Numerical Python

numpy part 1

코드로 방정식 표현하기

coefficient_matrix = [[2, 2, 1], [2, -1, 2], [1, -1, 2]]
constant_vector = [9, 6, 5]

다양한 Matrix 계산을 어떻게 만들 것인가?
굉장히 큰 Matrix에 대한 표현
처리 속도 문제 - python은 Interpreter 언어
적절한 패키지의 활용

파이썬 과학 처리 패키지

Numpy

Numerical Python
파이썬의 고성능 과학 계산용 패키지
Matrix와 Vector와 같은 Array 연산의 사실상의 표준
한글로 넘파이로 주로 통칭
누군가는 넘피/늄파이라고 부르기도 함

Numpy의 특징

일반 List에 비해 빠르고, 메모리 효율적
반복문 없이 데이터 배열에 대한 처리를 지원함
선형대수와 관련된 다양한 기능을 제공함
C, C++, 포트란 등의 언어와 통합 가능

ndarray

import

import numpy as np

numpy의 호출 방법
일반적으로 numpy는 np라는 alias(별칭) 이용해서 호출함
특별한 이유는 없음, 세계적인 약속 같은 것

Array creation

test_array = np.array([1, 4, 5, 8], float)
print(test_array)
type(test_array[3])
[1. 4. 5. 8.]
numpy.float64

numpy는 np.array 함수를 활용 배열을 생성함
-> ndarray
numpy는 하나의 데이터 type만 배열에 넣을 수 있음
List와 가장 큰 차이점 → dynamic typing not supported
C의 Array를 사용하여 배열을 생성함

Array의 생성

a = [1, 2, 3, 4, 5]
b = [5, 4, 3, 2, 1]

a = np.array(a, int)
a
array([1, 2, 3, 4, 5])

a
array([1, 2, 3, 4, 5])

test_array = np.array(["1", "4", 5, 8], float)
test_array
array([1., 4., 5., 8.])

type(test_array)
numpy.ndarray

type(test_array[3])
numpy.float64

test_array = np.array([1, 4, 5, "8"], float)    # String Type의 데이터를 입력해도
test_array
array([1., 4., 5., 8.])

type(test_array[3])     # Float Type으로 자동 형변환을 실시
numpy.float64

a = [1, 2, 3, 4, 5]
b = [5, 4, 3, 2, 1]
a[0] is b[-1]
True

a[0] is b[-1]
True

a = np.array(a)
b = np.array(b)
a[0] is b[-1]
False

shape: numpy array의 dimension 구성을 반환함
dtype: numpy array의 데이터 type을 반환함

test_array = np.array([1, 4, 5, "8"], float)    # String Type의 데이터를 입력해도
print(test_array)
print(type(test_array[3]))                      # Float Type으로 자동 형변환을 실시
print(test_array)                               # Array(배열) 전체의 데이터 Type을 반환함
print(test_array.shape)                         # Array(배열)의 shape을 반환함
[1. 4. 5. 8.]
<class 'numpy.float64'>
[1. 4. 5. 8.]
(4,)

test_array.dtype    # Array(배열) 전체의 데이터 Type을 반환함
dtype('float64')

a = [[1, 2, 3], [4, 5, 6], [4, 5, 6]]
np.array(a).shape
(3, 3)

Array shape

array의 RANK에 따라 불리는 이름이 있음

Rank	Name	Example
0	scalar	7
1	vector	[10, 10]
2	matrix	[[10, 10], [15, 15]]
3	3-tensor	[[[1, 5, 9], [2, 6, 10]], [[3, 7, 11], [4, 8, 12]]]
n	n-tensor

Array shape (vector)

shape: array의 크기, 형태 등에 대한 정보

ndarray의 구성 -> ndarray의 shape(type: tuple)

test_array = np.array([1, 4, 5, "8"], float)
test_array

Array shape (matrix)

matrix = [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]]
np.array(matrix, int).shape

Array shape (3rd order tensor)

ndim - number of dimensions
size -data의 개수

tensor = [[[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
          [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
          [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]],
          [[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]]]
np.array(tensor, int).shape
(4, 3, 4)

np.array(tensor, int).ndim
3

np.array(tensor, int).size
48

numpy dtype

Array dtype

ndarray의 single element가 가지는 data type
각 element가 차지하는 memory의 크기가 결정됨
C의 data type과 Compatible

float64를 많이 사용함

np.array([[1, 2, 3], [4.5, 5, 6]], dtype=int)   #Data type을 integer로 선언
array([[1, 2, 3],
       [4, 5, 6]])
       
np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.float32)    #Data type을 float로 선언
array([[1. , 2. , 3. ],
       [4.5, 5. , 6. ]], dtype=float32)

Array nbytes

nbytes - ndarray object의 메모리 크기를 반환함

np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.float32).nbytes     #32bits = 4bytes -> 6 * 4bytes
24

np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.int8).nbytes        #8bits = 1bytes -> 6 * 1bytes
6

np.array([[1, 2, 3], [4.5, "5", "6"]], dtype=np.float64).nbytes     #64bits = 8bytes -> 6 * 48bytes
48

numpy2

Handling shape

reshape

reshape: Array의 shape의 크기를 변경함, element의 갯수는 동일

test_matrix = [[1, 2, 3, 4], [1, 2, 5, 8]]
np.array(test_matrix).shape
(2, 4)

np.array(test_matrix).reshape(8,)
array([1, 2, 3, 4, 1, 2, 5, 8])

np.array(test_matrix).reshape(8,).shape
(8,)

np.array(test_matrix).reshape(2, 4).shape
(2, 4)

np.array(test_matrix).reshape(-1, 2).shape
(4, 2)

np.array(test_matrix).reshape(2, 2, 2)
array([[[1, 2],
        [3, 4]],

       [[1, 2],
        [5, 8]]])

np.array(test_matrix).reshape(2, 2, 2).shape
(2, 2, 2)

flat or flatten()

flatten: 다차원 array를 1차원 array로 변환

test_matrix = [[[1, 2, 3, 4], [1, 2, 5, 8]], [[1, 2, 3, 4], [1, 2, 5, 8]]]
np.array(test_matrix).flatten()
array([1, 2, 3, 4, 1, 2, 5, 8, 1, 2, 3, 4, 1, 2, 5, 8])

test_matrix = np.array(test_matrix)
test_matrix.shape
(2, 2, 4)

np.array(test_matrix).flatten()
array([1, 2, 3, 4, 1, 2, 5, 8, 1, 2, 3, 4, 1, 2, 5, 8])

np.array(test_matrix).flatten().shape
(16,)

indexing & slicing

indexing for numpy array

list와 달리 이차원 배열에서 [0,0] 표기법을 제공함
matrix 일 경우 앞은 row 뒤는 column을 의미함

a = np.array([[1, 2, 3], [4.5, 5, 6]], int)
print(a)
print(a[0,0])   # Two dimensional array representation #1
print(a[0][0])  # Two dimensional array representation #2

a[0,0] = 12 # Matrix 0,0 에 12 할당
print(a)
a[0][0] = 5 # Matrix 0,0 에 12 할당
print(a)
[[1 2 3]
 [4 5 6]]
1
1
[[12  2  3]
 [ 4  5  6]]
[[5 2 3]
 [4 5 6]]
 
 import numpy as np
 test_example = np.array([[1, 2, 3], [4.5, 5, 6]], int)
test_example
array([[1, 2, 3],
       [4, 5, 6]])
       
test_example[0][2]
3

test_example[0, 2]
3

test_example[0, 0] = 10     # Matrix 0, 0 에 12 할당
test_example
array([[10,  2,  3],
       [ 4,  5,  6]])

test_example[1, 2] = 5      # Matrix 0, 0 에 12 할당
test_example[1, 2]
5

test_example
array([[10,  2,  3],
       [ 4,  5,  5]])

slicing

test_example = np.array([[1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8], [1, 2, 5, 8]], int)
test_example
array([[1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8]])
       
test_example[:, 2:]
array([[5, 8],
       [5, 8],
       [5, 8],
       [5, 8]])

test_example
array([[1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8]])
       
test_example[:, 1:3].shape
(4, 2)

test_example
array([[1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8]])
       
test_example[1:2, :2]
array([[1, 2]])

slicing for numpy array

list와 달리 행과 열 부분을 나눠서 slicing이 가능함
matrix의 부분 집합을 추출할 때 유용함

test_example = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], int)
test_example
array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])
       
test_example[1]     # 1 Row의 1열 ~ 2열
array([ 6,  7,  8,  9, 10])

test_example[1:3]   # 1 Row ~ 2Row의 전체
array([[ 6,  7,  8,  9, 10]])

a = np.arange(100).reshape(10, 10)
a
array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49],
       [50, 51, 52, 53, 54, 55, 56, 57, 58, 59],
       [60, 61, 62, 63, 64, 65, 66, 67, 68, 69],
       [70, 71, 72, 73, 74, 75, 76, 77, 78, 79],
       [80, 81, 82, 83, 84, 85, 86, 87, 88, 89],
       [90, 91, 92, 93, 94, 95, 96, 97, 98, 99]])
       
a[:, -1]
array([ 9, 19, 29, 39, 49, 59, 69, 79, 89, 99])

a = np.array([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]], int)
a[:, 2:]    # 전체 Row의 2열 이상
a[1, 1:3]   # 1Row의 1열 ~ 2열
a[1:3]      # 1 Row ~ 2Row의 전체
array([[1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8],
       [1, 2, 5, 8]])

creation function

array의 범위를 지정하여, 값의 list를 생성하는 명령어

np.arange(30)   # range: List의 range와 같은 효과, integer로 0부터 29까지 배열추출
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])
       
np.arange(0, 5, 0.5)    # floating point도 표시가능함
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

np.arange(30).reshape(5,6)
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

arange

list(range(0, 10, 3))
[0, 3, 6, 9]

type(list(range(30)))
list

# range: List의 range와 같은 효과,
# integer로 0부터 29까지 배열추출
np.arange(30)
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29])

np.arange(0, 10, 0.5)
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5, 5. , 5.5, 6. ,
       6.5, 7. , 7.5, 8. , 8.5, 9. , 9.5])
       
np.arange(0, 5, 0.5)    # floating point도 표시가능함
array([0. , 0.5, 1. , 1.5, 2. , 2.5, 3. , 3.5, 4. , 4.5])

np.arange(30).reshape(5, 6)
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

ones, zeros & empty

zeros - 0으로 가득 찬 ndarray 생성

np.zeros(shape, dtype, order)

np.zeros(shape=(10,), dtype=np.int8)    # 10 - zero vector 생성
array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int8)

np.zeros((2, 5))    # 2 by 5 - zero matrix 생성
array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

ones - 1로 가득찬 ndarrary 생성

np.ones(shape, dtype, order)

np.ones(shape=(10,), dtype=np.int8)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)

np.ones((2, 5))
array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

empty (- shape)만 주어지고 비어있는 ndarray 생성

(memory initialization이 되지 않음)

np.empty(shape=(10,), dtype=np.int8)
array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=int8)

np.empty((3, 5))
array([[ 2.76169574e-316,  3.85371204e-322,  0.00000000e+000,
         0.00000000e+000, -1.42800187e-101],
       [ 1.50008929e+248,  4.31174539e-096,  9.80058441e+252,
         1.23971686e+224,  1.05206415e-153],
       [ 9.03292329e+271,  9.08366793e+223,  1.06244660e-153,
         3.44981369e+175,  6.81019663e-310]])

기존 ndarray의 shape 크기만큼 1, 0 또는 empty array를 반환

test_matrix = np.arange(30).reshape(5, 6)
test_matrix.shape
(5, 6)

test_matrix
array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23],
       [24, 25, 26, 27, 28, 29]])

np.zeros_like(test_matrix, dtype=np.float32)
array([[0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0.]], dtype=float32)

eye, identify & digonal

np.identity(n=5, dtype=np.int8)
array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0],
       [0, 0, 0, 1, 0],
       [0, 0, 0, 0, 1]], dtype=int8)
       
np.eye(N=3, M=5, dtype=np.int8)
array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0]], dtype=int8)
       
np.eye(3)
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

np.eye(3, 5, k=2)
array([[0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

matrix = np.arange(9).reshape(3, 3)
matrix
array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])
       
np.diag(matrix, k=-1)
array([3, 7])

identity

단위행렬(i 행렬)을 생성함

n -> number of rows

np.identity(n=3, dtype=np.int8)
array([[1, 0, 0],
       [0, 1, 0],
       [0, 0, 1]], dtype=int8)
       
np.identity(5)
array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

eye

대각선이 1인 행렬, k값의 시작 index의 변경이 가능

np.eye(3)
array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

np.eye(3, 5, k=2)
array([[0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

np.eye(N=3, M=5, dtype=np.int8)
array([[1, 0, 0, 0, 0],
       [0, 1, 0, 0, 0],
       [0, 0, 1, 0, 0]], dtype=int8)

diag

대각 행렬의 값을 추출함

matrix = np.arange(9).reshape(3, 3)
np.diag(matrix)
array([0, 4, 8])

np.diag(matrix, k=1)    # k -> start index
array([1, 5])

random sampling

데이터 분포에 따른 sampling으로 array를 생성

np.random.uniform(0, 1, 10).reshape(2, 5)    # 균등분포
array([[0.28148877, 0.39096213, 0.8688948 , 0.75583441, 0.0974156 ],
       [0.63874029, 0.89183884, 0.10185038, 0.53817901, 0.23567787]])
       
np.random.normal(0, 1, 10).reshape(2, 5)    # 정규분포
array([[ 0.2214428 , -0.19452303,  1.56907102, -0.59203375, -1.44884377],
       [-0.15457402, -0.74602628,  0.31767523,  0.39039759,  1.70415728]])
       
np.random.exponential(scale=2, size=100)
array([ 1.43650613,  5.98904744,  0.05297811,  1.28599404,  4.81650772,
        0.59883965,  7.87592719,  3.38402755,  0.14934426, 10.10969981,
        0.51442332,  0.96138876,  0.44146638,  0.95549804,  1.11217727,
        0.40839213,  2.25400866,  0.02174743,  0.7400785 ,  4.36583526,
        0.41430991,  1.3198711 ,  0.08029711,  1.19866709,  0.86826583,
        2.17804921,  3.10930786,  2.94439585,  0.10205564,  0.36131767,
        3.93830995,  8.09234396,  0.78814556,  0.66893399,  3.10155757,
        0.24308198,  1.17406288,  1.40631782,  3.07294024,  0.01854137,
        4.15483117,  7.51690715,  0.4229553 ,  1.02218354,  4.15164633,
        2.75376998,  2.06286244,  0.46185446,  0.80212423,  1.03983023,
        0.37336122,  0.54373863,  1.69744393,  0.55939618,  0.13405293,
        0.76280061,  2.25226669,  2.4357386 ,  0.93445028,  0.63296579,
        5.92024152, 10.28789799,  1.26083273,  0.23218126,  3.56793215,
        9.98640007,  0.63800226,  0.94782236,  2.70165123,  1.37156731,
        2.34877294,  1.02617748,  3.84053988,  0.10290869,  0.01295792,
        0.62858765,  0.27713061,  2.31272343,  0.78942903,  3.49218441,
        3.50591872,  0.13878279,  5.4718107 ,  0.6421617 ,  4.64185023,
        3.11468181,  0.43955209,  0.28343166,  3.50594796,  2.11322927,
        0.70796652,  3.1750745 ,  2.371161  ,  0.18013178,  3.70909884,
        1.15206655,  4.87385253,  1.53893689,  2.54807892,  0.48986662])

operation functions

sum

ndarray의 element들 간의 합을 구함, list의 sum 기능과 동일

test_array = np.arange(1, 11)
test_array
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

test_array.sum(dtype=np.float)
<ipython-input-112-5c571f18da20>:1: DeprecationWarning: `np.float` is a deprecated alias for the builtin `float`. To silence this warning, use `float` by itself. Doing this will not modify any behavior and is safe. If you specifically wanted the numpy scalar type, use `np.float64` here.
Deprecated in NumPy 1.20; for more details and guidance: https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
  test_array.sum(dtype=np.float)
55.0

import numpy as np
test_array = np.arange(1, 11)
test_array
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

test_array.sum()
55

test_array = np.arange(1, 13).reshape

axis

모든 operation function을 실행할 때 기준이 되는 dimension 축

test_array = np.arange(1, 13).reshape(3, 4)
test_array
array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

test_array.sum(axis=1), test_array.sum(axis=0)
(array([10, 26, 42]), array([15, 18, 21, 24]))

third_order_tensor = np.array([test_array, test_array, test_array])
third_order_tensor
array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]],

       [[ 1,  2,  3,  4],
        [ 5,  6,  7,  8],
        [ 9, 10, 11, 12]]])

third_order_tensor.sum(axis=2)
array([[10, 26, 42],
       [10, 26, 42],
       [10, 26, 42]])

third_order_tensor.sum(axis=1)
array([[15, 18, 21, 24],
       [15, 18, 21, 24],
       [15, 18, 21, 24]])
       
third_order_tensor.sum(axis=0)
array([[ 3,  6,  9, 12],
       [15, 18, 21, 24],
       [27, 30, 33, 36]])
       
test_array = np.arange(1, 13).reshape(3, 4)

mean & std

ndarray의 element들 간의 평균 또는 표 중 편차를 반환

test_array = np. arange(1, 13).reshape(3, 4)
test_array
array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])
       
test_array.mean(), test_array.mean(axis=0)
(6.5, array([5., 6., 7., 8.]))

test_array.std(), test_array.std(axis=0)
(3.452052529534663, array([3.26598632, 3.26598632, 3.26598632, 3.26598632]))

mathemarical functions

그 외에도 다양한 수학 연산자를 제공함 (np.something 호출)

exponential: exp, expm1, exp2, log, log10, log1 p, log2, power, sqrt trigonometric: sin, cos, tan, acsin, arccos, atctan hyperbolic: sinh, cosh, tanh, acsinh, arccosh, atctanh

np.sqrt(test_array)
array([[1.        , 1.41421356, 1.73205081, 2.        ],
       [2.23606798, 2.44948974, 2.64575131, 2.82842712],
       [3.        , 3.16227766, 3.31662479, 3.46410162]])

np.exp(test_array)
array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01],
       [1.48413159e+02, 4.03428793e+02, 1.09663316e+03, 2.98095799e+03],
       [8.10308393e+03, 2.20264658e+04, 5.98741417e+04, 1.62754791e+05]])

concatenate

numpy array를 합치는(붙이는) 함수

a = np.array([1, 2, 3])
b = np.array([2, 3, 4])
np.vstack((a,b))
array([[1, 2, 3],
       [2, 3, 4]])
       
a = np.array([ [1], [2], [3]])
b = np.array([ [2], [3], [4]])
np.hstack((a,b))
array([[1, 2],
       [2, 3],
       [3, 4]])
       
a = np.array([1, 2, 3])
b = np.array([2, 3, 4])
np.concatenate((a,b), axis=0)
array([1, 2, 3, 2, 3, 4])

a = np.array([[1, 2], [3, 4]])
b = np.array([[5], [6]])
np.concatenate( (a,b), axis=1)
array([[1, 2, 5],
       [3, 4, 6]])
       
a = np.array([[1, 2], [3, 4]])
b = np.array([5, 6])

b = b[np.newaxis, :]
np.concatenate((a, b.T), axis=1)
array([[1, 2, 5],
       [3, 4, 6]])

Array operations

numpy는 array 간의 기본적인 사칙 연산을 지원함

test_a = np.array([[1, 2, 3], [4, 5, 6]], float)
test_a + test_a # Matrix + Matrix 연산
array([[ 2.,  4.,  6.],
       [ 8., 10., 12.]])
       
test_a - test_a # Matrix - Matrix 연산
array([[0., 0., 0.],
       [0., 0., 0.]])
       
test_a * test_a # Matrix내 element들 간 같은 위치에 있는 값들끼리 연산
array([[ 1.,  4.,  9.],
       [16., 25., 36.]])

Element-wise operations

Array 간 shape이 같을 때 일어나는 연산

matrix_a = np.arange(1, 13).reshape(3, 4)
matrix_a * matrix_a
array([[  1,   4,   9,  16],
       [ 25,  36,  49,  64],
       [ 81, 100, 121, 144]])

Dot product

Matrix의 기본 연산, dot 함수 사용

test_a = np.arange(1, 7).reshape(2, 3)
test_b = np.arange(7, 13).reshape(3, 2)
test_b
array([[ 7,  8],
       [ 9, 10],
       [11, 12]])
       
test_a.dot(test_b)
array([[ 58,  64],
       [139, 154]])
       
test_a = np.arange(1, 7).reshape(2, 3)
test_a
array([[1, 2, 3],
       [4, 5, 6]])

transpose

transpose 또는 T attribute 사용

test_a.transpose()
array([[1, 4],
       [2, 5],
       [3, 6]])
       
test_a.T
array([[1, 4],
       [2, 5],
       [3, 6]])
       
test_a.T.dot(test_a)
array([[17, 22, 27],
       [22, 29, 36],
       [27, 36, 45]])

broadcasting

Shape이 다른 배열 간 연산을 지원하는 기능
Scalar (- vector) 외에도 vector (- matrix) 간의 연산도 지원

test_matrix = np.array([[1, 2, 3], [4, 5, 6]], float)
scalar = 3
test_matrix
array([[1., 2., 3.],
       [4., 5., 6.]])
       
test_matrix + scalar    # Matrix - Scalar 덧셈
array([[4., 5., 6.],
       [7., 8., 9.]])
       
test_matrix - scalar    # Matrix - Scalar 뺄셈
array([[-2., -1.,  0.],
       [ 1.,  2.,  3.]])
       
test_matrix * 5         # Matrix - Scalar 곱셈
array([[ 5., 10., 15.],
       [20., 25., 30.]])
       
test_matrix / 5         # Matrix - Scalar 나눗셈
array([[0.2, 0.4, 0.6],
       [0.8, 1. , 1.2]])
       
test_matrix // 2          # Matrix - Scalar 몫
array([[0., 1., 1.],
       [2., 2., 3.]])
       
test_matrix ** 2         # Matrix - Scalar 제곱
array([[ 1.,  4.,  9.],
       [16., 25., 36.]])
       
test_matrix = np.arange(1, 13).reshape(4, 3)
test_vector = np.arange(10, 40, 10)

test_vector
array([10, 20, 30])

test_vector.reshape(-1, 3).T + test_vector
array([[20, 30, 40],
       [30, 40, 50],
       [40, 50, 60]])
       
test_matrix + test_vector
array([[11, 22, 33],
       [14, 25, 36],
       [17, 28, 39],
       [20, 31, 42]])

Numpy performance #1

timeit: jupyter 환경에서 코드의 퍼포먼스를 체크하는 함수

Numpy performance #2

일반적으로 속도는 아래 순 for loop < list comprehension < numpy
100,000,000번의 loop이 돌 때, 약 4배 이상의 성능 차이를 보임
Numpy는 C로 구현되어 있어, 성능을 확보하는 대신 파이썬의 가장 큰 특징인 dynamic typing을 포기함
대용량 계산에서는 가장 흔히 사용됨
Concatenate처럼 계산이 아닌, 할당에서는 연산 속도의 이점이 없음

def sclar_vector_product(scalar, vector):
    result = []
    for value in vector:
        result.append(scalar * value)
    return result

iternation_max = 100000000

vector = list(range(iternation_max))
scalar = 2

%timeit sclar_vector_product(scalar, vector)    # for loop을 이용한 성능
%timeit [scalar * value for value in range(iternation_max)]
 # list comprehention을 이용한 성능
%timeit np.arange(iternation_max) * scalar  # numpy를 이용한 성능
19.4 s ± 1.66 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
13.5 s ± 931 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
282 ms ± 8.62 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

Numpy part3

comparisons

All & Any

Array의 데이터 전부(and) 또는 일부(or)가 조건에 만족 여부 반환

a = np.arange(10)
a
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

a < 4
array([ True,  True,  True,  True, False, False, False, False, False,
       False])
       
a < 1
array([ True, False, False, False, False, False, False, False, False,
       False])
       
a < 10
array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])
        
np.all(a < 10)
True

a < 5
array([ True,  True,  True,  True,  True, False, False, False, False,
       False])
       
np.all(a < 5)
False

np.any(a > 5)
True

np.any(a < 0)
False

np.any(a>5), np.any(a<0)    # any -> 하나라도 조건에 만족한다면 true
(True, False)

np.all(a>5), np.all(a < 10) # all -> 모두가 조건에 만족한다면 true
(False, True)

comparison operation #1

numpy는 배열의 크기가 동일할 때 element 간 비교의 결과를 Boolean type으로 반환

test_a = np.array([1, 3, 0], float)
test_b = np.array([5, 2, 1], float)

test_a > test_b
array([False,  True, False])

test_a >= test_b
array([False,  True, False])

(test_a > test_b).any()
True

(test_a > test_b).all()
False

comparison operation #2

a = np.array([1, 3, 0], float)
np.logical_and(a > 0, a < 3)    # and 조건의 condition
array([ True, False, False])

b = np.array([True, False, True], bool)
np.logical_not(b)   # NOT 조건의 condition
array([False,  True, False])

c = np.array([False, True, False], bool)
np.logical_or(b, c) # OR 조건의 condition
array([ True,  True,  True])

np.where

a
array([ 1., nan, inf])

np.where(a > 5)
(array([2]),)

np.where(a < 3)[0]
array([0])

np.where(a < 3, 3, 2)   # where(condition, TRUE, FALSE)
array([3, 2, 2])

np.where(a > 0, 3, 2)   # where(condition, TRUE, FALSE)
array([3, 3, 2])

a = np.arange(5, 15)
a
array([ 5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

np.where(a > 10)
(array([6, 7, 8, 9]),)

a = np.arange(10)       # Index 값 반환
np.where(a>5)
(array([6, 7, 8, 9]),)

a = np.array([1, np.NaN, np.Inf], float)
np.isnan(a)     # Not a Number
array([False,  True, False])

np.isfinite(a)      # is finite number
array([ True, False, False])

argmax & argmin

array 내 최댓값 또는 최솟값의 index를 반환함

a = np.array([1, 2, 4, 5, 8, 78, 23, 3])
np.argmax(a), np.argmin(a)
(5, 0)

a.argsort()
array([[0, 1, 2, 3],
       [2, 0, 3, 1],
       [2, 3, 0, 1]])

a.argsort()[::-1]
array([[2, 3, 0, 1],
       [2, 0, 3, 1],
       [0, 1, 2, 3]])
       
np.argmax(a)
5

np.argmin(a)
0

axis 기반의 반환

a = np.array([[1, 2, 4, 7], [9, 88, 6, 45], [9, 76, 3, 4]])
np.argmax(a, axis=1), np.argmin(a, axis=0)
(array([3, 1, 1]), array([0, 0, 2, 2]))

boolean & fancy index

특정 조건에 따른 값을 배열 형태로 추출
Comparison operation 함수들도 모두 사용가능

boolean index

test_array = np.array([1, 4, 0, 2, 3, 8, 9, 7], float)
test_array > 3
array([False,  True, False, False, False,  True,  True,  True])

test_array.shape
(8,)

test_array[test_array > 3]  # 조건이 True인 Index의 element만 추출
array([4., 8., 9., 7.])

condition = test_array < 3
test_array[condition]
array([1., 0., 2.])

fancy index

numpy는 array를 index value로 사용해서 값 추출

a = np.array([2, 4, 6, 8], float)
cond = np.array([1, 1, 1, 2, 1, 1, 1, 3])

a.take(cond)
array([4., 4., 4., 6., 4., 4., 4., 8.])

a[a > 4]
array([6., 8.])

a.take(cond)    # take 함수: bracket index와 같은 효과
array([4., 4., 4., 6., 4., 4., 4., 8.])

matrix 형태의 데이터도 가능

a = np.array([[1, 4], [9, 16]], float)
b = np.array([0, 0, 1, 1, 0], int)
c = np.array([0, 1, 1, 1, 1], int)
a[b, c]     # b를 row_index, c를 column index로 변환하여 표시함
array([ 1.,  4., 16., 16.,  4.])

a = np.array([[1, 4], [9, 16]], float)
a[b]
array([[ 1.,  4.],
       [ 1.,  4.],
       [ 9., 16.],
       [ 9., 16.],
       [ 1.,  4.]])

loadtxt & savetxt

text type의 데이터를 읽고, 저장하는 기능

load txt

a = np.loadtxt("./populations.txt", delimiter="\t")
a

a_int = a.astype(int)
a_int[:3]
array([[ 1,  4],
       [ 9, 16]])
       
np.savetxt("int_data_2.csv", a_int, fmt="%.2e", delimiter=",")

numpy object - npy

np.save("npy_test", arr=a_int)

a_test = np.load(file="npy_test.npy")
a_test
array([[ 1,  4],
       [ 9, 16]])
       
npy_array = np.load(file="npy_test.npy")
npy_array[:3]
array([[ 1,  4],
       [ 9, 16]])

저작자표시 (새창열림)

'BOOTCAMP > boostcamp AI Tech Pre-Course' 카테고리의 다른 글

선형독립과 선형종속 (0)	2023.01.01
선형방정식과 선형시스템 (0)	2022.12.29
Mathematics for Artificial Intelligence (0)	2022.12.28
File / Exception / Log Handling (0)	2022.12.27
Module and Project (0)	2022.12.27