Python Data Structure

Python Data Structure

2022. 12. 25. 23:18ㆍBOOTCAMP/boostcourse AI BASIC 2023

stack(스택)

나중에 넣은 데이터를 먼저 반환하도록 설계된 메모리 구조
Last In First Out(LIFO)
Data의 입력을 Push, 출력을 Pop이라고 함

stack(스택) with list object

리스트를 사용하여 스택 구조를 구현 가능
push를 append(), pop을 pop()를 사용

a = [1,2,3,4,5]
a
[1, 2, 3, 4, 5]

a.append(10)
a
[1, 2, 3, 4, 5, 10]

a.append(20)
a
[1, 2, 3, 4, 5, 10, 20]

c = a.pop()     # 20 출력
a
[1, 2, 3, 4, 5, 10]

d = a.pop()     # 10 출력
a
[1, 2, 3, 4, 5]

c
20

d
10

stack example

스택 구조를 활용, 입력된 글자를 역순으로 출력

word = input("Input a word : ")     # Input Word
word_list = list(word)              # String to List
# print(word_list)
for i in range(len(word_list)):
    print(word_list.pop())         # 하나씩 빼면서 출력
    print(word_list)
    
Input a word : kakao
o
['k', 'a', 'k', 'a']
a
['k', 'a', 'k']
k
['k', 'a']
a
['k']
k
[]

Queue(큐)

먼저 넣은 데이터를 먼저 반환하도록 설계된 메모리 구조
First In First Out(FIFO)
Stack과 반대되는 개념

Queue(큐) with list object

파이썬은 리스트를 사용하여 큐 구조를 활용
put를 append(), get을 pop(0)를 사용

a = [1,2,3,4,5]
a.append(10)
a.append(20)

a.pop(0)
1

a.pop(0)
2

first = a.pop(0)
a
[4, 5, 10, 20]

first
3

Tuple(튜플)

값의 변경이 불가능한 리스트
선언 시 "[ ]"가 아닌 "( )"를 사용
리스트의 연산, 인덱싱, 슬라이싱 등을 동일하게 사용
프로그램을 작동하는 동안 변경되지 않은 데이터의 저장 ex) 학번, 이름, 우편번호 등등
함수의 반환 값 등 사용자의 실수에 의한 에러를 사전에 방지

t = (1,2,3)
print (t + t, t* 2)
(1, 2, 3, 1, 2, 3) (1, 2, 3, 1, 2, 3)

len(t)
3

t = (1)     # 일반정수로 인식
type(t)
int

t = (1, )   # 값이 하나인 Tuple은 반드시 ","를 붙여야 함
type(t)
tuple

집합(set)

값을 순서 없이 저장, 중복 불허하는 자료형
set 객체 선언을 이용하여 객체 생성

s = set([1,2,3,1,2,3])      # set 함수를 사용 1,2,3을 집합 객체 생성, a = {1,2,3,4,5} 도 가능
s
{1, 2, 3}

s.add(1)                    # 한 원소 1만 추가, 추가, 중복 불허로 추가 되지 않음
s
{1, 2, 3}

s.remove(1)                 # 1 삭제
s
{2, 3}

s.update([1,4,5,6,7])       # [1,4,5,6,7] 추가
s
{1, 2, 3, 4, 5, 6, 7}

s.discard(3)                # 3 삭제
s
{1, 2, 4, 5, 6, 7}

s.clear()                   # 모든 원소 삭제

집합의 연산

수학에서 활용하는 다양한 집합 연산 가능

s1 = set([1,2,3,4,5])
s2 = set([3,4,5,6,7])

s1.union(s2)        # s1과 s2의 합집합
{1, 2, 3, 4, 5, 6, 7}

s1 | s2             # set([1, 2, 3, 4, 5, 6, 7])
{1, 2, 3, 4, 5, 6, 7}

s1. intersection(s2)    # s1과 s2의 교집합
{3, 4, 5}

s1 & s2                 # set([3, 4, 5])
{3, 4, 5}

s1.difference(s2)        # s1과 s2의 차집합
{1, 2}

s1 - s2                 # set([1, 2])
{1, 2}

사전(dictionary)

데이터를 저장할 때는 구분 지을 수 있는 값을 함께 저장 예) 주민등록번호, 제품 모델 번호
구분을 위한 데이터 고유 값을 Identifier 또는 Key라고 함
Key 값을 활용하여, 데이터 값(Value)을 관리함
key와 value를 매칭하여 key로 value를 검색
다른 언어에서는 Hash Table이라는 용어를 사용
{Key1:Value1, Key2:Value2, Key3:Value3...} 형태

student_info = {20140012: 'Sungchul', 20140059: 'Jiyong', 20140058: 'Jare'}

student_info[20140012]
student_info[20140012] = 'Janhyeok'
student_info[20140012]
student_info[20140039] = 'wonchul'
student_info
{20140012: 'Janhyeok',
 20140059: 'Jiyong',
 20140058: 'Jare',
 20140039: 'wonchul'}
 
country_code = {}   # Dict 생성. country_code = dict() 도 가능
country_code = {"America": 1, "Korea": 82, "China": 86, "Japan": 81}
country_code
{'America': 1, 'Korea': 82, 'China': 86, 'Japan': 81}

for dict_items in country_code.items(): # Dict 데이터 출력
    print(type(dict_items))
    
<class 'tuple'>
<class 'tuple'>
<class 'tuple'>
<class 'tuple'>
<class 'tuple'>

country_code.keys()     # Dict 키 값만 출력
dict_keys(['America', 'Korea', 'China', 'Japan', 'German'])

country_code["German"] = 49 # Dict 추가
country_code
{'America': 1, 'Korea': 82, 'China': 86, 'Japan': 81, 'German': 49}

country_code.values()   # Dict Value만 출력

dict_values([1, 82, 86, 81, 49])

for k, v in country_code.items():
    print ("key: ", k)
    print ("Value :", v)
key:  America
Value : 1
key:  Korea
Value : 82
key:  China
Value : 86
key:  Japan
Value : 81
key:  German
Value : 49

"Korea" in country_code.keys()  # Key값에 "Korea"가 있는지 확인
True

82 in country_code.values() # Vlaue 값에 82가 있는지 확인
True

collections

List, Tuple, Dict에 대한 Python Built-in 확장 자료 구조(모듈)
편의성, 실행 효율 등을 사용자에게 제공함
아래의 모듈이 존재함

from collections import deque
from collections import Counter
from collections import OrderedDict
from collections import defaultdict
from collections import namedtuple

deque

Stack과 Queue를 지원하는 모듈
List에 비해 효율적인 = 빠른 자료 저장 방식을 지원함
rotate, reverse 등 Linked List의 특성을 지원함
기존 list 형태의 함수를 모두 지원함
deque는 기존 list보다 효율적인 자료구조를 제공
효율적 메모리 구조로 처리 속도 향상

from collections import deque

deque_list = deque()
for i in range(5):
    deque_list.append(i)
print(deque_list)
deque_list.appendleft(10)
print(deque_list)
deque([0, 1, 2, 3, 4])
deque([10, 0, 1, 2, 3, 4])

deque_list.rotate(2)
deque_list
deque([3, 4, 10, 0, 1, 2])

deque_list.extend([5,6,7])
deque_list.extendleft([5,6,7])

def general_list():
    just_list = []
    for i in range(100):
        for i in range(100):
            just_list.append(i)
            just_list.pop()

deque_list.append(100)
deque_list
deque([7, 6, 5, 3, 4, 10, 0, 1, 2, 5, 6, 7, 100])

deque_list.append(200)
deque_list
deque([7, 6, 5, 3, 4, 10, 0, 1, 2, 5, 6, 7, 100, 200])

deque_list.rotate(1)
deque_list
deque([200, 7, 6, 5, 3, 4, 10, 0, 1, 2, 5, 6, 7, 100])

deque_list.extend([5,6,7])
deque_list.extendleft([5,6,7])

def general_list():
    just_list = []
    for i in range(100):
        for i in range(100):
            just_list.append(i)
            just_list.pop()
%timeit general_list()

1.83 ms ± 197 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

def deque_list():
    deque_list = deque()
    for i in range(100):
        for i in range(100):
            deque_list.append(i)
            deque_list.pop()

%timeit deque_list()
1.01 ms ± 10.6 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)

OrderedDict

Dict와 달리, 데이터를 입력한 순서대로 dict를 반환함
그러나 dict도 python 3.6부터 입력한 순서를 보장하여 출력함

from collections import OrderedDict

d = {}
d['x'] = 100
d['z'] = 300
d['y'] = 200
d['l'] = 500

for k, v in d.items():
    print(k, v)
x 100
z 300
y 200
l 500

def get_key(x):
    return x[0]
   
sorted(d.items(), key=lambda t: t[0])
[('l', 500), ('x', 100), ('y', 200), ('z', 300)]

for k, v in OrderedDict(sorted(d.items(), key=lambda t: t[0])).items():
    print(k, v)
l 500
x 100
y 200
z 300

defaultdict

Dict type의 값에 기본 값을 지정, 신규값 생성 시 사용하는 방법
하나의 지문에 각 단어들이 몇 개나 있는지 세고 싶을경우?
Text-mining 접근법 - Vector Space Model

def default_value():
    return 0
    
from collections import defaultdict
d = defaultdict(default_value)
d

defaultdict(<function __main__.default_value()>, {})

d["first"]
0

text = """A press release is the quickest and easiest way to get free publicity. If well written, a press release can result in multiple published articles about your firm and its products. And that can mean new prospects contacting you asking you to sell to them.""".lower().split()

def get_key(x):
    return x[1]

from collections import OrderedDict
d = defaultdict(lambda : 0)
d

defaultdict(<function __main__.<lambda>()>, {})

for word in text:
    d[word] += 1

sorted_dict = OrderedDict()
for i, v in sorted(d.items(), key=get_key, reverse = True):
    sorted_dict[i] = v
print(sorted_dict)
sorted_dict["to"]
OrderedDict([('and', 6), ('to', 6), ('a', 4), ('press', 4), ('release', 4), ('can', 4), ('you', 4), ('is', 2), ('the', 2), ('quickest', 2), ('easiest', 2), ('way', 2), ('get', 2), ('free', 2), ('publicity.', 2), ('if', 2), ('well', 2), ('written,', 2), ('result', 2), ('in', 2), ('multiple', 2), ('published', 2), ('articles', 2), ('about', 2), ('your', 2), ('firm', 2), ('its', 2), ('products.', 2), ('that', 2), ('mean', 2), ('new', 2), ('prospects', 2), ('contacting', 2), ('asking', 2), ('sell', 2), ('them.', 2)])
6

Counter

Sequence type의 data element들의 개수를 dict 형태로 반환
Set의 연산들을 지원함

rom collections import Counter

c = Counter()               # a new, empty counter
c = Counter('gallahad')     # a new counter from an iterable
print(c)
Counter({'a': 3, 'l': 2, 'g': 1, 'h': 1, 'd': 1})

from collections import Counter

ball_or_strike_list = ["B", "S", "S", "S", "S", "B", "B"]
c = Counter(ball_or_strike_list)
c
Counter({'B': 3, 'S': 4})

c = Counter({'red': 4, 'blue': 2})
print(list(c.elements()))
['red', 'red', 'red', 'red', 'blue', 'blue']

c = Counter(cats=4, dogs=8)
print(list(c.elements()))
['cats', 'cats', 'cats', 'cats', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs', 'dogs']

c = Counter(a=4, b=2, c=0, d=-2)
d = Counter(a=1, b=2, c=3, d=4)
c.subtract(d)
print(c)

Counter({'a': 3, 'b': 0, 'c': -3, 'd': -6})

c = Counter(a=4, b=2, c=0, d=-2)
d = Counter(a=1, b=2, c=3, d=4)
print(c + d)
print(c & d)
print(c | d)

Counter({'a': 5, 'b': 4, 'c': 3, 'd': 2})
Counter({'b': 2, 'a': 1})
Counter({'a': 4, 'd': 4, 'c': 3, 'b': 2})

text = """A press release is the quickest and easiest"""
Counter(text)
Counter({'A': 1,
         ' ': 7,
         'p': 1,
         'r': 2,
         'e': 8,
         's': 7,
         'l': 1,
         'a': 3,
         'i': 3,
         't': 3,
         'h': 1,
         'q': 1,
         'u': 1,
         'c': 1,
         'k': 1,
         'n': 1,
         'd': 1})
         
sorted(Counter(text).items(), key=lambda t : t[1], reverse = True)
print(Counter(text)["a"])
3

namedtuple

Tuple 형태로 Data 구조체를 저장하는 방법
저장되는 data의 variable을 사전에 지정해서 저장함

from collections import namedtuple
Point = namedtuple('Point', ['x', 'y'])
p = Point(11, y=22)
print(p[0] + p[1])
33

'BOOTCAMP > boostcourse AI BASIC 2023' 카테고리의 다른 글

[부스트코스] 코칭스터디 9기 : AI Basic 2023 3주차 미션(Q1, Q2, Q3, Q4) 변형 문제 풀이 (0)	2023.02.07
[부스트코스] 코칭스터디 9기 : AI Basic 2023 2주차 미션(Q1, Q2) 변형 문제 풀이 (0)	2023.02.01
[부스트코스] 코칭스터디 9기 : AI Basic 2023 1주차 미션(Q3, Q4, Q5) 변형 문제 풀이 (0)	2023.01.15
[부스트코스] 코칭스터디 9기 : AI Basic 2023 1주차 미션(Q1, Q2) 변형 문제 풀이 (0)	2023.01.13
Python Object Oriented Programming (2)	2022.12.26

HIGHLLIGHT

HIGHLLIGHT

태그

최근글

댓글

공지사항

아카이브

stack(스택)

Queue(큐)

Tuple(튜플)

집합(set)

사전(dictionary)

collections

deque

OrderedDict

defaultdict

Counter

namedtuple

'BOOTCAMP > boostcourse AI BASIC 2023' 카테고리의 다른 글

관련글

티스토리툴바