데이터분석
[23.07.05] 데이터 시각화(WordCloud) - 24(1)
gmwoo
2023. 7. 5. 16:10
- KoNLPy 패키지 설치
-
- JDK 설치 : JAVA JDK로 검색해서 OS에 맞춰 설치
-
- KoNLPy 의존성 패키지 설치
- pip install jpype1
- KoNLPy 의존성 패키지 설치
-
- KoNLPy 설치
- pip install konlpy
- KoNLPy 설치
-
- (C:\Users\A\Anaconda3) C:\Users\Anaconda3>python3
- import nltk
- nltk.download()
- --> stopwords & punkt duble click download
- (C:\Users\A\Anaconda3) C:\Users\Anaconda3>python3
-
- 워드 클라우드
- pip install wordcloud
- 워드 클라우드
-
- gensim 설치
- pip install gensim
- gensim 설치
-
2. 한글 자연어 처리 기초¶
In [1]:
import warnings
warnings.filterwarnings(action='ignore')
In [2]:
from konlpy.tag import Kkma # 꼬꼬마 사전
kkma = Kkma()
In [3]:
kkma.nouns('한국어 분석을 시작합니다 재미있어요~~')
Out[3]:
['한국어', '분석']
In [4]:
kkma.sentences('한국어 분석을 시작합니다 재미있어요~~')
Out[4]:
['한국어 분석을 시작합니다', '재미있어요~~']
In [5]:
kkma.pos('한국어 분석을 시작합니다 재미있어요~~') # POS(part of speech) 형태소 품사
Out[5]:
[('한국어', 'NNG'),
('분석', 'NNG'),
('을', 'JKO'),
('시작하', 'VV'),
('ㅂ니다', 'EFN'),
('재미있', 'VA'),
('어요', 'EFN'),
('~~', 'SW')]
In [6]:
from konlpy.tag import Hannanum
hannanum = Hannanum()
In [7]:
hannanum.nouns('한국어 분석을 시작합니다 재미있어요~~')
Out[7]:
['한국어', '분석', '시작']
In [8]:
hannanum.morphs('한국어 분석을 시작합니다 재미있어요~~')
Out[8]:
['한국어', '분석', '을', '시작', '하', 'ㅂ니다', '재미있', '어요', '~~']
In [9]:
# pos : 형태소 분석
hannanum.pos('한국어 분석을 시작합니다 재미있어요~~')
Out[9]:
[('한국어', 'N'),
('분석', 'N'),
('을', 'J'),
('시작', 'N'),
('하', 'X'),
('ㅂ니다', 'E'),
('재미있', 'P'),
('어요', 'E'),
('~~', 'S')]
In [10]:
# 트위터 분석
from konlpy.tag import Okt
t = Okt()
In [11]:
t.nouns('한국어 분석을 시작합니다 재미있어요~~')
Out[11]:
['한국어', '분석', '시작']
In [12]:
t.morphs('한국어 분석을 시작합니다 재미있어요~~')
Out[12]:
['한국어', '분석', '을', '시작', '합니다', '재미있어요', '~~']
In [13]:
t.pos('한국어 분석을 시작합니다 재미있어요~~')
Out[13]:
[('한국어', 'Noun'),
('분석', 'Noun'),
('을', 'Josa'),
('시작', 'Noun'),
('합니다', 'Verb'),
('재미있어요', 'Adjective'),
('~~', 'Punctuation')]
3. 워드 클라우드¶
- WordCloud 설치 : pip install wordcloud
- PIL(Python Imaging Library)
In [14]:
from wordcloud import WordCloud, STOPWORDS
import numpy as np
from PIL import Image
In [15]:
text = open('../../data/09. a_new_hope.txt').read()
alice_mask = np.array(Image.open('../../data/09. alice_mask.png'))
stopwords = set(STOPWORDS)
stopwords.add("said")
In [16]:
import matplotlib.pyplot as plt
from matplotlib import font_manager, rc
plt.rcParams['axes.unicode_minus'] = False # - 기호 깨지는 것 방지
# f_path = "/Library/Fonts/AppleGothic.ttf" -> MAC
f_path = "C:/Windows/Fonts/malgun.ttf"
font_name = font_manager.FontProperties(fname=f_path).get_name()
rc('font', family=font_name)
In [17]:
plt.figure(figsize=(8,8))
plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation='bilinear') # Reds
plt.axis('off')
plt.show()
In [18]:
wc = WordCloud(background_color='white', max_words=2000,
mask=alice_mask, stopwords=stopwords)
wc = wc.generate(text) # 엘리스라는 문서에서 단어를 뽑아옴
wc.words_
Out[18]:
{'Luke': 1.0,
'HAN': 0.49053030303030304,
'DEATH STAR': 0.42424242424242425,
'THREEPIO': 0.3977272727272727,
'Ben': 0.2897727272727273,
'Leia': 0.21401515151515152,
'ship': 0.21401515151515152,
'Artoo': 0.1875,
'one': 0.18181818181818182,
'X WING': 0.17992424242424243,
'look': 0.17424242424242425,
'RED LEADER': 0.16856060606060605,
'back': 0.1553030303030303,
'move': 0.14772727272727273,
'two': 0.14583333333333334,
'toward': 0.14015151515151514,
'going': 0.13825757575757575,
'Biggs': 0.13068181818181818,
'INT DEATH': 0.12878787878787878,
'TIE fighter': 0.12878787878787878,
'see': 0.1268939393939394,
'EXT SPACE': 0.1268939393939394,
'Vader': 0.125,
'MILLENNIUM FALCON': 0.12121212121212122,
'right': 0.11742424242424243,
'LUKE X': 0.11742424242424243,
'come': 0.11553030303030302,
'around': 0.11363636363636363,
'trooper': 0.11363636363636363,
'robot': 0.10795454545454546,
'small': 0.10606060606060606,
'side': 0.10416666666666667,
'head': 0.10416666666666667,
'make': 0.09848484848484848,
'now': 0.09848484848484848,
'WING FIGHTER': 0.09848484848484848,
'take': 0.09659090909090909,
'begin': 0.09659090909090909,
'away': 0.08901515151515152,
'fire': 0.08901515151515152,
'will': 0.08901515151515152,
'INT MILLENNIUM': 0.08901515151515152,
'turn': 0.08712121212121213,
'stand': 0.08712121212121213,
'Chewbacca': 0.08712121212121213,
'stormtrooper': 0.08712121212121213,
'INT LUKE': 0.08712121212121213,
'way': 0.08333333333333333,
'control': 0.08333333333333333,
'know': 0.07954545454545454,
'pirateship': 0.07954545454545454,
'COCKPIT Luke': 0.07954545454545454,
'sir': 0.07765151515151515,
'think': 0.07765151515151515,
'DARTH VADER': 0.07765151515151515,
'GOLD LEADER': 0.07765151515151515,
'huge': 0.07575757575757576,
'got': 0.07575757575757576,
'us': 0.07575757575757576,
'time': 0.07386363636363637,
'computer': 0.07196969696969698,
'several': 0.07196969696969698,
'Rebel': 0.07007575757575757,
'long': 0.06818181818181818,
'door': 0.06818181818181818,
'OFFICER': 0.06818181818181818,
'across': 0.06818181818181818,
'OWEN': 0.06818181818181818,
'EXT': 0.06628787878787878,
'hallway': 0.06628787878787878,
'something': 0.06628787878787878,
'VOICE': 0.06628787878787878,
'FIGHTER COCKPIT': 0.06628787878787878,
'blast': 0.06439393939393939,
'behind': 0.06439393939393939,
'hit': 0.06439393939393939,
'light': 0.06439393939393939,
'go': 0.06439393939393939,
'EXT TATOOINE': 0.06439393939393939,
'WING COCKPIT': 0.06439393939393939,
'Imperial': 0.0625,
'Suddenly': 0.0625,
'troops': 0.0625,
'INT': 0.06060606060606061,
'eye': 0.06060606060606061,
'let': 0.06060606060606061,
'start': 0.06060606060606061,
'TARKIN': 0.06060606060606061,
'hand': 0.058712121212121215,
'surface': 0.058712121212121215,
'another': 0.058712121212121215,
'EXT SURFACE': 0.058712121212121215,
'little': 0.056818181818181816,
'face': 0.054924242424242424,
'large': 0.054924242424242424,
'wall': 0.054924242424242424,
'creature': 0.054924242424242424,
'Luke look': 0.054924242424242424,
'laserfire': 0.05303030303030303,
'give': 0.05303030303030303,
'run': 0.05303030303030303,
'pilot': 0.05303030303030303,
'Chewie': 0.05303030303030303,
'race': 0.05303030303030303,
'seem': 0.05113636363636364,
'three': 0.05113636363636364,
'Force': 0.05113636363636364,
'SPACE AROUND': 0.05113636363636364,
'INT RED': 0.05113636363636364,
'power': 0.04924242424242424,
'system': 0.04924242424242424,
'stop': 0.04924242424242424,
'Well': 0.04924242424242424,
'Jawa': 0.04924242424242424,
'looking': 0.04924242424242424,
'Solo': 0.04924242424242424,
'Star surface': 0.04924242424242424,
'past': 0.04734848484848485,
'Alderaan': 0.04734848484848485,
'pull': 0.04734848484848485,
'speaker': 0.04734848484848485,
'COCKPIT Red': 0.04734848484848485,
'old': 0.045454545454545456,
'must': 0.045454545454545456,
'MASSASSI OUTPOST': 0.045454545454545456,
'Y WING': 0.045454545454545456,
'Princess': 0.043560606060606064,
'sound': 0.043560606060606064,
'hold': 0.043560606060606064,
'say': 0.043560606060606064,
'help': 0.043560606060606064,
'INT MASSASSI': 0.043560606060606064,
'WAR ROOM': 0.043560606060606064,
'RED TEN': 0.043560606060606064,
'want': 0.041666666666666664,
'good': 0.041666666666666664,
'droid': 0.041666666666666664,
'continue': 0.041666666666666664,
'dive': 0.041666666666666664,
'INT DARTH': 0.041666666666666664,
'GOLD FIVE': 0.041666666666666664,
'planet': 0.03977272727272727,
'sure': 0.03977272727272727,
'front': 0.03977272727272727,
'friend': 0.03977272727272727,
'wait': 0.03977272727272727,
'find': 0.03977272727272727,
'standing': 0.03977272727272727,
'speeder': 0.03977272727272727,
'gun': 0.03977272727272727,
'comlink': 0.03977272727272727,
'VADER COCKPIT': 0.03977272727272727,
'COCKPIT Vader': 0.03977272727272727,
'starship': 0.03787878787878788,
'firing': 0.03787878787878788,
'laser': 0.03787878787878788,
'sit': 0.03787878787878788,
'moment': 0.03787878787878788,
'enter': 0.03787878787878788,
'thing': 0.03787878787878788,
'coming': 0.03787878787878788,
'Obi Wan': 0.03787878787878788,
'FALCON COCKPIT': 0.03787878787878788,
'LEADER COCKPIT': 0.03787878787878788,
'close': 0.03598484848484849,
'group': 0.03598484848484849,
'attack': 0.03598484848484849,
'Princess Leia': 0.03598484848484849,
'MOS EISLEY': 0.03598484848484849,
'INT GOLD': 0.03598484848484849,
'targeting device': 0.03598484848484849,
'short': 0.03409090909090909,
'much': 0.03409090909090909,
'watches': 0.03409090909090909,
'work': 0.03409090909090909,
'put': 0.03409090909090909,
'Oh': 0.03409090909090909,
'father': 0.03409090909090909,
'position': 0.03409090909090909,
'point': 0.03409090909090909,
'trench': 0.03409090909090909,
'headset': 0.03409090909090909,
'adjust': 0.03409090909090909,
'docking bay': 0.03409090909090909,
'OUTPOST WAR': 0.03409090909090909,
'weapon': 0.032196969696969696,
'along': 0.032196969696969696,
'hear': 0.032196969696969696,
'shut': 0.032196969696969696,
'heard': 0.032196969696969696,
'last': 0.032196969696969696,
'even': 0.032196969696969696,
'guard': 0.032196969696969696,
'laserbolt': 0.032196969696969696,
'appear': 0.032196969696969696,
'Artoo Detoo': 0.032196969696969696,
'far': 0.030303030303030304,
'Empire': 0.030303030303030304,
'station': 0.030303030303030304,
'enough': 0.030303030303030304,
'explosion': 0.030303030303030304,
'floor': 0.030303030303030304,
'follow': 0.030303030303030304,
'sight': 0.030303030303030304,
'four': 0.030303030303030304,
'kid': 0.030303030303030304,
'jump': 0.030303030303030304,
'Sandpeople': 0.030303030303030304,
'open': 0.030303030303030304,
'Jabba': 0.030303030303030304,
'screen': 0.030303030303030304,
'shake': 0.030303030303030304,
'plan': 0.030303030303030304,
'explode': 0.030303030303030304,
'Dodonna': 0.030303030303030304,
'zoom': 0.030303030303030304,
'INTERCOM VOICE': 0.030303030303030304,
'COCKPIT Gold': 0.030303030303030304,
'streak': 0.028409090909090908,
'electronic': 0.028409090909090908,
'boy': 0.028409090909090908,
'next': 0.028409090909090908,
'minute': 0.028409090909090908,
'goes': 0.028409090909090908,
'Commander': 0.028409090909090908,
'man': 0.028409090909090908,
'almost': 0.028409090909090908,
'tell': 0.028409090909090908,
'area': 0.028409090909090908,
'watch': 0.028409090909090908,
'WEDGE': 0.028409090909090908,
'formation': 0.028409090909090908,
'little Artoo': 0.028409090909090908,
'slowly': 0.026515151515151516,
'first': 0.026515151515151516,
'tiny': 0.026515151515151516,
'giant': 0.026515151515151516,
'better': 0.026515151515151516,
'DAY': 0.026515151515151516,
'end': 0.026515151515151516,
'speak': 0.026515151515151516,
'dead': 0.026515151515151516,
'talking': 0.026515151515151516,
'thought': 0.026515151515151516,
'big': 0.026515151515151516,
'second': 0.026515151515151516,
'really': 0.026515151515151516,
'Sandcrawler': 0.026515151515151516,
'beep': 0.026515151515151516,
'shoulder': 0.026515151515151516,
'Stay': 0.026515151515151516,
'arm': 0.026515151515151516,
'Wookiee': 0.026515151515151516,
'moon': 0.026515151515151516,
'battle station': 0.026515151515151516,
'LUKE Well': 0.026515151515151516,
'looks around': 0.026515151515151516,
'beeping sound': 0.026515151515151516,
'laser cannon': 0.026515151515151516,
'outside': 0.02462121212121212,
'corridor': 0.02462121212121212,
'feet': 0.02462121212121212,
'place': 0.02462121212121212,
'subhallway': 0.02462121212121212,
'rushes': 0.02462121212121212,
'found': 0.02462121212121212,
'great': 0.02462121212121212,
'Rebellion': 0.02462121212121212,
'cut': 0.02462121212121212,
'spot': 0.02462121212121212,
'Listen': 0.02462121212121212,
'trying': 0.02462121212121212,
'never': 0.02462121212121212,
'need': 0.02462121212121212,
'Okay': 0.02462121212121212,
'reaches': 0.02462121212121212,
'year': 0.02462121212121212,
'still': 0.02462121212121212,
'try': 0.02462121212121212,
'laser pistol': 0.02462121212121212,
'Artoo beeps': 0.02462121212121212,
'Wan Kenobi': 0.02462121212121212,
'Imperial TIE': 0.02462121212121212,
'FALCON GUNPORT': 0.02462121212121212,
"BIGGS' COCKPIT": 0.02462121212121212,
'main': 0.022727272727272728,
'battle': 0.022727272727272728,
'battered': 0.022727272727272728,
'aim': 0.022727272727272728,
'moving': 0.022727272727272728,
'Hey': 0.022727272727272728,
'ROOM': 0.022727272727272728,
'lot': 0.022727272727272728,
'keep': 0.022727272727272728,
'anything': 0.022727272727272728,
'leave': 0.022727272727272728,
'canyon': 0.022727272727272728,
'quite': 0.022727272727272728,
'speed': 0.022727272727272728,
'table': 0.022727272727272728,
'disappear': 0.022727272727272728,
'button': 0.022727272727272728,
'use': 0.022727272727272728,
'board': 0.022727272727272728,
'target': 0.022727272727272728,
'grab': 0.022727272727272728,
'R2 unit': 0.022727272727272728,
'Aunt Beru': 0.022727272727272728,
'Ben Kenobi': 0.022727272727272728,
'TIE ship': 0.022727272727272728,
'STAR MAIN': 0.022727272727272728,
'FORWARD BAY': 0.022727272727272728,
'Imperial stormtrooper': 0.022727272727272728,
"INT BIGGS'": 0.022727272727272728,
'Rebel fighter': 0.022727272727272728,
'LEADER Y': 0.022727272727272728,
'STAR TRENCH': 0.022727272727272728,
'hope': 0.020833333333333332,
'struggle': 0.020833333333333332,
'smoke': 0.020833333333333332,
'near': 0.020833333333333332,
'horizon': 0.020833333333333332,
'air': 0.020833333333333332,
'smile': 0.020833333333333332,
'distance': 0.020833333333333332,
'direction': 0.020833333333333332,
'approaching': 0.020833333333333332,
'hatch': 0.020833333333333332,
'narrow': 0.020833333333333332,
'life': 0.020833333333333332,
'bad': 0.020833333333333332,
'uncle': 0.020833333333333332,
'blaster': 0.020833333333333332,
'seen': 0.020833333333333332,
'line': 0.020833333333333332,
'master': 0.020833333333333332,
'become': 0.020833333333333332,
'range': 0.020833333333333332,
'cell': 0.020833333333333332,
'port': 0.020833333333333332,
'wingmen': 0.020833333333333332,
'REBEL BLOCKADE': 0.020833333333333332,
'BLOCKADE RUNNER': 0.020833333333333332,
'old Ben': 0.020833333333333332,
'Rebel base': 0.020833333333333332,
'CONTROL ROOM': 0.020833333333333332,
'COCKPIT TRAVELING': 0.020833333333333332,
'new': 0.01893939393939394,
'home': 0.01893939393939394,
'half': 0.01893939393939394,
'Hurry': 0.01893939393939394,
'young': 0.01893939393939394,
'soldier': 0.01893939393939394,
'getting': 0.01893939393939394,
'course': 0.01893939393939394,
'others': 0.01893939393939394,
'worry': 0.01893939393939394,
'fight': 0.01893939393939394,
'part': 0.01893939393939394,
'approaches': 0.01893939393939394,
'sand': 0.01893939393939394,
'whistle': 0.01893939393939394,
'distant': 0.01893939393939394,
'left': 0.01893939393939394,
'might': 0.01893939393939394,
'bit': 0.01893939393939394,
'shoot': 0.01893939393939394,
'followed': 0.01893939393939394,
'Uh': 0.01893939393939394,
'hangar': 0.01893939393939394,
'able': 0.01893939393939394,
'blow': 0.01893939393939394,
'bartender': 0.01893939393939394,
'fast': 0.01893939393939394,
'check': 0.01893939393939394,
'GREEDO': 0.01893939393939394,
'green': 0.01893939393939394,
'crew': 0.01893939393939394,
'flame': 0.01893939393939394,
'tower': 0.01893939393939394,
'Uncle Owen': 0.01893939393939394,
'HOLD AREA': 0.01893939393939394,
'TATOOINE MOS': 0.01893939393939394,
'Han Solo': 0.01893939393939394,
'control panel': 0.01893939393939394,
'Imperial fighter': 0.01893939393939394,
'MAIN FORWARD': 0.01893939393939394,
'three TIE': 0.01893939393939394,
'galaxy': 0.017045454545454544,
'aboard': 0.017045454545454544,
'human': 0.017045454545454544,
'rush': 0.017045454545454544,
'filled': 0.017045454545454544,
'morning': 0.017045454545454544,
'seven': 0.017045454545454544,
'Dark': 0.017045454545454544,
'mission': 0.017045454545454544,
'call': 0.017045454545454544,
'rest': 0.017045454545454544,
'background': 0.017045454545454544,
'shot': 0.017045454545454544,
'happened': 0.017045454545454544,
'came': 0.017045454545454544,
'low': 0.017045454545454544,
'gets': 0.017045454545454544,
'may': 0.017045454545454544,
'HOMESTEAD': 0.017045454545454544,
'quickly': 0.017045454545454544,
'garage': 0.017045454545454544,
'Yeah': 0.017045454545454544,
'trouble': 0.017045454545454544,
'tries': 0.017045454545454544,
'pushes': 0.017045454545454544,
'gone': 0.017045454545454544,
'yell': 0.017045454545454544,
'closer': 0.017045454545454544,
'step': 0.017045454545454544,
'return': 0.017045454545454544,
'lightsaber': 0.017045454545454544,
'thousand': 0.017045454545454544,
'together': 0.017045454545454544,
'information': 0.017045454545454544,
'readout': 0.017045454545454544,
'ready': 0.017045454545454544,
'burst': 0.017045454545454544,
'elevator': 0.017045454545454544,
'enemy': 0.017045454545454544,
'climb': 0.017045454545454544,
'shadow': 0.017045454545454544,
'Dark Lord': 0.017045454545454544,
'LARS HOMESTEAD': 0.017045454545454544,
'old man': 0.017045454545454544,
'tractor beam': 0.017045454545454544,
'GARBAGE ROOM': 0.017045454545454544,
'exhaust port': 0.017045454545454544,
'INT WEDGE': 0.017045454545454544,
'COCKPIT Wedge': 0.017045454545454544,
'ROOM Leia': 0.017045454545454544,
'control stick': 0.017045454545454544,
'trench INT': 0.017045454545454544,
'armored': 0.015151515151515152,
'shaking': 0.015151515151515152,
'scream': 0.015151515151515152,
'creating': 0.015151515151515152,
'desert': 0.015151515151515152,
'white': 0.015151515151515152,
'Landspeeder': 0.015151515151515152,
'leaving': 0.015151515151515152,
'quiet': 0.015151515151515152,
'heading': 0.015151515151515152,
'bring': 0.015151515151515152,
'set': 0.015151515151515152,
'told': 0.015151515151515152,
'yelling': 0.015151515151515152,
'easy': 0.015151515151515152,
'nothing': 0.015151515151515152,
'made': 0.015151515151515152,
'reply': 0.015151515151515152,
'pay': 0.015151515151515152,
'best': 0.015151515151515152,
'strange': 0.015151515151515152,
'approach': 0.015151515151515152,
'pop': 0.015151515151515152,
'edge': 0.015151515151515152,
'blue': 0.015151515151515152,
'lost': 0.015151515151515152,
'center': 0.015151515151515152,
'mind': 0.015151515151515152,
'bar': 0.015151515151515152,
'massive': 0.015151515151515152,
'view': 0.015151515151515152,
'walk': 0.015151515151515152,
'everything': 0.015151515151515152,
'corner': 0.015151515151515152,
'transmission': 0.015151515151515152,
'twenty': 0.015151515151515152,
'round': 0.015151515151515152,
'charge': 0.015151515151515152,
'helmet': 0.015151515151515152,
'copy': 0.015151515151515152,
'cover': 0.015151515151515152,
'Yavin': 0.015151515151515152,
'peel': 0.015151515151515152,
'Lord Vader': 0.015151515151515152,
'TATOOINE ROCK': 0.015151515151515152,
'EISLEY STREET': 0.015151515151515152,
'cockpit window': 0.015151515151515152,
'WEDGE COCKPIT': 0.015151515151515152,
'base': 0.013257575757575758,
'evil': 0.013257575757575758,
'entire': 0.013257575757575758,
'craft': 0.013257575757575758,
'tall': 0.013257575757575758,
'understand': 0.013257575757575758,
'escape': 0.013257575757575758,
'making': 0.013257575757575758,
'heavy': 0.013257575757575758,
'hole': 0.013257575757575758,
'holding': 0.013257575757575758,
'barely': 0.013257575757575758,
'nowhere': 0.013257575757575758,
'lifepod': 0.013257575757575758,
'flash': 0.013257575757575758,
'captain': 0.013257575757575758,
'loose': 0.013257575757575758,
'soon': 0.013257575757575758,
'stumbles': 0.013257575757575758,
'believe': 0.013257575757575758,
'senator': 0.013257575757575758,
'dangerous': 0.013257575757575758,
'send': 0.013257575757575758,
'signal': 0.013257575757575758,
'season': 0.013257575757575758,
'CENTRAL': 0.013257575757575758,
'join': 0.013257575757575758,
'always': 0.013257575757575758,
'feel': 0.013257575757575758,
'powerful': 0.013257575757575758,
'glow': 0.013257575757575758,
'eight': 0.013257575757575758,
'size': 0.013257575757575758,
'beam': 0.013257575757575758,
'metal': 0.013257575757575758,
'chamber': 0.013257575757575758,
'pick': 0.013257575757575758,
'slimy': 0.013257575757575758,
'stick': 0.013257575757575758,
'grey': 0.013257575757575758,
'spaceship': 0.013257575757575758,
'laughing': 0.013257575757575758,
'many': 0.013257575757575758,
'afraid': 0.013257575757575758,
'message': 0.013257575757575758,
'bolt': 0.013257575757575758,
'high': 0.013257575757575758,
'deep': 0.013257575757575758,
'lies': 0.013257575757575758,
'Jedi': 0.013257575757575758,
'nervously': 0.013257575757575758,
'general': 0.013257575757575758,
'precise': 0.013257575757575758,
'fighter': 0.013257575757575758,
'lock': 0.013257575757575758,
'ramp': 0.013257575757575758,
'hyperspace': 0.013257575757575758,
'lever': 0.013257575757575758,
'number': 0.013257575757575758,
'closing': 0.013257575757575758,
'shaft': 0.013257575757575758,
'bridge': 0.013257575757575758,
'swing': 0.013257575757575758,
'manage': 0.013257575757575758,
'GUNPORT': 0.013257575757575758,
'temple': 0.013257575757575758,
'maneuver': 0.013257575757575758,
'flying': 0.013257575757575758,
'Flak': 0.013257575757575758,
'little robot': 0.013257575757575758,
'Imperial Officer': 0.013257575757575758,
'POWER STATION': 0.013257575757575758,
'ROCK CANYON': 0.013257575757575758,
'FIRST TROOPER': 0.013257575757575758,
'THREEPIO Oh': 0.013257575757575758,
'STAR INTERCOM': 0.013257575757575758,
'STAR GARBAGE': 0.013257575757575758,
'laser gun': 0.013257575757575758,
'Gold Two': 0.013257575757575758,
'heavens': 0.011363636363636364,
'hidden': 0.011363636363636364,
'destroy': 0.011363636363636364,
'awesome': 0.011363636363636364,
'causing': 0.011363636363636364,
'passageway': 0.011363636363636364,
'metallic': 0.011363636363636364,
'doomed': 0.011363636363636364,
'loud': 0.011363636363636364,
'nervous': 0.011363636363636364,
'figure': 0.011363636363636364,
'belt': 0.011363636363636364,
'transport': 0.011363636363636364,
'circle': 0.011363636363636364,
'every': 0.011363636363636364,
'break': 0.011363636363636364,
'panic': 0.011363636363636364,
'dome': 0.011363636363636364,
'neck': 0.011363636363636364,
'done': 0.011363636363636364,
'scurry': 0.011363636363636364,
'fear': 0.011363636363636364,
'anger': 0.011363636363636364,
'body': 0.011363636363636364,
'dust': 0.011363636363636364,
'dozen': 0.011363636363636364,
'Fixer': 0.011363636363636364,
'noise': 0.011363636363636364,
'surprise': 0.011363636363636364,
'hot': 0.011363636363636364,
'working': 0.011363636363636364,
'forget': 0.011363636363636364,
'hard': 0.011363636363636364,
'play': 0.011363636363636364,
'respond': 0.011363636363636364,
'rock': 0.011363636363636364,
'fault': 0.011363636363636364,
'mouth': 0.011363636363636364,
'whole': 0.011363636363636364,
'taken': 0.011363636363636364,
'men': 0.011363636363636364,
'beside': 0.011363636363636364,
'carefully': 0.011363636363636364,
'form': 0.011363636363636364,
'thirty': 0.011363636363636364,
'lead': 0.011363636363636364,
'pointing': 0.011363636363636364,
'Thank': 0.011363636363636364,
'notice': 0.011363636363636364,
'wonder': 0.011363636363636364,
'either': 0.011363636363636364,
'knew': 0.011363636363636364,
'mean': 0.011363636363636364,
'watching': 0.011363636363636364,
'top': 0.011363636363636364,
'laugh': 0.011363636363636364,
'TAGGE': 0.011363636363636364,
'Motti': 0.011363636363636364,
'cantina': 0.011363636363636364,
'remain': 0.011363636363636364,
'TEN': 0.011363636363636364,
'reach': 0.011363636363636364,
'doorway': 0.011363636363636364,
'Five': 0.011363636363636364,
'alarm': 0.011363636363636364,
'lose': 0.011363636363636364,
'window': 0.011363636363636364,
'seeker': 0.011363636363636364,
'shudder': 0.011363636363636364,
'clear': 0.011363636363636364,
'exit': 0.011363636363636364,
'already': 0.011363636363636364,
'reward': 0.011363636363636364,
'barrage': 0.011363636363636364,
'alert': 0.011363636363636364,
'whip': 0.011363636363636364,
'wave': 0.011363636363636364,
'jungle': 0.011363636363636364,
'Hang': 0.011363636363636364,
'engine': 0.011363636363636364,
'Imperial Stardestroyer': 0.011363636363636364,
'Laserbolts streak': 0.011363636363636364,
'main passageway': 0.011363636363636364,
'farm boy': 0.011363636363636364,
'Artoo whistles': 0.011363636363636364,
'let go': 0.011363636363636364,
'little droid': 0.011363636363636364,
'ROCK MESA': 0.011363636363636364,
'Tusken Raider': 0.011363636363636364,
'Moff Tarkin': 0.011363636363636364,
'old Jedi': 0.011363636363636364,
'gantry officer': 0.011363636363636364,
'BAY SERVICE': 0.011363636363636364,
'SERVICE PANEL': 0.011363636363636364,
'target screen': 0.011363636363636364,
'headset Red': 0.011363636363636364,
'FIVE Y': 0.011363636363636364,
'vast': 0.00946969696969697,
'bronze': 0.00946969696969697,
'reactor': 0.00946969696969697,
'destroyed': 0.00946969696969697,
'trust': 0.00946969696969697,
'oil': 0.00946969696969697,
'six': 0.00946969696969697,
'waiting': 0.00946969696969697,
'throws': 0.00946969696969697,
'Sith': 0.00946969696969697,
'Everyone': 0.00946969696969697,
'warrior': 0.00946969696969697,
'opening': 0.00946969696969697,
'broken': 0.00946969696969697,
'bodies': 0.00946969696969697,
'astro': 0.00946969696969697,
'somebody': 0.00946969696969697,
'carrying': 0.00946969696969697,
'terrified': 0.00946969696969697,
'CHIEF': 0.00946969696969697,
'response': 0.00946969696969697,
'empty': 0.00946969696969697,
'STREET': 0.00946969696969697,
'rugged': 0.00946969696969697,
'Camie': 0.00946969696969697,
'wildly': 0.00946969696969697,
'sitting': 0.00946969696969697,
'blasted': 0.00946969696969697,
'Yes': 0.00946969696969697,
'dune': 0.00946969696969697,
'fall': 0.00946969696969697,
'forth': 0.00946969696969697,
'technical': 0.00946969696969697,
'rather': 0.00946969696969697,
'ridge': 0.00946969696969697,
'frantically': 0.00946969696969697,
'inside': 0.00946969696969697,
'wide': 0.00946969696969697,
'stunned': 0.00946969696969697,
'sorry': 0.00946969696969697,
'care': 0.00946969696969697,
'shrouded': 0.00946969696969697,
'fill': 0.00946969696969697,
'alone': 0.00946969696969697,
'slight': 0.00946969696969697,
'flicker': 0.00946969696969697,
'eerie': 0.00946969696969697,
'onto': 0.00946969696969697,
'middle': 0.00946969696969697,
'conversation': 0.00946969696969697,
'tracks': 0.00946969696969697,
'bounce': 0.00946969696969697,
'language': 0.00946969696969697,
'real': 0.00946969696969697,
'entry': 0.00946969696969697,
'late': 0.00946969696969697,
'muffled': 0.00946969696969697,
'finally': 0.00946969696969697,
'chrome': 0.00946969696969697,
'remove': 0.00946969696969697,
'cruiser': 0.00946969696969697,
'twelve': 0.00946969696969697,
'question': 0.00946969696969697,
'data': 0.00946969696969697,
'kind': 0.00946969696969697,
'immediately': 0.00946969696969697,
'concern': 0.00946969696969697,
'fine': 0.00946969696969697,
'agreement': 0.00946969696969697,
'entrance': 0.00946969696969697,
'final': 0.00946969696969697,
'furry': 0.00946969696969697,
'wrong': 0.00946969696969697,
'terror': 0.00946969696969697,
'exotic': 0.00946969696969697,
'piece': 0.00946969696969697,
'chatter': 0.00946969696969697,
'fool': 0.00946969696969697,
'direct': 0.00946969696969697,
'location': 0.00946969696969697,
'alien': 0.00946969696969697,
'hundred': 0.00946969696969697,
'show': 0.00946969696969697,
'drink': 0.00946969696969697,
'ear': 0.00946969696969697,
'sword': 0.00946969696969697,
'paces': 0.00946969696969697,
'attempt': 0.00946969696969697,
'avoid': 0.00946969696969697,
'fly': 0.00946969696969697,
'money': 0.00946969696969697,
'idea': 0.00946969696969697,
'alleyway': 0.00946969696969697,
'secure': 0.00946969696969697,
'passing': 0.00946969696969697,
'couple': 0.00946969696969697,
'flashes': 0.00946969696969697,
'quick': 0.00946969696969697,
'pulled': 0.00946969696969697,
'monitor': 0.00946969696969697,
'socket': 0.00946969696969697,
'level': 0.00946969696969697,
'ball': 0.00946969696969697,
"ain't": 0.00946969696969697,
'camera': 0.00946969696969697,
'crewmen': 0.00946969696969697,
'worried': 0.00946969696969697,
'guy': 0.00946969696969697,
'leading': 0.00946969696969697,
'lean': 0.00946969696969697,
'READ': 0.00946969696969697,
'draw': 0.00946969696969697,
'technician': 0.00946969696969697,
'TATOOINE DESERT': 0.00946969696969697,
'Luke Skywalker': 0.00946969696969697,
'makes beeping': 0.00946969696969697,
'Yes sir': 0.00946969696969697,
'PLANET TATOOINE': 0.00946969696969697,
'dune sea': 0.00946969696969697,
'far end': 0.00946969696969697,
'astro droid': 0.00946969696969697,
'restraining bolt': 0.00946969696969697,
'Jedi Knight': 0.00946969696969697,
'Grand Moff': 0.00946969696969697,
'door slides': 0.00946969696969697,
'giant Wookiee': 0.00946969696969697,
'darkly clad': 0.00946969696969697,
'slides open': 0.00946969696969697,
'clad creature': 0.00946969696969697,
'Governor Tarkin': 0.00946969696969697,
'races past': 0.00946969696969697,
'command office': 0.00946969696969697,
'MAIN HANGAR': 0.00946969696969697,
'planet Yavin': 0.00946969696969697,
'targeting computer': 0.00946969696969697,
'fire button': 0.00946969696969697,
'Vader wingman': 0.00946969696969697,
'sinister': 0.007575757575757576,
'stolen': 0.007575757575757576,
'save': 0.007575757575757576,
'yellow': 0.007575757575757576,
'spacecraft': 0.007575757575757576,
'deadly': 0.007575757575757576,
'bouncing': 0.007575757575757576,
'radar': 0.007575757575757576,
'equipment': 0.007575757575757576,
'smaller': 0.007575757575757576,
'tremendous': 0.007575757575757576,
'duck': 0.007575757575757576,
'known': 0.007575757575757576,
'settle': 0.007575757575757576,
'lone': 0.007575757575757576,
'wind': 0.007575757575757576,
'pipe': 0.007575757575757576,
'sky': 0.007575757575757576,
'electrobinoculars': 0.007575757575757576,
'ground': 0.007575757575757576,
'tight': 0.007575757575757576,
'joint': 0.007575757575757576,
'smoldering': 0.007575757575757576,
'Emperor': 0.007575757575757576,
'black': 0.007575757575757576,
'sweeps': 0.007575757575757576,
'woman': 0.007575757575757576,
'attention': 0.007575757575757576,
'alcove': 0.007575757575757576,
'girl': 0.007575757575757576,
'companion': 0.007575757575757576,
'gruesome': 0.007575757575757576,
'apart': 0.007575757575757576,
'muted': 0.007575757575757576,
'pod': 0.007575757575757576,
'debris': 0.007575757575757576,
'vehicle': 0.007575757575757576,
'flies': 0.007575757575757576,
'city': 0.007575757575757576,
'sharp': 0.007575757575757576,
'appearance': 0.007575757575757576,
'hug': 0.007575757575757576,
'slightly': 0.007575757575757576,
'binoculars': 0.007575757575757576,
'scanning': 0.007575757575757576,
'freighter': 0.007575757575757576,
'shrugs': 0.007575757575757576,
'obvious': 0.007575757575757576,
'squad': 0.007575757575757576,
'killed': 0.007575757575757576,
'snap': 0.007575757575757576,
'meet': 0.007575757575757576,
'frozen': 0.007575757575757576,
'malfunctioning': 0.007575757575757576,
'within': 0.007575757575757576,
'trail': 0.007575757575757576,
'Skyhopper': 0.007575757575757576,
'gotten': 0.007575757575757576,
'amazed': 0.007575757575757576,
'chance': 0.007575757575757576,
'learn': 0.007575757575757576,
'starting': 0.007575757575757576,
'wish': 0.007575757575757576,
'Maybe': 0.007575757575757576,
'gargantuan': 0.007575757575757576,
'dim': 0.007575757575757576,
'array': 0.007575757575757576,
'simply': 0.007575757575757576,
'mechanical': 0.007575757575757576,
'bangs': 0.007575757575757576,
'block': 0.007575757575757576,
'covered': 0.007575757575757576,
'choice': 0.007575757575757576,
'sale': 0.007575757575757576,
'similar': 0.007575757575757576,
'box': 0.007575757575757576,
'sparks': 0.007575757575757576,
'damaged': 0.007575757575757576,
'harvest': 0.007575757575757576,
'matter': 0.007575757575757576,
'answer': 0.007575757575757576,
'recording': 0.007575757575757576,
'Pardon': 0.007575757575757576,
'anyone': 0.007575757575757576,
'hurries': 0.007575757575757576,
'food': 0.007575757575757576,
'name': 0.007575757575757576,
"that'll": 0.007575757575757576,
'deactivate': 0.007575757575757576,
'foreground': 0.007575757575757576,
'monstrous': 0.007575757575757576,
'ride': 0.007575757575757576,
'talk': 0.007575757575757576,
'million': 0.007575757575757576,
'react': 0.007575757575757576,
'local': 0.007575757575757576,
'frightened': 0.007575757575757576,
'ancient': 0.007575757575757576,
'yet': 0.007575757575757576,
'belong': 0.007575757575757576,
'remember': 0.007575757575757576,
'pit': 0.007575757575757576,
'switch': 0.007575757575757576,
'listening': 0.007575757575757576,
'operational': 0.007575757575757576,
'chair': 0.007575757575757576,
'gain': 0.007575757575757576,
'bow': 0.007575757575757576,
'suggest': 0.007575757575757576,
'fortress': 0.007575757575757576,
'Bantha': 0.007575757575757576,
'burning': 0.007575757575757576,
'stopped': 0.007575757575757576,
'business': 0.007575757575757576,
'rough': 0.007575757575757576,
'giving': 0.007575757575757576,
'quietly': 0.007575757575757576,
'careful': 0.007575757575757576,
'monster': 0.007575757575757576,
'suit': 0.007575757575757576,
'advance': 0.007575757575757576,
'asking': 0.007575757575757576,
'drop': 0.007575757575757576,
'sign': 0.007575757575757576,
'full': 0.007575757575757576,
'locked': 0.007575757575757576,
'visible': 0.007575757575757576,
'insect': 0.007575757575757576,
'grim': 0.007575757575757576,
'type': 0.007575757575757576,
'transmitter': 0.007575757575757576,
'overhead': 0.007575757575757576,
'scope': 0.007575757575757576,
'violently': 0.007575757575757576,
'Dantooine': 0.007575757575757576,
'remote': 0.007575757575757576,
'anyway': 0.007575757575757576,
'lighted': 0.007575757575757576,
'crosses': 0.007575757575757576,
'straight': 0.007575757575757576,
'pirate': 0.007575757575757576,
'running': 0.007575757575757576,
'buzz': 0.007575757575757576,
'NINE': 0.007575757575757576,
'checked': 0.007575757575757576,
'command': 0.007575757575757576,
'howl': 0.007575757575757576,
'rescue': 0.007575757575757576,
'glance': 0.007575757575757576,
'situation': 0.007575757575757576,
'buzzing': 0.007575757575757576,
'negative': 0.007575757575757576,
'emerge': 0.007575757575757576,
'rumble': 0.007575757575757576,
'pressure': 0.007575757575757576,
'relief': 0.007575757575757576,
'abyss': 0.007575757575757576,
'ease': 0.007575757575757576,
'cloak': 0.007575757575757576,
'spectacular': 0.007575757575757576,
'ladder': 0.007575757575757576,
'soars': 0.007575757575757576,
'Willard': 0.007575757575757576,
'activity': 0.007575757575757576,
'lower': 0.007575757575757576,
'mike': 0.007575757575757576,
'rotate': 0.007575757575757576,
'tail': 0.007575757575757576,
'pursuit': 0.007575757575757576,
'wingman': 0.007575757575757576,
'presses': 0.007575757575757576,
'mark': 0.007575757575757576,
'repair': 0.007575757575757576,
'DESERT WASTELAND': 0.007575757575757576,
'utility belt': 0.007575757575757576,
'beautiful young': 0.007575757575757576,
'STATION DAY': 0.007575757575757576,
'Imperial Senate': 0.007575757575757576,
'new droids': 0.007575757575757576,
'laser rifle': 0.007575757575757576,
'CONFERENCE ROOM': 0.007575757575757576,
'TATOOINE WASTELAND': 0.007575757575757576,
'EISLEY SPACEPORT': 0.007575757575757576,
'speeder lot': 0.007575757575757576,
'TARKIN Yes': 0.007575757575757576,
'DETENTION AREA': 0.007575757575757576,
'Chewbacca growls': 0.007575757575757576,
'computer readout': 0.007575757575757576,
'HANGAR DECK': 0.007575757575757576,
'bridge overhang': 0.007575757575757576,
'projected target': 0.007575757575757576,
'projected screen': 0.007575757575757576,
'FOURTH MOON': 0.007575757575757576,
'GUN EMPLACEMENTS': 0.007575757575757576,
'ship spins': 0.007575757575757576,
'ago': 0.005681818181818182,
'infinity': 0.005681818181818182,
'Galactic': 0.005681818181818182,
'managed': 0.005681818181818182,
...}
In [19]:
plt.figure(figsize=(12,12))
plt.imshow(wc, interpolation='bilinear')
plt.axis('off')
plt.show()
In [20]:
text = open('../../data/09. a_new_hope.txt').read()
text = text.replace('HAN', 'Han')
text = text.replace("LUKE'S", "Luke")
mask = np.array(Image.open('../../data/09. stormtrooper_mask.png'))
In [21]:
stopwords = set(STOPWORDS)
stopwords.add("int")
stopwords.add("ext")
In [22]:
wc = WordCloud(max_words=1000, random_state=1, margin=10,
mask=mask, stopwords=stopwords).generate(text)
default_colors = wc.to_array()
In [23]:
wc.words_
Out[23]:
{'Luke': 1.0,
'Han': 0.45121951219512196,
'DEATH STAR': 0.3902439024390244,
'THREEPIO': 0.36585365853658536,
'Ben': 0.2665505226480836,
'Leia': 0.19686411149825783,
'ship': 0.19686411149825783,
'Artoo': 0.17247386759581881,
'one': 0.1672473867595819,
'X WING': 0.16550522648083624,
'look': 0.1602787456445993,
'RED LEADER': 0.15505226480836237,
'back': 0.14285714285714285,
'move': 0.13588850174216027,
'two': 0.13414634146341464,
'toward': 0.1289198606271777,
'going': 0.12717770034843207,
'Biggs': 0.12020905923344948,
'TIE fighter': 0.11846689895470383,
'see': 0.11672473867595819,
'Vader': 0.11498257839721254,
'MILLENNIUM FALCON': 0.11149825783972125,
'SPACE': 0.10975609756097561,
'surface': 0.10801393728222997,
'right': 0.10801393728222997,
'Luke X': 0.10801393728222997,
'come': 0.10627177700348432,
'around': 0.10452961672473868,
'trooper': 0.10452961672473868,
'robot': 0.09930313588850175,
'small': 0.0975609756097561,
'side': 0.09581881533101046,
'head': 0.09581881533101046,
'make': 0.09059233449477352,
'now': 0.09059233449477352,
'WING FIGHTER': 0.09059233449477352,
'take': 0.08885017421602788,
'begin': 0.08885017421602788,
'away': 0.08188153310104529,
'fire': 0.08188153310104529,
'will': 0.08188153310104529,
'turn': 0.08013937282229965,
'stand': 0.08013937282229965,
'Chewbacca': 0.08013937282229965,
'stormtrooper': 0.08013937282229965,
'way': 0.07665505226480836,
'control': 0.07665505226480836,
'know': 0.07317073170731707,
'pirateship': 0.07317073170731707,
'COCKPIT Luke': 0.07317073170731707,
'sir': 0.07142857142857142,
'think': 0.07142857142857142,
'DARTH VADER': 0.07142857142857142,
'GOLD LEADER': 0.07142857142857142,
'huge': 0.06968641114982578,
'got': 0.06968641114982578,
'us': 0.06968641114982578,
'time': 0.06794425087108014,
'computer': 0.06620209059233449,
'several': 0.06620209059233449,
'Rebel': 0.06445993031358885,
'long': 0.0627177700348432,
'door': 0.0627177700348432,
'OFFICER': 0.0627177700348432,
'across': 0.0627177700348432,
'OWEN': 0.0627177700348432,
'hallway': 0.06097560975609756,
'something': 0.06097560975609756,
'VOICE': 0.06097560975609756,
'FIGHTER COCKPIT': 0.06097560975609756,
'blast': 0.059233449477351915,
'behind': 0.059233449477351915,
'hit': 0.059233449477351915,
'light': 0.059233449477351915,
'go': 0.059233449477351915,
'WING COCKPIT': 0.059233449477351915,
'Imperial': 0.05749128919860627,
'Suddenly': 0.05749128919860627,
'troops': 0.05749128919860627,
'eye': 0.05574912891986063,
'let': 0.05574912891986063,
'start': 0.05574912891986063,
'TARKIN': 0.05574912891986063,
'hand': 0.05400696864111498,
'another': 0.05400696864111498,
'little': 0.05226480836236934,
'face': 0.050522648083623695,
'large': 0.050522648083623695,
'wall': 0.050522648083623695,
'creature': 0.050522648083623695,
'Luke look': 0.050522648083623695,
'laserfire': 0.04878048780487805,
'give': 0.04878048780487805,
'run': 0.04878048780487805,
'pilot': 0.04878048780487805,
'Chewie': 0.04878048780487805,
'race': 0.04878048780487805,
'seem': 0.047038327526132406,
'three': 0.047038327526132406,
'Force': 0.047038327526132406,
'trench': 0.047038327526132406,
'SPACE AROUND': 0.047038327526132406,
'power': 0.04529616724738676,
'system': 0.04529616724738676,
'stop': 0.04529616724738676,
'Well': 0.04529616724738676,
'Jawa': 0.04529616724738676,
'looking': 0.04529616724738676,
'Solo': 0.04529616724738676,
'Star surface': 0.04529616724738676,
'past': 0.04355400696864112,
'Alderaan': 0.04355400696864112,
'pull': 0.04355400696864112,
'speaker': 0.04355400696864112,
'COCKPIT Red': 0.04355400696864112,
'old': 0.041811846689895474,
'must': 0.041811846689895474,
'WEDGE': 0.041811846689895474,
'MASSASSI OUTPOST': 0.041811846689895474,
'Y WING': 0.041811846689895474,
'Princess': 0.04006968641114982,
'sound': 0.04006968641114982,
'hold': 0.04006968641114982,
'say': 0.04006968641114982,
'help': 0.04006968641114982,
'WAR ROOM': 0.04006968641114982,
'RED TEN': 0.04006968641114982,
'want': 0.03832752613240418,
'good': 0.03832752613240418,
'droid': 0.03832752613240418,
'continue': 0.03832752613240418,
'dive': 0.03832752613240418,
'GOLD FIVE': 0.03832752613240418,
'planet': 0.036585365853658534,
'sure': 0.036585365853658534,
'front': 0.036585365853658534,
'friend': 0.036585365853658534,
'wait': 0.036585365853658534,
'find': 0.036585365853658534,
'standing': 0.036585365853658534,
'speeder': 0.036585365853658534,
'gun': 0.036585365853658534,
'comlink': 0.036585365853658534,
'VADER COCKPIT': 0.036585365853658534,
'COCKPIT Vader': 0.036585365853658534,
'starship': 0.03484320557491289,
'firing': 0.03484320557491289,
'laser': 0.03484320557491289,
'sit': 0.03484320557491289,
'moment': 0.03484320557491289,
'enter': 0.03484320557491289,
'thing': 0.03484320557491289,
'coming': 0.03484320557491289,
'Obi Wan': 0.03484320557491289,
'FALCON COCKPIT': 0.03484320557491289,
'LEADER COCKPIT': 0.03484320557491289,
'close': 0.033101045296167246,
'group': 0.033101045296167246,
'attack': 0.033101045296167246,
'Princess Leia': 0.033101045296167246,
'MOS EISLEY': 0.033101045296167246,
'targeting device': 0.033101045296167246,
'short': 0.0313588850174216,
'much': 0.0313588850174216,
'watches': 0.0313588850174216,
'work': 0.0313588850174216,
'put': 0.0313588850174216,
'Oh': 0.0313588850174216,
'father': 0.0313588850174216,
'position': 0.0313588850174216,
'point': 0.0313588850174216,
'headset': 0.0313588850174216,
'adjust': 0.0313588850174216,
'docking bay': 0.0313588850174216,
'OUTPOST WAR': 0.0313588850174216,
'weapon': 0.029616724738675958,
'along': 0.029616724738675958,
'hear': 0.029616724738675958,
'shut': 0.029616724738675958,
'heard': 0.029616724738675958,
'last': 0.029616724738675958,
'even': 0.029616724738675958,
'guard': 0.029616724738675958,
'laserbolt': 0.029616724738675958,
'appear': 0.029616724738675958,
'Artoo Detoo': 0.029616724738675958,
'far': 0.027874564459930314,
'Empire': 0.027874564459930314,
'station': 0.027874564459930314,
'enough': 0.027874564459930314,
'explosion': 0.027874564459930314,
'floor': 0.027874564459930314,
'follow': 0.027874564459930314,
'sight': 0.027874564459930314,
'four': 0.027874564459930314,
'kid': 0.027874564459930314,
'jump': 0.027874564459930314,
'Sandpeople': 0.027874564459930314,
'open': 0.027874564459930314,
'Jabba': 0.027874564459930314,
'screen': 0.027874564459930314,
'shake': 0.027874564459930314,
'plan': 0.027874564459930314,
'explode': 0.027874564459930314,
'Dodonna': 0.027874564459930314,
'zoom': 0.027874564459930314,
'INTERCOM VOICE': 0.027874564459930314,
'COCKPIT Gold': 0.027874564459930314,
'streak': 0.02613240418118467,
'electronic': 0.02613240418118467,
'boy': 0.02613240418118467,
'next': 0.02613240418118467,
'minute': 0.02613240418118467,
'goes': 0.02613240418118467,
'Commander': 0.02613240418118467,
'man': 0.02613240418118467,
'almost': 0.02613240418118467,
'tell': 0.02613240418118467,
'area': 0.02613240418118467,
'watch': 0.02613240418118467,
'formation': 0.02613240418118467,
'little Artoo': 0.02613240418118467,
'slowly': 0.024390243902439025,
'first': 0.024390243902439025,
'tiny': 0.024390243902439025,
'giant': 0.024390243902439025,
'better': 0.024390243902439025,
'DAY': 0.024390243902439025,
'end': 0.024390243902439025,
'speak': 0.024390243902439025,
'dead': 0.024390243902439025,
'talking': 0.024390243902439025,
'thought': 0.024390243902439025,
'big': 0.024390243902439025,
'second': 0.024390243902439025,
'really': 0.024390243902439025,
'Sandcrawler': 0.024390243902439025,
'beep': 0.024390243902439025,
'shoulder': 0.024390243902439025,
'Stay': 0.024390243902439025,
'arm': 0.024390243902439025,
'Wookiee': 0.024390243902439025,
'moon': 0.024390243902439025,
'battle station': 0.024390243902439025,
'LUKE Well': 0.024390243902439025,
'looks around': 0.024390243902439025,
'beeping sound': 0.024390243902439025,
'laser cannon': 0.024390243902439025,
'TATOOINE': 0.02264808362369338,
'outside': 0.02264808362369338,
'corridor': 0.02264808362369338,
'feet': 0.02264808362369338,
'place': 0.02264808362369338,
'subhallway': 0.02264808362369338,
'rushes': 0.02264808362369338,
'found': 0.02264808362369338,
'great': 0.02264808362369338,
'Rebellion': 0.02264808362369338,
'cut': 0.02264808362369338,
'spot': 0.02264808362369338,
'Listen': 0.02264808362369338,
'trying': 0.02264808362369338,
'never': 0.02264808362369338,
'need': 0.02264808362369338,
'Okay': 0.02264808362369338,
'reaches': 0.02264808362369338,
'year': 0.02264808362369338,
'still': 0.02264808362369338,
'try': 0.02264808362369338,
'laser pistol': 0.02264808362369338,
'Artoo beeps': 0.02264808362369338,
'Wan Kenobi': 0.02264808362369338,
'Imperial TIE': 0.02264808362369338,
'FALCON GUNPORT': 0.02264808362369338,
"BIGGS' COCKPIT": 0.02264808362369338,
'main': 0.020905923344947737,
'battle': 0.020905923344947737,
'battered': 0.020905923344947737,
'aim': 0.020905923344947737,
'moving': 0.020905923344947737,
'Hey': 0.020905923344947737,
'ROOM': 0.020905923344947737,
'lot': 0.020905923344947737,
'keep': 0.020905923344947737,
'anything': 0.020905923344947737,
'leave': 0.020905923344947737,
'canyon': 0.020905923344947737,
'quite': 0.020905923344947737,
'speed': 0.020905923344947737,
'table': 0.020905923344947737,
'disappear': 0.020905923344947737,
'button': 0.020905923344947737,
'use': 0.020905923344947737,
'board': 0.020905923344947737,
'target': 0.020905923344947737,
'grab': 0.020905923344947737,
'R2 unit': 0.020905923344947737,
'Aunt Beru': 0.020905923344947737,
'Ben Kenobi': 0.020905923344947737,
'TIE ship': 0.020905923344947737,
'STAR MAIN': 0.020905923344947737,
'FORWARD BAY': 0.020905923344947737,
'Imperial stormtrooper': 0.020905923344947737,
'Rebel fighter': 0.020905923344947737,
'LEADER Y': 0.020905923344947737,
'STAR TRENCH': 0.020905923344947737,
'hope': 0.01916376306620209,
'struggle': 0.01916376306620209,
'smoke': 0.01916376306620209,
'near': 0.01916376306620209,
'horizon': 0.01916376306620209,
'air': 0.01916376306620209,
'smile': 0.01916376306620209,
'distance': 0.01916376306620209,
'direction': 0.01916376306620209,
'approaching': 0.01916376306620209,
'hatch': 0.01916376306620209,
'narrow': 0.01916376306620209,
'life': 0.01916376306620209,
'bad': 0.01916376306620209,
'uncle': 0.01916376306620209,
'blaster': 0.01916376306620209,
'seen': 0.01916376306620209,
'line': 0.01916376306620209,
'master': 0.01916376306620209,
'become': 0.01916376306620209,
'range': 0.01916376306620209,
'cell': 0.01916376306620209,
'port': 0.01916376306620209,
'wingmen': 0.01916376306620209,
'REBEL BLOCKADE': 0.01916376306620209,
'BLOCKADE RUNNER': 0.01916376306620209,
'old Ben': 0.01916376306620209,
'Rebel base': 0.01916376306620209,
'CONTROL ROOM': 0.01916376306620209,
'COCKPIT TRAVELING': 0.01916376306620209,
'new': 0.017421602787456445,
'home': 0.017421602787456445,
'half': 0.017421602787456445,
'Hurry': 0.017421602787456445,
'young': 0.017421602787456445,
'soldier': 0.017421602787456445,
'getting': 0.017421602787456445,
'course': 0.017421602787456445,
'others': 0.017421602787456445,
'worry': 0.017421602787456445,
'fight': 0.017421602787456445,
'part': 0.017421602787456445,
'approaches': 0.017421602787456445,
'sand': 0.017421602787456445,
'whistle': 0.017421602787456445,
'distant': 0.017421602787456445,
'left': 0.017421602787456445,
'might': 0.017421602787456445,
'bit': 0.017421602787456445,
'shoot': 0.017421602787456445,
'followed': 0.017421602787456445,
'Uh': 0.017421602787456445,
'hangar': 0.017421602787456445,
'able': 0.017421602787456445,
'blow': 0.017421602787456445,
'bartender': 0.017421602787456445,
'fast': 0.017421602787456445,
'check': 0.017421602787456445,
'GREEDO': 0.017421602787456445,
'green': 0.017421602787456445,
'crew': 0.017421602787456445,
'flame': 0.017421602787456445,
'tower': 0.017421602787456445,
'Uncle Owen': 0.017421602787456445,
'HOLD AREA': 0.017421602787456445,
'TATOOINE MOS': 0.017421602787456445,
'Han Solo': 0.017421602787456445,
'control panel': 0.017421602787456445,
'Imperial fighter': 0.017421602787456445,
'MAIN FORWARD': 0.017421602787456445,
'three TIE': 0.017421602787456445,
'galaxy': 0.0156794425087108,
'aboard': 0.0156794425087108,
'human': 0.0156794425087108,
'rush': 0.0156794425087108,
'filled': 0.0156794425087108,
'morning': 0.0156794425087108,
'seven': 0.0156794425087108,
'Dark': 0.0156794425087108,
'mission': 0.0156794425087108,
'RED': 0.0156794425087108,
'call': 0.0156794425087108,
'rest': 0.0156794425087108,
'background': 0.0156794425087108,
'shot': 0.0156794425087108,
'happened': 0.0156794425087108,
'came': 0.0156794425087108,
'low': 0.0156794425087108,
'gets': 0.0156794425087108,
'may': 0.0156794425087108,
'HOMESTEAD': 0.0156794425087108,
'quickly': 0.0156794425087108,
'garage': 0.0156794425087108,
'Yeah': 0.0156794425087108,
'trouble': 0.0156794425087108,
'tries': 0.0156794425087108,
'pushes': 0.0156794425087108,
'gone': 0.0156794425087108,
'yell': 0.0156794425087108,
'closer': 0.0156794425087108,
'step': 0.0156794425087108,
'return': 0.0156794425087108,
'lightsaber': 0.0156794425087108,
'thousand': 0.0156794425087108,
'together': 0.0156794425087108,
'information': 0.0156794425087108,
'readout': 0.0156794425087108,
'ready': 0.0156794425087108,
'burst': 0.0156794425087108,
'elevator': 0.0156794425087108,
'enemy': 0.0156794425087108,
'climb': 0.0156794425087108,
'shadow': 0.0156794425087108,
'Dark Lord': 0.0156794425087108,
'LARS HOMESTEAD': 0.0156794425087108,
'old man': 0.0156794425087108,
'tractor beam': 0.0156794425087108,
'GARBAGE ROOM': 0.0156794425087108,
'exhaust port': 0.0156794425087108,
'COCKPIT Wedge': 0.0156794425087108,
'ROOM Leia': 0.0156794425087108,
'control stick': 0.0156794425087108,
'armored': 0.013937282229965157,
'shaking': 0.013937282229965157,
'scream': 0.013937282229965157,
'creating': 0.013937282229965157,
'desert': 0.013937282229965157,
'white': 0.013937282229965157,
'Landspeeder': 0.013937282229965157,
'leaving': 0.013937282229965157,
'quiet': 0.013937282229965157,
'heading': 0.013937282229965157,
'bring': 0.013937282229965157,
'set': 0.013937282229965157,
'told': 0.013937282229965157,
'yelling': 0.013937282229965157,
'easy': 0.013937282229965157,
'nothing': 0.013937282229965157,
'made': 0.013937282229965157,
'reply': 0.013937282229965157,
'pay': 0.013937282229965157,
'said': 0.013937282229965157,
'best': 0.013937282229965157,
'strange': 0.013937282229965157,
'approach': 0.013937282229965157,
'pop': 0.013937282229965157,
'edge': 0.013937282229965157,
'blue': 0.013937282229965157,
'lost': 0.013937282229965157,
'center': 0.013937282229965157,
'mind': 0.013937282229965157,
'bar': 0.013937282229965157,
'massive': 0.013937282229965157,
'view': 0.013937282229965157,
'walk': 0.013937282229965157,
'everything': 0.013937282229965157,
'corner': 0.013937282229965157,
'transmission': 0.013937282229965157,
'twenty': 0.013937282229965157,
'round': 0.013937282229965157,
'charge': 0.013937282229965157,
'helmet': 0.013937282229965157,
'copy': 0.013937282229965157,
'cover': 0.013937282229965157,
'Yavin': 0.013937282229965157,
'peel': 0.013937282229965157,
'Lord Vader': 0.013937282229965157,
'TATOOINE ROCK': 0.013937282229965157,
'EISLEY STREET': 0.013937282229965157,
'cockpit window': 0.013937282229965157,
'WEDGE COCKPIT': 0.013937282229965157,
'base': 0.012195121951219513,
'evil': 0.012195121951219513,
'entire': 0.012195121951219513,
'craft': 0.012195121951219513,
'tall': 0.012195121951219513,
'understand': 0.012195121951219513,
'escape': 0.012195121951219513,
'making': 0.012195121951219513,
'heavy': 0.012195121951219513,
'hole': 0.012195121951219513,
'holding': 0.012195121951219513,
'barely': 0.012195121951219513,
'nowhere': 0.012195121951219513,
'lifepod': 0.012195121951219513,
'flash': 0.012195121951219513,
'captain': 0.012195121951219513,
'loose': 0.012195121951219513,
'soon': 0.012195121951219513,
'stumbles': 0.012195121951219513,
'believe': 0.012195121951219513,
'senator': 0.012195121951219513,
'dangerous': 0.012195121951219513,
'send': 0.012195121951219513,
'signal': 0.012195121951219513,
'season': 0.012195121951219513,
'CENTRAL': 0.012195121951219513,
'join': 0.012195121951219513,
'always': 0.012195121951219513,
'feel': 0.012195121951219513,
'powerful': 0.012195121951219513,
'glow': 0.012195121951219513,
'eight': 0.012195121951219513,
'size': 0.012195121951219513,
'beam': 0.012195121951219513,
'metal': 0.012195121951219513,
'chamber': 0.012195121951219513,
'pick': 0.012195121951219513,
'slimy': 0.012195121951219513,
'stick': 0.012195121951219513,
'grey': 0.012195121951219513,
'spaceship': 0.012195121951219513,
'laughing': 0.012195121951219513,
'many': 0.012195121951219513,
'afraid': 0.012195121951219513,
'message': 0.012195121951219513,
'bolt': 0.012195121951219513,
'high': 0.012195121951219513,
'deep': 0.012195121951219513,
'lies': 0.012195121951219513,
'Jedi': 0.012195121951219513,
'nervously': 0.012195121951219513,
'general': 0.012195121951219513,
'precise': 0.012195121951219513,
'fighter': 0.012195121951219513,
'lock': 0.012195121951219513,
'ramp': 0.012195121951219513,
'hyperspace': 0.012195121951219513,
'lever': 0.012195121951219513,
'number': 0.012195121951219513,
'closing': 0.012195121951219513,
'shaft': 0.012195121951219513,
'bridge': 0.012195121951219513,
'swing': 0.012195121951219513,
'manage': 0.012195121951219513,
'GUNPORT': 0.012195121951219513,
'temple': 0.012195121951219513,
'maneuver': 0.012195121951219513,
'flying': 0.012195121951219513,
'Flak': 0.012195121951219513,
'little robot': 0.012195121951219513,
'Imperial Officer': 0.012195121951219513,
'POWER STATION': 0.012195121951219513,
'ROCK CANYON': 0.012195121951219513,
'FIRST TROOPER': 0.012195121951219513,
'THREEPIO Oh': 0.012195121951219513,
'STAR INTERCOM': 0.012195121951219513,
'STAR GARBAGE': 0.012195121951219513,
'laser gun': 0.012195121951219513,
'Gold Two': 0.012195121951219513,
'heavens': 0.010452961672473868,
'hidden': 0.010452961672473868,
'destroy': 0.010452961672473868,
'awesome': 0.010452961672473868,
'causing': 0.010452961672473868,
'passageway': 0.010452961672473868,
'metallic': 0.010452961672473868,
'doomed': 0.010452961672473868,
'loud': 0.010452961672473868,
'nervous': 0.010452961672473868,
'figure': 0.010452961672473868,
'belt': 0.010452961672473868,
'transport': 0.010452961672473868,
'circle': 0.010452961672473868,
'every': 0.010452961672473868,
'break': 0.010452961672473868,
'panic': 0.010452961672473868,
'dome': 0.010452961672473868,
'neck': 0.010452961672473868,
'done': 0.010452961672473868,
'scurry': 0.010452961672473868,
'fear': 0.010452961672473868,
'anger': 0.010452961672473868,
'body': 0.010452961672473868,
'dust': 0.010452961672473868,
'dozen': 0.010452961672473868,
'Fixer': 0.010452961672473868,
'noise': 0.010452961672473868,
'surprise': 0.010452961672473868,
'hot': 0.010452961672473868,
'working': 0.010452961672473868,
'forget': 0.010452961672473868,
'hard': 0.010452961672473868,
'play': 0.010452961672473868,
'respond': 0.010452961672473868,
'rock': 0.010452961672473868,
'fault': 0.010452961672473868,
'mouth': 0.010452961672473868,
'whole': 0.010452961672473868,
'taken': 0.010452961672473868,
'men': 0.010452961672473868,
'beside': 0.010452961672473868,
'carefully': 0.010452961672473868,
'form': 0.010452961672473868,
'thirty': 0.010452961672473868,
'lead': 0.010452961672473868,
'pointing': 0.010452961672473868,
'Thank': 0.010452961672473868,
'notice': 0.010452961672473868,
'wonder': 0.010452961672473868,
'either': 0.010452961672473868,
'knew': 0.010452961672473868,
'mean': 0.010452961672473868,
'watching': 0.010452961672473868,
'top': 0.010452961672473868,
'laugh': 0.010452961672473868,
'TAGGE': 0.010452961672473868,
'Motti': 0.010452961672473868,
'cantina': 0.010452961672473868,
'remain': 0.010452961672473868,
'TEN': 0.010452961672473868,
'reach': 0.010452961672473868,
'doorway': 0.010452961672473868,
'Five': 0.010452961672473868,
'alarm': 0.010452961672473868,
'lose': 0.010452961672473868,
'window': 0.010452961672473868,
'seeker': 0.010452961672473868,
'shudder': 0.010452961672473868,
'clear': 0.010452961672473868,
'exit': 0.010452961672473868,
'already': 0.010452961672473868,
'reward': 0.010452961672473868,
'barrage': 0.010452961672473868,
'alert': 0.010452961672473868,
'whip': 0.010452961672473868,
'wave': 0.010452961672473868,
'jungle': 0.010452961672473868,
'Hang': 0.010452961672473868,
'engine': 0.010452961672473868,
'Imperial Stardestroyer': 0.010452961672473868,
'Laserbolts streak': 0.010452961672473868,
'main passageway': 0.010452961672473868,
'farm boy': 0.010452961672473868,
'Artoo whistles': 0.010452961672473868,
'let go': 0.010452961672473868,
'little droid': 0.010452961672473868,
'ROCK MESA': 0.010452961672473868,
'Tusken Raider': 0.010452961672473868,
'Moff Tarkin': 0.010452961672473868,
'old Jedi': 0.010452961672473868,
'gantry officer': 0.010452961672473868,
'BAY SERVICE': 0.010452961672473868,
'SERVICE PANEL': 0.010452961672473868,
'target screen': 0.010452961672473868,
'headset Red': 0.010452961672473868,
'FIVE Y': 0.010452961672473868,
'vast': 0.008710801393728223,
'DEATH': 0.008710801393728223,
'bronze': 0.008710801393728223,
'reactor': 0.008710801393728223,
'destroyed': 0.008710801393728223,
'trust': 0.008710801393728223,
'oil': 0.008710801393728223,
'six': 0.008710801393728223,
'waiting': 0.008710801393728223,
'throws': 0.008710801393728223,
'Sith': 0.008710801393728223,
'Everyone': 0.008710801393728223,
'warrior': 0.008710801393728223,
'opening': 0.008710801393728223,
'broken': 0.008710801393728223,
'bodies': 0.008710801393728223,
'astro': 0.008710801393728223,
'somebody': 0.008710801393728223,
'carrying': 0.008710801393728223,
'terrified': 0.008710801393728223,
'CHIEF': 0.008710801393728223,
'response': 0.008710801393728223,
'empty': 0.008710801393728223,
'STREET': 0.008710801393728223,
'rugged': 0.008710801393728223,
'Camie': 0.008710801393728223,
'wildly': 0.008710801393728223,
'sitting': 0.008710801393728223,
'blasted': 0.008710801393728223,
'Yes': 0.008710801393728223,
'dune': 0.008710801393728223,
'fall': 0.008710801393728223,
'forth': 0.008710801393728223,
'technical': 0.008710801393728223,
'rather': 0.008710801393728223,
'ridge': 0.008710801393728223,
'frantically': 0.008710801393728223,
'inside': 0.008710801393728223,
'wide': 0.008710801393728223,
'stunned': 0.008710801393728223,
'sorry': 0.008710801393728223,
'care': 0.008710801393728223,
'shrouded': 0.008710801393728223,
'fill': 0.008710801393728223,
'alone': 0.008710801393728223,
'slight': 0.008710801393728223,
'flicker': 0.008710801393728223,
'eerie': 0.008710801393728223,
'onto': 0.008710801393728223,
'middle': 0.008710801393728223,
'conversation': 0.008710801393728223,
'tracks': 0.008710801393728223,
'bounce': 0.008710801393728223,
'language': 0.008710801393728223,
'real': 0.008710801393728223,
'entry': 0.008710801393728223,
'late': 0.008710801393728223,
'muffled': 0.008710801393728223,
'finally': 0.008710801393728223,
'chrome': 0.008710801393728223,
'remove': 0.008710801393728223,
'cruiser': 0.008710801393728223,
'twelve': 0.008710801393728223,
'question': 0.008710801393728223,
'data': 0.008710801393728223,
'kind': 0.008710801393728223,
'immediately': 0.008710801393728223,
'concern': 0.008710801393728223,
'fine': 0.008710801393728223,
'agreement': 0.008710801393728223,
'entrance': 0.008710801393728223,
'final': 0.008710801393728223,
'furry': 0.008710801393728223,
'wrong': 0.008710801393728223,
'terror': 0.008710801393728223,
'exotic': 0.008710801393728223,
'piece': 0.008710801393728223,
'chatter': 0.008710801393728223,
'fool': 0.008710801393728223,
'direct': 0.008710801393728223,
'location': 0.008710801393728223,
'alien': 0.008710801393728223,
'hundred': 0.008710801393728223,
'show': 0.008710801393728223,
'drink': 0.008710801393728223,
'ear': 0.008710801393728223,
'sword': 0.008710801393728223,
'paces': 0.008710801393728223,
'attempt': 0.008710801393728223,
'avoid': 0.008710801393728223,
'fly': 0.008710801393728223,
'money': 0.008710801393728223,
'idea': 0.008710801393728223,
'alleyway': 0.008710801393728223,
'secure': 0.008710801393728223,
'passing': 0.008710801393728223,
'couple': 0.008710801393728223,
'flashes': 0.008710801393728223,
'quick': 0.008710801393728223,
'pulled': 0.008710801393728223,
'monitor': 0.008710801393728223,
'socket': 0.008710801393728223,
'level': 0.008710801393728223,
'ball': 0.008710801393728223,
"ain't": 0.008710801393728223,
'camera': 0.008710801393728223,
'crewmen': 0.008710801393728223,
'worried': 0.008710801393728223,
'guy': 0.008710801393728223,
'leading': 0.008710801393728223,
'lean': 0.008710801393728223,
'MASSASSI': 0.008710801393728223,
'READ': 0.008710801393728223,
'draw': 0.008710801393728223,
'technician': 0.008710801393728223,
'TATOOINE DESERT': 0.008710801393728223,
'Luke Skywalker': 0.008710801393728223,
'makes beeping': 0.008710801393728223,
'Yes sir': 0.008710801393728223,
'PLANET TATOOINE': 0.008710801393728223,
'dune sea': 0.008710801393728223,
'far end': 0.008710801393728223,
'astro droid': 0.008710801393728223,
'restraining bolt': 0.008710801393728223,
'Jedi Knight': 0.008710801393728223,
'Grand Moff': 0.008710801393728223,
'door slides': 0.008710801393728223,
'giant Wookiee': 0.008710801393728223,
'darkly clad': 0.008710801393728223,
'slides open': 0.008710801393728223,
'clad creature': 0.008710801393728223,
'Governor Tarkin': 0.008710801393728223,
'races past': 0.008710801393728223,
'command office': 0.008710801393728223,
'MAIN HanGAR': 0.008710801393728223,
'planet Yavin': 0.008710801393728223,
'targeting computer': 0.008710801393728223,
'fire button': 0.008710801393728223,
'Vader wingman': 0.008710801393728223,
'sinister': 0.006968641114982578,
'stolen': 0.006968641114982578,
'save': 0.006968641114982578,
'yellow': 0.006968641114982578,
'spacecraft': 0.006968641114982578,
'deadly': 0.006968641114982578,
'bouncing': 0.006968641114982578,
'radar': 0.006968641114982578,
'equipment': 0.006968641114982578,
'smaller': 0.006968641114982578,
'tremendous': 0.006968641114982578,
'duck': 0.006968641114982578,
'known': 0.006968641114982578,
'settle': 0.006968641114982578,
'lone': 0.006968641114982578,
'wind': 0.006968641114982578,
'pipe': 0.006968641114982578,
'sky': 0.006968641114982578,
'electrobinoculars': 0.006968641114982578,
'ground': 0.006968641114982578,
'tight': 0.006968641114982578,
'joint': 0.006968641114982578,
'smoldering': 0.006968641114982578,
'Emperor': 0.006968641114982578,
'black': 0.006968641114982578,
'sweeps': 0.006968641114982578,
'woman': 0.006968641114982578,
'attention': 0.006968641114982578,
'alcove': 0.006968641114982578,
'girl': 0.006968641114982578,
'companion': 0.006968641114982578,
'gruesome': 0.006968641114982578,
'apart': 0.006968641114982578,
'muted': 0.006968641114982578,
'pod': 0.006968641114982578,
'debris': 0.006968641114982578,
'vehicle': 0.006968641114982578,
'flies': 0.006968641114982578,
'city': 0.006968641114982578,
'sharp': 0.006968641114982578,
'appearance': 0.006968641114982578,
'hug': 0.006968641114982578,
'slightly': 0.006968641114982578,
'binoculars': 0.006968641114982578,
'scanning': 0.006968641114982578,
'freighter': 0.006968641114982578,
'shrugs': 0.006968641114982578,
'obvious': 0.006968641114982578,
'squad': 0.006968641114982578,
'killed': 0.006968641114982578,
'snap': 0.006968641114982578,
'meet': 0.006968641114982578,
'frozen': 0.006968641114982578,
'malfunctioning': 0.006968641114982578,
'within': 0.006968641114982578,
'trail': 0.006968641114982578,
'Skyhopper': 0.006968641114982578,
'gotten': 0.006968641114982578,
"BIGGS'": 0.006968641114982578,
'amazed': 0.006968641114982578,
'chance': 0.006968641114982578,
'learn': 0.006968641114982578,
'starting': 0.006968641114982578,
'wish': 0.006968641114982578,
'Maybe': 0.006968641114982578,
'gargantuan': 0.006968641114982578,
'dim': 0.006968641114982578,
'array': 0.006968641114982578,
'simply': 0.006968641114982578,
'mechanical': 0.006968641114982578,
'bangs': 0.006968641114982578,
'block': 0.006968641114982578,
'covered': 0.006968641114982578,
'choice': 0.006968641114982578,
'sale': 0.006968641114982578,
'similar': 0.006968641114982578,
'box': 0.006968641114982578,
'sparks': 0.006968641114982578,
'damaged': 0.006968641114982578,
'harvest': 0.006968641114982578,
'matter': 0.006968641114982578,
'answer': 0.006968641114982578,
'recording': 0.006968641114982578,
'Pardon': 0.006968641114982578,
'anyone': 0.006968641114982578,
'hurries': 0.006968641114982578,
'food': 0.006968641114982578,
'name': 0.006968641114982578,
"that'll": 0.006968641114982578,
'deactivate': 0.006968641114982578,
'foreground': 0.006968641114982578,
'monstrous': 0.006968641114982578,
'ride': 0.006968641114982578,
'talk': 0.006968641114982578,
'million': 0.006968641114982578,
'react': 0.006968641114982578,
'local': 0.006968641114982578,
'frightened': 0.006968641114982578,
'ancient': 0.006968641114982578,
'yet': 0.006968641114982578,
'belong': 0.006968641114982578,
'remember': 0.006968641114982578,
'pit': 0.006968641114982578,
'switch': 0.006968641114982578,
'listening': 0.006968641114982578,
'operational': 0.006968641114982578,
'chair': 0.006968641114982578,
'gain': 0.006968641114982578,
'bow': 0.006968641114982578,
'suggest': 0.006968641114982578,
'fortress': 0.006968641114982578,
'Bantha': 0.006968641114982578,
'burning': 0.006968641114982578,
'stopped': 0.006968641114982578,
'business': 0.006968641114982578,
'rough': 0.006968641114982578,
'giving': 0.006968641114982578,
'quietly': 0.006968641114982578,
'careful': 0.006968641114982578,
'monster': 0.006968641114982578,
'suit': 0.006968641114982578,
'advance': 0.006968641114982578,
'asking': 0.006968641114982578,
'drop': 0.006968641114982578,
'sign': 0.006968641114982578,
'full': 0.006968641114982578,
'locked': 0.006968641114982578,
'visible': 0.006968641114982578,
'insect': 0.006968641114982578,
'grim': 0.006968641114982578,
'type': 0.006968641114982578,
'transmitter': 0.006968641114982578,
'overhead': 0.006968641114982578,
'scope': 0.006968641114982578,
'violently': 0.006968641114982578,
'Dantooine': 0.006968641114982578,
'remote': 0.006968641114982578,
'anyway': 0.006968641114982578,
'lighted': 0.006968641114982578,
'crosses': 0.006968641114982578,
'straight': 0.006968641114982578,
'pirate': 0.006968641114982578,
'running': 0.006968641114982578,
'buzz': 0.006968641114982578,
'NINE': 0.006968641114982578,
'checked': 0.006968641114982578,
'command': 0.006968641114982578,
'howl': 0.006968641114982578,
'rescue': 0.006968641114982578,
'glance': 0.006968641114982578,
'situation': 0.006968641114982578,
'buzzing': 0.006968641114982578,
'negative': 0.006968641114982578,
'emerge': 0.006968641114982578,
'rumble': 0.006968641114982578,
'pressure': 0.006968641114982578,
'relief': 0.006968641114982578,
'abyss': 0.006968641114982578,
'ease': 0.006968641114982578,
'cloak': 0.006968641114982578,
'spectacular': 0.006968641114982578,
'ladder': 0.006968641114982578,
'soars': 0.006968641114982578,
'Willard': 0.006968641114982578,
'activity': 0.006968641114982578,
'lower': 0.006968641114982578,
'mike': 0.006968641114982578,
'rotate': 0.006968641114982578,
'tail': 0.006968641114982578,
'pursuit': 0.006968641114982578,
'wingman': 0.006968641114982578,
'presses': 0.006968641114982578,
'mark': 0.006968641114982578,
'repair': 0.006968641114982578,
'DESERT WASTELAND': 0.006968641114982578,
'utility belt': 0.006968641114982578,
'beautiful young': 0.006968641114982578,
'STATION DAY': 0.006968641114982578,
'Imperial Senate': 0.006968641114982578,
'new droids': 0.006968641114982578,
'laser rifle': 0.006968641114982578,
'CONFERENCE ROOM': 0.006968641114982578,
'TATOOINE WASTELAND': 0.006968641114982578,
'EISLEY SPACEPORT': 0.006968641114982578,
'speeder lot': 0.006968641114982578,
'TARKIN Yes': 0.006968641114982578,
'DETENTION AREA': 0.006968641114982578,
'Chewbacca growls': 0.006968641114982578,
'computer readout': 0.006968641114982578,
'HanGAR DECK': 0.006968641114982578,
'bridge overhang': 0.006968641114982578,
'projected target': 0.006968641114982578,
'projected screen': 0.006968641114982578,
'FOURTH MOON': 0.006968641114982578,
'GUN EMPLACEMENTS': 0.006968641114982578,
'ship spins': 0.006968641114982578,
'ago': 0.005226480836236934,
'infinity': 0.005226480836236934,
'Galactic': 0.005226480836236934,
'managed': 0.005226480836236934,
'secret': 0.005226480836236934,
'silver': 0.005226480836236934,
'armed': 0.005226480836236934,
'mass': 0.005226480836236934,
'series': 0.005226480836236934,
"There'll": 0.005226480836236934,
'beeping': 0.005226480836236934,
'score': 0.005226480836236934}
In [24]:
import random
def grey_color_func(word, font_size, position, orientation,
random_state=None, **kwargs):
return 'hsl(0, 0%%, %d%%)' % random.randint(60,100) # hsl: 색상, 채도, 명도
In [25]:
plt.figure(figsize=(12,12))
plt.imshow(wc.recolor(color_func=grey_color_func,random_state=3),
interpolation='bilinear')
plt.axis('off')
plt.show()
육아휴직 관련 법안 1809890호¶
In [26]:
import nltk
In [27]:
from konlpy.corpus import kobill
files_ko = kobill.fileids()
doc_ko = kobill.open('1809890.txt').read()
In [28]:
# doc_ko
In [29]:
from konlpy.tag import Twitter
t = Twitter()
tokens_ko = t.nouns(doc_ko)
# tokens_ko
In [30]:
ko = nltk.Text(tokens_ko)
In [31]:
print(len(ko.tokens))
print(len(set(ko.tokens)))
ko.vocab()
735
250
Out[31]:
FreqDist({'육아휴직': 38, '발생': 19, '만': 18, '이하': 18, '비용': 17, '액': 17, '경우': 16, '세': 16, '자녀': 14, '고용': 14, ...})
In [32]:
plt.figure(figsize=(12,6))
ko.plot(50)
plt.show()
In [33]:
# 불용어 처리
stop_words = ['.', '(', ')', ',', "'", '%', '-', 'X', ').', '×',
'의','자','에','안','번','호','을','이','다','만',
'로','가','를','발','인','액','수','세','중','제','월','곳',
'위','이하','것','표','명', '및']
ko = [each_word for each_word in ko if each_word not in stop_words]
# ko
In [34]:
# stop_words
In [35]:
ko = nltk.Text(ko)
plt.figure(figsize=(12,6))
ko.plot(50) #Plot sorted frequency of top 50 tokens
plt.show()
In [36]:
ko.count('초등학교')
Out[36]:
6
In [37]:
# 단어의 문장내 위치
plt.figure(figsize=(12,6))
ko.dispersion_plot(['육아휴직', '초등학교', '공무원'])
In [38]:
ko.concordance('초등학교')
Displaying 6 of 6 matches:
여상규 안규백 황영철 박영아 김정훈 김학송 의원 제안 이유 내용 초등학교 저학년 경우 부모 사랑 필요 나이 현재 공무원 자녀 양육 육아휴직
부모 사랑 필요 나이 현재 공무원 자녀 양육 육아휴직 자녀 나이 초등학교 저학년 자녀 해당 부모님 일자리 곧 출산 의욕 저하 문제 것임 따
률 지방공무원법 일부 개정 법률 지방공무원법 일부 다음 개정 항제 초등학교 취학 전 자녀 취학 중인 경우 초등학교 학년 말 자녀 부 칙 법
원법 일부 다음 개정 항제 초등학교 취학 전 자녀 취학 중인 경우 초등학교 학년 말 자녀 부 칙 법 공포 날 시행 신 구조 문대비 현 행 개
사유 직 임용 휴직 다만 경우 대통령령 정 사정 직 생 략 현행 초등학교 취 취학 중인 경우 학 전 자녀 양육 초등학교 학년 여 여자 공무
정 직 생 략 현행 초등학교 취 취학 중인 경우 학 전 자녀 양육 초등학교 학년 여 여자 공무원 말 자녀 임신 출산 때 생 략 생 략 현행
In [39]:
# ko.collocations() 문장 내에서 연이어 나타난다
ko.collocation_list()
Out[39]:
[('초등학교', '저학년'),
('공무원', '근로자'),
('근로자', '육아휴직'),
('대상자', '육아휴직'),
('육아휴직', '대상자'),
('공무원', '육아휴직'),
('육아휴직', '육아휴직')]
In [40]:
data = ko.vocab().most_common(150)
wordcloud = WordCloud(font_path='c:Windows/Fonts/malgun.ttf',
relative_scaling=0.1,
background_color='white'
).generate_from_frequencies(dict(data))
plt.figure(figsize=(12,6))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [41]:
# 잘난체 검색 > 여기어때.잘난체 > ttf download
data = ko.vocab().most_common(150)
wordcloud = WordCloud(font_path='C:/Users/Playdata/Jalnan.ttf',
relative_scaling=0.1,
background_color='white',
colormap='winter' # gist_heat
).generate_from_frequencies(dict(data))
plt.figure(figsize=(12,8))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
In [42]:
mask1 = np.array(Image.open('../../data/pan.png'))
data = ko.vocab().most_common(150)
wordcloud = WordCloud(font_path='C:/Users/Playdata/Jalnan.ttf',
relative_scaling=0.1,
background_color='white',
colormap='winter', # gist_heat
mask=mask1
).generate_from_frequencies(dict(data))
plt.figure(figsize=(12,8))
plt.imshow(wordcloud)
plt.axis('off')
plt.show()
5. Naive Bayes Classifier의 이해 - 영문¶
In [43]:
from nltk.tokenize import word_tokenize
import nltk
In [44]:
# 지도 학습
train = [('i like you', 'pos'),
('i hate you', 'neg'),
('you like me', 'neg',
('i like her', 'pos'))]
In [45]:
print(train[0][0])
print(word_tokenize(train[0][0]))
i like you
['i', 'like', 'you']
In [46]:
for i in train:
for j in word_tokenize(i[0]):
print(j.lower())
i
like
you
i
hate
you
you
like
me
In [47]:
# 말 뭉치
all_words = set(word.lower() for sentence in train
for word in word_tokenize(sentence[0]))
all_words
Out[47]:
{'hate', 'i', 'like', 'me', 'you'}
In [48]:
t = [({word: (word in word_tokenize(x[0])) for word in all_words}, x[1]) for x in train]
t
Out[48]:
[({'like': True, 'me': False, 'i': True, 'hate': False, 'you': True}, 'pos'),
({'like': False, 'me': False, 'i': True, 'hate': True, 'you': True}, 'neg'),
({'like': True, 'me': True, 'i': False, 'hate': False, 'you': True}, 'neg')]
In [49]:
classifier = nltk.NaiveBayesClassifier.train(t)
classifier.show_most_informative_features()
Most Informative Features
hate = False pos : neg = 1.5 : 1.0
i = True pos : neg = 1.5 : 1.0
like = True pos : neg = 1.5 : 1.0
me = False pos : neg = 1.5 : 1.0
you = True neg : pos = 1.0 : 1.0
6. Naive Bayes Calssifier의 이해 - 한글¶
In [50]:
from konlpy.tag import Twitter
In [51]:
pos_tagger = Twitter()
In [52]:
train = [('메리가 좋아', 'pos'),
('고양이도 좋아', 'pos'),
('난 수업이 지루해', 'neg'),
('메리는 이쁜 고양이야', 'pos'),
('난 마치고 메리랑 놀거야', 'pos'),]
In [53]:
# 말 뭉치
all_words = set(word.lower() for sentence in train
for word in word_tokenize(sentence[0]))
all_words
Out[53]:
{'고양이도',
'고양이야',
'난',
'놀거야',
'마치고',
'메리가',
'메리는',
'메리랑',
'수업이',
'이쁜',
'좋아',
'지루해'}
In [54]:
# 말뭉치에 있는지 여부를 나타냄(5개의 훈련 세트)
t= [({word: (word in word_tokenize(x[0])) for word in all_words}, x[1]) for x in train]
t
Out[54]:
[({'수업이': False,
'난': False,
'고양이도': False,
'좋아': True,
'지루해': False,
'놀거야': False,
'메리는': False,
'이쁜': False,
'고양이야': False,
'마치고': False,
'메리랑': False,
'메리가': True},
'pos'),
({'수업이': False,
'난': False,
'고양이도': True,
'좋아': True,
'지루해': False,
'놀거야': False,
'메리는': False,
'이쁜': False,
'고양이야': False,
'마치고': False,
'메리랑': False,
'메리가': False},
'pos'),
({'수업이': True,
'난': True,
'고양이도': False,
'좋아': False,
'지루해': True,
'놀거야': False,
'메리는': False,
'이쁜': False,
'고양이야': False,
'마치고': False,
'메리랑': False,
'메리가': False},
'neg'),
({'수업이': False,
'난': False,
'고양이도': False,
'좋아': False,
'지루해': False,
'놀거야': False,
'메리는': True,
'이쁜': True,
'고양이야': True,
'마치고': False,
'메리랑': False,
'메리가': False},
'pos'),
({'수업이': False,
'난': True,
'고양이도': False,
'좋아': False,
'지루해': False,
'놀거야': True,
'메리는': False,
'이쁜': False,
'고양이야': False,
'마치고': True,
'메리랑': True,
'메리가': False},
'pos')]
In [55]:
classifier = nltk.NaiveBayesClassifier.train(t)
classifier.show_most_informative_features()
Most Informative Features
난 = True neg : pos = 2.5 : 1.0
좋아 = False neg : pos = 1.5 : 1.0
고양이도 = False neg : pos = 1.1 : 1.0
고양이야 = False neg : pos = 1.1 : 1.0
놀거야 = False neg : pos = 1.1 : 1.0
마치고 = False neg : pos = 1.1 : 1.0
메리가 = False neg : pos = 1.1 : 1.0
메리는 = False neg : pos = 1.1 : 1.0
메리랑 = False neg : pos = 1.1 : 1.0
이쁜 = False neg : pos = 1.1 : 1.0
In [56]:
test_sentense = '난 수업이 마치면 메리랑 놀거야'
In [57]:
test_sent_feature = {word.lower():
(word in word_tokenize(test_sentense.lower()))
for word in all_words}
test_sent_feature
Out[57]:
{'수업이': True,
'난': True,
'고양이도': False,
'좋아': False,
'지루해': False,
'놀거야': True,
'메리는': False,
'이쁜': False,
'고양이야': False,
'마치고': False,
'메리랑': True,
'메리가': False}
In [58]:
from konlpy.tag import Twitter
twitter = Twitter()
print(twitter.pos('이것도 재미 있습니당ㅋㅋ'))
print('--------------------------------------------------------------------')
print(twitter.pos('이것도 재미 있습니당ㅋㅋ', norm=True))
print('--------------------------------------------------------------------')
print(twitter.pos('이것도 재미 있습니당ㅋㅋ', norm=True, stem=True))
[('이', 'Determiner'), ('것', 'Noun'), ('도', 'Josa'), ('재미', 'Noun'), ('있', 'Adjective'), ('습', 'Noun'), ('니당', 'Verb'), ('ㅋㅋ', 'KoreanParticle')]
--------------------------------------------------------------------
[('이', 'Determiner'), ('것', 'Noun'), ('도', 'Josa'), ('재미', 'Noun'), ('있습니다', 'Adjective'), ('ㅋㅋ', 'KoreanParticle')]
--------------------------------------------------------------------
[('이', 'Determiner'), ('것', 'Noun'), ('도', 'Josa'), ('재미', 'Noun'), ('있다', 'Adjective'), ('ㅋㅋ', 'KoreanParticle')]
In [59]:
# stem=True : 원형, norm=True : 정규화
def tokenize(doc):
return ['/'.join(t) for t in pos_tagger.pos(doc, norm=True, stem=True)]
In [60]:
for row in train:
print(tokenize(row[0]))
['메리/Noun', '가/Josa', '좋다/Adjective']
['고양이/Noun', '도/Josa', '좋다/Adjective']
['난/Noun', '수업/Noun', '이/Josa', '지루하다/Adjective']
['메리/Noun', '는/Josa', '이쁘다/Adjective', '고양이/Noun', '야/Josa']
['난/Noun', '마치/Noun', '고/Josa', '메리/Noun', '랑/Josa', '놀다/Verb']
In [61]:
train_docs = [(tokenize(row[0]), row[1]) for row in train]
train_docs
Out[61]:
[(['메리/Noun', '가/Josa', '좋다/Adjective'], 'pos'),
(['고양이/Noun', '도/Josa', '좋다/Adjective'], 'pos'),
(['난/Noun', '수업/Noun', '이/Josa', '지루하다/Adjective'], 'neg'),
(['메리/Noun', '는/Josa', '이쁘다/Adjective', '고양이/Noun', '야/Josa'], 'pos'),
(['난/Noun', '마치/Noun', '고/Josa', '메리/Noun', '랑/Josa', '놀다/Verb'], 'pos')]
In [62]:
tokens = [t for d in train_docs for t in d[0]]
tokens
Out[62]:
['메리/Noun',
'가/Josa',
'좋다/Adjective',
'고양이/Noun',
'도/Josa',
'좋다/Adjective',
'난/Noun',
'수업/Noun',
'이/Josa',
'지루하다/Adjective',
'메리/Noun',
'는/Josa',
'이쁘다/Adjective',
'고양이/Noun',
'야/Josa',
'난/Noun',
'마치/Noun',
'고/Josa',
'메리/Noun',
'랑/Josa',
'놀다/Verb']
In [63]:
def term_exists(doc):
return {word: (word in set(doc)) for word in tokens}
In [64]:
for d, c in train_docs:
print(d, c)
['메리/Noun', '가/Josa', '좋다/Adjective'] pos
['고양이/Noun', '도/Josa', '좋다/Adjective'] pos
['난/Noun', '수업/Noun', '이/Josa', '지루하다/Adjective'] neg
['메리/Noun', '는/Josa', '이쁘다/Adjective', '고양이/Noun', '야/Josa'] pos
['난/Noun', '마치/Noun', '고/Josa', '메리/Noun', '랑/Josa', '놀다/Verb'] pos
In [65]:
# '메리가 좋아', '고양이도 좋아'
# 토큰라이징한 후 문장이 말뭉치에 있는지 확인
train_xy = [(term_exists(d), c) for d,c in train_docs]
train_xy
Out[65]:
[({'메리/Noun': True,
'가/Josa': True,
'좋다/Adjective': True,
'고양이/Noun': False,
'도/Josa': False,
'난/Noun': False,
'수업/Noun': False,
'이/Josa': False,
'지루하다/Adjective': False,
'는/Josa': False,
'이쁘다/Adjective': False,
'야/Josa': False,
'마치/Noun': False,
'고/Josa': False,
'랑/Josa': False,
'놀다/Verb': False},
'pos'),
({'메리/Noun': False,
'가/Josa': False,
'좋다/Adjective': True,
'고양이/Noun': True,
'도/Josa': True,
'난/Noun': False,
'수업/Noun': False,
'이/Josa': False,
'지루하다/Adjective': False,
'는/Josa': False,
'이쁘다/Adjective': False,
'야/Josa': False,
'마치/Noun': False,
'고/Josa': False,
'랑/Josa': False,
'놀다/Verb': False},
'pos'),
({'메리/Noun': False,
'가/Josa': False,
'좋다/Adjective': False,
'고양이/Noun': False,
'도/Josa': False,
'난/Noun': True,
'수업/Noun': True,
'이/Josa': True,
'지루하다/Adjective': True,
'는/Josa': False,
'이쁘다/Adjective': False,
'야/Josa': False,
'마치/Noun': False,
'고/Josa': False,
'랑/Josa': False,
'놀다/Verb': False},
'neg'),
({'메리/Noun': True,
'가/Josa': False,
'좋다/Adjective': False,
'고양이/Noun': True,
'도/Josa': False,
'난/Noun': False,
'수업/Noun': False,
'이/Josa': False,
'지루하다/Adjective': False,
'는/Josa': True,
'이쁘다/Adjective': True,
'야/Josa': True,
'마치/Noun': False,
'고/Josa': False,
'랑/Josa': False,
'놀다/Verb': False},
'pos'),
({'메리/Noun': True,
'가/Josa': False,
'좋다/Adjective': False,
'고양이/Noun': False,
'도/Josa': False,
'난/Noun': True,
'수업/Noun': False,
'이/Josa': False,
'지루하다/Adjective': False,
'는/Josa': False,
'이쁘다/Adjective': False,
'야/Josa': False,
'마치/Noun': True,
'고/Josa': True,
'랑/Josa': True,
'놀다/Verb': True},
'pos')]
In [66]:
classifier = nltk.NaiveBayesClassifier.train(train_xy)
In [67]:
test_sentense = [('난 수업이 마치면 메리랑 놀거야')]
In [68]:
test_docs = pos_tagger.pos(test_sentense[0])
test_docs
Out[68]:
[('난', 'Noun'),
('수업', 'Noun'),
('이', 'Josa'),
('마치', 'Noun'),
('면', 'Josa'),
('메리', 'Noun'),
('랑', 'Josa'),
('놀거야', 'Verb')]
In [69]:
test_sent_features = {word: (word in tokens) for word in test_docs}
test_sent_features
Out[69]:
{('난', 'Noun'): False,
('수업', 'Noun'): False,
('이', 'Josa'): False,
('마치', 'Noun'): False,
('면', 'Josa'): False,
('메리', 'Noun'): False,
('랑', 'Josa'): False,
('놀거야', 'Verb'): False}
In [70]:
classifier.classify(test_sent_features)
Out[70]:
'pos'
반응형