<aside> ✅ 정규 표현식

</aside>

<aside> ✅ 파이썬에서 정규 표현식 사용

</aside>

<aside> ✅ 단어의 토큰화

</aside>

import nltk

sentence = 'word is very difference'
# 공백 기준 분할
tokens = [x for x in sentence.split(' ')]

nltk.download('punkt')

from nltk.tokenize import word_tokenize

tokens = word_tokenize(sentence)
print(tokens)

<aside> ✅ 문장의 토큰화

</aside>

from nltk import sent_tokenize

sentences = 'The world is peace.\\nbut The world is still proceeding war.\\nso we be careful saving peace.'

print(sentences)

tokens_v2 = sent_tokenize(sentences)
print(tokens_v2)