v Tokenize Text - Machine Learning

Tokenize Text

Preliminaries

# Load library
from nltk.tokenize import word_tokenize, sent_tokenize

Create Text Data

# Create text
string = "The science of today is the technology of tomorrow. Tomorrow is today."

Tokenize Words

# Tokenize words
word_tokenize(string)
['The',
 'science',
 'of',
 'today',
 'is',
 'the',
 'technology',
 'of',
 'tomorrow',
 '.',
 'Tomorrow',
 'is',
 'today',
 '.']

Tokenize Sentences

# Tokenize sentences
sent_tokenize(string)
['The science of today is the technology of tomorrow.', 'Tomorrow is today.']