v Remove Stop Words - Machine Learning

Remove Stop Words

Authors: Chris Albon

Preliminaries

# Load library
from nltk.corpus import stopwords

# You will have to download the set of stop words the first time
import nltk
nltk.download('stopwords')
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/chrisalbon/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!





True

Create Word Tokens

# Create word tokens
tokenized_words = ['i', 'am', 'going', 'to', 'go', 'to', 'the', 'store', 'and', 'park']

Load Stop Words

# Load stop words
stop_words = stopwords.words('english')

# Show stop words
stop_words[:5]
['i', 'me', 'my', 'myself', 'we']

Remove Stop Words

# Remove stop words
[word for word in tokenized_words if word not in stop_words]
['going', 'go', 'store', 'park']