v Deleting Missing Values - Machine Learning

Deleting Missing Values

Preliminaries

# Load library
import numpy as np
import pandas as pd

Create Data Frame

# Create feature matrix
X = np.array([[1, 2], 
              [6, 3], 
              [8, 4], 
              [9, 5], 
              [np.nan, 4]])

Drop Missing Values Using NumPy

# Remove observations with missing values
X[~np.isnan(X).any(axis=1)]
array([[ 1.,  2.],
       [ 6.,  3.],
       [ 8.,  4.],
       [ 9.,  5.]])

Drop Missing Values Using pandas

# Load data as a data frame
df = pd.DataFrame(X, columns=['feature_1', 'feature_2'])

# Remove observations with missing values
df.dropna()
feature_1 feature_2
0 1.0 2.0
1 6.0 3.0
2 8.0 4.0
3 9.0 5.0