v Handling Missing Values In Time Series - Machine Learning

Handling Missing Values In Time Series

Preliminaries

# Load libraries
import pandas as pd
import numpy as np

Create Date Data With Gap In Values

# Create date
time_index = pd.date_range('01/01/2010', periods=5, freq='M')

# Create data frame, set index
df = pd.DataFrame(index=time_index)

# Create feature with a gap of missing values
df['Sales'] = [1.0,2.0,np.nan,np.nan,5.0]

Interpolate Missing Values

# Interpolate missing values
df.interpolate()
Sales
2010-01-31 1.0
2010-02-28 2.0
2010-03-31 3.0
2010-04-30 4.0
2010-05-31 5.0

Forward-fill Missing Values

# Forward-fill
df.ffill()
Sales
2010-01-31 1.0
2010-02-28 2.0
2010-03-31 2.0
2010-04-30 2.0
2010-05-31 5.0

Backfill Missing Values

# Back-fill
df.bfill()
Sales
2010-01-31 1.0
2010-02-28 2.0
2010-03-31 5.0
2010-04-30 5.0
2010-05-31 5.0

Interpolate Missing Values But Only Up One Value

# Interpolate missing values
df.interpolate(limit=1, limit_direction='forward')
Sales
2010-01-31 1.0
2010-02-28 2.0
2010-03-31 3.0
2010-04-30 NaN
2010-05-31 5.0