Preliminaries
# Load library
import pandas as pd
Create Data
# Create DataFrame
houses = pd.DataFrame()
houses['Price'] = [534433, 392333, 293222, 4322032]
houses['Bathrooms'] = [2, 3.5, 2, 116]
houses['Square_Feet'] = [1500, 2500, 1500, 48000]
houses

Price 
Bathrooms 
Square_Feet 
0 
534433 
2.0 
1500 
1 
392333 
3.5 
2500 
2 
293222 
2.0 
1500 
3 
4322032 
116.0 
48000 
Option 1: Drop
# Drop observations greater than some value
houses[houses['Bathrooms'] < 20]

Price 
Bathrooms 
Square_Feet 
0 
534433 
2.0 
1500 
1 
392333 
3.5 
2500 
2 
293222 
2.0 
1500 
Option 2: Mark
# Load library
import numpy as np
# Create feature based on boolean condition
houses['Outlier'] = np.where(houses['Bathrooms'] < 20, 0, 1)
# Show data
houses

Price 
Bathrooms 
Square_Feet 
Outlier 
0 
534433 
2.0 
1500 
0 
1 
392333 
3.5 
2500 
0 
2 
293222 
2.0 
1500 
0 
3 
4322032 
116.0 
48000 
1 
Option 3: Rescale
# Log feature
houses['Log_Of_Square_Feet'] = [np.log(x) for x in houses['Square_Feet']]
# Show data
houses

Price 
Bathrooms 
Square_Feet 
Outlier 
Log_Of_Square_Feet 
0 
534433 
2.0 
1500 
0 
7.313220 
1 
392333 
3.5 
2500 
0 
7.824046 
2 
293222 
2.0 
1500 
0 
7.313220 
3 
4322032 
116.0 
48000 
1 
10.778956 