v Make Simulated Data For Regression - Machine Learning

Make Simulated Data For Regression

Preliminaries

import pandas as pd
from sklearn.datasets import make_regression

Create Simulated Data

# Generate fetures, outputs, and true coefficient of 100 samples,
features, output, coef = make_regression(n_samples = 100,
                                         # three features
                                         n_features = 3,
                                         # where only two features are useful,
                                         n_informative = 2,
                                         # a single target value per observation
                                         n_targets = 1,
                                         # 0.0 standard deviation of the guassian noise
                                         noise = 0.0,
                                         # show the true coefficient used to generated the data
                                         coef = True)

View Simulated Data

# View the features of the first five rows
pd.DataFrame(features, columns=['Store 1', 'Store 2', 'Store 3']).head()
Store 1 Store 2 Store 3
0 -0.166697 -0.177142 -2.329568
1 -0.093566 -0.544292 0.685165
2 0.625958 -0.193049 1.168012
3 -0.843925 -0.567444 -0.193631
4 -1.079227 -0.819236 1.609171
# View the output of the first five rows
pd.DataFrame(output, columns=['Sales']).head()
Sales
0 -149.387162
1 -4.164344
2 52.166904
3 -56.996180
4 27.246575
# View the actual, true coefficients used to generate the data
pd.DataFrame(coef, columns=['True Coefficient Values'])
True Coefficient Values
0 0.000000
1 80.654346
2 57.993548