# Import necessary libraries
import numpy as np
import pandas as pd
import seaborn as sns 
from pprint import pprint
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
%matplotlib inline


# Read the file named "colinearity.csv" into a Pandas dataframe
df = pd.read_csv(___)


# Take a quick look at the dataset
df.head()


# Choose all the predictors as the variable 'X' (note capitalization of X for multiple features)
X = df.drop([___],axis=1)

# Choose the response variable 'y' 
y = df.___


### edTest(test_coeff) ###

# Initialize a list to store the beta values for each linear regression model
linear_coef = []

# Loop over all the predictors
# In each loop "i" holds the name of the predictor 
for i in X:
    
    # Set the current predictor as the variable x
    x = df[[___]]

    # Create a linear regression object
    linreg = ____

    # Fit the model with training data 
    # Remember to choose only one column at a time i.e. given by x
    linreg.fit(___,___)
    
    # Add the coefficient value of the model to the list
    linear_coef.append(linreg.coef_)


# Perform multi-linear regression with all predictors
multi_linear = LinearRegression()

# Fit the multi-linear regression on all features of the entire data
multi_linear.fit(___,___)

# Get the coefficients (plural) of the model
multi_coef = multi_linear.coef_


# Helper code to see the beta values of the linear regression models

print('By simple(one variable) linear regression for each variable:', sep = '\n')

for i in range(4):
    pprint(f'Value of beta{i+1} = {linear_coef[i][0]:.2f}')


### edTest(test_multi_coeff) ###

# Helper code to compare with the values from the multi-linear regression
print('By multi-Linear regression on all variables')
for i in range(4):
    pprint(f'Value of beta{i+1} = {round(multi_coef[i],2)}')


### edTest(test_chow1) ###
# Submit an answer choice as a string below 
# (Eg. if you choose option C, put 'C')
answer1 = '___'


# Helper code to visualize the heatmap of the covariance matrix
corrMatrix = df[['x1','x2','x3','x4']].corr() 
sns.heatmap(corrMatrix, annot=True) 
plt.show()

Title :¶

Description :¶

Data Description:¶

Instructions:¶

Hints:¶

Creation of Linear Regression Objects¶

Multi-Linear Regression using all variables¶

Printing the individual $\beta$ values¶

A. Because the random seed selected is not as random as we would imagine.¶

B. Because of collinearity between $\beta_1$ and $\beta_4$¶

C. Because multi-linear regression is not a stable model¶

D. Because of the measurement error in the data¶