# Import necessary libraries
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.preprocessing import PolynomialFeatures


# Helper code to alter plot properties
large = 22; med = 16; small = 10
params = {'axes.titlesize': large,
          'legend.fontsize': med,
          'figure.figsize': (16, 10),
          'axes.labelsize': med,
          'axes.titlesize': med,
          'axes.linewidth': 2,
          'xtick.labelsize': med,
          'ytick.labelsize': med,
          'figure.titlesize': large}
plt.style.use('seaborn-white')
plt.rcParams.update(params)
%matplotlib inline


# Read the file "bacteria_train.csv" as a dataframe
df = pd.read_csv("bacteria_train.csv")


# Take a quick look of your dataset
df.head()


# Set the values of 'Spreading_factor' as the predictor 
x = df[[___]]

# Set the values of 'Perc_population' as the response 
y = df[___]


# Select the degree of the polynomial
maxdeg = 4

# Compute the polynomial features on the data
x_poly = PolynomialFeatures(___).fit_transform(___)


# Get a list of 1000 alpha values ranging from 10 to 120 
# np.linspace is inclusive by default unlike arange
alpha_list = np.linspace(___,___,___)


### edTest(test_ridge_fit) ###
# Make an empty list called coeff_list to store the coefficients of each model
coeff_list = []

# Loop over all alpha values
for i in alpha_list:

    # Initialize a Ridge regression object with the current alpha value
    # and set normalize as True
    ridge_reg = Ridge(alpha=___,normalize=___)

    # Fit on the transformed data
    ridge_reg.fit(___, ___)
    
    # Append the coeff_list with the coefficients of the trained model
    coeff_list.append(___)


# Take the transpose of the list to get the variation in the 
# coefficient values per degree
trend = np.array(coeff_list).T


# Helper code to plot the variation of the coefficients as per the alpha value

# Just adding some nice colors. make sure to comment this cell out if you plan to use degree more than 7
colors = ['#5059E8','#9FC131FF','#D91C1C','#9400D3','#FF2F92','#336600','black']

fig, ax = plt.subplots(figsize = (10,6))
for i in range(maxdeg):
    ax.plot(alpha_list,np.abs(trend[i+1]),color=colors[i],alpha = 0.9,label = f'Degree {i+1}',lw=2.2)
    ax.legend(loc='best',fontsize=10)
    ax.set_xlabel(r'$\alpha$ values', fontsize=20)
    ax.set_ylabel(r'$\beta$ values', fontsize=20)
fig.suptitle(r'Ridge ($L_2$) Regression')
plt.show();


# Select a list of 1000 alpha values ranging from 1e-4 to 1e-1 
alpha_list = np.linspace(___,___,___)


### edTest(test_lasso_fit) ###

# Initialize a list called to store the alpha value of each model
coeff_list = []

# Loop over all the alpha values
for i in alpha_list:

    # Initialize a Lasso regression model with the current alpha
    # Set normalize as True
    lasso_reg = Lasso(alpha=___, max_iter=250000, normalize=___)

    # Fit on the transformed data
    lasso_reg.fit(___, ___)
    
    # Append the coeff_list with the coefficients of the model
    coeff_list.append(___)


# Get the transpose of the list to get the variation in the 
# coefficient values per degree
trend = np.array(coeff_list).T


# Helper code below to plot the variation of the coefficients as per the alpha value
colors = ['#5059E8','#9FC131FF','#D91C1C','#9400D3','#FF2F92','#336600','black']

fig, ax = plt.subplots(figsize = (10,6))
for i in range(maxdeg):
    ax.plot(alpha_list,np.abs(trend[i+1]),color=colors[i],alpha = 0.9,label = f'Degree {i+1}',lw=2)
    ax.legend(loc='best',fontsize=10)
    ax.set_xlabel(r'$\alpha$ values', fontsize=20)
    ax.set_ylabel(r'$\beta$ values', fontsize=20)

fig.suptitle(r'Lasso ($L_1$) Regression')
plt.show();

Title :¶

Description :¶

Data Description:¶

Instructions:¶

Hints:¶

Compare the results of Ridge regression with the Lasso variant¶