# Import necessary libraries
%matplotlib inline
import numpy as np
import pandas as pd
from scipy import stats
import matplotlib.pyplot as plt
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures


# Read the `Advertising.csv` dataframe
df = pd.read_csv('Advertising.csv')

# Take a quick look at the data
df.head()


# Helper function to compute the variance of the error term 
def error_func(y,y_p):
    n = len(y)
    return np.sqrt(np.sum((y-y_p)**2/(n-2)))


# Set the number of bootstraps 
numboot = 1000

# Set the budget as per the instructions given 
# Use 2D list to facilitate model prediction (sklearn.LinearRegression requires input as a 2d array)
budget = [[___]]

# Initialize an empty list to store sales predictions for each bootstrap
sales_list = []


# Loop through each bootstrap
for i in range(___):

    # Create bootstrapped version of the data using the sample function
    # Set frac=1 and replace=True to get a bootstrap
    df_new = df.sample(___, replace=___)

    # Get the predictor data ('TV') from the new bootstrapped data
    x = df_new[[___]]

    # Get the response data ('Sales') from the new bootstrapped data
    y = df_new.___

    # Initialize a Linear Regression model
    linreg = LinearRegression()

    # Fit the model on the new data
    linreg.fit(___,___)

    # Predict on the budget from the original data
    prediction = linreg.predict(budget)

    # Predict on the bootstrapped data
    y_pred = linreg.predict(x) 

    # Compute the error using the helper function error_func
    error = np.random.normal(0,error_func(y,y_pred))
    
    # The final sales prediction is the sum of the model prediction 
    # and the error term
    sales = ___

    # Convert the sales to float type and append to the list
    sales_list.append(np.float64(___))


### edTest(test_sales) ###
# Sort the list containing sales predictions in ascending order 
sales_list.sort()

# Find the 95% confidence interval using np.percentile function 
# at 2.5% and 97.5%
sales_CI = (np.percentile(___,___),np.percentile(___, ___))


# Helper function to plot the histogram of beta values along 
# with the 95% confidence interval
def plot_simulation(simulation,confidence):
    plt.hist(simulation, bins = 30, label = 'beta distribution', align = 'left', density = True,edgecolor='k')
    plt.axvline(confidence[1], 0, 1, color = 'r', label = 'Right Interval')
    plt.axvline(confidence[0], 0, 1, color = 'red', label = 'Left Interval')
    plt.xlabel('Beta value')
    plt.ylabel('Frequency')
    plt.legend(frameon = False, loc = 'upper right')
    plt.show();


# Call the plot_simulation function above with the computed sales 
# distribution and the confidence intervals computed earlier
plot_simulation(sales_list,sales_CI)


# Print the computed values
print(f"With a TV advertising budget of ${budget[0][0]},")
print(f"we can expect an increase of sales anywhere between {sales_CI[0]:0.2f} to {sales_CI[1]:.2f}\
 with a 95% confidence interval")


### edTest(test_chow1) ###
# Type your answer within in the quotes given
answer1 = '___'

Title :¶

Description :¶

Data Description:¶

Instructions:¶

Hints:¶