import pandas as pd
import numpy as np
import sklearn as sk
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import PolynomialFeatures
import statsmodels.api as sm


heart = pd.read_csv('Heart.csv')

# Force the response into a binary indicator:
heart['AHD'] = 1*(heart['AHD'] == "Yes")

heart.describe()


# split train and test data
heart_train, heart_test = train_test_split(heart, test_size=0.3, random_state = 109)


### edTest(test_logit1) ###

degree = 1
predictors = ['Age','MaxHR']

X_train1 = PolynomialFeatures(degree=degree,include_bias=False).fit_transform(heart_train[predictors])
y_train = heart_train['AHD']

logit1 = ___

print("Logistic Regression Estimated Betas:",
      logit1.intercept_,logit1.coef_)


degree = ___
predictors = ['Age','MaxHR']

X_train4 = PolynomialFeatures(degree=degree,include_bias=False).fit_transform(heart_train[predictors])

logit4 = ___

print("Logistic Regression Estimated Betas:",
      logit4.intercept_,logit4.coef_)


### edTest(test_misclass) ###

######
# your code here
######

predictors = ['Age','MaxHR']
X_test1 = PolynomialFeatures(degree=1,include_bias=False).fit_transform(heart_test[predictors])
X_test4 = PolynomialFeatures(degree=4,include_bias=False).fit_transform(heart_test[predictors])
y_test = heart_test['AHD']

misclass_logit1 = ___
misclass_logit4 = ___

print("Overall misclassification rate in test for logit1:",misclass_logit1)
print("Overall misclassification rate in test for logit4:",misclass_logit4)


n = 100

x1=np.linspace(np.min(heart[['Age']]),np.max(heart[['Age']]),n)
x2=np.linspace(np.min(heart[['MaxHR']]),np.max(heart[['MaxHR']]),n)
x1v, x2v = np.meshgrid(x1, x2)

# To do the predictions and keep the yhats on 2-D (to match the dummy predictor shapes), use this
X = np.c_[x1v.ravel(), x2v.ravel()]
X_dummy = PolynomialFeatures(degree=1,include_bias=False).fit_transform(X)
yhat1 = logit1.predict(X_dummy)


plt.pcolormesh(x1v, x2v, yhat1.reshape(x1v.shape),alpha = 0.05) 
plt.scatter(heart_train['Age'],heart_train['MaxHR'],c=heart_train['AHD'])
plt.ylabel("MaxHR")
plt.xlabel("Age")
plt.title("Yellow = Predicted to have AHD, Purple = Predicted to not have AHD")
plt.colorbar()
plt.show()


X_dummy = PolynomialFeatures(degree=4,include_bias=False).fit_transform(X)
yhat4 = logit4.predict(X_dummy)

plt.pcolormesh(x1v, x2v, yhat4.reshape(x1v.shape),alpha = 0.05) 
plt.scatter(heart_train['Age'],heart_train['MaxHR'],c=heart_train['AHD'])
plt.ylabel("MaxHR")
plt.xlabel("Age")
plt.title("Yellow = Predicted to have AHD, Purple = Predicted to not have AHD")
plt.colorbar()
plt.show()


### edTest(test_ridge) ###

logit_ridge = LogisticRegression(___).fit(X_train4, y_train)


yhat_ridge = logit_ridge.predict(X_dummy)

plt.pcolormesh(x1v, x2v, yhat_ridge.reshape(x1v.shape),alpha = 0.05) 
plt.scatter(heart_train['Age'],heart_train['MaxHR'],c=heart_train['AHD'])
plt.ylabel("MaxHR")
plt.xlabel("Age")
plt.title("Yellow = Predicted to have AHD, Purple = Predicted to not have AHD")
plt.colorbar()
plt.show()


predictors = ['Age','MaxHR']
X_train_new = heart_train[predictors].copy()
X_train_new['Age'] = X_train_new['Age'] + 100*y_train.values


plt.plot(X_train_new['Age'], y_train ,'o', markersize=7,color="#011DAD",label="Data")

plt.xlabel("Age")
plt.ylabel("AHD")
plt.yticks((0,1), labels=('No', 'Yes'))

plt.legend()
plt.show()


# Try to train a logistic regression model

X_train_new = sm.add_constant(X_train_new)

try:
    logreg = sm.Logit(y_train, X_train_new).fit()
except Exception as e: 
    print(e)

Title :¶

Description :¶

Perfect Separation¶