# Import necessary libraries
# Feel free to import other modules and libraries as you deem fit
import sklearn
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from prettytable import PrettyTable
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from IPython.core.interactiveshell import InteractiveShell
from sklearn.metrics import classification_report,roc_auc_score, roc_curve, accuracy_score

%matplotlib inline
InteractiveShell.ast_node_interactivity = "all"


                        # Read the train and test data
# Take a look at the data to understand the features and reponse

# Your code here


                        # define X_train, y_train, X_test, and y_test 
# Urgency is the response variable, all other variables are the predictors                        

# Your code here


                        # Perform GridSearchCV to get the best C value for a Logistic Regression model
# Feel free to use the cv and set of C values of your choice
# Remember to keep track of your best C value

# Your code here


                        # Using the C value from above, initialize a Logistic Regression model
# Fit the model on the train data
# Predict on the test data

# Your code here


                        # Compute the accuracy of the model
logistic_acc = ___


                        # Perform GridSearchCV to get the best k value for a kNN Classification model
# Feel free to use the cv and set of k values of your choice
# Remember to keep track of your best k value

# Your code here


                        # Using the k value from above, initialize a kNN Classification model
# Fit the model on the train data
# Predict on the test data

# Your code here


                        # Compute the accuracy of the model
knn_acc = ___


                        # Store the Confusion Matrix of the trained Logistic Regression Model on the test data in a variable


# Your code here


                        # Store the Confusion Matrix of the trained kNN Classification Model on the test data in a variable

# Your code here


                        # Plot of the Confusion Matrix for the Logisitic Regression and kNN Classification model

# Your code here


                        # Compute the Sensitivity for the Logistic Regression model
logistic_recall = ___

# Compute the Sensitivity for the kNN Classification model
knn_recall = ___


                        # Compute the Specificity for the Logistic Regression model
logistic_fpr = ___

# Compute the Specificity for the kNN Classification model
knn_fpr = ___


                        # Compute the Precision for the Logistic Regression model
logistic_precision = ___

# Compute the Precision for the kNN Classification model
knn_precision = ___


                        # Compute the F1-Score for the Logistic Regression model
logistic_fscore = ___

# Compute the F1-Score for the kNN Classification model
knn_fscore = ___


                        # Helper code to bring everything together
pt = PrettyTable()

pt.field_names = ["Metric", "Logistic Regression", "kNN Classification"]
pt.add_row(["Accuracy", round(logistic_acc, 3), round(knn_acc, 3)])
pt.add_row(["Sensitivity(Recall)", round(logistic_recall, 3), round(knn_recall, 3)])
pt.add_row(["Specificity", round(logistic_fpr, 3), round(knn_fpr, 3)])
pt.add_row(["Precision", round(logistic_precision, 3), round(knn_precision, 3)])
pt.add_row(["F1 Score", round(logistic_fscore, 3), round(knn_fscore, 3)])

print(pt)


                        # Compute the area under the ROC curve for the Logistic Regression model
logreg_auc = ___

# Compute the area under the ROC curve for the kNN Classification model
knnreg_auc = ___


                        # Plot the ROC curve for the Logistic Regression model and kNN Classification model
# You can refer to the end of homework 5 for example code

# Your code here


                        # Area under curve - Logistic regression & kNN
# along with the boundary conditions

# Your code here

Title :¶

Hints:¶

COVID-19 Machine Learning Dataset®¶

Intended For Educational Use Only¶

Should this data be used for research?¶

And most importantly:¶

Calling the dataset¶

GridsearchCV for Logistic Regression¶

Fitting the data and making predictions¶

GridsearchCV for KNN classification¶

Fitting the data and making predictions¶

What is a Confusion Matrix?¶

True Positive¶

False Positive¶

True Negative¶

False Negative¶

The boy who cried wolf: Data Science edition ¶

Predicted wolf, but no wolf¶

Predicted no wolf, but actually wolf¶

Plot the Confusion Matrix¶

Sensitivity¶

Specificity¶

Precision (Positive Predicted Value)¶

F1 score¶

BACK TO THE LECTURE¶

Bayes Theorem & Diagnostic testing¶

ROC Curve¶

Which classifier to choose?¶

Choice of classifier Scenario 1 - BRAZIL¶

Choice of classifier Scenario 2 - GERMANY¶

Choice of classifier Scenario 3 - INDIA¶

ROC curve with boundary conditions¶

BACK TO THE LECTURE¶