diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 78c553a0c725eb85fcf4d69be957e051f70ba867..8607a124b7038d4c0a6108800dadb3bb156ac8ac 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -7,6 +7,7 @@ import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression +from sklearn.neighbors import KNeighborsClassifier def get_csv_as_dataframe(csv_file): @@ -58,6 +59,29 @@ def logistic_regression(x, y): yhat = LR.predict(x_test) yhat_prob = LR.predict_proba(x_test) + print(confusion_matrix(y_test, yhat, labels=[1,0])) + print('Accuracy: ', accuracy_score(y_test, yhat)) + print('Jaccard Idx: ', jaccard_score(y_test, yhat)) + print('Precision: ', precision_score(y_test, yhat)) + print('Recall: ', recall_score(y_test, yhat)) + print('F1: ', f1_score(y_test, yhat)) + print('AUC: ', roc_auc_score(y_test, yhat_prob[:, 1])) + + +def k_nearest_neighbors(x, y, k=4): + x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) + print('Train set:', x_train.shape, y_train.shape) + print('Test set:', x_test.shape, y_test.shape) + + x_train = np.where(np.isnan(x_train), 0, x_train) + x_test = np.where(np.isnan(x_test), 0, x_test) + print('num no icing test: ', np.sum(y_test == 0)) + print('num icing test: ', np.sum(y_test == 1)) + + KN_C = KNeighborsClassifier(n_neighbors=k).fit(x_train, y_train) + yhat = KN_C.predict(x_test) + yhat_prob = KN_C.predict_proba(x_test) + print(confusion_matrix(y_test, yhat, labels=[1,0])) print('Accuracy: ', accuracy_score(y_test, yhat)) print('Jaccard Idx: ', jaccard_score(y_test, yhat))