From bfcfa753b235bc16f50f470b1c934b9de2630bcb Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Wed, 24 Apr 2024 12:37:35 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 24 ++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 035ef544..757bafe8 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -8,6 +8,8 @@ from sklearn.metrics import confusion_matrix, classification_report, accuracy_sc from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier +from sklearn.tree import DecisionTreeClassifier +import sklearn.tree as tree def get_csv_as_dataframe(csv_file): @@ -82,6 +84,28 @@ def k_nearest_neighbors(x, y, k=4): yhat = KN_C.predict(x_test) yhat_prob = KN_C.predict_proba(x_test) + print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) + print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) + print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) + print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) + print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) + print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) + + +def decision_tree(x, y, max_depth=4): + x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) + print('Train set:', x_train.shape, y_train.shape) + print('Test set:', x_test.shape, y_test.shape) + + x_train = np.where(np.isnan(x_train), 0, x_train) + x_test = np.where(np.isnan(x_test), 0, x_test) + print('num no icing test: ', np.sum(y_test == 0)) + print('num icing test: ', np.sum(y_test == 1)) + + DT = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth).fit(x_train, y_train) + yhat = DT.predict(x_test) + yhat_prob = DT.predict_proba(x_test) + print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) -- GitLab