From bfcfa753b235bc16f50f470b1c934b9de2630bcb Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 24 Apr 2024 12:37:35 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 24 ++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 035ef544..757bafe8 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -8,6 +8,8 @@ from sklearn.metrics import confusion_matrix, classification_report, accuracy_sc
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
 from sklearn.neighbors import KNeighborsClassifier
+from sklearn.tree import DecisionTreeClassifier
+import sklearn.tree as tree
 
 
 def get_csv_as_dataframe(csv_file):
@@ -82,6 +84,28 @@ def k_nearest_neighbors(x, y, k=4):
     yhat = KN_C.predict(x_test)
     yhat_prob = KN_C.predict_proba(x_test)
 
+    print('Accuracy:    ', "{:.4f}".format(accuracy_score(y_test, yhat)))
+    print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
+    print('Precision:   ', "{:.4f}".format(precision_score(y_test, yhat)))
+    print('Recall:      ', "{:.4f}".format(recall_score(y_test, yhat)))
+    print('F1:          ', "{:.4f}".format(f1_score(y_test, yhat)))
+    print('AUC:         ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
+
+
+def decision_tree(x, y, max_depth=4):
+    x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
+    print('Train set:', x_train.shape,  y_train.shape)
+    print('Test set:', x_test.shape,  y_test.shape)
+
+    x_train = np.where(np.isnan(x_train), 0, x_train)
+    x_test = np.where(np.isnan(x_test), 0, x_test)
+    print('num no icing test: ', np.sum(y_test == 0))
+    print('num icing test: ', np.sum(y_test == 1))
+
+    DT = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth).fit(x_train, y_train)
+    yhat = DT.predict(x_test)
+    yhat_prob = DT.predict_proba(x_test)
+
     print('Accuracy:    ', "{:.4f}".format(accuracy_score(y_test, yhat)))
     print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
     print('Precision:   ', "{:.4f}".format(precision_score(y_test, yhat)))
-- 
GitLab