From c50ecf7290558602b89c65460f7cd693b8390db3 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 24 Apr 2024 14:31:06 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 40 ++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 93564c92..e29641b4 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -96,6 +96,46 @@ def k_nearest_neighbors(x, y, k=4):
     print('AUC:         ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
 
 
+def k_nearest_neighbors_all(x, y, k_s=10):
+    x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
+    print('Train set:', x_train.shape,  y_train.shape)
+    print('Test set:', x_test.shape,  y_test.shape)
+
+    x_train = np.where(np.isnan(x_train), 0, x_train)
+    x_test = np.where(np.isnan(x_test), 0, x_test)
+    print('num no icing test: ', np.sum(y_test == 0))
+    print('num icing test: ', np.sum(y_test == 1))
+
+    k_s = 10
+    mean_acc = np.zeros((k_s - 1))
+    std_acc = np.zeros((k_s - 1))
+
+    for n in range(1, k_s):
+        KN_C = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train)
+        yhat = KN_C.predict(x_test)
+        yhat_prob = KN_C.predict_proba(x_test)
+        print('Accuracy:    ', "{:.4f}".format(accuracy_score(y_test, yhat)))
+        print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
+        print('Precision:   ', "{:.4f}".format(precision_score(y_test, yhat)))
+        print('Recall:      ', "{:.4f}".format(recall_score(y_test, yhat)))
+        print('F1:          ', "{:.4f}".format(f1_score(y_test, yhat)))
+        print('AUC:         ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
+
+        mean_acc[n - 1] = accuracy_score(y_test, yhat)
+        std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0])
+
+    print("The best accuracy was with", mean_acc.max(), "with k=", mean_acc.argmax() + 1)
+
+    plt.plot(range(1, k_s), mean_acc, 'g')
+    plt.fill_between(range(1, k_s), mean_acc - 1 * std_acc, mean_acc + 1 * std_acc, alpha=0.10)
+    plt.fill_between(range(1, k_s), mean_acc - 3 * std_acc, mean_acc + 3 * std_acc, alpha=0.10, color="green")
+    plt.legend(('Accuracy ', '+/- 1xstd', '+/- 3xstd'))
+    plt.ylabel('Accuracy ')
+    plt.xlabel('Number of Neighbors (K)')
+    plt.tight_layout()
+    plt.show()
+
+
 def decision_tree(x, y, max_depth=4):
     x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
     print('Train set:', x_train.shape,  y_train.shape)
-- 
GitLab