From c50ecf7290558602b89c65460f7cd693b8390db3 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Wed, 24 Apr 2024 14:31:06 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 40 ++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 93564c92..e29641b4 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -96,6 +96,46 @@ def k_nearest_neighbors(x, y, k=4): print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) +def k_nearest_neighbors_all(x, y, k_s=10): + x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) + print('Train set:', x_train.shape, y_train.shape) + print('Test set:', x_test.shape, y_test.shape) + + x_train = np.where(np.isnan(x_train), 0, x_train) + x_test = np.where(np.isnan(x_test), 0, x_test) + print('num no icing test: ', np.sum(y_test == 0)) + print('num icing test: ', np.sum(y_test == 1)) + + k_s = 10 + mean_acc = np.zeros((k_s - 1)) + std_acc = np.zeros((k_s - 1)) + + for n in range(1, k_s): + KN_C = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train) + yhat = KN_C.predict(x_test) + yhat_prob = KN_C.predict_proba(x_test) + print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) + print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) + print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) + print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) + print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) + print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) + + mean_acc[n - 1] = accuracy_score(y_test, yhat) + std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0]) + + print("The best accuracy was with", mean_acc.max(), "with k=", mean_acc.argmax() + 1) + + plt.plot(range(1, k_s), mean_acc, 'g') + plt.fill_between(range(1, k_s), mean_acc - 1 * std_acc, mean_acc + 1 * std_acc, alpha=0.10) + plt.fill_between(range(1, k_s), mean_acc - 3 * std_acc, mean_acc + 3 * std_acc, alpha=0.10, color="green") + plt.legend(('Accuracy ', '+/- 1xstd', '+/- 3xstd')) + plt.ylabel('Accuracy ') + plt.xlabel('Number of Neighbors (K)') + plt.tight_layout() + plt.show() + + def decision_tree(x, y, max_depth=4): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) print('Train set:', x_train.shape, y_train.shape) -- GitLab