From fb5dfb20f553b617075da6e7499e351668802cd7 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Wed, 24 Apr 2024 15:35:31 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 50 ++++++++++++++++++++--
 1 file changed, 46 insertions(+), 4 deletions(-)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index e29641b4..dcbfa2de 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -2,13 +2,14 @@ import pandas as pd
 import pylab as pl
 import numpy as np
 import scipy.optimize as opt
-from sklearn import preprocessing
+from sklearn import preprocessing, svm
 import matplotlib.pyplot as plt
 from sklearn.metrics import confusion_matrix, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score
 from sklearn.model_selection import train_test_split
 from sklearn.linear_model import LogisticRegression
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.tree import DecisionTreeClassifier
+from sklearn.ensemble import RandomForestClassifier
 import sklearn.tree as tree
 from sklearn.tree import export_graphviz
 
@@ -136,7 +137,7 @@ def k_nearest_neighbors_all(x, y, k_s=10):
     plt.show()
 
 
-def decision_tree(x, y, max_depth=4):
+def decision_tree(x, y, criterion='entropy', max_depth=4):
     x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
     print('Train set:', x_train.shape,  y_train.shape)
     print('Test set:', x_test.shape,  y_test.shape)
@@ -146,7 +147,7 @@ def decision_tree(x, y, max_depth=4):
     print('num no icing test: ', np.sum(y_test == 0))
     print('num icing test: ', np.sum(y_test == 1))
 
-    DT = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth).fit(x_train, y_train)
+    DT = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train)
     yhat = DT.predict(x_test)
     yhat_prob = DT.predict_proba(x_test)
 
@@ -158,7 +159,48 @@ def decision_tree(x, y, max_depth=4):
     print('AUC:         ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
 
     return DT
-
 # export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'])
 # !dot -Tpng tree.dot -o tree.png
 
+
+def SVM(x, y, kernel='rbf'):
+    x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
+    print('Train set:', x_train.shape,  y_train.shape)
+    print('Test set:', x_test.shape,  y_test.shape)
+
+    x_train = np.where(np.isnan(x_train), 0, x_train)
+    x_test = np.where(np.isnan(x_test), 0, x_test)
+    print('num no icing test: ', np.sum(y_test == 0))
+    print('num icing test: ', np.sum(y_test == 1))
+
+    clf = svm.SVC(kernel=kernel)
+    clf = clf.fit(x_train, y_train)
+    yhat = clf.predict(x_test)
+
+    print('Accuracy:    ', "{:.4f}".format(accuracy_score(y_test, yhat)))
+    print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
+    print('Precision:   ', "{:.4f}".format(precision_score(y_test, yhat)))
+    print('Recall:      ', "{:.4f}".format(recall_score(y_test, yhat)))
+    print('F1:          ', "{:.4f}".format(f1_score(y_test, yhat)))
+
+
+def random_forest(x, y, criterion='entropy', max_depth=4):
+    x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
+    print('Train set:', x_train.shape,  y_train.shape)
+    print('Test set:', x_test.shape,  y_test.shape)
+
+    x_train = np.where(np.isnan(x_train), 0, x_train)
+    x_test = np.where(np.isnan(x_test), 0, x_test)
+    print('num no icing test: ', np.sum(y_test == 0))
+    print('num icing test: ', np.sum(y_test == 1))
+
+    rnd_clf = RandomForestClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train)
+    yhat = rnd_clf.predict(x_test)
+    yhat_prob = rnd_clf.predict_proba(x_test)
+
+    print('Accuracy:    ', "{:.4f}".format(accuracy_score(y_test, yhat)))
+    print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
+    print('Precision:   ', "{:.4f}".format(precision_score(y_test, yhat)))
+    print('Recall:      ', "{:.4f}".format(recall_score(y_test, yhat)))
+    print('F1:          ', "{:.4f}".format(f1_score(y_test, yhat)))
+    print('AUC:         ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
-- 
GitLab