From fb5dfb20f553b617075da6e7499e351668802cd7 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Wed, 24 Apr 2024 15:35:31 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 50 ++++++++++++++++++++-- 1 file changed, 46 insertions(+), 4 deletions(-) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index e29641b4..dcbfa2de 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -2,13 +2,14 @@ import pandas as pd import pylab as pl import numpy as np import scipy.optimize as opt -from sklearn import preprocessing +from sklearn import preprocessing, svm import matplotlib.pyplot as plt from sklearn.metrics import confusion_matrix, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score from sklearn.model_selection import train_test_split from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier +from sklearn.ensemble import RandomForestClassifier import sklearn.tree as tree from sklearn.tree import export_graphviz @@ -136,7 +137,7 @@ def k_nearest_neighbors_all(x, y, k_s=10): plt.show() -def decision_tree(x, y, max_depth=4): +def decision_tree(x, y, criterion='entropy', max_depth=4): x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) print('Train set:', x_train.shape, y_train.shape) print('Test set:', x_test.shape, y_test.shape) @@ -146,7 +147,7 @@ def decision_tree(x, y, max_depth=4): print('num no icing test: ', np.sum(y_test == 0)) print('num icing test: ', np.sum(y_test == 1)) - DT = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth).fit(x_train, y_train) + DT = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train) yhat = DT.predict(x_test) yhat_prob = DT.predict_proba(x_test) @@ -158,7 +159,48 @@ def decision_tree(x, y, max_depth=4): print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) return DT - # export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp']) # !dot -Tpng tree.dot -o tree.png + +def SVM(x, y, kernel='rbf'): + x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) + print('Train set:', x_train.shape, y_train.shape) + print('Test set:', x_test.shape, y_test.shape) + + x_train = np.where(np.isnan(x_train), 0, x_train) + x_test = np.where(np.isnan(x_test), 0, x_test) + print('num no icing test: ', np.sum(y_test == 0)) + print('num icing test: ', np.sum(y_test == 1)) + + clf = svm.SVC(kernel=kernel) + clf = clf.fit(x_train, y_train) + yhat = clf.predict(x_test) + + print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) + print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) + print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) + print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) + print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) + + +def random_forest(x, y, criterion='entropy', max_depth=4): + x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4) + print('Train set:', x_train.shape, y_train.shape) + print('Test set:', x_test.shape, y_test.shape) + + x_train = np.where(np.isnan(x_train), 0, x_train) + x_test = np.where(np.isnan(x_test), 0, x_test) + print('num no icing test: ', np.sum(y_test == 0)) + print('num icing test: ', np.sum(y_test == 1)) + + rnd_clf = RandomForestClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train) + yhat = rnd_clf.predict(x_test) + yhat_prob = rnd_clf.predict_proba(x_test) + + print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) + print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) + print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) + print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) + print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) + print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) -- GitLab