From 61038eee84c1ffd7aa656a61943ff1293c908e50 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Fri, 10 May 2024 10:27:44 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 9832ee74..8b8cfcc6 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -10,13 +10,13 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.neighbors import KNeighborsClassifier
 from sklearn.tree import DecisionTreeClassifier
 from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, GradientBoostingRegressor
-import itertools
+import itertools, joblib
 import sklearn.tree as tree
 from sklearn.tree import export_graphviz
 
 # The independent variables (features) we want to use:
-params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp']
-# params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp']
+# params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp']
+params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp']
 
 
 def metrics(y_true, y_pred, y_pred_prob=None):
@@ -93,8 +93,11 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard
 
     x = np.asarray(icing_df[params])
     if standardize:
-        x = preprocessing.StandardScaler().fit(x).transform(x)
+        stdSclr = preprocessing.StandardScaler()
+        stdSclr.fit(x)
+        x = stdSclr.transform(x)
         x = np.where(np.isnan(x), 0, x)
+        joblib.dump(stdSclr, '/Users/tomrink/stdSclr_4.pkl')
 
     # The dependent variable (target) --------------------------------------------
     y = np.asarray(icing_df['icing_intensity'])
@@ -231,7 +234,7 @@ def random_forest(x_train, y_train, x_test, y_test, criterion='entropy', max_dep
     metrics(y_test, yhat, y_pred_prob=yhat_prob)
 
 
-def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_depth=3, learning_rate=0.1):
+def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_depth=3, learning_rate=0.1, saveModel=True):
 
     gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3)
     gbm.fit(x_train, y_train)
@@ -239,3 +242,6 @@ def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_de
     yhat_prob = gbm.predict_proba(x_test)
 
     metrics(y_test, yhat, y_pred_prob=yhat_prob)
+
+    if saveModel:
+        joblib.dump(gbm, '/Users/tomrink/icing_gbm.pkl')
-- 
GitLab