From 61038eee84c1ffd7aa656a61943ff1293c908e50 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Fri, 10 May 2024 10:27:44 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 9832ee74..8b8cfcc6 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -10,13 +10,13 @@ from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, GradientBoostingRegressor -import itertools +import itertools, joblib import sklearn.tree as tree from sklearn.tree import export_graphviz # The independent variables (features) we want to use: -params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp'] -# params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp'] +# params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp'] +params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp'] def metrics(y_true, y_pred, y_pred_prob=None): @@ -93,8 +93,11 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard x = np.asarray(icing_df[params]) if standardize: - x = preprocessing.StandardScaler().fit(x).transform(x) + stdSclr = preprocessing.StandardScaler() + stdSclr.fit(x) + x = stdSclr.transform(x) x = np.where(np.isnan(x), 0, x) + joblib.dump(stdSclr, '/Users/tomrink/stdSclr_4.pkl') # The dependent variable (target) -------------------------------------------- y = np.asarray(icing_df['icing_intensity']) @@ -231,7 +234,7 @@ def random_forest(x_train, y_train, x_test, y_test, criterion='entropy', max_dep metrics(y_test, yhat, y_pred_prob=yhat_prob) -def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_depth=3, learning_rate=0.1): +def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_depth=3, learning_rate=0.1, saveModel=True): gbm = GradientBoostingClassifier(n_estimators=100, learning_rate=0.1, max_depth=3) gbm.fit(x_train, y_train) @@ -239,3 +242,6 @@ def gradient_boosting(x_train, y_train, x_test, y_test, n_estimators=100, max_de yhat_prob = gbm.predict_proba(x_test) metrics(y_test, yhat, y_pred_prob=yhat_prob) + + if saveModel: + joblib.dump(gbm, '/Users/tomrink/icing_gbm.pkl') -- GitLab