From a569eda81c23937f59869f2e79a496dfa5df27f3 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Tue, 30 Apr 2024 14:07:04 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 72f2227e..263ebc8c 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -81,14 +81,17 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None, random_state=42): def get_feature_target_data(data_frame, standardize=True): icing_df = data_frame - # The independent variables (features) we want to use: - params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', - 'cld_opd_dcomp', 'iwc_dcomp', 'cld_cwp_dcomp'] - # Remove this column + # Remove these, more than half seem to be NaN icing_df = icing_df.drop('lwc_dcomp', axis=1) - # Remove this column + icing_df = icing_df.drop('iwc_dcomp', axis=1) + + # Remove this column for now. icing_df = icing_df.drop('cld_geo_thick', axis=1) + # The independent variables (features) we want to use: + params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', + 'cld_opd_dcomp', 'cld_cwp_dcomp'] + # Remove rows with NaN values # icing_df = icing_df.dropna() @@ -97,7 +100,7 @@ def get_feature_target_data(data_frame, standardize=True): if standardize: x = preprocessing.StandardScaler().fit(x).transform(x) - # The dependent variable (target) ------------------------------ + # The dependent variable (target) -------------------------------------------- y = np.asarray(icing_df['icing_intensity']) y = np.where(y == -1, 0, y) y = np.where(y >= 1, 1, y) -- GitLab