From e83957518ca474a4b1868dfdc939e99f8c49d3e4 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Thu, 25 Apr 2024 12:55:43 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 9ab5ed31..814af787 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -60,9 +60,10 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None): return icing_df -def get_train_test_data(data_frame, standardize=True): +def get_feature_target_data(data_frame, standardize=True): icing_df = data_frame - # The independent variables we want to use: + + # The independent variables (features) we want to use: params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'] # Remove this column @@ -71,17 +72,17 @@ def get_train_test_data(data_frame, standardize=True): # Remove rows with NaN values # icing_df = icing_df.dropna() - print(icing_df.shape) - # icing_df = icing_df.dropna() - print(icing_df.shape) + print('num obs, features: ', icing_df.shape) x = np.asarray(icing_df[params]) if standardize: x = preprocessing.StandardScaler().fit(x).transform(x) + + # The dependent variable (target) ------------------------------ y = np.asarray(icing_df['icing_intensity']) y = np.where(y == -1, 0, y) y = np.where(y >= 1, 1, y) - print(x.shape, y.shape) + print('num no icing: ', np.sum(y == 0)) print('num icing: ', np.sum(y == 1)) -- GitLab