From e83957518ca474a4b1868dfdc939e99f8c49d3e4 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Thu, 25 Apr 2024 12:55:43 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 9ab5ed31..814af787 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -60,9 +60,10 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None):
     return icing_df
 
 
-def get_train_test_data(data_frame, standardize=True):
+def get_feature_target_data(data_frame, standardize=True):
     icing_df = data_frame
-    # The independent variables we want to use:
+
+    # The independent variables (features) we want to use:
     params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
               'cld_opd_dcomp', 'iwc_dcomp']
     # Remove this column
@@ -71,17 +72,17 @@ def get_train_test_data(data_frame, standardize=True):
     # Remove rows with NaN values
     # icing_df = icing_df.dropna()
 
-    print(icing_df.shape)
-    # icing_df = icing_df.dropna()
-    print(icing_df.shape)
+    print('num obs, features: ', icing_df.shape)
 
     x = np.asarray(icing_df[params])
     if standardize:
         x = preprocessing.StandardScaler().fit(x).transform(x)
+
+    # The dependent variable (target) ------------------------------
     y = np.asarray(icing_df['icing_intensity'])
     y = np.where(y == -1, 0, y)
     y = np.where(y >= 1, 1, y)
-    print(x.shape, y.shape)
+
     print('num no icing: ', np.sum(y == 0))
     print('num icing: ', np.sum(y == 1))
 
-- 
GitLab