From a569eda81c23937f59869f2e79a496dfa5df27f3 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Tue, 30 Apr 2024 14:07:04 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 72f2227e..263ebc8c 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -81,14 +81,17 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None, random_state=42):
 def get_feature_target_data(data_frame, standardize=True):
     icing_df = data_frame
 
-    # The independent variables (features) we want to use:
-    params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
-              'cld_opd_dcomp', 'iwc_dcomp', 'cld_cwp_dcomp']
-    # Remove this column
+    # Remove these, more than half seem to be NaN
     icing_df = icing_df.drop('lwc_dcomp', axis=1)
-    # Remove this column
+    icing_df = icing_df.drop('iwc_dcomp', axis=1)
+
+    # Remove this column for now.
     icing_df = icing_df.drop('cld_geo_thick', axis=1)
 
+    # The independent variables (features) we want to use:
+    params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
+              'cld_opd_dcomp', 'cld_cwp_dcomp']
+
     # Remove rows with NaN values
     # icing_df = icing_df.dropna()
 
@@ -97,7 +100,7 @@ def get_feature_target_data(data_frame, standardize=True):
     if standardize:
         x = preprocessing.StandardScaler().fit(x).transform(x)
 
-    # The dependent variable (target) ------------------------------
+    # The dependent variable (target) --------------------------------------------
     y = np.asarray(icing_df['icing_intensity'])
     y = np.where(y == -1, 0, y)
     y = np.where(y >= 1, 1, y)
-- 
GitLab