From 2c47ece998e3c2522deb409f51c45cf00c17e309 Mon Sep 17 00:00:00 2001
From: tomrink <rink@ssec.wisc.edu>
Date: Fri, 26 Apr 2024 10:54:10 -0500
Subject: [PATCH] snapshot...

---
 modules/machine_learning/classification.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py
index 3e30531f..0d4c3993 100644
--- a/modules/machine_learning/classification.py
+++ b/modules/machine_learning/classification.py
@@ -57,6 +57,18 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None, random_state=42):
     if reduce_frac is not None:
         icing_df = icing_df.sample(axis=0, frac=reduce_frac, random_state=random_state)
 
+    # # remove approximately half of rows where column_name equals to column_value
+    # column_name = 'icing_intensity'
+    # column_value = -1
+    # if column_name in icing_df.columns:
+    #     df_to_reduce = icing_df[icing_df[column_name] == column_value]
+    #     icing_df = icing_df[icing_df[column_name] != column_value]
+    #
+    #     if reduce_frac is not None:
+    #         df_to_reduce = df_to_reduce.sample(axis=0, frac=0.5, random_state=random_state)
+    #
+    #     icing_df = pd.concat([icing_df, df_to_reduce])
+
     return icing_df
 
 
-- 
GitLab