From 2c47ece998e3c2522deb409f51c45cf00c17e309 Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Fri, 26 Apr 2024 10:54:10 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 3e30531f..0d4c3993 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -57,6 +57,18 @@ def get_csv_as_dataframe(csv_file, reduce_frac=None, random_state=42): if reduce_frac is not None: icing_df = icing_df.sample(axis=0, frac=reduce_frac, random_state=random_state) + # # remove approximately half of rows where column_name equals to column_value + # column_name = 'icing_intensity' + # column_value = -1 + # if column_name in icing_df.columns: + # df_to_reduce = icing_df[icing_df[column_name] == column_value] + # icing_df = icing_df[icing_df[column_name] != column_value] + # + # if reduce_frac is not None: + # df_to_reduce = df_to_reduce.sample(axis=0, frac=0.5, random_state=random_state) + # + # icing_df = pd.concat([icing_df, df_to_reduce]) + return icing_df -- GitLab