From 0cd1d076c7f3a98a4c4ed9f5127fdb922d2b910e Mon Sep 17 00:00:00 2001 From: tomrink <rink@ssec.wisc.edu> Date: Thu, 2 May 2024 11:06:45 -0500 Subject: [PATCH] snapshot... --- modules/machine_learning/classification.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index 3ebf3b8a..c5f97d62 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -17,7 +17,6 @@ from sklearn.tree import export_graphviz # The independent variables (features) we want to use: params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp'] -# params = ['supercooled_cloud_fraction', 'cld_temp_acha'] def metrics(y_true, y_pred, y_pred_prob=None): @@ -72,7 +71,7 @@ def plot_confusion_matrix(cm, classes, plt.xlabel('Predicted label') -def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standardize=True): +def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standardize=True, remove_nan=False): icing_df = pd.read_csv(csv_file) # Random selection of reduce_frac of the rows @@ -85,11 +84,12 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard # Remove this column for now. icing_df = icing_df.drop('cld_geo_thick', axis=1) - # Remove rows with NaN values - # icing_df = icing_df.dropna() + print('num obs, features: ', icing_df.shape) + if remove_nan: + icing_df = icing_df.dropna() + print('NaN removed num obs, features: ', icing_df.shape) x = np.asarray(icing_df[params]) - print('num obs, features: ', x.shape) if standardize: x = preprocessing.StandardScaler().fit(x).transform(x) x = np.where(np.isnan(x), 0, x) -- GitLab