diff --git a/modules/machine_learning/classification.py b/modules/machine_learning/classification.py index c5f97d628e56ece39f0634e1ac8e78aea6eef977..fcbf6c2656f2f06769c6f2c3c33ffa25152b078a 100644 --- a/modules/machine_learning/classification.py +++ b/modules/machine_learning/classification.py @@ -15,8 +15,7 @@ import sklearn.tree as tree from sklearn.tree import export_graphviz # The independent variables (features) we want to use: -params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', - 'cld_opd_dcomp', 'cld_cwp_dcomp'] +params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp'] def metrics(y_true, y_pred, y_pred_prob=None): @@ -89,6 +88,8 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard icing_df = icing_df.dropna() print('NaN removed num obs, features: ', icing_df.shape) + # icing_df = icing_df[icing_df.cld_temp_acha < 273.5] + x = np.asarray(icing_df[params]) if standardize: x = preprocessing.StandardScaler().fit(x).transform(x)