Skip to content
Snippets Groups Projects
Commit 0cd1d076 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent f0360e57
No related branches found
No related tags found
No related merge requests found
......@@ -17,7 +17,6 @@ from sklearn.tree import export_graphviz
# The independent variables (features) we want to use:
params = ['cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
'cld_opd_dcomp', 'cld_cwp_dcomp']
# params = ['supercooled_cloud_fraction', 'cld_temp_acha']
def metrics(y_true, y_pred, y_pred_prob=None):
......@@ -72,7 +71,7 @@ def plot_confusion_matrix(cm, classes,
plt.xlabel('Predicted label')
def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standardize=True):
def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standardize=True, remove_nan=False):
icing_df = pd.read_csv(csv_file)
# Random selection of reduce_frac of the rows
......@@ -85,11 +84,12 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard
# Remove this column for now.
icing_df = icing_df.drop('cld_geo_thick', axis=1)
# Remove rows with NaN values
# icing_df = icing_df.dropna()
print('num obs, features: ', icing_df.shape)
if remove_nan:
icing_df = icing_df.dropna()
print('NaN removed num obs, features: ', icing_df.shape)
x = np.asarray(icing_df[params])
print('num obs, features: ', x.shape)
if standardize:
x = preprocessing.StandardScaler().fit(x).transform(x)
x = np.where(np.isnan(x), 0, x)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment