Skip to content
Snippets Groups Projects
Commit 99573c54 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent d9cad14a
No related branches found
No related tags found
No related merge requests found
......@@ -15,8 +15,14 @@ import sklearn.tree as tree
from sklearn.tree import export_graphviz
# The independent variables (features) we want to use:
# params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp']
params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp']
# feature_params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp']
# feature_params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp']
feature_params = ['cld_temp_acha', 'supercooled_cloud_fraction', 'cld_reff_acha', 'cld_opd_acha']
# The dependent variable (target)
target_param = 'icing_intensity'
params = feature_params + [target_param]
def metrics(y_true, y_pred, y_pred_prob=None):
......@@ -74,15 +80,16 @@ def plot_confusion_matrix(cm, classes,
def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standardize=True, remove_nan=False):
icing_df = pd.read_csv(csv_file)
icing_df = icing_df[params]
# Random selection of reduce_frac of the rows
icing_df = icing_df.sample(axis=0, frac=reduce_frac, random_state=random_state)
# Remove these, more than half seem to be NaN
icing_df = icing_df.drop('lwc_dcomp', axis=1)
icing_df = icing_df.drop('iwc_dcomp', axis=1)
# Remove this column for now.
icing_df = icing_df.drop('cld_geo_thick', axis=1)
# # Remove these, more than half seem to be NaN
# icing_df = icing_df.drop('lwc_dcomp', axis=1)
# icing_df = icing_df.drop('iwc_dcomp', axis=1)
# # Remove this column for now.
# icing_df = icing_df.drop('cld_geo_thick', axis=1)
print('num obs, features: ', icing_df.shape)
if remove_nan:
......@@ -91,7 +98,7 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard
# icing_df = icing_df[icing_df.cld_temp_acha < 273.10]
x = np.asarray(icing_df[params])
x = np.asarray(icing_df[feature_params])
if standardize:
stdSclr = preprocessing.StandardScaler()
stdSclr.fit(x)
......@@ -100,7 +107,7 @@ def get_feature_target_data(csv_file, reduce_frac=1.0, random_state=42, standard
joblib.dump(stdSclr, '/Users/tomrink/stdSclr_4.pkl')
# The dependent variable (target) --------------------------------------------
y = np.asarray(icing_df['icing_intensity'])
y = np.asarray(icing_df[target_param])
y = np.where(y == -1, 0, y)
y = np.where(y >= 1, 1, y)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment