Skip to content
Snippets Groups Projects
Commit ed9e6b64 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent a569eda8
No related branches found
No related tags found
No related merge requests found
...@@ -15,6 +15,17 @@ import sklearn.tree as tree ...@@ -15,6 +15,17 @@ import sklearn.tree as tree
from sklearn.tree import export_graphviz from sklearn.tree import export_graphviz
def metrics(y_true, y_pred, y_pred_prob=None):
print(confusion_matrix(y_true, y_pred, labels=[1,0]))
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_true, y_pred)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_true, y_pred)))
print('Precision: ', "{:.4f}".format(precision_score(y_true, y_pred)))
print('Recall: ', "{:.4f}".format(recall_score(y_true, y_pred)))
print('F1: ', "{:.4f}".format(f1_score(y_true, y_pred)))
if y_pred_prob is not None:
print('AUC: ', "{:.4f}".format(roc_auc_score(y_true, y_pred_prob[:, 1])))
def analyze(dataFrame): def analyze(dataFrame):
no_icing_df = dataFrame[dataFrame['icing_intensity'] == -1] no_icing_df = dataFrame[dataFrame['icing_intensity'] == -1]
icing_df = dataFrame[dataFrame['icing_intensity'] >= 1] icing_df = dataFrame[dataFrame['icing_intensity'] >= 1]
...@@ -99,6 +110,7 @@ def get_feature_target_data(data_frame, standardize=True): ...@@ -99,6 +110,7 @@ def get_feature_target_data(data_frame, standardize=True):
print('num obs, features: ', x.shape) print('num obs, features: ', x.shape)
if standardize: if standardize:
x = preprocessing.StandardScaler().fit(x).transform(x) x = preprocessing.StandardScaler().fit(x).transform(x)
x = np.where(np.isnan(x), 0, x)
# The dependent variable (target) -------------------------------------------- # The dependent variable (target) --------------------------------------------
y = np.asarray(icing_df['icing_intensity']) y = np.asarray(icing_df['icing_intensity'])
...@@ -111,12 +123,8 @@ def get_feature_target_data(data_frame, standardize=True): ...@@ -111,12 +123,8 @@ def get_feature_target_data(data_frame, standardize=True):
return x, y return x, y
def logistic_regression(x, y, x_test=None, y_test=None): def logistic_regression(x_train, y_train, x_test, y_test):
if x_test is None:
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
else:
x_train = x
y_train = y
print('Train set:', x_train.shape, y_train.shape) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape) print('Test set:', x_test.shape, y_test.shape)
...@@ -129,17 +137,19 @@ def logistic_regression(x, y, x_test=None, y_test=None): ...@@ -129,17 +137,19 @@ def logistic_regression(x, y, x_test=None, y_test=None):
yhat = LR.predict(x_test) yhat = LR.predict(x_test)
yhat_prob = LR.predict_proba(x_test) yhat_prob = LR.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1,0])) metrics(y_test, yhat, y_pred_prob=yhat_prob)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
# print(confusion_matrix(y_test, yhat, labels=[1,0]))
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
def k_nearest_neighbors(x_train, y_train, x_test, y_test, k=4):
def k_nearest_neighbors(x, y, k=4):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape) print('Test set:', x_test.shape, y_test.shape)
...@@ -152,12 +162,14 @@ def k_nearest_neighbors(x, y, k=4): ...@@ -152,12 +162,14 @@ def k_nearest_neighbors(x, y, k=4):
yhat = KN_C.predict(x_test) yhat = KN_C.predict(x_test)
yhat_prob = KN_C.predict_proba(x_test) yhat_prob = KN_C.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) metrics(y_test, yhat, y_pred_prob=yhat_prob)
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) # print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) # print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) # print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) # print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
def k_nearest_neighbors_all(x, y, k_s=10): def k_nearest_neighbors_all(x, y, k_s=10):
...@@ -178,12 +190,13 @@ def k_nearest_neighbors_all(x, y, k_s=10): ...@@ -178,12 +190,13 @@ def k_nearest_neighbors_all(x, y, k_s=10):
KN_C = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train) KN_C = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train)
yhat = KN_C.predict(x_test) yhat = KN_C.predict(x_test)
yhat_prob = KN_C.predict_proba(x_test) yhat_prob = KN_C.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) metrics(y_test, yhat, y_pred_prob=yhat_prob)
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) # print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) # print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) # print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) # print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) # print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
mean_acc[n - 1] = accuracy_score(y_test, yhat) mean_acc[n - 1] = accuracy_score(y_test, yhat)
std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0]) std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0])
...@@ -201,7 +214,7 @@ def k_nearest_neighbors_all(x, y, k_s=10): ...@@ -201,7 +214,7 @@ def k_nearest_neighbors_all(x, y, k_s=10):
def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_depth=4): def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_depth=4):
# x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape) print('Test set:', x_test.shape, y_test.shape)
...@@ -214,21 +227,22 @@ def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_dep ...@@ -214,21 +227,22 @@ def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_dep
yhat = DT.predict(x_test) yhat = DT.predict(x_test)
yhat_prob = DT.predict_proba(x_test) yhat_prob = DT.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1, 0])) metrics(y_test, yhat, y_pred_prob=yhat_prob)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) # print(confusion_matrix(y_test, yhat, labels=[1, 0]))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) # print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) # print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) # print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) # print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) # print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
return DT return DT
# export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp']) # export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'])
# !dot -Tpng tree.dot -o tree.png # !dot -Tpng tree.dot -o tree.png
def SVM(x, y, kernel='rbf'): def SVM(x_train, y_train, x_test, y_test, kernel='rbf'):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape) print('Test set:', x_test.shape, y_test.shape)
...@@ -241,19 +255,17 @@ def SVM(x, y, kernel='rbf'): ...@@ -241,19 +255,17 @@ def SVM(x, y, kernel='rbf'):
clf = clf.fit(x_train, y_train) clf = clf.fit(x_train, y_train)
yhat = clf.predict(x_test) yhat = clf.predict(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) metrics(y_test, yhat)
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
def random_forest(x_train, y_train, x_test, y_test, criterion='entropy', max_depth=4):
def random_forest(x, y, x_test=None, y_test=None, criterion='entropy', max_depth=4):
if x_test is None:
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
else:
x_train = x
y_train = y
print('Train set:', x_train.shape, y_train.shape) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape) print('Test set:', x_test.shape, y_test.shape)
...@@ -266,9 +278,10 @@ def random_forest(x, y, x_test=None, y_test=None, criterion='entropy', max_depth ...@@ -266,9 +278,10 @@ def random_forest(x, y, x_test=None, y_test=None, criterion='entropy', max_depth
yhat = rnd_clf.predict(x_test) yhat = rnd_clf.predict(x_test)
yhat_prob = rnd_clf.predict_proba(x_test) yhat_prob = rnd_clf.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat))) metrics(y_test, yhat, y_pred_prob=yhat_prob)
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat))) # print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat))) # print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat))) # print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat))) # print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1]))) # print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment