Skip to content
Snippets Groups Projects
Commit ed9e6b64 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent a569eda8
No related branches found
No related tags found
No related merge requests found
......@@ -15,6 +15,17 @@ import sklearn.tree as tree
from sklearn.tree import export_graphviz
def metrics(y_true, y_pred, y_pred_prob=None):
print(confusion_matrix(y_true, y_pred, labels=[1,0]))
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_true, y_pred)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_true, y_pred)))
print('Precision: ', "{:.4f}".format(precision_score(y_true, y_pred)))
print('Recall: ', "{:.4f}".format(recall_score(y_true, y_pred)))
print('F1: ', "{:.4f}".format(f1_score(y_true, y_pred)))
if y_pred_prob is not None:
print('AUC: ', "{:.4f}".format(roc_auc_score(y_true, y_pred_prob[:, 1])))
def analyze(dataFrame):
no_icing_df = dataFrame[dataFrame['icing_intensity'] == -1]
icing_df = dataFrame[dataFrame['icing_intensity'] >= 1]
......@@ -99,6 +110,7 @@ def get_feature_target_data(data_frame, standardize=True):
print('num obs, features: ', x.shape)
if standardize:
x = preprocessing.StandardScaler().fit(x).transform(x)
x = np.where(np.isnan(x), 0, x)
# The dependent variable (target) --------------------------------------------
y = np.asarray(icing_df['icing_intensity'])
......@@ -111,12 +123,8 @@ def get_feature_target_data(data_frame, standardize=True):
return x, y
def logistic_regression(x, y, x_test=None, y_test=None):
if x_test is None:
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
else:
x_train = x
y_train = y
def logistic_regression(x_train, y_train, x_test, y_test):
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -129,17 +137,19 @@ def logistic_regression(x, y, x_test=None, y_test=None):
yhat = LR.predict(x_test)
yhat_prob = LR.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1,0]))
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
metrics(y_test, yhat, y_pred_prob=yhat_prob)
# print(confusion_matrix(y_test, yhat, labels=[1,0]))
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
def k_nearest_neighbors(x_train, y_train, x_test, y_test, k=4):
def k_nearest_neighbors(x, y, k=4):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -152,12 +162,14 @@ def k_nearest_neighbors(x, y, k=4):
yhat = KN_C.predict(x_test)
yhat_prob = KN_C.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
metrics(y_test, yhat, y_pred_prob=yhat_prob)
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
def k_nearest_neighbors_all(x, y, k_s=10):
......@@ -178,12 +190,13 @@ def k_nearest_neighbors_all(x, y, k_s=10):
KN_C = KNeighborsClassifier(n_neighbors=n).fit(x_train, y_train)
yhat = KN_C.predict(x_test)
yhat_prob = KN_C.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
metrics(y_test, yhat, y_pred_prob=yhat_prob)
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
mean_acc[n - 1] = accuracy_score(y_test, yhat)
std_acc[n - 1] = np.std(yhat == y_test) / np.sqrt(yhat.shape[0])
......@@ -201,7 +214,7 @@ def k_nearest_neighbors_all(x, y, k_s=10):
def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_depth=4):
# x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -214,21 +227,22 @@ def decision_tree(x_train, y_train, x_test, y_test, criterion='entropy', max_dep
yhat = DT.predict(x_test)
yhat_prob = DT.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1, 0]))
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
metrics(y_test, yhat, y_pred_prob=yhat_prob)
# print(confusion_matrix(y_test, yhat, labels=[1, 0]))
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
return DT
# export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'])
# !dot -Tpng tree.dot -o tree.png
def SVM(x, y, kernel='rbf'):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
def SVM(x_train, y_train, x_test, y_test, kernel='rbf'):
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -241,19 +255,17 @@ def SVM(x, y, kernel='rbf'):
clf = clf.fit(x_train, y_train)
yhat = clf.predict(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
metrics(y_test, yhat)
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
def random_forest(x_train, y_train, x_test, y_test, criterion='entropy', max_depth=4):
def random_forest(x, y, x_test=None, y_test=None, criterion='entropy', max_depth=4):
if x_test is None:
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
else:
x_train = x
y_train = y
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -266,9 +278,10 @@ def random_forest(x, y, x_test=None, y_test=None, criterion='entropy', max_depth
yhat = rnd_clf.predict(x_test)
yhat_prob = rnd_clf.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
metrics(y_test, yhat, y_pred_prob=yhat_prob)
# print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
# print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
# print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
# print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
# print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
# print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment