Skip to content
Snippets Groups Projects
Commit fb5dfb20 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent c50ecf72
No related branches found
No related tags found
No related merge requests found
......@@ -2,13 +2,14 @@ import pandas as pd
import pylab as pl
import numpy as np
import scipy.optimize as opt
from sklearn import preprocessing
from sklearn import preprocessing, svm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
import sklearn.tree as tree
from sklearn.tree import export_graphviz
......@@ -136,7 +137,7 @@ def k_nearest_neighbors_all(x, y, k_s=10):
plt.show()
def decision_tree(x, y, max_depth=4):
def decision_tree(x, y, criterion='entropy', max_depth=4):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
......@@ -146,7 +147,7 @@ def decision_tree(x, y, max_depth=4):
print('num no icing test: ', np.sum(y_test == 0))
print('num icing test: ', np.sum(y_test == 1))
DT = DecisionTreeClassifier(criterion="entropy", max_depth=max_depth).fit(x_train, y_train)
DT = DecisionTreeClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train)
yhat = DT.predict(x_test)
yhat_prob = DT.predict_proba(x_test)
......@@ -158,7 +159,48 @@ def decision_tree(x, y, max_depth=4):
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
return DT
# export_graphviz(DT, out_file='tree.dot', filled=True, feature_names=['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'])
# !dot -Tpng tree.dot -o tree.png
def SVM(x, y, kernel='rbf'):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
x_train = np.where(np.isnan(x_train), 0, x_train)
x_test = np.where(np.isnan(x_test), 0, x_test)
print('num no icing test: ', np.sum(y_test == 0))
print('num icing test: ', np.sum(y_test == 1))
clf = svm.SVC(kernel=kernel)
clf = clf.fit(x_train, y_train)
yhat = clf.predict(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
def random_forest(x, y, criterion='entropy', max_depth=4):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
x_train = np.where(np.isnan(x_train), 0, x_train)
x_test = np.where(np.isnan(x_test), 0, x_test)
print('num no icing test: ', np.sum(y_test == 0))
print('num icing test: ', np.sum(y_test == 1))
rnd_clf = RandomForestClassifier(criterion=criterion, max_depth=max_depth).fit(x_train, y_train)
yhat = rnd_clf.predict(x_test)
yhat_prob = rnd_clf.predict_proba(x_test)
print('Accuracy: ', "{:.4f}".format(accuracy_score(y_test, yhat)))
print('Jaccard Idx: ', "{:.4f}".format(jaccard_score(y_test, yhat)))
print('Precision: ', "{:.4f}".format(precision_score(y_test, yhat)))
print('Recall: ', "{:.4f}".format(recall_score(y_test, yhat)))
print('F1: ', "{:.4f}".format(f1_score(y_test, yhat)))
print('AUC: ', "{:.4f}".format(roc_auc_score(y_test, yhat_prob[:, 1])))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment