Skip to content
Snippets Groups Projects
Commit 7716eb71 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 7d7f4e5a
No related branches found
No related tags found
No related merge requests found
...@@ -4,52 +4,64 @@ import numpy as np ...@@ -4,52 +4,64 @@ import numpy as np
import scipy.optimize as opt import scipy.optimize as opt
from sklearn import preprocessing from sklearn import preprocessing
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression from sklearn.linear_model import LogisticRegression
icing_df = pd.read_csv('/Users/tomrink/train_L2_DAY_1D.csv')
#print(icing_df.head(20)) def get_csv_as_dataframe(csv_file):
#print(icing_df.describe()) icing_df = pd.read_csv(csv_file)
print(icing_df.shape) print(icing_df.describe())
print(icing_df.shape)
# Remove rows with NaN values return icing_df
# icing_df = icing_df.dropna()
#Access rows def get_train_test_data(data_frame, normalize=True):
print(icing_df.iloc[0]) #First row of DataFrame icing_df = data_frame
print('--------------------------------------') # The independent variables we want to use:
print(icing_df.iloc[200]) #Eleventh row of DataFrame params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
'cld_opd_dcomp', 'iwc_dcomp']
#Access columns # Remove this column
print(icing_df['lwc_dcomp']) #Replace COLUMN_NAME with the name of column icing_df = icing_df.drop('lwc_dcomp', axis=1)
#Remove column # Remove rows with NaN values
icing_df = icing_df.drop('lwc_dcomp', axis=1) # icing_df = icing_df.dropna()
print(icing_df.shape) print(icing_df.shape)
# icing_df = icing_df.dropna() # icing_df = icing_df.dropna()
print(icing_df.shape) print(icing_df.shape)
params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp'] x = np.asarray(icing_df[params])
X = np.asarray(icing_df[params]) if normalize:
# X = preprocessing.StandardScaler().fit(X).transform(X) x = preprocessing.StandardScaler().fit(x).transform(x)
y = np.asarray(icing_df['icing_intensity']) y = np.asarray(icing_df['icing_intensity'])
y = np.where(y == -1, 0, y) y = np.where(y == -1, 0, y)
print(X.shape, y.shape) y = np.where(y >= 1, 1, y)
print('num no icing: ', np.sum(y == 0)) print(x.shape, y.shape)
print('num icing: ', np.sum(y == 1)) print('num no icing: ', np.sum(y == 0))
print('num icing: ', np.sum(y == 1))
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)
print ('Train set:', X_train.shape, y_train.shape) return x, y
print ('Test set:', X_test.shape, y_test.shape)
X_train = np.where(np.isnan(X_train), 0, X_train)
X_test = np.where(np.isnan(X_test), 0, X_test) def logistic_regression(x, y):
print('num no icing test: ', np.sum(y_test == 0)) x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('num icing test: ', np.sum(y_test == 1)) print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train) x_train = np.where(np.isnan(x_train), 0, x_train)
yhat = LR.predict(X_test) x_test = np.where(np.isnan(x_test), 0, x_test)
yhat_prob = LR.predict_proba(X_test) print('num no icing test: ', np.sum(y_test == 0))
print(confusion_matrix(y_test, yhat, labels=[1,0])) print('num icing test: ', np.sum(y_test == 1))
\ No newline at end of file
LR = LogisticRegression(C=0.01, solver='liblinear').fit(x_train, y_train)
yhat = LR.predict(x_test)
yhat_prob = LR.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1,0]))
print('Accuracy: ', accuracy_score(y_test, yhat))
print('Jaccard Idx: ', jaccard_score(y_test, yhat))
print('Precision: ', precision_score(y_test, yhat))
print('Recall: ', recall_score(y_test, yhat))
print('F1: ', f1_score(y_test, yhat))
print('AUC: ', roc_auc_score(y_test, yhat_prob[:, 1]))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment