Skip to content
Snippets Groups Projects
Commit 7716eb71 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 7d7f4e5a
No related branches found
No related tags found
No related merge requests found
......@@ -4,52 +4,64 @@ import numpy as np
import scipy.optimize as opt
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix
from sklearn.metrics import confusion_matrix, classification_report, accuracy_score, jaccard_score, f1_score, precision_score, recall_score, roc_auc_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
icing_df = pd.read_csv('/Users/tomrink/train_L2_DAY_1D.csv')
#print(icing_df.head(20))
#print(icing_df.describe())
print(icing_df.shape)
# Remove rows with NaN values
# icing_df = icing_df.dropna()
#Access rows
print(icing_df.iloc[0]) #First row of DataFrame
print('--------------------------------------')
print(icing_df.iloc[200]) #Eleventh row of DataFrame
#Access columns
print(icing_df['lwc_dcomp']) #Replace COLUMN_NAME with the name of column
#Remove column
icing_df = icing_df.drop('lwc_dcomp', axis=1)
print(icing_df.shape)
# icing_df = icing_df.dropna()
print(icing_df.shape)
params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp']
X = np.asarray(icing_df[params])
# X = preprocessing.StandardScaler().fit(X).transform(X)
y = np.asarray(icing_df['icing_intensity'])
y = np.where(y == -1, 0, y)
print(X.shape, y.shape)
print('num no icing: ', np.sum(y == 0))
print('num icing: ', np.sum(y == 1))
X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)
print ('Train set:', X_train.shape, y_train.shape)
print ('Test set:', X_test.shape, y_test.shape)
X_train = np.where(np.isnan(X_train), 0, X_train)
X_test = np.where(np.isnan(X_test), 0, X_test)
print('num no icing test: ', np.sum(y_test == 0))
print('num icing test: ', np.sum(y_test == 1))
LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train)
yhat = LR.predict(X_test)
yhat_prob = LR.predict_proba(X_test)
print(confusion_matrix(y_test, yhat, labels=[1,0]))
\ No newline at end of file
def get_csv_as_dataframe(csv_file):
icing_df = pd.read_csv(csv_file)
print(icing_df.describe())
print(icing_df.shape)
return icing_df
def get_train_test_data(data_frame, normalize=True):
icing_df = data_frame
# The independent variables we want to use:
params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp',
'cld_opd_dcomp', 'iwc_dcomp']
# Remove this column
icing_df = icing_df.drop('lwc_dcomp', axis=1)
# Remove rows with NaN values
# icing_df = icing_df.dropna()
print(icing_df.shape)
# icing_df = icing_df.dropna()
print(icing_df.shape)
x = np.asarray(icing_df[params])
if normalize:
x = preprocessing.StandardScaler().fit(x).transform(x)
y = np.asarray(icing_df['icing_intensity'])
y = np.where(y == -1, 0, y)
y = np.where(y >= 1, 1, y)
print(x.shape, y.shape)
print('num no icing: ', np.sum(y == 0))
print('num icing: ', np.sum(y == 1))
return x, y
def logistic_regression(x, y):
x_train, x_test, y_train, y_test = train_test_split( x, y, test_size=0.2, random_state=4)
print('Train set:', x_train.shape, y_train.shape)
print('Test set:', x_test.shape, y_test.shape)
x_train = np.where(np.isnan(x_train), 0, x_train)
x_test = np.where(np.isnan(x_test), 0, x_test)
print('num no icing test: ', np.sum(y_test == 0))
print('num icing test: ', np.sum(y_test == 1))
LR = LogisticRegression(C=0.01, solver='liblinear').fit(x_train, y_train)
yhat = LR.predict(x_test)
yhat_prob = LR.predict_proba(x_test)
print(confusion_matrix(y_test, yhat, labels=[1,0]))
print('Accuracy: ', accuracy_score(y_test, yhat))
print('Jaccard Idx: ', jaccard_score(y_test, yhat))
print('Precision: ', precision_score(y_test, yhat))
print('Recall: ', recall_score(y_test, yhat))
print('F1: ', f1_score(y_test, yhat))
print('AUC: ', roc_auc_score(y_test, yhat_prob[:, 1]))
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment