diff --git a/modules/machine_learning/__init__.py b/modules/machine_learning/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/modules/machine_learning/logistic_regression.py b/modules/machine_learning/logistic_regression.py
new file mode 100644
index 0000000000000000000000000000000000000000..2660f4aaec1c16704f52cc985fbc75495f6d3f58
--- /dev/null
+++ b/modules/machine_learning/logistic_regression.py
@@ -0,0 +1,55 @@
+import pandas as pd
+import pylab as pl
+import numpy as np
+import scipy.optimize as opt
+from sklearn import preprocessing
+import matplotlib.pyplot as plt
+from sklearn.metrics import confusion_matrix
+from sklearn.model_selection import train_test_split
+from sklearn.linear_model import LogisticRegression
+
+icing_df = pd.read_csv('/Users/tomrink/train_L2_DAY_1D.csv')
+#print(icing_df.head(20))
+#print(icing_df.describe())
+print(icing_df.shape)
+
+# Remove rows with NaN values
+# icing_df = icing_df.dropna()
+
+#Access rows
+print(icing_df.iloc[0]) #First row of DataFrame
+print('--------------------------------------')
+print(icing_df.iloc[200]) #Eleventh row of DataFrame
+
+#Access columns
+print(icing_df['lwc_dcomp']) #Replace COLUMN_NAME with the name of column
+
+#Remove column
+icing_df = icing_df.drop('lwc_dcomp', axis=1)
+
+print(icing_df.shape)
+# icing_df = icing_df.dropna()
+print(icing_df.shape)
+
+params = ['cld_geo_thick', 'cld_temp_acha', 'conv_cloud_fraction', 'supercooled_cloud_fraction', 'cld_reff_dcomp', 'cld_opd_dcomp', 'iwc_dcomp']
+X = np.asarray(icing_df[params])
+# X = preprocessing.StandardScaler().fit(X).transform(X)
+y = np.asarray(icing_df['icing_intensity'])
+y = np.where(y == -1, 0, y)
+print(X.shape, y.shape)
+print('num no icing: ', np.sum(y == 0))
+print('num icing: ', np.sum(y == 1))
+
+X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=4)
+print ('Train set:', X_train.shape,  y_train.shape)
+print ('Test set:', X_test.shape,  y_test.shape)
+X_train = np.where(np.isnan(X_train), 0, X_train)
+X_test = np.where(np.isnan(X_test), 0, X_test)
+print('num no icing test: ', np.sum(y_test == 0))
+print('num icing test: ', np.sum(y_test == 1))
+
+
+LR = LogisticRegression(C=0.01, solver='liblinear').fit(X_train,y_train)
+yhat = LR.predict(X_test)
+yhat_prob = LR.predict_proba(X_test)
+print(confusion_matrix(y_test, yhat, labels=[1,0]))
\ No newline at end of file