Testing files for creating the cross_correlation check. correlate.py has finished product

8e33e7dc · Alex Diebold · d22cc721 · 8e33e7dc · 8e33e7dc
Commit 8e33e7dc authored 7 years ago by Alex Diebold
--- a/testing/correlate.py
+++ b/testing/correlate.py
+from sys import argv
+import os
+import time
+
+import pandas as pd
+import numpy as np
+import scipy.signal
+import matplotlib.pyplot as plt
+from sklearn.preprocessing import MinMaxScaler
+
+from aeri_tools.io.dmv import housekeeping
+from aeri_tools.io.dmv import radiance
+
+def graph_correlation(test_name, path, save_option=False):
+    #read data
+    rad = radiance.get_radiance_from_rnc(path)
+    hk = housekeeping.get_all_housekeeping(path)
+
+    #get only relavent data points
+    rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
+    hk_test = hk['SCEtemp'].loc[rad_test.index]
+
+    #filter data
+    if len(rad_test.values) > 51:
+        #filter data
+        rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
+        hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
+    else:
+        rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 3, 2)
+        hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 3, 2)
+
+    #remove outliers
+    hk_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
+    rad_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
+    ind = rad_test.index
+
+    #copy original values
+    orig_rad = rad_test.copy()
+
+    #normalize data to range of (0,1)
+    rad_test = rad_test.values
+    rad_test = rad_test.reshape((len(rad_test),1))
+    hk_test = hk_test.values
+    hk_test = hk_test.reshape((len(hk_test),1))
+    scaler = MinMaxScaler(feature_range=(0,1))
+    scaler = scaler.fit(hk_test)
+    hk_test = pd.Series(np.concatenate(scaler.transform(hk_test), axis=0), index=ind)
+    scaler = MinMaxScaler(feature_range=(0,1))
+    scaler = scaler.fit(rad_test)
+    rad_test = pd.Series(np.concatenate(scaler.transform(rad_test), axis=0), index=ind)
+
+    #print radiance
+    ax = plt.subplot(4,1,1)
+    ax.set_title('radiance filtered')
+    plt.plot(rad_test)
+
+    #print SCEtemp
+    ax = plt.subplot(4,1,2)
+    ax.set_title('SCEtemp filtered')
+    plt.plot(hk_test)
+
+    #print correlation
+    ax = plt.subplot(4,1,3)
+    ax.set_title('correlation')
+    ax.set_ylim(-1, 1)
+    curr_num = 0
+    curr_roll = 15
+    corr_array = []
+
+    #rolling correlation
+    for x in range(len(hk_test.values)):
+        #use Pearson's correlation coefficient
+        corr_array.append(np.corrcoef(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll])[0,1])
+        if curr_roll < 30 and curr_num == 0:
+            curr_roll += 1
+        elif curr_roll + curr_num == len(hk_test.values):
+            curr_num += 1
+            curr_roll -= 1
+        else:
+            curr_num += 1
+
+    #plot QC levels
+    plt.plot(hk_test.index, np.array(corr_array))
+    plt.plot(hk_test.index, [0.5]*len(hk_test.index))
+    plt.plot(hk_test.index, [0.6]*len(hk_test.index))
+    plt.plot(hk_test.index, [0.7]*len(hk_test.index))
+    plt.plot(hk_test.index, [0.8]*len(hk_test.index))
+    plt.plot(hk_test.index, [0.9]*len(hk_test.index))
+
+    #plot QC percent
+    ax = plt.subplot(4,1,4)
+    ax.set_title('QC percent')
+    ax.set_ylim(0,1)
+
+    #calculate QC
+    corr_array = np.array(corr_array)
+    scale = np.log(corr_array*2.1 + corr_array**2)
+    correlation_qc = ((corr_array > 0.5) & (corr_array < 0.9)) * scale + (corr_array > 0.9)*1
+
+    #remove spikes
+    for x,v in enumerate(correlation_qc):
+        if v > 0:
+            for a in range(x, x+25):
+                if correlation_qc[a] == 0 or a == len(correlation_qc) - 1:
+                    for b in range(x, x-25, -1):
+                        if correlation_qc[b] == 0 or x-b == 0:
+                            for c in range(x, a):
+                                correlation_qc[c] = 0
+                            break
+                    break
+
+    #plot QC
+    plt.fill_between(hk_test.index, correlation_qc, color='black')
+
+    #calculate correlation coefficient
+    # corr = np.corrcoef(hk_test.values, rad_test.values)[0,1]
+
+    #calculate mean amplitude
+    temp = pd.DataFrame()
+    temp['rad_test'] = orig_rad
+    temp['corr_array'] = corr_array
+    if all(corr_array < 0.5):
+        amp = 0
+    else:
+        amp = np.mean(abs(temp['rad_test'].loc[temp['corr_array'].dropna() > 0.5].dropna()))
+    t = path + ' ||| ' + str(amp)
+    plt.suptitle(t, fontsize=20)
+
+    #save graph
+    if save_option:
+        print(t)
+        plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/correlation/' + path[:3] + '_' + path[6:12] + '_awr.png')
+    else:
+        savename = path.split('/')[0] + '_' + path[-12:-6] + '_' + test_name + '.png'
+        plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/diagnostics/' + savename)
+
+    plt.clf()
+
+def graph_everything(tf):
+    for f in tf:
+        graph_correlation(None, f, True)
+
+def cross_correlation(pathname):
+    #read data
+    rad = radiance.get_radiance_from_rnc(pathname)
+    hk = housekeeping.get_all_housekeeping(pathname)
+    #get only relavent data points
+    hk = hk.loc[~hk.index.duplicated()]
+    rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
+    data = pd.concat((hk, rad_test.rename('radiance')), axis=1)
+    hk_test = hk['SCEtemp'].loc[data.radiance.dropna().index]
+
+    #filter data
+    rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
+    hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
+
+    #remove outliers
+    hk_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
+    rad_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
+    ind = rad_test.index
+
+    #normalize data to range of (0,1)
+    rad_test = rad_test.values
+    rad_test = rad_test.reshape((len(rad_test),1))
+    hk_test = hk_test.values
+    hk_test = hk_test.reshape((len(hk_test),1))
+    scaler = MinMaxScaler(feature_range=(0,1))
+    scaler = scaler.fit(hk_test)
+    hk_test = pd.Series(np.concatenate(scaler.transform(hk_test), axis=0), index=ind)
+    scaler = MinMaxScaler(feature_range=(0,1))
+    scaler = scaler.fit(rad_test)
+    rad_test = pd.Series(np.concatenate(scaler.transform(rad_test), axis=0), index=ind)
+
+    #prepare for rolling correlation
+    curr_num = 0
+    curr_roll = 15
+    corr_array = []
+
+    #rolling correlation
+    for x in range(len(hk_test.values)):
+        corr_array.append(np.corrcoef(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll])[0,1])
+        if curr_roll < 30 and curr_num == 0:
+            curr_roll += 1
+        elif curr_roll + curr_num == len(hk_test.values):
+            curr_num += 1
+            curr_roll -= 1
+        else:
+            curr_num += 1
+
+    #scale 0.6-0.9 as 0.1-1.0
+    corr_array = np.array(corr_array)
+    scale = np.exp((corr_array-0.9)/0.15)
+    correlation_qc = ((corr_array > 0.6) & (corr_array < 0.9)) * scale + (corr_array > 0.9)*1
+
+    #remove spikes
+    for x,v in enumerate(correlation_qc):
+        if v > 0:
+            for a,b in enumerate(correlation_qc[x:]):
+                if (b == 0 or x+a == len(correlation_qc)-1) and a < 25:
+                    for c in range(x, x+a+1):
+                        correlation_qc[c] = 0
+                    break
+
+    #put qc results into a Series
+    correlation_qc = pd.Series(correlation_qc, index=rad_test.index)
+    test = pd.Series(index=hk.index)
+    test.loc[correlation_qc.index] = correlation_qc
+    test.fillna(0.0, inplace=True)
+    print(test)
+    print('SCEtemp len = ', str(len(hk['SCEtemp'])))
+    print('rad_test len = ', str(len(rad_test)))
+    print('hk_test len = ', str(len(hk_test)))
+    print('correlation_qc len = ', str(len(correlation_qc)))
+    '''
+    #finished result
+    return correlation_qc
+    '''
--- a/testing/correlation_test.py
+++ b/testing/correlation_test.py
@@ -14,12 +14,15 @@ from aeri_tools.io.dmv import radiance
 def main(path):

    #get data from B1.CXS file
-    dmv = radiance.get_radiance_from_rnc(path)
+    rad = radiance.get_radiance_from_rnc(path)
    hk = housekeeping.get_all_housekeeping(path)

-    #only use dmv data from scene H
-    data = pd.DataFrame(index=hk.index, columns=('dmv', 'hk'))
-    data['dmv'] = dmv.xs(str.encode('H'), level='scene').mean(axis=1)
+    #only use rad data from scene H
+    data = pd.DataFrame(index=hk.index, columns=('rad', 'hk'))
+    data['rad'] = rad.xs(str.encode('H'), level='scene').mean(axis=1)
+    # rad.xs(str.encode('H'), level='scene').iloc[:,
+    # ((rad.xs(str.encode('H'), level='scene').columns > 560) &
+    # (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
    data['hk'] = hk['SCEtemp']
    #get rid of hk rows where scene isn't H
    data.dropna(inplace=True)
@@ -32,7 +35,7 @@ def main(path):
            data[x] = data[x] - tmp

        #correlate the data
-        correlation = np.correlate(data['dmv'].values[:],
+        correlation = np.correlate(data['rad'].values[:],
                                    data['hk'].values[:], mode='same')

        print('max = ', np.amax(correlation), ' : min = ',
@@ -41,7 +44,7 @@ def main(path):
        fig, ax = plt.subplots(1, figsize=(15,10), sharex=True)

        '''
-        for x, val in ('dmv', 'hk'):
+        for x, val in ('rad', 'hk'):
            plt.sca(ax[x])
            plt.plot(list(data.index), data[val].values[:])
        '''
@@ -94,3 +97,62 @@ if __name__ == '__main__':

    print('execution time: %d minute(s), %.2f second(s)' %
        ((time.time()-start_time)//60, (time.time()-start_time)%60))
+
+
+
+for f in tf:
+    rad = radiance.get_radiance_from_rnc(f)
+    hk = housekeeping.get_all_housekeeping(f)
+    rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
+    #hk_test = hk['SCEtemp'].loc[rad_test.index[0]:rad_test.index[len(rad_test.index)-1]]
+    hk_test = hk['SCEtemp'].loc[rad_test.index]
+    ax = plt.subplot(4,1,1)
+    ax.set_title('radiance filtered')
+    rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
+    plt.plot(rad_test)
+    ax = plt.subplot(4,1,2)
+    ax.set_title('SCEtemp raw')
+    plt.plot(hk_test)
+    ax = plt.subplot(4,1,3)
+    ax.set_title('SCEtemp filtered')
+    hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
+    plt.plot(hk_test)
+    ax = plt.subplot(4,1,4)
+    ax.set_title('correlation')
+    #print(len(rad_test.index), ' -- ', len(np.correlate(rad_test.values, hk_test.values, 'same')))
+    plt.plot(rad_test.index, np.correlate(hk_test.values, rad_test.values, 'same'))
+    corr = np.correlate(hk_test.values, rad_test.values)[0]
+    t = f + ' -- ' + 'reduced -- ' + str(corr)
+    plt.suptitle(t, fontsize=20)
+    print(t)
+    plt.show()
+    plt.clf()
+
+for c,f in enumerate(tf):
+    print(f)
+    rad = radiance.get_radiance_from_rnc(f)
+    hk = housekeeping.get_all_housekeeping(f)
+    rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 60
+    0))].mean(axis=1)
+    hk_test = hk['SCEtemp'].loc[rad_test.index]
+    rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
+    hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
+    curr_num = 0
+    curr_roll = max_roll / 2
+    corr = []
+    for x in range(len(hk_test.values)):
+        corr.append(max(np.correlate(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll], 'full')[curr_roll-1:curr_roll+1]))
+        if curr_roll < max_roll and curr_num == 0:
+            curr_roll += 1
+        elif curr_roll + curr_num == len(hk_test.values):
+            curr_num += 1
+            curr_roll -= 1
+        else:
+            curr_num += 1
+    #ax = plt.subplot(len(tf), 1, c+1)
+    ax = plt.subplot(111)
+    ax.set_title(f + ' ||| ' + str(np.corrcoef(hk_test.values, rad_test.values)[0,1]))
+    ax.set_ylim(-0.1, 1.0)
+    plt.plot(corr)
+    plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/correlation/' + f[:3] + '_' + f[6:12] + '.png')
+    plt.clf()