Skip to content
Snippets Groups Projects
Commit 8e33e7dc authored by Alex Diebold's avatar Alex Diebold
Browse files

Testing files for creating the cross_correlation check. correlate.py has finished product

parent d22cc721
No related branches found
No related tags found
No related merge requests found
from sys import argv
import os
import time
import pandas as pd
import numpy as np
import scipy.signal
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from aeri_tools.io.dmv import housekeeping
from aeri_tools.io.dmv import radiance
def graph_correlation(test_name, path, save_option=False):
#read data
rad = radiance.get_radiance_from_rnc(path)
hk = housekeeping.get_all_housekeeping(path)
#get only relavent data points
rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
hk_test = hk['SCEtemp'].loc[rad_test.index]
#filter data
if len(rad_test.values) > 51:
#filter data
rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
else:
rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 3, 2)
hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 3, 2)
#remove outliers
hk_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
rad_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
ind = rad_test.index
#copy original values
orig_rad = rad_test.copy()
#normalize data to range of (0,1)
rad_test = rad_test.values
rad_test = rad_test.reshape((len(rad_test),1))
hk_test = hk_test.values
hk_test = hk_test.reshape((len(hk_test),1))
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(hk_test)
hk_test = pd.Series(np.concatenate(scaler.transform(hk_test), axis=0), index=ind)
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(rad_test)
rad_test = pd.Series(np.concatenate(scaler.transform(rad_test), axis=0), index=ind)
#print radiance
ax = plt.subplot(4,1,1)
ax.set_title('radiance filtered')
plt.plot(rad_test)
#print SCEtemp
ax = plt.subplot(4,1,2)
ax.set_title('SCEtemp filtered')
plt.plot(hk_test)
#print correlation
ax = plt.subplot(4,1,3)
ax.set_title('correlation')
ax.set_ylim(-1, 1)
curr_num = 0
curr_roll = 15
corr_array = []
#rolling correlation
for x in range(len(hk_test.values)):
#use Pearson's correlation coefficient
corr_array.append(np.corrcoef(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll])[0,1])
if curr_roll < 30 and curr_num == 0:
curr_roll += 1
elif curr_roll + curr_num == len(hk_test.values):
curr_num += 1
curr_roll -= 1
else:
curr_num += 1
#plot QC levels
plt.plot(hk_test.index, np.array(corr_array))
plt.plot(hk_test.index, [0.5]*len(hk_test.index))
plt.plot(hk_test.index, [0.6]*len(hk_test.index))
plt.plot(hk_test.index, [0.7]*len(hk_test.index))
plt.plot(hk_test.index, [0.8]*len(hk_test.index))
plt.plot(hk_test.index, [0.9]*len(hk_test.index))
#plot QC percent
ax = plt.subplot(4,1,4)
ax.set_title('QC percent')
ax.set_ylim(0,1)
#calculate QC
corr_array = np.array(corr_array)
scale = np.log(corr_array*2.1 + corr_array**2)
correlation_qc = ((corr_array > 0.5) & (corr_array < 0.9)) * scale + (corr_array > 0.9)*1
#remove spikes
for x,v in enumerate(correlation_qc):
if v > 0:
for a in range(x, x+25):
if correlation_qc[a] == 0 or a == len(correlation_qc) - 1:
for b in range(x, x-25, -1):
if correlation_qc[b] == 0 or x-b == 0:
for c in range(x, a):
correlation_qc[c] = 0
break
break
#plot QC
plt.fill_between(hk_test.index, correlation_qc, color='black')
#calculate correlation coefficient
# corr = np.corrcoef(hk_test.values, rad_test.values)[0,1]
#calculate mean amplitude
temp = pd.DataFrame()
temp['rad_test'] = orig_rad
temp['corr_array'] = corr_array
if all(corr_array < 0.5):
amp = 0
else:
amp = np.mean(abs(temp['rad_test'].loc[temp['corr_array'].dropna() > 0.5].dropna()))
t = path + ' ||| ' + str(amp)
plt.suptitle(t, fontsize=20)
#save graph
if save_option:
print(t)
plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/correlation/' + path[:3] + '_' + path[6:12] + '_awr.png')
else:
savename = path.split('/')[0] + '_' + path[-12:-6] + '_' + test_name + '.png'
plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/diagnostics/' + savename)
plt.clf()
def graph_everything(tf):
for f in tf:
graph_correlation(None, f, True)
def cross_correlation(pathname):
#read data
rad = radiance.get_radiance_from_rnc(pathname)
hk = housekeeping.get_all_housekeeping(pathname)
#get only relavent data points
hk = hk.loc[~hk.index.duplicated()]
rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
data = pd.concat((hk, rad_test.rename('radiance')), axis=1)
hk_test = hk['SCEtemp'].loc[data.radiance.dropna().index]
#filter data
rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
#remove outliers
hk_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
rad_test.drop(rad_test.loc[rad_test < (np.mean(rad_test) - 4*np.std(rad_test))].index, inplace=True)
ind = rad_test.index
#normalize data to range of (0,1)
rad_test = rad_test.values
rad_test = rad_test.reshape((len(rad_test),1))
hk_test = hk_test.values
hk_test = hk_test.reshape((len(hk_test),1))
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(hk_test)
hk_test = pd.Series(np.concatenate(scaler.transform(hk_test), axis=0), index=ind)
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(rad_test)
rad_test = pd.Series(np.concatenate(scaler.transform(rad_test), axis=0), index=ind)
#prepare for rolling correlation
curr_num = 0
curr_roll = 15
corr_array = []
#rolling correlation
for x in range(len(hk_test.values)):
corr_array.append(np.corrcoef(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll])[0,1])
if curr_roll < 30 and curr_num == 0:
curr_roll += 1
elif curr_roll + curr_num == len(hk_test.values):
curr_num += 1
curr_roll -= 1
else:
curr_num += 1
#scale 0.6-0.9 as 0.1-1.0
corr_array = np.array(corr_array)
scale = np.exp((corr_array-0.9)/0.15)
correlation_qc = ((corr_array > 0.6) & (corr_array < 0.9)) * scale + (corr_array > 0.9)*1
#remove spikes
for x,v in enumerate(correlation_qc):
if v > 0:
for a,b in enumerate(correlation_qc[x:]):
if (b == 0 or x+a == len(correlation_qc)-1) and a < 25:
for c in range(x, x+a+1):
correlation_qc[c] = 0
break
#put qc results into a Series
correlation_qc = pd.Series(correlation_qc, index=rad_test.index)
test = pd.Series(index=hk.index)
test.loc[correlation_qc.index] = correlation_qc
test.fillna(0.0, inplace=True)
print(test)
print('SCEtemp len = ', str(len(hk['SCEtemp'])))
print('rad_test len = ', str(len(rad_test)))
print('hk_test len = ', str(len(hk_test)))
print('correlation_qc len = ', str(len(correlation_qc)))
'''
#finished result
return correlation_qc
'''
......@@ -14,12 +14,15 @@ from aeri_tools.io.dmv import radiance
def main(path):
#get data from B1.CXS file
dmv = radiance.get_radiance_from_rnc(path)
rad = radiance.get_radiance_from_rnc(path)
hk = housekeeping.get_all_housekeeping(path)
#only use dmv data from scene H
data = pd.DataFrame(index=hk.index, columns=('dmv', 'hk'))
data['dmv'] = dmv.xs(str.encode('H'), level='scene').mean(axis=1)
#only use rad data from scene H
data = pd.DataFrame(index=hk.index, columns=('rad', 'hk'))
data['rad'] = rad.xs(str.encode('H'), level='scene').mean(axis=1)
# rad.xs(str.encode('H'), level='scene').iloc[:,
# ((rad.xs(str.encode('H'), level='scene').columns > 560) &
# (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
data['hk'] = hk['SCEtemp']
#get rid of hk rows where scene isn't H
data.dropna(inplace=True)
......@@ -32,7 +35,7 @@ def main(path):
data[x] = data[x] - tmp
#correlate the data
correlation = np.correlate(data['dmv'].values[:],
correlation = np.correlate(data['rad'].values[:],
data['hk'].values[:], mode='same')
print('max = ', np.amax(correlation), ' : min = ',
......@@ -41,7 +44,7 @@ def main(path):
fig, ax = plt.subplots(1, figsize=(15,10), sharex=True)
'''
for x, val in ('dmv', 'hk'):
for x, val in ('rad', 'hk'):
plt.sca(ax[x])
plt.plot(list(data.index), data[val].values[:])
'''
......@@ -94,3 +97,62 @@ if __name__ == '__main__':
print('execution time: %d minute(s), %.2f second(s)' %
((time.time()-start_time)//60, (time.time()-start_time)%60))
for f in tf:
rad = radiance.get_radiance_from_rnc(f)
hk = housekeeping.get_all_housekeeping(f)
rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
#hk_test = hk['SCEtemp'].loc[rad_test.index[0]:rad_test.index[len(rad_test.index)-1]]
hk_test = hk['SCEtemp'].loc[rad_test.index]
ax = plt.subplot(4,1,1)
ax.set_title('radiance filtered')
rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
plt.plot(rad_test)
ax = plt.subplot(4,1,2)
ax.set_title('SCEtemp raw')
plt.plot(hk_test)
ax = plt.subplot(4,1,3)
ax.set_title('SCEtemp filtered')
hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
plt.plot(hk_test)
ax = plt.subplot(4,1,4)
ax.set_title('correlation')
#print(len(rad_test.index), ' -- ', len(np.correlate(rad_test.values, hk_test.values, 'same')))
plt.plot(rad_test.index, np.correlate(hk_test.values, rad_test.values, 'same'))
corr = np.correlate(hk_test.values, rad_test.values)[0]
t = f + ' -- ' + 'reduced -- ' + str(corr)
plt.suptitle(t, fontsize=20)
print(t)
plt.show()
plt.clf()
for c,f in enumerate(tf):
print(f)
rad = radiance.get_radiance_from_rnc(f)
hk = housekeeping.get_all_housekeeping(f)
rad_test = rad.xs(str.encode('H'), level='scene').iloc[:, ((rad.xs(str.encode('H'), level='scene').columns > 560) & (rad.xs(str.encode('H'), level='scene').columns < 60
0))].mean(axis=1)
hk_test = hk['SCEtemp'].loc[rad_test.index]
rad_test = rad_test - scipy.signal.savgol_filter(rad_test.values, 51, 4)
hk_test = hk_test - scipy.signal.savgol_filter(hk_test.values, 51, 4)
curr_num = 0
curr_roll = max_roll / 2
corr = []
for x in range(len(hk_test.values)):
corr.append(max(np.correlate(hk_test.values[curr_num:curr_num+curr_roll], rad_test.values[curr_num:curr_num+curr_roll], 'full')[curr_roll-1:curr_roll+1]))
if curr_roll < max_roll and curr_num == 0:
curr_roll += 1
elif curr_roll + curr_num == len(hk_test.values):
curr_num += 1
curr_roll -= 1
else:
curr_num += 1
#ax = plt.subplot(len(tf), 1, c+1)
ax = plt.subplot(111)
ax.set_title(f + ' ||| ' + str(np.corrcoef(hk_test.values, rad_test.values)[0,1]))
ax.set_ylim(-0.1, 1.0)
plt.plot(corr)
plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/correlation/' + f[:3] + '_' + f[6:12] + '.png')
plt.clf()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment