Skip to content
Snippets Groups Projects
Commit e2fdb84b authored by Matthew Westphall's avatar Matthew Westphall
Browse files

removed some obsolete files and updated README

parent 03b954d1
Branches dev main
No related tags found
No related merge requests found
Flag correlation between interior temperature and radiance in SGP data
## AERI AC
Project to develop QC flags for various temperature-based parameters
### 1. Air Conditioner - Radiance Correlation
Flag correlation between interior temperature and radiance in SGP data
Largely unfinished; Main file: `flags_from_sum.py`
### 2. Detector Temperature Periodicity
Flag strongly sinusoidal patterns in detectorTemp
Reasonably close to completion; Main file: `detector_temp_flag.py`
### 3. Air Conditioner - HBBTemp Correlation
Flag correlation between interior temperature and HBBTemperature
Not started, no main file.
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
SEG_LEN = 48
def cross_corr(df,exclude=['Time']):
df_out = df[exclude]
for i,col1 in enumerate(df.columns[:-1]):
if not col1 in exclude:
for col2 in df.columns[i+1:]:
if not col2 in exclude:
df_out[' '.join([col1,col2])] = df[col1]*df[col2]
return df_out
def generate_threshold_table():
cols = ["SCEtemp","rad"]
idx = ["BBsupportStructureTemp","airNearBBsTemp",
"atmosphericRelativeHumidity","calibrationHBBtemp","rad"]
table =[[.003, .12, .04, 1.2],
[.003, .12, .04, 1.2],
[.03 , .9, .1, 10],
[2e-5,9e-4, 2e-4,8e-3],
[.03 , .9, None,None]]
columns = pd.MultiIndex.from_product([cols,['min','max',]])
thresh_table = pd.DataFrame(table,columns=columns,index=idx)
return thresh_table
def percent_from_flags(flag_df):
'''
Output a single 0-1 floating point flag from a set of boolean flags
'''
flags = pd.Series(np.zeros(flag_df.shape[0]))
for i,_ in enumerate(flags):
#SCE -> rad is the big one:
weight = 1./9
if flag_df["detrend SCEtemp detrend rad"].loc[i]:
flags[i] += .6
weight = .4/9
elif flag_df["detrend atmosphericRelativeHumidity detrend rad"].loc[i]:
flags[i] += .5
weight = .5/9
flags[i] += weight * flag_df.iloc[i].sum()
return flags
def cross_corr_bool_flags(df, kind = "both"):
thresh_table = generate_threshold_table()
flag_df = pd.DataFrame(np.zeros(df.shape),columns=df.columns,index=df.index)
for col in df.columns[1:]:
thresh = np.inf
if 'SCEtemp' in col and kind in ("SCEtemp","both"):
thresh = thresh_table['SCEtemp']['min'].loc[col.split()[-1]]
elif 'rad' in col and kind in ("rad","both"):
thresh = thresh_table['rad']['min'].loc[col.split()[1]]
thresh = np.max((thresh,df[col].std()))
flag_df[col]=df[col] > thresh
#flags /=(thresh_table['rad']['weight'].sum()+thresh_table['SCEtemp']['weight'].sum())
return flag_df
def smooth_corr_flags(flag_df):
return flag_df#.rolling(20).max()
def flag_corrs(df):
c_corrs = cross_corr(df)
c_corr_bools = cross_corr_bool_flags(c_corrs)
c_corr_flags = percent_from_flags(c_corr_bools)
smooth_c_corr_flags = smooth_corr_flags(c_corr_flags)
return smooth_c_corr_flags
def detrend_rolling_mean(series, window = SEG_LEN):
return series - series.rolling(window,center=True).mean()
import pandas as pd
import numpy as np
from scipy import signal
from scipy.fftpack import fft
TEMP_CORR_KEYS = ["SCEtemp","BBsupportStructureTemp","airNearBBsTemp",
"atmosphericRelativeHumidity"]
def calc_autocorr(series):
"""from the source of from pandas.tools.plotting.autocorrelation_plot
"""
n = len(series)
data = np.asarray(series)
mean = np.mean(data)
c0 = np.sum((data - mean) ** 2) / float(n)
def autocorr_f(index):
return ((data[:n - index] - mean) *
(data[index:] - mean)).sum() / float(n) / c0
x = np.arange(n) + 1
autocorr = np.array(list(map(autocorr_f, x)))
return autocorr
def cross_corr_all(x,y):
"""lambda from http://stackoverflow.com/a/37215839"""
return np.asarray(list(map(lambda i: x.corr(y.shift(i)),range(x.size))))
def fft_x(x):
return np.append(((fft(x)/x.size))[:x.size//2],np.zeros(x.size - x.size//2))
def flag_fr_ffts(fftx):
#find the 3 dominant frequencies in each fft
def dom_freqs(fft):
fft = pd.Series(fft.copy())
return np.array([(fft.argmax(),fft.pop(fft.argmax()))
for i in range(3)]).T
x_frq,x_amp = dom_freqs(fftx)
#normalize
x_amp /= np.max(x_amp)
return x_frq,x_amp
def present_fft_flags(fftx,nitems):
x_frq, x_amp, = flag_fr_ffts(fftx)
sort_i = np.argsort(x_frq)
x_frq = x_frq[sort_i]
x_amp = x_amp[sort_i]
x_sum = np.sum(x_amp)
bounds = 0,0
xout = np.empty(nitems)
for xa,xf in zip(x_amp,x_frq):
bounds = [bounds[-1],bounds[-1] + xa/x_sum * nitems]
xout[bounds[0]:bounds[1]] = xf
return pd.Series(xout)
def flag_ac(df):
df_out = pd.DataFrame({'Time':df['Time']})
new_series = lambda: pd.Series(np.empty(df_out['Time'].size))
partitions = np.array(np.linspace(0,df['Time'].size,48),dtype=np.int)
for key in TEMP_CORR_KEYS:
df_out[key] = signal.savgol_filter(df[key],41,4)
df_out['sig %s'%key] = new_series()
for p0,pf in zip(partitions[::2],partitions[1::2]):
#replace each hour of df_out data with a line of best fit through
#that hour
df_out['sig %s'%key][p0:pf] = np.poly1d(np.polyfit(df['Time'][p0:pf],
df_out[key][p0:pf],1))(df['Time'][p0:pf])
#df_out['sig %s'%key][pf-2] = np.nan
for p0,pf in zip(partitions[1::2],partitions[2::2]):
p_ends = [p0-1,pf+1]
df_out['sig %s'%key][p0:pf] = np.poly1d(np.polyfit(df['Time'][p_ends],
df_out['sig %s'%key][p_ends],1))(df['Time'][p0:pf])
#df_out['sig %s'%key][pf] = np.nan
df_out['detrend %s'%key] = df[key] - df_out['sig %s'%key]
'''
df_out['corr %s'%key] = new_series()
for p0,pf in zip(partitions,partitions[1:]):
x = df_out['detrend %s'%key][p0:pf]
real_fft = np.abs(fft_x(x).real)
df_out['corr %s'%key][p0:pf] = present_fft_flags(real_fft,x.size)
'''
return df_out,partitions
import pandas as pd
import numpy as np
from scipy import signal
TEMP_CORR_KEYS = ["SCEtemp","BBsupportStructureTemp","airNearBBsTemp",
"atmosphericRelativeHumidity","calibrationHBBtemp"]
SEG_LEN = 48
def detrend_linreg(series, nseg = SEG_LEN):
segments = series.copy()
partitions = np.array(np.linspace(0,series.size,nseg),dtype=np.int)
for p0,pf in zip(partitions[::2],partitions[1::2]):
#replace each segment of the series with a line of best fit through
#that segment
segments[p0:pf] = np.poly1d(np.polyfit(np.arange(p0,pf),
series[p0:pf],1))(np.arange(p0,pf))
for p0,pf in zip(partitions[1::2],partitions[2::2]):
segments[p0:pf] = np.poly1d(np.polyfit([p0-1,pf+1],
segments[[p0-1,pf+1]],1))(np.arange(p0,pf))
return series - segments
def detrend_rolling_mean(series, window = SEG_LEN):
return series - series.rolling(window,center=True).mean()
detrend = detrend_rolling_mean
def powspec(series):
return(signal.periodogram(series,series.size))
def string_powspecs_together(series, nseg = SEG_LEN):
partitions = np.array(np.linspace(0,series.size,nseg),dtype=np.int)
powspecs = series.copy()
for p0,pf in zip(partitions,partitions[1:]):
_,powspec_slice = powspec(series[p0:pf])
powspec_slice/=powspec_slice.max()
powspecs[p0: p0+powspec_slice.size] = powspec_slice#/powspec_slice.max()
powspecs[p0+powspec_slice.size:pf] = np.nan
return powspecs
def flag_ac(df):
df_out = pd.DataFrame({'Time':df['Time']})
#new_series = lambda: pd.Series(np.empty(df_out['Time'].size))
for key in TEMP_CORR_KEYS:
df_out['detrend %s'%key] = detrend(df[key])
return df_out
def main():
pass
main() if __name__ == '__main__' else None
\ No newline at end of file
for i in range(10000000):
pass
150601
150105
150801
import glob
import os
import subprocess
from random import shuffle
glob_files = glob.glob("dreadnaught/*ENG.cdf")
shuffle(glob_files)
for g in glob_files:
print(g)
g_date = os.path.basename(os.path.splitext(g)[0]).replace('ENG','')
print(g_date)
subprocess.call(['python','test_ac_flag.py','-d',g_date])
#!/usr/bin/env python
USAGE="""
usage: python flag_ac.py start_time end_time site
OR
python flag_ac.py -f file_name
"""
#from matplotlib import pyplot as plt
from sys import argv
from dummy_housekeeping_dataframe import housekeeping_dataset_from_cdf_or_API
from flag_ac import flag_ac
from pandas.tools.plotting import autocorrelation_plot
from matplotlib import pyplot as plt
from scipy import signal
import netCDF4
import numpy as np
def autocorr(df,keys,ax=None):
for key in keys:
print(df[key].autocorr(2))
ax = autocorrelation_plot(df[key],ax=ax)
return ax
def show(df,keys,ax=None,kind='line',sy=False):
ax = df.plot(x='Time',y=keys,ax=ax,kind=kind,secondary_y =sy, lw=2)
ax.grid(True)
return ax
def axvlines(time,where,color='k'):
for t in time[where]:
plt.axvline(t,color=color)
def get_aeri_rad():
return netCDF4.Dataset(
argv[3] if argv[1] == '-f' else 'dreadnaught/%sC1.cdf'%argv[2]
).variables['mean_rad'][:,41:167].mean(axis=1)
get_aeri_wnum_range = lambda: netCDF4.Dataset(
argv[3] if argv[1] == '-f' else 'dreadnaught/%sC1.cdf'%argv[2]
).variables['wnum'if argv[1] == '-f'else'wnum1'][[41,167]]
def main():
if len(argv) < 2:
print(USAGE)
exit(1)
keep_going = True
while (keep_going):
OTHER_KEY='airNearBBsTemp'
df = housekeeping_dataset_from_cdf_or_API(*argv)
df_out,partitions = flag_ac(df)
aeri_rad = get_aeri_rad()
wnum_range = tuple(get_aeri_wnum_range())
#guess at appropriate ylims
detrend_ylim = (np.asarray(df_out[['detrend SCEtemp',
'detrend %s'%OTHER_KEY]]).min(),
np.asarray(df_out[['detrend SCEtemp',
'detrend %s'%OTHER_KEY]]).max())
#show(df,["SCEtemp","BBsupportStructureTemp"],ax=ax[0])
#show(df_out,["SCEtemp","BBsupportStructureTemp"],ax=ax[0])
hours = 0,12,24
for t0,tf in zip(hours,hours[1:]):
fig,ax=plt.subplots(3,figsize=(15,10),sharex=True)
t_i = (df_out['Time'] >= t0) & (df_out['Time'] <= tf)
df_out_t = df_out[t_i]
df_t = df[t_i]
rad_t = aeri_rad[np.asarray(t_i)]
rad_ymin = np.mean(rad_t) - 2.5 * np.std(rad_t)
rad_ymax = np.mean(rad_t) + 2.5 * np.std(rad_t)
show(df_t,["SCEtemp",OTHER_KEY],ax=ax[0],sy=["SCEtemp"])
show(df_out_t,["detrend SCEtemp","detrend %s"%OTHER_KEY],ax=ax[2])
#show(df_out,["corr SCEtemp","corr BBsupportStructureTemp"],ax=ax[1])
plt.sca(ax[2])
plt.ylim(detrend_ylim)
plt.ylabel("dT (K)")
#axvlines(df['Time'],partitions)
plt.sca(ax[1])
plt.ylabel("Radiance (RU)")
plt.plot(df_t['Time'],rad_t,'r',
label='AERI Mean Radiance, %d-%d cm-1'%wnum_range)
plt.ylim(rad_ymin,rad_ymax)
#plt.plot(df['Time'],signal.savgol_filter(get_aeri_rad(),41,4),'b',label='smoothed radiance')
plt.legend(loc='best')
plt.grid()
#axvlines(df['Time'],partitions)
plt.sca(ax[0])
plt.ylabel("Temperature (K)")
#axvlines(df['Time'],partitions)
plt.suptitle('%s: %s-%s hours'%(argv[2],t0,tf),fontsize=14)
#plt.show()
plt.savefig('%s_%d_%d.png'%(argv[2],t0,tf),bbox_inches='tight')
plt.close()
#more fancy command line mangling.
#if a list of files is given, use them all at once, otherwise keep
#popping dates from the stack until it's empty
if argv[1] == '-f':
keep_going = False
if argv[1] == '-d':
argv.pop(2)
if len(argv) < 3:
keep_going = False
if __name__ == '__main__':
main()
\ No newline at end of file
from sys import argv
from dummy_housekeeping_dataframe import housekeeping_dataset_from_cdf_or_API
from pow_dense_testing.signal_crawler import SignalCrawler
import numpy as np
import netCDF4
from matplotlib import pyplot as plt
import flag_ac2 as flag_ac
import pandas as pd
import cross_corr
def get_aeri_rad():
return netCDF4.Dataset(
argv[3] if argv[1] == '-f' else 'dreadnaught/%sC1.cdf'%argv[2]
).variables['mean_rad'][:,41:167].mean(axis=1)
get_aeri_wnum_range = lambda: netCDF4.Dataset(
argv[3] if argv[1] == '-f' else 'dreadnaught/%sC1.cdf'%argv[2]
).variables['wnum'if argv[1] == '-f'else'wnum1'][[41,167]]
def main():
keep_going = True
while (keep_going):
try:
aeri_rad = get_aeri_rad()
#wnum_range = get_aeri_wnum_range()
df = housekeeping_dataset_from_cdf_or_API(*argv)
df_out = flag_ac.flag_ac(df)
rad_dt = flag_ac.detrend(pd.Series(aeri_rad))
df_out['detrend rad'] = rad_dt
corr = cross_corr.flag_corrs(df_out)
except Exception as e:
raise e
else:
if argv[-1] == 'flag':
OTHER_KEY='atmosphericRelativeHumidity'
fig,ax=plt.subplots(3,figsize=(15,10),sharex=True)
plt.sca(ax[0])
plt.title("Engineering Data")
df.plot('Time',['SCEtemp',OTHER_KEY],secondary_y ='SCEtemp',ax=ax[0])
plt.grid()
plt.sca(ax[1])
plt.title("540-600cm-1 radiance")
plt.ylabel("Radiance (RU)")
plt.plot(np.asarray(df['Time']),aeri_rad,'r',label="Radiance")
plt.legend(loc='best')
plt.grid()
plt.sca(ax[2])
plt.grid()
plt.plot(df['Time'],corr)
#cross_corr.plot_cross_corr(corr)
plt.ylim(0,1)
plt.title("Data Correlation Flags")
plt.ylabel("Probability of Correlation")
plt.xlabel("Time (Hours UTC)")
plt.savefig("%s_flags.png"%argv[2],bbox_inches='tight')
plt.close()
if argv[-1] == 'debug':
for col in corr.columns[1:]:
if 'SCEtemp' in col or 'rad' in col:
if not ('SCEtemp' in col and 'rad' in col):
OTHER_KEY = col.split()[-1 if 'SCEtemp' in col else 1]
fig,ax=plt.subplots(3,figsize=(15,10),sharex=True)
plt.sca(ax[0])
plt.title("Engineering Data")
df.plot('Time',['SCEtemp',OTHER_KEY],secondary_y='SCEtemp',ax=ax[0])
plt.grid()
plt.sca(ax[1])
plt.title("540-600cm-1 radiance")
plt.ylabel("Radiance (RU)")
plt.plot(np.asarray(df['Time']),aeri_rad,'r',label="Radiance")
plt.legend(loc='best')
plt.grid()
plt.sca(ax[2])
#rolling correlation
corr.plot('Time',[col],ax=ax[2])
plt.xlabel('Time (Hours UTC)')
plt.title('Product of Detrended Variables')
thresh_table=cross_corr.generate_threshold_table()
if 'SCEtemp' in col:
thresh=thresh_table['SCEtemp']['min'].loc[col.split()[-1]]
elif 'rad' in col:
thresh=thresh_table['rad']['min'].loc[col.split()[1]]
plt.grid(True)
plt.axhline(-thresh,ls='--',color='k')
plt.axhline(thresh,ls='--',color='k')
ylim = np.max((10*corr[col].abs().median(), 5 * thresh))
plt.ylim(-ylim,ylim)
plt.suptitle(argv[2],fontsize=14)
plt.savefig("%s_%s.png"%(col.replace(' ','_'),argv[2]),
bbox_inches='tight')
plt.close()
#'''
if argv[1] == '-f':
keep_going = False
if argv[1] == '-d':
argv.pop(2)
if len(argv) < 3:
keep_going = False
if __name__ == '__main__':
main()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment