Skip to content
Snippets Groups Projects
Commit d22cc721 authored by Alex Diebold's avatar Alex Diebold
Browse files

Minor improvements made to some tests. Completely re-did hbb_std_dev,...

Minor improvements made to some tests. Completely re-did hbb_std_dev, hbb_lw_nen, hbb_sw_nen checks. Created cross_correlation check
parent 6f6784ef
No related branches found
No related tags found
No related merge requests found
......@@ -100,7 +100,6 @@ class Checklist(object):
frame = check(checklist, frame, parameters)
another_check_completed = True
uncompleted_checks.remove(check)
assert uncompleted_checks == []
return frame
......
......@@ -110,7 +110,7 @@ def detector_temp_check(frame, parameters):
flags,pds = flag_detector_temp_oscillations(detector_temp,win_type=WIN_TYPE)
flags = flatten_out(flags.copy(), pds)
return flags
return flags.replace(True, 0.4)
'''
@checklist.add_check(depends=['detectorTemp', 'datetime'], affects_calibration=False, description='test for strong periodicity in temperature on the order of 2-10 mins - intermediary value')
......
......@@ -32,6 +32,7 @@ def find_bb_outliers(frame, parameters, bb):
if bb == 'HBB':
delta_thresh = .001
# second_thresh = .001 --- change delta_thresh to .01
elif bb == 'ABB':
delta_thresh = .01
elif bb == 'BST':
......@@ -49,7 +50,11 @@ def find_bb_outliers(frame, parameters, bb):
bbb_outliers = np.array(list(_scan_for_outliers(frame, '{}bottomTemp'.format(bb), delta_thresh)))
bba_outliers = np.array(list(_scan_for_outliers(frame, '{}apexTemp'.format(bb), delta_thresh)))
bbt_outliers = np.array(list(_scan_for_outliers(frame, '{}topTemp'.format(bb), delta_thresh)))
# if bb == 'HBB':
# bbb_outliers = bbb_outliers * 0.5 + np.array(list(_scan_for_outliers(frame, '{}bottomTemp'.format(bb), second_thresh))) * 0.5
# bba_outliers = bba_outliers * 0.5 + np.array(list(_scan_for_outliers(frame, '{}apexTemp'.format(bb), second_thresh))) * 0.5
# bbt_outliers = bbt_outliers * 0.5 + np.array(list(_scan_for_outliers(frame, '{}topTemp'.format(bb), second_thresh))) * 0.5
# return ( np.array([max((bbb_outliers[x], bba_outliers[x], bbt_outliers[x])) for x in range(len(bbb_outliers))]))
return ( bbb_outliers | bba_outliers | bbt_outliers)
def _scan_for_outliers(frame, variable, delta_thresh):
......
......@@ -6,6 +6,7 @@ import re
from collections import defaultdict, OrderedDict
import netCDF4
from aeri_tools.io.dmv.housekeeping import get_all_housekeeping
from aeri_tools.io.dmv.radiance import get_radiance_from_rnc
import pandas as pd
import numpy as np
from zipfile import ZipFile
......@@ -233,7 +234,6 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None, hint=None):
print(ftp_dir)
# Find all CXS files
cxs_files = glob(os.path.join(os.path.abspath(ftp_dir),'AE*','*B1.CXS'))
print(cxs_files)
# For each CXS file find a matching SUM file and possible QC filename
for qc_file, cxs_file, sum_file in files_to_update(cxs_files, hint=hint):
print('Performing quality control for {}'.format(cxs_file))
......@@ -242,6 +242,13 @@ def update_all(ftp_dir, sci_dir, checklist, parameters=None, hint=None):
if parameters is None:
parameters = {}
radiance_data = get_radiance_from_rnc(cxs_file)
radiance_data = radiance_data.xs(str.encode('H'), level='scene').iloc[:,
((radiance_data.xs(str.encode('H'), level='scene').columns > 560) &
(radiance_data.xs(str.encode('H'), level='scene').columns < 600))].mean(axis=1)
parameters['radiance'] = radiance_data
parameters['SCEtemp'] = get_all_housekeeping(cxs_file).SCEtemp.loc[radiance_data.index]
frame = prepare_frame(cxs_file, sum_file, sci_dir, parameters)
# Perform qc on housekeeping frame
......
......@@ -23,21 +23,20 @@ def imaginary_radiance_check(frame, parameters):
@checklist.add_check(depends=['HBBviewStdDevRadiance985_990'], affects_calibration=True, description='HBB radiance has too much variance', hides=hide_data_if_invalid)
def hbb_std_dev_check(frame, parameters):
hbb_std_dist = scipy.stats.chi2.fit(frame.HBBviewStdDevRadiance985_990)
_, hbb_std_dist_hi = scipy.stats.chi2.interval(.995, *hbb_std_dist)
return frame.HBBviewStdDevRadiance985_990 > hbb_std_dist_hi
mean = np.mean(frame.HBBviewStdDevRadiance985_990)
std_dev = np.std(frame.HBBviewStdDevRadiance985_990)
return frame.HBBviewStdDevRadiance985_990 > mean + 3 * std_dev
@checklist.add_check(depends=['LW_HBB_NEN'], affects_calibration=True, description='Longwave HBB radiance NEN too high', hides=hide_data_if_invalid)
def hbb_lw_nen_check(frame, parameters):
lw_hbb_nen_dist = scipy.stats.chi2.fit(frame.LW_HBB_NEN)
_, lw_hbb_nen_hi = scipy.stats.chi2.interval(.995, *lw_hbb_nen_dist)
return frame.LW_HBB_NEN > lw_hbb_nen_hi
return check_bounds(frame, 'LW_HBB_NEN', 0.0, 0.3, 0.0, 0.5)
@checklist.add_check(depends=['SW_HBB_NEN'], affects_calibration=True, description='Shortwave HBB radiance NEN too high', hides=hide_data_if_invalid)
def hbb_sw_nen_check(frame, parameters):
sw_hbb_nen_dist = scipy.stats.chi2.fit(frame.SW_HBB_NEN)
_, sw_hbb_nen_hi = scipy.stats.chi2.interval(.995, *sw_hbb_nen_dist)
return frame.SW_HBB_NEN > sw_hbb_nen_hi
return check_bounds(frame, 'SW_HBB_NEN', 0.0, 0.015, 0.0, 0.1)
def check_bounds(frame, var, green_low, green_high, yellow_low, yellow_high):
return ((((frame[var] < green_low) | (frame[var] > green_high)) * 0.1) + (((frame[var] < yellow_low) | (frame[var] > yellow_high)) * 0.9))
@checklist.add_check(depends=['LWresponsivity'], affects_calibration=False, description='Longwave responsivity outlier', hides=hide_data_if_invalid)
def lw_responsivity_check(frame, parameters):
......
......@@ -2,7 +2,7 @@ import pandas as pd
import numpy as np
from aeri_qc.all_checks import checklist
@checklist.add_check(depends=['detectorTemp'], affects_calibration=True, hides=['missing_data_flag_check'], description='detector temperature too high')
@checklist.add_check(depends=['detectorTemp', 'Time'], affects_calibration=True, hides=['missing_data_flag_check'], description='detector temperature too high')
def detector_check(frame, parameters):
"""
Check that the detector temp is in range
......@@ -11,8 +11,20 @@ def detector_check(frame, parameters):
1 x > 90
}
"""
temp = ~(frame.detectorTemp.diff() / frame.Time.diff() > 20) * 1
for x,v in enumerate(temp):
if x == 0:
if temp[x+1] == 1 and v == 0:
temp[x] = 1
elif x == len(temp) - 1:
if temp[x-1] == 1 and v == 0:
temp[x] = 1
else:
if temp[x-1] == 1 and temp[x+1] == 1 and v == 0:
temp[x] = 1
softmax = np.exp((frame['detectorTemp'] - 90)/5)
return (((frame['detectorTemp'] > 80) & (frame['detectorTemp'] <= 90)) * softmax + (frame['detectorTemp'] > 90)*1).fillna(0.0)
return ((((frame['detectorTemp'] > 80) & (frame['detectorTemp'] <= 90)) * softmax + (frame['detectorTemp'] > 90)*1).fillna(0.0)) * temp
@checklist.add_check(depends=['HBBapexTemp','HBBbottomTemp','HBBtopTemp'], affects_calibration=True, description='HBB thermistors outside range')
def hbb_thermistor_check(frame, parameters):
......@@ -20,7 +32,7 @@ def hbb_thermistor_check(frame, parameters):
@checklist.add_check(depends=['ABBapexTemp','ABBbottomTemp','ABBtopTemp'], affects_calibration=True, description='ABB thermistors outside range')
def abb_thermistor_check(frame, parameters):
return thermistor_check(frame, 'ABB', 150, 335)
return thermistor_check(frame, 'ABB', 200, 318)
@checklist.add_check(depends=['datetime', 'HBBbottomTemp','HBBtopTemp','HBBapexTemp'], affects_calibration=True, description='HBB temperature is changing too quickly')
def hbb_stable_check(frame, parameters):
......@@ -33,7 +45,7 @@ def hbb_stable_check(frame, parameters):
hbba_diff_problem = abs(hbba_diff.fillna(0)) > hsr
hbbt_diff_problem = abs(hbbt_diff.fillna(0)) > hsr
return (hbbb_diff_problem | hbbt_diff_problem | hbbt_diff_problem)
return (hbbb_diff_problem | hbba_diff_problem | hbbt_diff_problem)
def thermistor_check(frame, bb, low, high):
......
import sklearn.covariance
from sklearn.preprocessing import MinMaxScaler
import scipy.stats
import scipy.signal
import numpy as np
from aeri_qc.all_checks import checklist
import statsmodels.api as sm
import pandas as pd
@checklist.add_check(depends=['HBBbottomTemp','HBBtopTemp','HBBapexTemp'], affects_calibration=True, description='HBB thermistors disagree')
def hbb_covariance_check(frame, parameters):
......@@ -19,28 +22,108 @@ def hbb_covariance_check(frame, parameters):
distances = envelope.mahalanobis(frame[['HBBbottomTemp','HBBtopTemp','HBBapexTemp']]) ** .5
return (distances > 10) * 1
@checklist.add_check(depends=['SCEtemp', 'Time', 'dateYYMMDD', 'airNearBBsTemp', 'BBsupportStructureTemp'], affects_calibration=True, description='absolute deviation of temperature too high')
@checklist.add_check(depends=['SCEtemp', 'timeHHMMSS'], affects_calibration=True, description='absolute deviation of temperature too high')
def sce_temp_deviation_check(frame, parameters):
temps = frame['SCEtemp'].copy()
output = []
temps_to_avg = []
index = 0
upper_bound = len(frame['Time']) - 1
lower_bound = 0
while(np.isnan(frame['Time'][upper_bound])):
upper_bound -= 1
while(np.isnan(frame['Time'][lower_bound])):
while(np.isnan(frame['timeHHMMSS'][lower_bound])):
lower_bound += 1
time_difference = frame['Time'][upper_bound] - frame['Time'][lower_bound]
if frame['dateYYMMDD'][upper_bound] != frame['dateYYMMDD'][lower_bound]:
time_difference += 24
rolling_number = int(len(frame['Time'])/time_difference)
for x, temp in temps.iteritems():
for x,v in enumerate(frame.timeHHMMSS):
if v - 200 > frame.timeHHMMSS[lower_bound]:
rolling_number = x - lower_bound + 1
break
for x, temp in frame.SCEtemp.iteritems():
if len(temps_to_avg) < rolling_number:
temps_to_avg.append(temp)
else:
temps_to_avg[index] = temp
index = (index + 1) % rolling_number
absdev = sm.robust.scale.mad(temps_to_avg)
softmax = np.exp((absdev - 4) / 5)
temps.loc[x] = ((((absdev > 1.5) & (absdev <= 4)) * softmax + (absdev > 4) * 1))
return temps.fillna(0.0)
\ No newline at end of file
softmax = (0.4 * (((absdev >= 0.5) & (absdev < 1.5)) * (absdev - 0.5) +
(absdev >= 1.5)))
output.append(softmax)
return pd.Series(data=output, index=frame.index).fillna(0.0)
@checklist.add_check(depends=['datetime'], affects_calibration=True, description='test for correlation between SCEtemp and radiance')
def cross_correlation_check(frame, parameters):
#acquire data
radiance = parameters['radiance']
sce_temp = parameters['SCEtemp'].loc[radiance.index]
if len(radiance.values) > 51:
#filter data
radiance = radiance - scipy.signal.savgol_filter(radiance.values, 51, 4)
sce_temp = sce_temp - scipy.signal.savgol_filter(sce_temp.values, 51, 4)
else:
radiance = radiance - scipy.signal.savgol_filter(radiance.values, 3, 2)
sce_temp = sce_temp - scipy.signal.savgol_filter(sce_temp.values, 3, 2)
#remove outliers
sce_temp.drop(radiance.loc[radiance < (np.mean(radiance) - 4*np.std(radiance))].index, inplace=True)
radiance.drop(radiance.loc[radiance < (np.mean(radiance) - 4*np.std(radiance))].index, inplace=True)
ind = radiance.index
#normalize data to range of (0,1)
radiance = radiance.values
radiance = radiance.reshape((len(radiance),1))
sce_temp = sce_temp.values
sce_temp = sce_temp.reshape((len(sce_temp),1))
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(sce_temp)
sce_temp = pd.Series(np.concatenate(scaler.transform(sce_temp), axis=0), index=ind)
scaler = MinMaxScaler(feature_range=(0,1))
scaler = scaler.fit(radiance)
radiance = pd.Series(np.concatenate(scaler.transform(radiance), axis=0), index=ind)
#prepare for rolling correlation
curr_num = 0
curr_roll = 15
corr_array = []
#rolling correlation
for x in range(len(sce_temp.values)):
corr_array.append(np.corrcoef(sce_temp.values[curr_num:curr_num+curr_roll], radiance.values[curr_num:curr_num+curr_roll])[0,1])
if curr_roll < 30 and curr_num == 0:
curr_roll += 1
elif curr_roll + curr_num == len(sce_temp.values):
curr_num += 1
curr_roll -= 1
else:
curr_num += 1
#scale 0.6-0.9 as 0.1-1.0
corr_array = np.array(corr_array)
scale = np.exp((corr_array-0.9)/0.15)
correlation_qc = ((corr_array > 0.6) & (corr_array < 0.9)) * scale + (corr_array > 0.9)*1
#remove spikes
for x,v in enumerate(correlation_qc):
if v > 0:
for a in range(x, x+25):
if correlation_qc[a] == 0 or a == len(correlation_qc) - 1:
for b in range(x, x-25, -1):
if correlation_qc[b] == 0 or x-b == 0:
for c in range(x, a):
correlation_qc[c] = 0
break
break
#put correlation qc into a Series
correlation_qc = pd.Series(correlation_qc, index=radiance.index)
temp = pd.Series(index=frame.datetime.values)
temp.loc[correlation_qc.index] = correlation_qc
correlation_qc = temp.fillna(0.0)
correlation_qc.index = frame.index
for x,v in enumerate(correlation_qc):
if v > 0 and x != 0:
for a in range(1,25):
if correlation_qc[x-a] > 0 or x-a == 0:
for b in range(1,a):
correlation_qc.loc[x-b] = v
break
#finished result
return correlation_qc
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment