diff --git a/testing/correlation_test.py b/testing/correlation_test.py index d367c5ef5c553db759e748637d73354ba1273cd3..4e2d7e2f6dc8314334894d9e2a8008bfa89afb72 100644 --- a/testing/correlation_test.py +++ b/testing/correlation_test.py @@ -1,12 +1,15 @@ +from sys import argv +import os +import time + import pandas as pd import numpy as np import scipy.signal +import matplotlib.pyplot as plt + from aeri_tools.io.dmv import housekeeping from aeri_tools.io.dmv import radiance -from sys import argv -import os -import time -import matplotlib.pyplot as plt + def main(path): @@ -18,10 +21,10 @@ def main(path): data = pd.DataFrame(index=hk.index, columns=('dmv', 'hk')) data['dmv'] = dmv.xs(str.encode('H'), level='scene').mean(axis=1) data['hk'] = hk['SCEtemp'] - #get rid of hk rows where scene isn't H data.dropna(inplace=True) + #account for errors when using the savgol_filter try: #subtract the rolling average for x in data.columns: @@ -29,9 +32,11 @@ def main(path): data[x] = data[x] - tmp #correlate the data - correlation = np.correlate(data['dmv'].values[:], data['hk'].values[:], mode='same') + correlation = np.correlate(data['dmv'].values[:], + data['hk'].values[:], mode='same') - print('max = ', np.amax(correlation), ' : min = ', np.amin(correlation)) + print('max = ', np.amax(correlation), ' : min = ', + np.amin(correlation)) fig, ax = plt.subplots(1, figsize=(15,10), sharex=True) @@ -43,11 +48,10 @@ def main(path): plt.plot(data.index, correlation) - #plt.show() + plt.show() name = '/Users/adiebold/Documents/corr_pngs/' + path[-12:-6] + '.png' - plt.savefig(name) + # plt.savefig(name) plt.clf() - #probably the scipy.signal.savgol_filter failing except: print('FAIL') @@ -55,28 +59,38 @@ def main(path): if __name__ == '__main__': start_time = time.time() - - pathname = argv[1] + filepath = argv[1] skip_num = 0 curr_num = 0 - if os.path.isdir(pathname): - for filename in os.listdir(pathname): - full_path = pathname + '/' + filename - if os.path.isdir(full_path): - for fname in os.listdir(full_path): - fuller_path = full_path + '/' + fname - if os.path.isfile(fuller_path) and 'B1.CXS' in fname: - print(curr_num, ': ', fuller_path) - if curr_num >= skip_num: - main(fuller_path) + print('skip_num = ', skip_num, '\n') + if os.path.isdir(filepath): + for filename_1 in os.listdir(filepath): + filename_1 = filepath + '/' + filename_1 + filename_1 = filename_1.replace('//', '/') + if os.path.isdir(filename_1): + for filename_2 in os.listdir(filename_1): + filename_2 = filename_1 + '/' + filename_2 + filename_2.replace('//', '/') + if (os.path.isfile(filename_2) and + filename_2.endswith('B1.CXS')): curr_num += 1 - elif os.path.isfile(full_path) and 'B1.CXS' in filename: - if curr_num >= skip_num: - main(full_path) + if curr_num >= skip_num: + print(curr_num, ': ', filename_2) + main(filename_2) + else: + print(curr_num, ': ', filename_2, ' -- SKIPPED') + elif os.path.isfile(filename_1) and filename_1.endswith('B1.CXS'): curr_num += 1 - print(curr_num, ': ', filename) - else: - main(pathname) - - print('execution time: %d minute(s), %.2f second(s)' % ((time.time()-start_time)//60, (time.time()-start_time)%60)) + if curr_num >= skip_num: + print(curr_num, ': ', filename_1) + main(filename_1) + else: + print(curr_num, ': ', filename_1, ' -- SKIPPED') + elif os.path.isfile(filepath): + if filepath.endswith('B1.CXS'): + print(filepath) + main(filepath) + + print('execution time: %d minute(s), %.2f second(s)' % + ((time.time()-start_time)//60, (time.time()-start_time)%60)) diff --git a/testing/quick_vis.py b/testing/quick_vis.py index 6823f8c2f2e8f29f5721687618fff2055b33d7f2..09e285b080aae1a13388903381ef4e248481a1fc 100644 --- a/testing/quick_vis.py +++ b/testing/quick_vis.py @@ -9,7 +9,7 @@ import numpy as np import pandas as pd -def run(filename): +def main(filename): subplot_start_time = time.time() print('program running...') @@ -40,8 +40,8 @@ def run(filename): low_ind = x break #calculate average time increment between times for inferring NaT values - time_increment = (old_data['time'][high_ind] - old_data['time'][low_ind]) - / len(old_data['time'][low_ind:high_ind]) + time_increment = ((old_data['time'][high_ind] - old_data['time'][low_ind]) + / len(old_data['time'][low_ind:high_ind+1])) data = pd.DataFrame(index=range(len(old_data['time'])), columns=old_data.keys()) @@ -52,11 +52,10 @@ def run(filename): for var_name in old_data: if var_name not in variable_order and var_name != 'time': print('*********\n', var_name, '\n&&&&&&&&&') - else: - data[var_name] = old_data[var_name] #infers times for NaT values data.loc[data['time'] < 0] = 0 + data['missing_data_flag_check'].loc[data['time'] == 0] = 1 for t in range(len(data['time'])): if data['time'].iloc[t] == 0 and t != 0: data['time'].iloc[t] = data['time'].iloc[t-1] + time_increment @@ -67,12 +66,12 @@ def run(filename): break #converts times from nanoseconds to hours - data['time'] = data['time']/1000000000/60/60 - - data['time'].iloc[0]/1000000000/60/60 + data['time'] = (data['time']/1000000000/60/60 + - data['time'].iloc[0]/1000000000/60/60) data.set_index('time', inplace=True) #calculate how much of qc_percent is valid - qc_percent_num = 100 * (1 - sum(data['qc_percent']) - / len(data['qc_percent'])) + qc_percent_num = (100 * (1 - sum(data['qc_percent']) + / len(data['qc_percent']))) plt.figure(1) curr_plot_num = 0 @@ -128,15 +127,16 @@ def run(filename): print('saving...') end_of_name = filename.split('/')[-1].split('.')[0] + '.png' - plt.savefig('/Users/adiebold/awr_pngs/' + end_of_name) + plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/awr/' + end_of_name) + #comment out plt.show() when doing a directory + # plt.show() + plt.clf() print('finished') print('subplot execution time: %d minute(s), %.2f second(s)' % ((time.time() - subplot_start_time) // 60, (time.time() - subplot_start_time) % 60)) #comment out plt.show() when creating pngs for a directory - # plt.show() - plt.clf() if __name__ == '__main__': start_time = time.time() @@ -146,27 +146,36 @@ if __name__ == '__main__': print(args.filepath) #amount of files to skip - skip_num = 393 + skip_num = 0 curr_num = 0 - print('skip_num = ', skip_num) + print('skip_num = ', skip_num, '\n') if os.path.isdir(args.filepath): for filename_1 in os.listdir(args.filepath): filename_1 = args.filepath + '/' + filename_1 + filename_1 = filename_1.replace('//', '/') if os.path.isdir(filename_1): for filename_2 in os.listdir(filename_1): filename_2 = filename_1 + '/' + filename_2 - if filename_2.endswith('QC.nc'): + filename_2 = filename_2.replace('//', '/') + if (os.path.isfile(filename_2) + and filename_2.endswith('QC.nc')): curr_num += 1 if curr_num >= skip_num: - print(curr_num, ' -- ', filename_2) - run(filename_2) + print(curr_num, ': ', filename_2) + main(filename_2) else: - print(curr_num, ' -- ', filename_2, ' -- SKIPPED') - elif filename_1.endswith('QC.nc') - run(filename_1) + print(curr_num, ': ', filename_2, ' -- SKIPPED') + elif os.path.isfile(filename_1) and filename_1.endswith('QC.nc'): + curr_num += 1 + if curr_num >= skip_num: + print(curr_num, ': ', filename_1) + main(filename_1) + else: + print(curr_num, ': ', filename_1, ' -- SKIPPED') elif os.path.isfile(args.filepath): if args.filepath.endswith('QC.nc'): - run(args.filepath) + print(args.filepath) + main(args.filepath) - print('total execution time: %d minute(s), %.2f second(s)' % ((time.time() - - start_time) // 60, (time.time() - start_time) % 60)) + print('total execution time: %d minute(s), %.2f second(s)' % + ((time.time()-start_time)//60, (time.time()-start_time)%60)) diff --git a/testing/testing_quick_vis.py b/testing/testing_quick_vis.py index c110d3f615895e3bb3b40bb6219e115519737c75..6823f8c2f2e8f29f5721687618fff2055b33d7f2 100644 --- a/testing/testing_quick_vis.py +++ b/testing/testing_quick_vis.py @@ -1,22 +1,36 @@ +import argparse +import os +import time + import netCDF4 import matplotlib.pyplot as plt -import matplotlib.patches as patches -import time from scipy import arange -import os import numpy as np import pandas as pd -import argparse + def run(filename): subplot_start_time = time.time() - print('program running...') plt.rcParams['figure.figsize'] = [25, 15] + #ensures the graphs are created in the correct order + variable_order = [ + 'qc_percent', 'hatch_check', 'missing_data_flag_check', + 'safing_check', 'encoder_check', 'detector_check', + 'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check', + 'hbb_temp_outlier_check', 'abb_temp_outlier_check', + 'bst_temp_outlier_check', 'sce_temp_deviation_check', + 'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check', + 'detector_temp_check', + 'sky_brightness_temp_spectral_averages_ch1_check', + 'sky_brightness_temp_spectral_averages_ch2_check', + 'hbb_std_dev_check', 'hbb_lw_nen_check', 'hbb_sw_nen_check', + 'lw_responsivity_check', 'sw_responsivity_check', + ] old_data = netCDF4.Dataset(filename).variables - #need to account for times when the first or last values are not a time (NaT) + #accounts for times when the first or last values are not a time (NaT) for x, val in reversed(list(enumerate(old_data['time']))): if val > 0: high_ind = x @@ -25,34 +39,23 @@ def run(filename): if val > 0: low_ind = x break - #calculate average time increment between times for inferring NaT values - time_increment = (old_data['time'][high_ind] - old_data['time'][low_ind]) / len(old_data['time'][low_ind:high_ind]) + time_increment = (old_data['time'][high_ind] - old_data['time'][low_ind]) + / len(old_data['time'][low_ind:high_ind]) - data = pd.DataFrame(index=range(len(old_data['time'])), columns=old_data.keys()) - - #put data into DataFrame + data = pd.DataFrame(index=range(len(old_data['time'])), + columns=old_data.keys()) for key in old_data: data[key] = old_data[key] - #makes sure the graphs are put in the correct order - variable_order = ['qc_percent', 'hatch_check', 'missing_data_flag_check', 'safing_check', 'encoder_check', 'detector_check', - 'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check', 'hbb_temp_outlier_check', 'abb_temp_outlier_check', - 'bst_temp_outlier_check', - 'sce_temp_deviation_check', 'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check', - 'detector_temp_check', - 'sky_brightness_temp_spectral_averages_ch1_check', 'sky_brightness_temp_spectral_averages_ch2_check', - 'hbb_std_dev_check', - 'hbb_lw_nen_check', 'hbb_sw_nen_check', 'lw_responsivity_check', 'sw_responsivity_check', ] - - #checks for any variables in the DataFrame not accounted for in variable_order + #checks for any variables in the QC file not in variable_order for var_name in old_data: if var_name not in variable_order and var_name != 'time': print('*********\n', var_name, '\n&&&&&&&&&') else: data[var_name] = old_data[var_name] - #infers any times for NaT values + #infers times for NaT values data.loc[data['time'] < 0] = 0 for t in range(len(data['time'])): if data['time'].iloc[t] == 0 and t != 0: @@ -64,49 +67,17 @@ def run(filename): break #converts times from nanoseconds to hours - data['time'] = data['time']/1000000000/60/60 - data['time'].iloc[0]/1000000000/60/60 - + data['time'] = data['time']/1000000000/60/60 + - data['time'].iloc[0]/1000000000/60/60 data.set_index('time', inplace=True) - #calculate how much of qc_percent is valid - qc_percent_num = 100 * (1 - sum(data['qc_percent']) / len(data['qc_percent'])) - - ''' - data['time'] = [] - qc_percent_num = 0.0 - for var_name in ['time',] + variable_order: - print(var_name) - greater_than_zero[var_name] = False - if var_name == 'time': - for x, curr_time in enumerate(old_data['time']): - if curr_time < 0: - if x > 0: - curr_time = old_data['time'][x-1] + time_increment - else: - curr_time = old_data['time'][low_ind] - time_increment*(low_ind-x) - data['time'].append(curr_time) - else: - data[var_name] = [] - for x, value in enumerate(old_data[var_name]): - if var_name != 'qc_percent' and var_name != 'hatch_check' and var_name != 'missing_data_flag_check': - if data['missing_data_flag_check'][x] == 0.0: - data[var_name].append(value) - else: - data[var_name].append(0) - else: - data[var_name].append(value) - if var_name == 'qc_percent' and value == 0: - qc_percent_num += 1.0 - if value > 0.0 and not greater_than_zero[var_name]: - greater_than_zero[var_name] = True - ''' - + qc_percent_num = 100 * (1 - sum(data['qc_percent']) + / len(data['qc_percent'])) plt.figure(1) curr_plot_num = 0 print('creating subplots...') for value in variable_order: - #alter name to make it better formatted for the graph #use 30 spaces because that's what works best if '_check' in value: @@ -124,58 +95,46 @@ def run(filename): var_name = 'qc_percent ({:3.2f}%)'.format(qc_percent_num) + ' '*30 curr_plot_num += 1 - - #doublecheck that key is actually a key + #doublecheck that value is actually a key if value not in data.keys(): - print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', value, ' --- missing') + print('subplot ', curr_plot_num, ' of ', len(variable_order), + ' -- ', value, ' --- missing') else: - print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', var_name) + print('subplot ', curr_plot_num, ' of ', len(variable_order), + ' -- ', var_name) ax = plt.subplot(len(variable_order), 1, curr_plot_num) - - #turn background light blue so anywhere not graphed is distinguishable + #turn background light blue so anywhere not graphed is + #distinguishable from where the value is 0 ax.set_axis_bgcolor((0.8,1.0,1.0)) plt.ylabel(var_name, rotation=0) # 0-24 for 24 hours, 0-1 either valid or invalid plt.axis([0, 24, 0, 1]) - # x ticks only on every third to reduce clutter - plt.xticks( arange(0,25) ) - plt.yticks((0,1)) + plt.xticks(arange(0,25)) plt.setp(ax.xaxis.get_ticklabels()[1::3], visible=False) plt.setp(ax.xaxis.get_ticklabels()[2::3], visible=False) + plt.yticks((0,1)) - ''' - plt.bar(data['time'], old_data[value], width=time_increment - plt.plot(data['time'], data[value], color='black') - print(value, ' ---- ', data[value]) - ''' - - #fill in the area below values as black and above as white to get rid - #of the blue background + #fill in the area below values as black and above as white to get + #rid of the blue background if any(data[value] > 0): plt.fill_between(data.index, data[value], y2=1, color='white') plt.fill_between(data.index, data[value], color='black') plt.tight_layout(h_pad=0.1) plt.subplots_adjust(wspace=1.0) - #if none of the values are above 0 change background to gray else: ax.set_axis_bgcolor((0.87,0.87,0.87)) print('saving...') - # plt.savefig('/Users/adiebold/rooftop_pngs/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/qc/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/archive_test/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/archive_pngs/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/ena_data_pics/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/sgp-c1_pics/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/sgp_pngs/' + filename.split('/')[-1].split('.')[0] + '.png') - # plt.savefig('/Users/adiebold/' + filename.split('/')[-1].split('.')[0] + '.png') - plt.savefig('/Users/adiebold/awr_pngs/' + filename.split('/')[-1].split('.')[0] + '.png') + end_of_name = filename.split('/')[-1].split('.')[0] + '.png' + plt.savefig('/Users/adiebold/awr_pngs/' + end_of_name) print('finished') - print('subplot execution time: %d minute(s), %.2f second(s)' % ((time.time() - subplot_start_time) // 60, + print('subplot execution time: %d minute(s), %.2f second(s)' % + ((time.time() - subplot_start_time) // 60, (time.time() - subplot_start_time) % 60)) + #comment out plt.show() when creating pngs for a directory # plt.show() plt.clf() @@ -183,31 +142,31 @@ if __name__ == '__main__': start_time = time.time() parser = argparse.ArgumentParser() parser.add_argument('filepath') - args = parser.parse_args() - print(args.filepath) #amount of files to skip skip_num = 393 curr_num = 0 + print('skip_num = ', skip_num) if os.path.isdir(args.filepath): - for filename in os.listdir(args.filepath): - filename = args.filepath + '/' + filename - if os.path.isdir(filename): - for file_name in os.listdir(filename): - file_name = filename + '/' + file_name - if 'QC.nc' in file_name: + for filename_1 in os.listdir(args.filepath): + filename_1 = args.filepath + '/' + filename_1 + if os.path.isdir(filename_1): + for filename_2 in os.listdir(filename_1): + filename_2 = filename_1 + '/' + filename_2 + if filename_2.endswith('QC.nc'): curr_num += 1 - print(curr_num, ' -- ', file_name) if curr_num >= skip_num: - run(file_name) + print(curr_num, ' -- ', filename_2) + run(filename_2) else: - print('SKIP') - elif 'QC.nc' in filename: - run(filename) + print(curr_num, ' -- ', filename_2, ' -- SKIPPED') + elif filename_1.endswith('QC.nc') + run(filename_1) elif os.path.isfile(args.filepath): - if 'QC.nc' in args.filepath: + if args.filepath.endswith('QC.nc'): run(args.filepath) - print('total execution time: %d minute(s), %.2f second(s)' % ((time.time() - start_time) // 60, (time.time() - start_time) % 60)) + print('total execution time: %d minute(s), %.2f second(s)' % ((time.time() + - start_time) // 60, (time.time() - start_time) % 60))