Newer
Older
import argparse
import os
import time
import netCDF4
import matplotlib.pyplot as plt
from scipy import arange
import numpy as np
import pandas as pd
def main(filename):
subplot_start_time = time.time()
print('program running...')
plt.rcParams['figure.figsize'] = [25, 15]
#ensures the graphs are created in the correct order
variable_order = [
'qc_percent', 'hatch_check', 'missing_data_flag_check',
'safing_check', 'encoder_check', 'detector_check',
'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check',
'hbb_temp_outlier_check', 'abb_temp_outlier_check',
'bst_temp_outlier_check', 'sce_temp_deviation_check',
'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check',
'detector_temp_check',
'sky_brightness_temp_spectral_averages_ch1_check',
'sky_brightness_temp_spectral_averages_ch2_check',
'hbb_std_dev_check', 'hbb_lw_nen_check', 'hbb_sw_nen_check',
'lw_responsivity_check', 'sw_responsivity_check',
]
old_data = netCDF4.Dataset(filename).variables
#accounts for times when the first or last values are not a time (NaT)
for x, val in reversed(list(enumerate(old_data['time']))):
if val > 0:
high_ind = x
break
for x, val in enumerate(old_data['time']):
if val > 0:
low_ind = x
break
#calculate average time increment between times for inferring NaT values
time_increment = ((old_data['time'][high_ind] - old_data['time'][low_ind])
/ len(old_data['time'][low_ind:high_ind+1]))
data = pd.DataFrame(index=range(len(old_data['time'])),
columns=old_data.keys())
for key in old_data:
data[key] = old_data[key]
#checks for any variables in the QC file not in variable_order
for var_name in old_data:
if var_name not in variable_order and var_name != 'time':
print('*********\n', var_name, '\n&&&&&&&&&')
#infers times for NaT values
data.loc[data['time'] < 0] = 0
data['missing_data_flag_check'].loc[data['time'] == 0] = 1
for t in range(len(data['time'])):
if data['time'].iloc[t] == 0 and t != 0:
data['time'].iloc[t] = data['time'].iloc[t-1] + time_increment
elif data['time'].iloc[t] == 0 and t == 0:
for x, val in enumerate(data['time']):
if val > 0:
data[t] = val - x*time_increment
break
#converts times from nanoseconds to hours
data['time'] = (data['time']/1000000000/60/60
- data['time'].iloc[0]/1000000000/60/60)
data.set_index('time', inplace=True)
#calculate how much of qc_percent is valid
qc_percent_num = (100 * (1 - sum(data['qc_percent'])
/ len(data['qc_percent'])))
plt.figure(1)
curr_plot_num = 0
print('creating subplots...')
for value in variable_order:
#alter name to make it better formatted for the graph
#use 30 spaces because that's what works best
if '_check' in value:
var_name = value[:-6] + ' '*30
else:
var_name = value + ' '*30
if 'spike' in var_name:
var_name = 'igm_spike' + ' '*30
if 'sky_brightness_temp_spectral_averages' in var_name:
if 'ch1' in var_name:
var_name = 'surface_bt_ch1' + ' '*30
elif 'ch2' in var_name:
var_name = 'surface_bt_ch2' + ' '*30
if 'qc_percent' in var_name:
var_name = 'qc_percent ({:3.2f}%)'.format(qc_percent_num) + ' '*30
#doublecheck that value is actually a key
if value not in data.keys():
print('subplot ', curr_plot_num, ' of ', len(variable_order),
' -- ', value, ' --- missing')
print('subplot ', curr_plot_num, ' of ', len(variable_order),
' -- ', var_name)
ax = plt.subplot(len(variable_order), 1, curr_plot_num)
#turn background light blue so anywhere not graphed is
#distinguishable from where the value is 0
ax.set_axis_bgcolor((0.8,1.0,1.0))
plt.ylabel(var_name, rotation=0)
# 0-24 for 24 hours, 0-1 either valid or invalid
# x ticks only on every third to reduce clutter
plt.xticks(arange(0,25))
plt.setp(ax.xaxis.get_ticklabels()[1::3], visible=False)
plt.setp(ax.xaxis.get_ticklabels()[2::3], visible=False)
plt.yticks((0,1))
#fill in the area below values as black and above as white to get
#rid of the blue background
if any(data[value] > 0):
plt.fill_between(data.index, data[value], y2=1, color='white')
plt.fill_between(data.index, data[value], color='black')
plt.tight_layout(h_pad=0.1)
plt.subplots_adjust(wspace=1.0)
#if none of the values are above 0 change background to gray
else:
ax.set_axis_bgcolor((0.87,0.87,0.87))
end_of_name = filename.split('/')[-1].split('.')[0] + '.png'
plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/awr/' + end_of_name)
#comment out plt.show() when doing a directory
# plt.show()
plt.clf()
print('subplot execution time: %d minute(s), %.2f second(s)' %
((time.time() - subplot_start_time) // 60,
(time.time() - subplot_start_time) % 60))
#comment out plt.show() when creating pngs for a directory
if __name__ == '__main__':
start_time = time.time()
parser = argparse.ArgumentParser()
parser.add_argument('filepath')
args = parser.parse_args()
print(args.filepath)
#amount of files to skip
skip_num = 0
print('skip_num = ', skip_num, '\n')
if os.path.isdir(args.filepath):
for filename_1 in os.listdir(args.filepath):
filename_1 = args.filepath + '/' + filename_1
filename_1 = filename_1.replace('//', '/')
if os.path.isdir(filename_1):
for filename_2 in os.listdir(filename_1):
filename_2 = filename_1 + '/' + filename_2
filename_2 = filename_2.replace('//', '/')
if (os.path.isfile(filename_2)
and filename_2.endswith('QC.nc')):
curr_num += 1
if curr_num >= skip_num:
print(curr_num, ': ', filename_2)
main(filename_2)
print(curr_num, ': ', filename_2, ' -- SKIPPED')
elif os.path.isfile(filename_1) and filename_1.endswith('QC.nc'):
curr_num += 1
if curr_num >= skip_num:
print(curr_num, ': ', filename_1)
main(filename_1)
else:
print(curr_num, ': ', filename_1, ' -- SKIPPED')
elif os.path.isfile(args.filepath):
if args.filepath.endswith('QC.nc'):
print(args.filepath)
main(args.filepath)
print('total execution time: %d minute(s), %.2f second(s)' %
((time.time()-start_time)//60, (time.time()-start_time)%60))