From cf12ac4617a1b37490ed03f6b0be3f9078f9974c Mon Sep 17 00:00:00 2001
From: Alex Diebold <adiebold@ella.ssec.wisc.edu>
Date: Thu, 2 Mar 2017 14:09:01 -0600
Subject: [PATCH] Added all the QC files I've been working on

---
 testing/correlation_test.py   |  77 ++++++++++++
 testing/detector_temp_flag.py | 200 +++++++++++++++++++++++++++++++
 testing/quick_vis.py          | 174 +++++++++++++++++++++++++++
 testing/testing_quick_vis.py  | 213 ++++++++++++++++++++++++++++++++++
 4 files changed, 664 insertions(+)
 create mode 100644 testing/correlation_test.py
 create mode 100644 testing/detector_temp_flag.py
 create mode 100644 testing/quick_vis.py
 create mode 100644 testing/testing_quick_vis.py

diff --git a/testing/correlation_test.py b/testing/correlation_test.py
new file mode 100644
index 0000000..580375f
--- /dev/null
+++ b/testing/correlation_test.py
@@ -0,0 +1,77 @@
+import pandas as pd
+import numpy as np
+import scipy.signal
+from aeri_tools.io.dmv import housekeeping
+from aeri_tools.io.dmv import radiance
+from sys import argv
+import os
+import time
+import matplotlib.pyplot as plt
+
+def main(path):
+
+    #get data from B1.CXS file
+    dmv = radiance.get_radiance_from_rnc(path)
+    hk = housekeeping.get_all_housekeeping(path)
+
+    data = pd.DataFrame(index=hk.index, columns=('dmv', 'hk'))
+    data['dmv'] = dmv.xs(str.encode('H'), level='scene').mean(axis=1)
+    data['hk'] = hk['SCEtemp']
+
+    data.dropna(inplace=True)
+
+    try:
+        for x in data.columns:
+            tmp = scipy.signal.savgol_filter(data[x].values[:], 201, 3)
+            data[x] = data[x] - tmp
+
+        correlation = np.correlate(data['dmv'].values[:], data['hk'].values[:], mode='same')
+
+        print('max = ', np.amax(correlation), ' : min = ', np.amin(correlation))
+
+        fig, ax = plt.subplots(1, figsize=(15,10), sharex=True)
+
+        '''
+        for x, val in ('dmv', 'hk'):
+            plt.sca(ax[x])
+            plt.plot(list(data.index), data[val].values[:])
+        '''
+
+        plt.plot(data.index, correlation)
+
+        #plt.show()
+        name = '/Users/adiebold/Documents/corr_pngs/' + path[-12:-6] + '.png'
+        plt.savefig(name)
+        plt.clf()
+
+    except:
+        print('FAIL')
+
+
+if __name__ == '__main__':
+    start_time = time.time()
+
+    pathname = argv[1]
+
+    skip_num = 0
+    curr_num = 0
+    if os.path.isdir(pathname):
+        for filename in os.listdir(pathname):
+            full_path = pathname + '/' + filename
+            if os.path.isdir(full_path):
+                for fname in os.listdir(full_path):
+                    fuller_path = full_path + '/' + fname
+                    if os.path.isfile(fuller_path) and 'B1.CXS' in fname:
+                        print(curr_num, ': ', fuller_path)
+                        if curr_num >= skip_num:
+                            main(fuller_path)
+                        curr_num += 1
+            elif os.path.isfile(full_path) and 'B1.CXS' in filename:
+                if curr_num >= skip_num:
+                    main(full_path)
+                curr_num += 1
+                print(curr_num, ': ', filename)
+    else:
+        main(pathname)
+
+    print('execution time: %d minute(s), %.2f second(s)' % ((time.time()-start_time)//60, (time.time()-start_time)%60))
diff --git a/testing/detector_temp_flag.py b/testing/detector_temp_flag.py
new file mode 100644
index 0000000..28f0bd1
--- /dev/null
+++ b/testing/detector_temp_flag.py
@@ -0,0 +1,200 @@
+from __future__ import print_function
+from scipy.signal import periodogram
+import pandas as pd
+import numpy as np
+from aeri_tools.io.dmv import housekeeping
+from matplotlib import pyplot as plt
+import os
+WIN_SIZE = 40
+DETECTOR_TEMP_KEY = 'detectorTemp'
+TIME_WINDOW = (60,600) #seconds
+MIN_AMP = .02
+WIN_TYPE = 'blackmanharris'
+def sig_finder_function(rolling_window,**kwargs):
+    """Function applied to detectorTemp dataset with pd.rolling_apply
+    Calculates the dominant period and
+    """
+
+    #calculate the time difference and average frequency
+    nsamples = rolling_window.size
+    times = pd.Series(rolling_window.index)
+    win_type = kwargs.get('win_type',WIN_TYPE)
+    try:
+        #get the time span covered by the window
+        dt = (times.iloc[-1] - times.iloc[0]).seconds
+    except AttributeError:
+        #catch NaT indicies
+        return np.nan
+    else:
+        #compute the dominant frequency/period
+        fs = nsamples/dt
+        f, Pxx_den = periodogram(rolling_window,fs,window=win_type)
+        max_i = Pxx_den.argmax()
+        if f[max_i] != 0:
+            primary_T = 1./f[max_i]
+        else:
+            primary_T = float("inf")
+        return primary_T
+def series_rolling_apply(series,window,func,**kwargs):
+    """rolling applies function to series by passing subsection of series,
+    rather than np array
+    """
+    #lambda to map the passed function to a rolling window
+    map_f=lambda i:np.nan if i<window else func(series.iloc[i-window:i],**kwargs)
+    return pd.Series(map(map_f,range(series.size)),index = series.index)
+
+def flag_detector_temp_oscillations(path_or_dmv,window_size = WIN_SIZE,std=10,**kwargs):
+    """Performs power spectral density analysis on detector temperature,
+    and flags the data if a consistent period is found
+    """
+    #read in the housekeeping dataframe
+    housekeeping_df = housekeeping.get_all_housekeeping(path_or_dmv)
+    #get the detector temp variable
+    detector_temp = housekeeping_df[DETECTOR_TEMP_KEY]
+    #detrend by subtracting the day's mean
+    detrend_dtemp = detector_temp - detector_temp.mean()
+    #approximate periodicity in data using a rolling periodogram
+    periods = series_rolling_apply(detrend_dtemp,window_size,sig_finder_function,**kwargs)
+    #check for big spikes in the data (these throw off the periodogram)
+    #currently disabled
+    dtt_roller = detrend_dtemp.rolling(window_size)
+    amps = False & ((dtt_roller.max()-dtt_roller.min()) > .2)
+    #look for low standard deviation in the period (= a consistent periodicity)
+    stds = series_rolling_apply(periods,window_size,lambda x:x.std() < std)
+    #flag as bad if there's a consistent periodicity or large jump in the data
+    flags = stds | amps
+    return flags, periods
+
+def flatten_out(original_flags, pds):
+    '''incrementing windows to help flatten out the 0/1 value as either
+    0 or 1; removes small blips
+    '''
+
+    flags = original_flags.copy()
+    values_dict = flags.value_counts()
+
+    num_half_hour = int(len(flags)/48)
+    curr_flags = []
+    std_data = {}
+    std_data['curr_std'] = []
+    std_data['mean_std'] = []
+    std_data['mean'] = []
+    std_data['std_curr_std'] = []
+    std_data['std_mean_std'] = []
+
+    lower = 0
+    upper = 1
+
+    while upper <= len(flags):
+        values_dict = original_flags[lower:upper].value_counts()
+        if True in values_dict.keys():
+            mean = values_dict[True] / len(flags[lower:upper])
+        else:
+            mean = 1 - (values_dict[False] / len(flags[lower:upper]))
+        curr_std = np.std(pds[lower:upper])
+        mean_std = np.mean(pds[lower:upper])
+        std_data['mean'].append(mean)
+        std_data['curr_std'].append(curr_std)
+        std_data['mean_std'].append(mean_std)
+        std_curr_std = np.std(std_data['curr_std'][lower:upper])
+        std_data['std_curr_std'].append(std_curr_std)
+        std_mean_std = np.std(std_data['mean_std'][lower:upper])
+        std_data['std_mean_std'].append(std_mean_std)
+        if std_mean_std < 20 or np.isnan(std_mean_std):
+            flags[lower:upper] = flags[lower:upper].replace(False, True)
+        else:
+            flags[lower:upper] = flags[lower:upper].replace(True, False)
+
+        upper += 1
+        if upper - lower > num_half_hour:
+            lower += 1
+
+    new_flags = flags & (np.asarray(std_data['mean']) > 0.35)
+    # new_flags = np.asarray(std_data['mean']) > 0.8
+    # fig, ax = plt.subplots(2, figsize=(15,10),sharex=True)
+    # plt.plot(flags)
+    # plt.plot(new_flags)
+    # plt.show()
+    # plt.clf()
+    return new_flags, std_data
+
+def main(path):
+    """Test function that displays detectorTemp alongside the computed flag
+    using different windowing functions
+    """
+    dmv = housekeeping.get_all_housekeeping(path)
+    detector_temp = dmv[DETECTOR_TEMP_KEY]
+    fig,ax = plt.subplots(5,figsize=(15,10),sharex=True)
+    plt.sca(ax[0])
+    plt.plot(dmv['Time'],detector_temp,'r')
+    # plt.ylim(76.9,77.6)
+    plt.ylabel("Detector Temperature (K)")
+    plt.grid()
+
+    flags,pds = flag_detector_temp_oscillations(path,win_type=WIN_TYPE)
+    flags, std_data = flatten_out(flags.copy(), pds)
+
+    plt.sca(ax[1])
+    plt.plot(dmv['Time'],pds,lw=2,label=WIN_TYPE)
+    plt.grid(True)
+    plt.ylabel("Dominant Period (s)")
+    #plt.legend()
+
+    plt.sca(ax[2])
+    plt.plot(dmv['Time'],flags,lw=2,label=WIN_TYPE)
+    plt.grid(True)
+    plt.ylabel("Standard Deviation Flag")
+    plt.xlabel("Time (Hours UTC)")
+    plt.ylim(-0.1,1.1)
+    plt.sca(ax[3])
+    plt.plot(dmv['Time'], np.asarray(std_data['mean']))
+
+    title = os.path.split(path)[-1][:6]
+    plt.suptitle(title,fontsize=14)
+
+    plt.sca(ax[4])
+    plt.plot(dmv['Time'], std_data['curr_std'])     #blue
+    plt.plot(dmv['Time'], std_data['mean_std'])     #green
+    plt.plot(dmv['Time'], std_data['std_curr_std']) #red
+    plt.plot(dmv['Time'], std_data['std_mean_std'])
+
+    print('title: ', title)
+    plt.savefig("sgp_detect_pngs/detector_temp_flag_test_%s.png"%title,bbox_inches='tight')
+    # plt.savefig("detector_temp_flag_test_%s_2.png"%title,bbox_inches='tight')
+    # plt.show()
+    plt.clf()
+    plt.close()
+
+if __name__ == '__main__':
+    from sys import argv
+    import os
+    import time
+
+    start_time = time.time()
+
+    pathname = argv[1]
+
+    skip_num = 0
+    curr_num = 0
+    print('skip_num is ', skip_num)
+
+    if os.path.isdir(pathname):
+        for filename in os.listdir(pathname):
+            full_path = pathname + '/' + filename
+            if os.path.isdir(full_path):
+                for fname in os.listdir(full_path):
+                    fuller_path = full_path + '/' + fname
+                    if os.path.isfile(fuller_path) and 'B1.CXS' in fname:
+                        if curr_num >= skip_num:
+                            main(fuller_path)
+                        curr_num += 1
+                        print(curr_num, ': ', fuller_path)
+            elif os.path.isfile(full_path) and 'B1.CXS' in filename:
+                if curr_num >= skip_num:
+                    main(full_path)
+                curr_num += 1
+                print(curr_num, ': ', filename)
+    else:
+        main(pathname)
+
+    print('execution time: %d minute(s), %.2f second(s)' % ((time.time()-start_time)//60, (time.time()-start_time)%60))
diff --git a/testing/quick_vis.py b/testing/quick_vis.py
new file mode 100644
index 0000000..34d273f
--- /dev/null
+++ b/testing/quick_vis.py
@@ -0,0 +1,174 @@
+import netCDF4
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import time
+from scipy import arange
+import os
+import time
+import numpy as np
+
+def run(filename):
+
+    subplot_start_time = time.time()
+
+    print('program running...')
+    plt.rcParams['figure.figsize'] = [25, 15]
+
+    old_data = netCDF4.Dataset(filename).variables
+    data = {}
+    greater_than_zero = {}
+    for x, val in reversed(list(enumerate(old_data['time']))):
+        if val > 0:
+            high_ind = x
+            break
+    for x, val in enumerate(old_data['time']):
+        if val > 0:
+            low_ind = x
+            break
+    time_increment = (old_data['time'][high_ind] - old_data['time'][low_ind]) / len(old_data['time'][low_ind:high_ind])
+    print('time_increment: ', time_increment)
+    print(time.localtime(old_data['time'][0]/1000000000))
+    variable_order = ['qc_percent', 'hatch_check', 'missing_data_flag_check', 'safing_check', 'encoder_check', 'detector_check',
+                        'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check', 'hbb_temp_outlier_check', 'abb_temp_outlier_check',
+                        'bst_temp_outlier_check',
+                        'sce_temp_deviation_check', 'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check',
+                        'detector_temp_check',
+                        'sky_brightness_temp_spectral_averages_ch1_check', 'sky_brightness_temp_spectral_averages_ch2_check',
+                        'hbb_std_dev_check',
+                        'hbb_lw_nen_check', 'hbb_sw_nen_check', 'lw_responsivity_check', 'sw_responsivity_check', ]
+    print('*********')
+    for var_name in old_data:
+        if var_name not in variable_order and var_name != 'time':
+            print(var_name)
+    print('&&&&&&&&&')
+    data['time'] = []
+    qc_percent_num = 0.0
+    for var_name in ['time',] + variable_order:
+        print(var_name)
+        greater_than_zero[var_name] = False
+        if var_name == 'time':
+            for x, curr_time in enumerate(old_data['time']):
+                if curr_time < 0:
+                    if x > 0:
+                        curr_time = data['time'][x-1] + time_increment
+                    else:
+                        curr_time = old_data['time'][low_ind] - time_increment*(low_ind-x)
+                data['time'].append(curr_time)
+        else:
+            data[var_name] = []
+            for x, value in enumerate(old_data[var_name]):
+                if var_name != 'qc_percent' and var_name != 'hatch_check' and var_name != 'missing_data_flag_check':
+                    if data['missing_data_flag_check'][x] == 0.0:
+                        data[var_name].append(value)
+                    else:
+                        data[var_name].append(0.0)
+                else:
+                    data[var_name].append(value)
+                    if var_name == 'qc_percent' and value == 0.0:
+                        qc_percent_num += 1.0
+                if value > 0.0 and not greater_than_zero[var_name]:
+                    greater_than_zero[var_name] = True
+
+    time_array = np.asarray(data['time'])/1000000000.0/60.0/60.0 - data['time'][0]/1000000000.0/60.0/60.0
+    print(type(time_array))
+    for x in range(len(data['missing_data_flag_check'])):
+        if data['missing_data_flag_check'][x] == data['hbb_stable_check'][x] and data['hbb_stable_check'][x] != 0:
+            print(data['missing_data_flag_check'][x], ' ----- ', data['hbb_stable_check'][x])
+    plt.figure(1)
+    curr_plot_num = 0
+    print('creating subplots...')
+    for value in variable_order:
+        if '_check' in value:
+            var_name = value[:-6] + ' '*30
+        else:
+            var_name = value + ' '*30
+        if 'spike' in var_name:
+            var_name = 'igm_spike' + ' '*30
+        if 'sky_brightness_temp_spectral_averages' in var_name:
+            if 'ch1' in var_name:
+                var_name = 'surface_bt_ch1' + ' '*30
+            elif 'ch2' in var_name:
+                var_name = 'surface_bt_ch2' + ' '*30
+        if 'qc_percent' in var_name:
+            var_name = 'qc_percent ({:3.2f}%)'.format(100.0 * qc_percent_num / len(data['qc_percent'])) + ' '*30
+        curr_plot_num += 1
+        if value not in old_data.keys():
+            print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', value, ' --- missing')
+        else:
+            print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', var_name)
+            ax = plt.subplot(len(variable_order), 1, curr_plot_num)
+            # if not greater_than_zero[value]:
+            #     ax.set_axis_bgcolor((0.87,0.87,0.87))
+            ax.set_axis_bgcolor((0.8,1.0,1.0))
+            plt.ylabel(var_name, rotation=0)
+            plt.axis([0, 24, 0, 1])
+            plt.xticks( arange(0,25) )
+            plt.yticks((0,1))
+            plt.setp(ax.xaxis.get_ticklabels()[1::3], visible=False)
+            plt.setp(ax.xaxis.get_ticklabels()[2::3], visible=False)
+            # plt.bar(data['time'], old_data[value], width=time_increment
+            # plt.plot(data['time'], data[value], color='black')
+            # print(value, ' ---- ', data[value])
+
+            if greater_than_zero[value]:
+                plt.fill_between(time_array, data[value], y2=1, color='white')
+                plt.fill_between(time_array, data[value], color='black')
+                # x_vals = arange(data['time'][len(data['time'])-1], 25)
+                # plt.fill_between(x_vals, [1]*len(x_vals), color='yellow')
+            else:
+                ax.set_axis_bgcolor((0.87,0.87,0.87))
+            plt.tight_layout(h_pad=0.1)
+            plt.subplots_adjust(wspace=1.0)
+    print('saving...')
+    # plt.savefig('/Users/adiebold/rooftop_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/qc/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/archive_test/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/archive_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/ena_data_pics/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/sgp-c1_pics/' + filename.split('/')[-1].split('.')[0] + '.png')
+    plt.savefig('/Users/adiebold/test_sgp_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/awr_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+
+    print('finished')
+    print('subplot execution time: %d minute(s), %.2f second(s)' % ((time.time() - subplot_start_time) // 60,
+            (time.time() - subplot_start_time) % 60))
+    # plt.show()
+    plt.clf()
+
+if __name__ == '__main__':
+    start_time = time.time()
+    import argparse
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filepath')
+
+    args = parser.parse_args()
+
+    print(args.filepath)
+#skip 23 for 1607*
+#skip 54 for 1608*
+#skip 62 for 1609*
+#skip 91 for 1610*
+    skip_num = 222         #172
+    curr_num = 0
+    if os.path.isdir(args.filepath):
+        for filename in os.listdir(args.filepath):
+            filename = args.filepath + '/' + filename
+            if os.path.isdir(filename):
+                for file_name in os.listdir(filename):
+                    file_name = filename + '/' + file_name
+                    if 'QC.nc' in file_name:
+                        # if not os.path.isfile('/Users/adiebold/sgp_new_pngs/' + file_name.split('/')[-1].split('.')[0] + '.png'):
+                        curr_num += 1
+                        print(curr_num, ' -- ', file_name)
+                        if curr_num >= skip_num:
+                            run(file_name)
+                        else:
+                            print('SKIP')
+            elif 'QC.nc' in filename:
+                run(filename)
+    elif os.path.isfile(args.filepath):
+        if 'QC.nc' in args.filepath:
+            run(args.filepath)
+
+    print('total execution time: %d minute(s), %.2f second(s)' % ((time.time() - start_time) // 60, (time.time() - start_time) % 60))
diff --git a/testing/testing_quick_vis.py b/testing/testing_quick_vis.py
new file mode 100644
index 0000000..c110d3f
--- /dev/null
+++ b/testing/testing_quick_vis.py
@@ -0,0 +1,213 @@
+import netCDF4
+import matplotlib.pyplot as plt
+import matplotlib.patches as patches
+import time
+from scipy import arange
+import os
+import numpy as np
+import pandas as pd
+import argparse
+
+def run(filename):
+
+    subplot_start_time = time.time()
+
+    print('program running...')
+    plt.rcParams['figure.figsize'] = [25, 15]
+
+    old_data = netCDF4.Dataset(filename).variables
+    #need to account for times when the first or last values are not a time (NaT)
+    for x, val in reversed(list(enumerate(old_data['time']))):
+        if val > 0:
+            high_ind = x
+            break
+    for x, val in enumerate(old_data['time']):
+        if val > 0:
+            low_ind = x
+            break
+
+    #calculate average time increment between times for inferring NaT values
+    time_increment = (old_data['time'][high_ind] - old_data['time'][low_ind]) / len(old_data['time'][low_ind:high_ind])
+
+    data = pd.DataFrame(index=range(len(old_data['time'])), columns=old_data.keys())
+
+    #put data into DataFrame
+    for key in old_data:
+        data[key] = old_data[key]
+
+    #makes sure the graphs are put in the correct order
+    variable_order = ['qc_percent', 'hatch_check', 'missing_data_flag_check', 'safing_check', 'encoder_check', 'detector_check',
+                        'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check', 'hbb_temp_outlier_check', 'abb_temp_outlier_check',
+                        'bst_temp_outlier_check',
+                        'sce_temp_deviation_check', 'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check',
+                        'detector_temp_check',
+                        'sky_brightness_temp_spectral_averages_ch1_check', 'sky_brightness_temp_spectral_averages_ch2_check',
+                        'hbb_std_dev_check',
+                        'hbb_lw_nen_check', 'hbb_sw_nen_check', 'lw_responsivity_check', 'sw_responsivity_check', ]
+
+    #checks for any variables in the DataFrame not accounted for in variable_order
+    for var_name in old_data:
+        if var_name not in variable_order and var_name != 'time':
+            print('*********\n', var_name, '\n&&&&&&&&&')
+        else:
+            data[var_name] = old_data[var_name]
+
+    #infers any times for NaT values
+    data.loc[data['time'] < 0] = 0
+    for t in range(len(data['time'])):
+        if data['time'].iloc[t] == 0 and t != 0:
+            data['time'].iloc[t] = data['time'].iloc[t-1] + time_increment
+        elif data['time'].iloc[t] == 0 and t == 0:
+            for x, val in enumerate(data['time']):
+                if val > 0:
+                    data[t] = val - x*time_increment
+                    break
+
+    #converts times from nanoseconds to hours
+    data['time'] = data['time']/1000000000/60/60 - data['time'].iloc[0]/1000000000/60/60
+
+    data.set_index('time', inplace=True)
+
+    #calculate how much of qc_percent is valid
+    qc_percent_num = 100 * (1 - sum(data['qc_percent']) / len(data['qc_percent']))
+
+    '''
+    data['time'] = []
+    qc_percent_num = 0.0
+    for var_name in ['time',] + variable_order:
+        print(var_name)
+        greater_than_zero[var_name] = False
+        if var_name == 'time':
+            for x, curr_time in enumerate(old_data['time']):
+                if curr_time < 0:
+                    if x > 0:
+                        curr_time = old_data['time'][x-1] + time_increment
+                    else:
+                        curr_time = old_data['time'][low_ind] - time_increment*(low_ind-x)
+                data['time'].append(curr_time)
+        else:
+            data[var_name] = []
+            for x, value in enumerate(old_data[var_name]):
+                if var_name != 'qc_percent' and var_name != 'hatch_check' and var_name != 'missing_data_flag_check':
+                    if data['missing_data_flag_check'][x] == 0.0:
+                        data[var_name].append(value)
+                    else:
+                        data[var_name].append(0)
+                else:
+                    data[var_name].append(value)
+                    if var_name == 'qc_percent' and value == 0:
+                        qc_percent_num += 1.0
+                if value > 0.0 and not greater_than_zero[var_name]:
+                    greater_than_zero[var_name] = True
+    '''
+
+    plt.figure(1)
+    curr_plot_num = 0
+
+    print('creating subplots...')
+    for value in variable_order:
+
+        #alter name to make it better formatted for the graph
+        #use 30 spaces because that's what works best
+        if '_check' in value:
+            var_name = value[:-6] + ' '*30
+        else:
+            var_name = value + ' '*30
+        if 'spike' in var_name:
+            var_name = 'igm_spike' + ' '*30
+        if 'sky_brightness_temp_spectral_averages' in var_name:
+            if 'ch1' in var_name:
+                var_name = 'surface_bt_ch1' + ' '*30
+            elif 'ch2' in var_name:
+                var_name = 'surface_bt_ch2' + ' '*30
+        if 'qc_percent' in var_name:
+            var_name = 'qc_percent ({:3.2f}%)'.format(qc_percent_num) + ' '*30
+
+        curr_plot_num += 1
+
+        #doublecheck that key is actually a key
+        if value not in data.keys():
+            print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', value, ' --- missing')
+        else:
+            print('subplot ', curr_plot_num, ' of ', len(variable_order), ' -- ', var_name)
+            ax = plt.subplot(len(variable_order), 1, curr_plot_num)
+
+            #turn background light blue so anywhere not graphed is distinguishable
+            ax.set_axis_bgcolor((0.8,1.0,1.0))
+            plt.ylabel(var_name, rotation=0)
+            # 0-24 for 24 hours, 0-1 either valid or invalid
+            plt.axis([0, 24, 0, 1])
+
+            # x ticks only on every third to reduce clutter
+            plt.xticks( arange(0,25) )
+            plt.yticks((0,1))
+            plt.setp(ax.xaxis.get_ticklabels()[1::3], visible=False)
+            plt.setp(ax.xaxis.get_ticklabels()[2::3], visible=False)
+
+            '''
+            plt.bar(data['time'], old_data[value], width=time_increment
+            plt.plot(data['time'], data[value], color='black')
+            print(value, ' ---- ', data[value])
+            '''
+
+            #fill in the area below values as black and above as white to get rid
+            #of the blue background
+            if any(data[value] > 0):
+                plt.fill_between(data.index, data[value], y2=1, color='white')
+                plt.fill_between(data.index, data[value], color='black')
+                plt.tight_layout(h_pad=0.1)
+                plt.subplots_adjust(wspace=1.0)
+
+            #if none of the values are above 0 change background to gray
+            else:
+                ax.set_axis_bgcolor((0.87,0.87,0.87))
+
+    print('saving...')
+    # plt.savefig('/Users/adiebold/rooftop_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/qc/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/archive_test/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/archive_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/ena_data_pics/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/sgp-c1_pics/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/sgp_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+    # plt.savefig('/Users/adiebold/' + filename.split('/')[-1].split('.')[0] + '.png')
+    plt.savefig('/Users/adiebold/awr_pngs/' + filename.split('/')[-1].split('.')[0] + '.png')
+
+    print('finished')
+    print('subplot execution time: %d minute(s), %.2f second(s)' % ((time.time() - subplot_start_time) // 60,
+            (time.time() - subplot_start_time) % 60))
+    # plt.show()
+    plt.clf()
+
+if __name__ == '__main__':
+    start_time = time.time()
+    parser = argparse.ArgumentParser()
+    parser.add_argument('filepath')
+
+    args = parser.parse_args()
+
+    print(args.filepath)
+
+    #amount of files to skip
+    skip_num = 393
+    curr_num = 0
+    if os.path.isdir(args.filepath):
+        for filename in os.listdir(args.filepath):
+            filename = args.filepath + '/' + filename
+            if os.path.isdir(filename):
+                for file_name in os.listdir(filename):
+                    file_name = filename + '/' + file_name
+                    if 'QC.nc' in file_name:
+                        curr_num += 1
+                        print(curr_num, ' -- ', file_name)
+                        if curr_num >= skip_num:
+                            run(file_name)
+                        else:
+                            print('SKIP')
+            elif 'QC.nc' in filename:
+                run(filename)
+    elif os.path.isfile(args.filepath):
+        if 'QC.nc' in args.filepath:
+            run(args.filepath)
+
+    print('total execution time: %d minute(s), %.2f second(s)' % ((time.time() - start_time) // 60, (time.time() - start_time) % 60))
-- 
GitLab