Majorly updated/improved quick_vis. Now also includes newly added tests

339a93ca · Alex Diebold · 8e33e7dc · 339a93ca
Commit 339a93ca authored 7 years ago by Alex Diebold
--- a/testing/testing_quick_vis.py
+++ b/testing/testing_quick_vis.py
@@ -7,12 +7,13 @@ import matplotlib.pyplot as plt
 from scipy import arange
 import numpy as np
 import pandas as pd
+import scipy.stats

 import file_finder
 import diagnostic_plotter


-def main(filename):
+def main(filename, save_option):

    subplot_start_time = time.time()
    print('program running...')
@@ -23,9 +24,10 @@ def main(filename):
        'safing_check', 'encoder_check', 'detector_check',
        'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check',
        'hbb_temp_outlier_check', 'abb_temp_outlier_check',
-        'bst_temp_outlier_check', 'sce_temp_deviation_check',
-        'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check',
-        'detector_temp_check',
+        'bst_temp_outlier_check', 'air_interferometer_outlier_check',
+        'sce_temp_deviation_check',
+        'cross_correlation_check', 'hbb_stable_check', 'hbb_covariance_check',
+        'imaginary_radiance_check', 'detector_temp_check',
        'sky_brightness_temp_spectral_averages_ch1_check',
        'sky_brightness_temp_spectral_averages_ch2_check',
        'hbb_std_dev_check', 'hbb_lw_nen_check', 'hbb_sw_nen_check',
@@ -42,43 +44,14 @@ def main(filename):
    ]

    old_data = netCDF4.Dataset(filename).variables
-    #accounts for times when the first or last values are not a time (NaT)
-    for x, val in reversed(list(enumerate(old_data['time']))):
-        if val > 0:
-            high_ind = x
-            break
-    for x, val in enumerate(old_data['time']):
-        if val > 0:
-            low_ind = x
-            break
-    #calculate average time increment between times for inferring NaT values
-    time_increment = ((old_data['time'][high_ind] - old_data['time'][low_ind])
-                        / len(old_data['time'][low_ind:high_ind+1]))
-
    data = pd.DataFrame(index=range(len(old_data['time'])),
                        columns=old_data.keys())
-    #dictionary of percentages that each variable is valid
-    percents = {}
-    #text file or a report of the varialbes' validity
-    report_name = 'reports/' + filename.split('/')[0] + '_'
-    report_name += filename[-11:-5] + '.txt'
-    #create and save plots of variables when the test is <80% valid
-    #write percentages to report file
+    for key in variable_order:
+        data[key] = old_data[key]
    data['time'] = old_data['time']
-    with open(report_name, 'w') as f:
-        f.write(filename.split('/')[0] + '_' + report_name[-10:-4] + '\n\n')
-        for key in variable_order:
-            data[key] = old_data[key]
-            percents[key] = (100 * (1 - sum(data[key])
-                                / len(data[key])))
-            if percents[key] < 80 and key not in vars_to_skip:
-                if key in sum_tests:
-                    diagnostic_plotter.plot(key, filename[:-5] + '.SUM')
-                else:
-                    diagnostic_plotter.plot(key, filename[:-5] + 'B1.CXS')
-            print(key, ' = ', percents[key])
-            if percents[key] < 100:
-                f.write(key + ' = ' + '{:3.2f}'.format(percents[key]) + '%\n')
+
+    #find the median time difference to use later in NaT value inference
+    time_increment = np.median(data['time'].diff().dropna())

    #checks for any variables in the QC file not in variable_order
    for var_name in old_data:
@@ -88,6 +61,7 @@ def main(filename):
    #infers times for NaT values
    data.loc[data['time'] < 0] = 0
    data['missing_data_flag_check'].loc[data['time'] == 0] = 1
+    data['qc_percent'].loc[data['time'] == 0] = 1
    for t in range(len(data['time'])):
        if data['time'].iloc[t] == 0 and t != 0:
            data['time'].iloc[t] = data['time'].iloc[t-1] + time_increment
@@ -97,13 +71,43 @@ def main(filename):
                    data[t] = val - x*time_increment
                    break

+    #dictionary of percentages that each variable is valid
+    percents = {}
+    #text file or a report of the varialbes' validity
+    report_name = 'pngs/reports/' + filename.split('/')[0] + '_'
+    report_name += filename[-11:-5] + '.txt'
+    #create and save plots of variables when the test is <80% valid
+    #write percentages to report file
+    with open(report_name, 'w') as f:
+        f.write(filename.split('/')[0] + '_' + report_name[-10:-4] + '\n\n')
+        for key in variable_order:
+            percents[key] = (100 * (1 - sum(data[key])
+                                / len(data[key])))
+            if percents[key] < 90 and key not in vars_to_skip:
+                if key in sum_tests:
+                    diagnostic_plotter.plot(key, filename[:-5] + '.SUM')
+                else:
+                    diagnostic_plotter.plot(key, filename[:-5] + 'B1.CXS')
+            print(key, ' = ', percents[key])
+            if percents[key] < 90:
+                f.write(key + ' = ' + '{:3.2f}'.format(percents[key]) + '%\n')
+
    #converts times from nanoseconds to hours
    data['time'] = (data['time']/1000000000/60/60
                    - data['time'].iloc[0]/1000000000/60/60)
+
+    #check for gaps in time
+    gaps = []
+    for x,v in enumerate(scipy.stats.zscore(data['time'].diff().dropna())):
+        if abs(v) > 20:
+            gaps.append((data['time'][x],data['time'][x+1]))
+
+    #set time column as index
    data.set_index('time', inplace=True)
    plt.figure(1)
    curr_plot_num = 0

+
    print('creating subplots...')
    for value in variable_order:
        #alter name to make it better formatted for the graph
@@ -153,6 +157,8 @@ def main(filename):
            if any(data[value] > 0):
                plt.fill_between(data.index, data[value], y2=1, color='white')
                plt.fill_between(data.index, data[value], color='black')
+                for points in gaps:
+                    plt.fill_between(points, 0, y2=1, color=(0.8,1.0,1.0))
                plt.tight_layout(h_pad=0.1)
                plt.subplots_adjust(wspace=1.0)
            #if none of the values are above 0 change background to gray
@@ -161,11 +167,13 @@ def main(filename):

    print('saving...')
    end_of_name = filename.split('/')[-1].split('.')[0] + '.png'
-    plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/awr_new/' + end_of_name)
-    # plt.savefig('/Users/adiebold/aeri_quality_control/testing/testing_bst/pngs/' + end_of_name)
-    # plt.savefig('/Users/adiebold/aeri_quality_control/testing/' + end_of_name)
-    #comment out plt.show() when doing a directory
-    # plt.show()
+    if save_option != 'show':
+        plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/' + save_option + '/' + end_of_name)
+        # plt.savefig('/Users/adiebold/aeri_quality_control/testing/testing_bst/pngs/' + end_of_name)
+        # plt.savefig('/Users/adiebold/aeri_quality_control/testing/' + end_of_name)
+    else:
+        #comment out plt.show() when doing a directory
+        plt.show()
    plt.clf()

    print('finished')
@@ -178,15 +186,18 @@ if __name__ == '__main__':
    start_time = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument('filepath')
+    parser.add_argument('save_option')
+    parser.add_argument('skip_number')
+    # parser.add_argument('--skip_number', nargs='?', const=0, type=int, default=0)
    args = parser.parse_args()
    print(args.filepath)

    #amount of files to skip
-    skip_num = 0    #215 for sgp; 187 for awr
+    skip_num = int(args.skip_number)
    curr_num = 0
    print('skip_num = ', skip_num, '\n')
    for f in file_finder.traversal(args.filepath, skip_num, 'QC.nc'):
-        main(f)
+        main(f, args.save_option)
    '''
    if os.path.isdir(args.filepath):
        for filename_1 in os.listdir(args.filepath):