quick_vis.py

import argparse
import os
import time

import netCDF4
import matplotlib.pyplot as plt
from scipy import arange
import numpy as np
import pandas as pd


def main(filename):

    subplot_start_time = time.time()
    print('program running...')
    plt.rcParams['figure.figsize'] = [25, 15]
    #ensures the graphs are created in the correct order
    variable_order = [
        'qc_percent', 'hatch_check', 'missing_data_flag_check',
        'safing_check', 'encoder_check', 'detector_check',
        'hbb_thermistor_check', 'abb_thermistor_check', 'spike_check',
        'hbb_temp_outlier_check', 'abb_temp_outlier_check',
        'bst_temp_outlier_check', 'sce_temp_deviation_check',
        'hbb_stable_check', 'hbb_covariance_check', 'imaginary_radiance_check',
        'detector_temp_check',
        'sky_brightness_temp_spectral_averages_ch1_check',
        'sky_brightness_temp_spectral_averages_ch2_check',
        'hbb_std_dev_check', 'hbb_lw_nen_check', 'hbb_sw_nen_check',
        'lw_responsivity_check', 'sw_responsivity_check',
        ]

    old_data = netCDF4.Dataset(filename).variables
    #accounts for times when the first or last values are not a time (NaT)
    for x, val in reversed(list(enumerate(old_data['time']))):
        if val > 0:
            high_ind = x
            break
    for x, val in enumerate(old_data['time']):
        if val > 0:
            low_ind = x
            break
    #calculate average time increment between times for inferring NaT values
    time_increment = ((old_data['time'][high_ind] - old_data['time'][low_ind])
                        / len(old_data['time'][low_ind:high_ind+1]))

    data = pd.DataFrame(index=range(len(old_data['time'])),
                        columns=old_data.keys())
    for key in old_data:
        data[key] = old_data[key]

    #checks for any variables in the QC file not in variable_order
    for var_name in old_data:
        if var_name not in variable_order and var_name != 'time':
            print('*********\n', var_name, '\n&&&&&&&&&')

    #infers times for NaT values
    data.loc[data['time'] < 0] = 0
    data['missing_data_flag_check'].loc[data['time'] == 0] = 1
    for t in range(len(data['time'])):
        if data['time'].iloc[t] == 0 and t != 0:
            data['time'].iloc[t] = data['time'].iloc[t-1] + time_increment
        elif data['time'].iloc[t] == 0 and t == 0:
            for x, val in enumerate(data['time']):
                if val > 0:
                    data[t] = val - x*time_increment
                    break

    #converts times from nanoseconds to hours
    data['time'] = (data['time']/1000000000/60/60
                    - data['time'].iloc[0]/1000000000/60/60)
    data.set_index('time', inplace=True)
    #calculate how much of qc_percent is valid
    qc_percent_num = (100 * (1 - sum(data['qc_percent'])
                        / len(data['qc_percent'])))
    plt.figure(1)
    curr_plot_num = 0

    print('creating subplots...')
    for value in variable_order:
        #alter name to make it better formatted for the graph
        #use 30 spaces because that's what works best
        if '_check' in value:
            var_name = value[:-6] + ' '*30
        else:
            var_name = value + ' '*30
        if 'spike' in var_name:
            var_name = 'igm_spike' + ' '*30
        if 'sky_brightness_temp_spectral_averages' in var_name:
            if 'ch1' in var_name:
                var_name = 'surface_bt_ch1' + ' '*30
            elif 'ch2' in var_name:
                var_name = 'surface_bt_ch2' + ' '*30
        if 'qc_percent' in var_name:
            var_name = 'qc_percent ({:3.2f}%)'.format(qc_percent_num) + ' '*30

        curr_plot_num += 1
        #doublecheck that value is actually a key
        if value not in data.keys():
            print('subplot ', curr_plot_num, ' of ', len(variable_order),
                    ' -- ', value, ' --- missing')
        else:
            print('subplot ', curr_plot_num, ' of ', len(variable_order),
                    ' -- ', var_name)
            ax = plt.subplot(len(variable_order), 1, curr_plot_num)
            #turn background light blue so anywhere not graphed is
            #distinguishable from where the value is 0
            ax.set_axis_bgcolor((0.8,1.0,1.0))
            plt.ylabel(var_name, rotation=0)
            # 0-24 for 24 hours, 0-1 either valid or invalid
            plt.axis([0, 24, 0, 1])
            # x ticks only on every third to reduce clutter
            plt.xticks(arange(0,25))
            plt.setp(ax.xaxis.get_ticklabels()[1::3], visible=False)
            plt.setp(ax.xaxis.get_ticklabels()[2::3], visible=False)
            plt.yticks((0,1))

            #fill in the area below values as black and above as white to get
            #rid of the blue background
            if any(data[value] > 0):
                plt.fill_between(data.index, data[value], y2=1, color='white')
                plt.fill_between(data.index, data[value], color='black')
                plt.tight_layout(h_pad=0.1)
                plt.subplots_adjust(wspace=1.0)
            #if none of the values are above 0 change background to gray
            else:
                ax.set_axis_bgcolor((0.87,0.87,0.87))

    print('saving...')
    end_of_name = filename.split('/')[-1].split('.')[0] + '.png'
    # plt.savefig('/Users/adiebold/aeri_quality_control/testing/pngs/awr/' + end_of_name)
    # plt.savefig('/Users/adiebold/aeri_quality_control/testing/' + end_of_name)
    #comment out plt.show() when doing a directory
    plt.show()
    plt.clf()

    print('finished')
    print('subplot execution time: %d minute(s), %.2f second(s)' %
            ((time.time() - subplot_start_time) // 60,
            (time.time() - subplot_start_time) % 60))
    #comment out plt.show() when creating pngs for a directory

if __name__ == '__main__':
    start_time = time.time()
    parser = argparse.ArgumentParser()
    parser.add_argument('filepath')
    args = parser.parse_args()
    print(args.filepath)

    #amount of files to skip
    skip_num = 0
    curr_num = 0
    print('skip_num = ', skip_num, '\n')
    if os.path.isdir(args.filepath):
        for filename_1 in os.listdir(args.filepath):
            filename_1 = args.filepath + '/' + filename_1
            filename_1 = filename_1.replace('//', '/')
            if os.path.isdir(filename_1):
                for filename_2 in os.listdir(filename_1):
                    filename_2 = filename_1 + '/' + filename_2
                    filename_2 = filename_2.replace('//', '/')
                    if (os.path.isfile(filename_2)
                    and filename_2.endswith('QC.nc')):
                        curr_num += 1
                        if curr_num >= skip_num:
                            print(curr_num, ': ', filename_2)
                            main(filename_2)
                        else:
                            print(curr_num, ': ', filename_2, ' -- SKIPPED')
            elif os.path.isfile(filename_1) and filename_1.endswith('QC.nc'):
                curr_num += 1
                if curr_num >= skip_num:
                    print(curr_num, ': ', filename_1)
                    main(filename_1)
                else:
                    print(curr_num, ': ', filename_1, ' -- SKIPPED')
    elif os.path.isfile(args.filepath):
        if args.filepath.endswith('QC.nc'):
            print(args.filepath)
            main(args.filepath)

    print('total execution time: %d minute(s), %.2f second(s)' %
            ((time.time()-start_time)//60, (time.time()-start_time)%60))