diff --git a/modules/util/abi_surfrad.py b/modules/util/abi_surfrad.py index fb03c1ecfd58f95ce3fa2756b893ab6781127942..19faccf1c8cdcc074699a34c3b8642e9a21e84a6 100644 --- a/modules/util/abi_surfrad.py +++ b/modules/util/abi_surfrad.py @@ -3,38 +3,14 @@ import h5py from util.util import get_grid_values, is_day import glob -keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc', - '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc'] - -keep_out = keep_out_opd - - -# target_param = 'cloud_probability' -target_param = 'cld_opd_dcomp' +target_param = 'cloud_probability' +# target_param = 'cld_opd_dcomp' group_name_i = 'super/' group_name_m = 'orig/' solzen_name = group_name_m + 'solar_zenith' -# params_i = [group_name_i+'temp_11_0um', group_name_i+'refl_0_65um', group_name_i+target_param] -# params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param] params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param] params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param] @@ -86,7 +62,6 @@ def process_cld_opd(grd_k): grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 return grd_k -# glob.glob('/ships19/cloud/scratch/cphillips/super_abi_l2/goodwin_creek_v3/2020/*/0[1-4]/*/*.nc', recursive=True) def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10): cnt = start @@ -97,21 +72,19 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st path = directory + '**' + '/' + pattern all_files = glob.glob(path, recursive=True) - data_files = [f for f in all_files if f not in keep_out] - # data_files = glob.glob(path, recursive=True) + valid_files = glob.glob(directory + '*/0[1-4]/*/*.nc', recursive=True) + train_files = [f for f in all_files if f not in valid_files] - valid_tiles_i = [] - train_tiles_i = [] - valid_tiles_m = [] - train_tiles_m = [] + data_tiles_i = [] + data_tiles_m = [] f_cnt = 0 - num_files = len(data_files) + num_files = len(all_files) print('Start, number of files: ', num_files) total_num_not_missing = 0 - for idx, data_f in enumerate(data_files): + for idx, data_f in enumerate(valid_files): # if idx % 4 == 0: # if we want to skip some files if True: try: @@ -121,8 +94,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st continue try: - num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m, - params_i, train_tiles_i, valid_tiles_i, + num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night) except Exception as e: print(e) @@ -134,59 +106,98 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st total_num_not_missing += num_not_missing - if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0: + if len(data_tiles_m) == 0: continue if (f_cnt % 20) == 0: num_valid_samples = 0 - if len(valid_tiles_m) > 0: - valid_i = np.stack(valid_tiles_i) - valid_m = np.stack(valid_tiles_m) + if len(data_tiles_m) > 0: + valid_i = np.stack(data_tiles_i) + valid_m = np.stack(data_tiles_m) np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) num_valid_samples = valid_m.shape[0] - num_train_samples = 0 - if len(train_tiles_m) > 0: - train_i = np.stack(train_tiles_i) - train_m = np.stack(train_tiles_m) - np.save(out_directory + 'train_ires_' + str(cnt), train_i) - np.save(out_directory + 'train_mres_' + str(cnt), train_m) - num_train_samples = train_m.shape[0] - - valid_tiles_i = [] - train_tiles_i = [] - valid_tiles_m = [] - train_tiles_m = [] + data_tiles_i = [] + data_tiles_m = [] - print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) - total_num_train_samples += num_train_samples + print(' num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/num_files)*100)) total_num_valid_samples += num_valid_samples - print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, - total_num_valid_samples, total_num_not_missing) + print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing) print('--------------------------------------------------') cnt += 1 # Write out leftover, if any. Maybe make this better someday num_valid_samples = 0 - if len(valid_tiles_m) > 0: - valid_i = np.stack(valid_tiles_i) - valid_m = np.stack(valid_tiles_m) + if len(data_tiles_m) > 0: + valid_i = np.stack(data_tiles_i) + valid_m = np.stack(data_tiles_m) np.save(out_directory + 'valid_mres_' + str(cnt), valid_m) np.save(out_directory + 'valid_ires_' + str(cnt), valid_i) num_valid_samples = valid_m.shape[0] + data_tiles_i = [] + data_tiles_m = [] + f_cnt = 0 + total_num_not_missing = 0 + + for idx, data_f in enumerate(train_files): + # if idx % 4 == 0: # if we want to skip some files + if True: + try: + h5f = h5py.File(data_f, 'r') + except: + print('cant open file: ', data_f) + continue + + try: + num_not_missing = run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, + num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night) + except Exception as e: + print(e) + h5f.close() + continue + print(data_f) + f_cnt += 1 + h5f.close() + + total_num_not_missing += num_not_missing + + if len(data_tiles_m) == 0: + continue + + if (f_cnt % 20) == 0: + num_train_samples = 0 + if len(data_tiles_m) > 0: + train_i = np.stack(data_tiles_i) + train_m = np.stack(data_tiles_m) + np.save(out_directory + 'train_ires_' + str(cnt), train_i) + np.save(out_directory + 'train_mres_' + str(cnt), train_m) + num_train_samples = train_m.shape[0] + + data_tiles_i = [] + data_tiles_m = [] + + print(' num_train_samples, progress % : ', num_train_samples, int((f_cnt/num_files)*100)) + total_num_train_samples += num_train_samples + print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing) + print('--------------------------------------------------') + + cnt += 1 + + # Write out leftover, if any. Maybe make this better someday num_train_samples = 0 - if len(train_tiles_m) > 0: - train_i = np.stack(train_tiles_i) - train_m = np.stack(train_tiles_m) + if len(data_tiles_m) > 0: + train_i = np.stack(data_tiles_i) + train_m = np.stack(data_tiles_m) np.save(out_directory + 'train_ires_' + str(cnt), train_i) np.save(out_directory + 'train_mres_' + str(cnt), train_m) num_train_samples = train_m.shape[0] print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt / num_files) * 100)) + total_num_train_samples += num_train_samples total_num_valid_samples += num_valid_samples print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples, @@ -198,8 +209,7 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st # tile_width: Must be even! # kernel_size: Must be odd! -def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i, - num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'): +def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'): border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects @@ -235,15 +245,11 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va i_skip = tile_width j_skip = tile_width - # i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2) - # j_start = 0 i_start = border - 1 # zero-based j_start = border - 1 # zero-based num_y_tiles = int(num_lines / tile_width) - 1 - data_tiles_m = [] - data_tiles_i = [] num_not_missing = 0 for j in range(num_y_tiles): @@ -270,16 +276,16 @@ def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, va data_tiles_m.append(nda_m) data_tiles_i.append(nda_i) - num_tiles = len(data_tiles_i) - num_valid = int(num_tiles * 0.10) - num_train = num_tiles - num_valid - - for k in range(num_train): - train_tiles_m.append(data_tiles_m[k]) - train_tiles_i.append(data_tiles_i[k]) - - for k in range(num_valid): - valid_tiles_m.append(data_tiles_m[num_train + k]) - valid_tiles_i.append(data_tiles_i[num_train + k]) + # num_tiles = len(data_tiles_i) + # num_valid = int(num_tiles * 0.10) + # num_train = num_tiles - num_valid + # + # for k in range(num_train): + # train_tiles_m.append(data_tiles_m[k]) + # train_tiles_i.append(data_tiles_i[k]) + # + # for k in range(num_valid): + # valid_tiles_m.append(data_tiles_m[num_train + k]) + # valid_tiles_i.append(data_tiles_i[num_train + k]) return num_not_missing