viirs_surfrad.py 12.08 KiB
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
import glob
import os
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
from pathlib import Path
keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc']
keep_out = keep_out_opd
target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
params_i = [group_name_i+'refl_0_65um', group_name_i+target_param]
params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
def keep_tile(p_idx, tile):
grd_k = tile[p_idx, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd(grd_k)
if grd_k is not None:
tile[p_idx, ] = grd_k
return tile
else:
return None
# def process_cld_prob(grd_k):
# keep = np.invert(np.isnan(grd_k))
# num_keep = np.sum(keep)
# keep_clr = np.where(keep, grd_k < 0.20, False)
# frac_keep = np.sum(keep_clr)/num_keep
# if not (0.30 < frac_keep < 0.70):
# return None
# grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
# return grd_k
def process_cld_prob(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
keep_clr = np.where(keep, grd_k < 0.20, False)
keep_cld = np.where(keep, grd_k > 0.80, False)
frac_clr = np.sum(keep_clr)/num_keep
frac_cld = np.sum(keep_cld)/num_keep
if not (frac_clr >= 0.23 and frac_cld >= 0.23):
return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
def process_cld_opd(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
frac_keep = np.sum(keep)/num_keep
if frac_keep < 0.50:
return None
return grd_k
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
num_keep_x_tiles = 14
path = directory + '**' + '/' + pattern
all_files = glob.glob(path, recursive=True)
data_files = [f for f in all_files if f not in keep_out]
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
f_cnt = 0
num_files = len(data_files)
print('Start, number of files: ', num_files)
total_num_not_missing = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m,
params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
total_num_not_missing += num_not_missing
if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0:
continue
if (f_cnt % 5) == 0:
num_valid_samples = 0
if len(valid_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
# Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0
if len(valid_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples,
int((f_cnt / num_files) * 100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='ANY'):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = params_m[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in params_m:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data_m = np.stack(grd_s)
grd_s = []
for param in params_i:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
grd_s.append(grd)
except Exception as e:
print(e)
return
data_i = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
j_start = 0
num_y_tiles = int(num_lines / tile_width) - 1
data_tiles_m = []
data_tiles_i = []
num_not_missing = 0
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda_m = data_m[:, j_a:j_b, i_a:i_b]
nda_i = data_i[:, j_a*2:j_b*2, i_a*2:i_b*2]
if is_missing(param_idx_i, nda_i):
continue
num_not_missing += 1
nda_i = keep_tile(param_idx_i, nda_i)
if nda_i is not None:
data_tiles_m.append(nda_m)
data_tiles_i.append(nda_i)
num_tiles = len(data_tiles_i)
num_valid = int(num_tiles * 0.10)
num_train = num_tiles - num_valid
for k in range(num_train):
train_tiles_m.append(data_tiles_m[k])
train_tiles_i.append(data_tiles_i[k])
for k in range(num_valid):
valid_tiles_m.append(data_tiles_m[num_train + k])
valid_tiles_i.append(data_tiles_i[num_train + k])
return num_not_missing