Skip to content
Snippets Groups Projects
Commit e0fa7621 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent f5c5dcda
No related branches found
No related tags found
No related merge requests found
import numpy as np import numpy as np
import h5py import h5py
from util.util import get_grid_values from util.util import get_grid_values, is_day
import glob import glob
keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc',
target_param = 'cloud_probability' '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc',
# target_param = 'cld_opd_dcomp' '/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc']
keep_out = keep_out_opd
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
group_name_i = 'super/' group_name_i = 'super/'
group_name_m = 'orig/' group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith' solzen_name = group_name_m + 'solar_zenith'
label_params = [group_name_i+target_param] # params_i = [group_name_i+'temp_11_0um', group_name_i+'refl_0_65um', group_name_i+target_param]
data_params = [group_name_m+'temp_ch31', group_name_m+'refl_ch01', group_name_m+target_param] # params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param]
param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
def keep_tile(param, param_s, tile):
k = param_s.index(param) def keep_tile(p_idx, tile):
grd_k = tile[k, ].copy() grd_k = tile[p_idx, ].copy()
if target_param == 'cloud_probability': if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k) grd_k = process_cld_prob(grd_k)
elif target_param == 'cld_opd_dcomp': elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k) grd_k = process_cld_opd(grd_k)
if grd_k is not None: if grd_k is not None:
tile[k, ] = grd_k tile[p_idx, ] = grd_k
return tile return tile
else: else:
return None return None
def process_cld_prob_(grd_k): def process_cld_prob(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
# if num_keep / grd_k.size < 0.98: keep_clr = np.where(keep, grd_k < 0.30, False)
# return None keep_cld = np.where(keep, grd_k > 0.70, False)
keep = np.where(keep, np.logical_and(0.10 < grd_k, grd_k < 0.90), False) frac_clr = np.sum(keep_clr)/num_keep
if np.sum(keep)/num_keep < 0.25: frac_cld = np.sum(keep_cld)/num_keep
if not (frac_clr >= 0.20 and frac_cld >= 0.20):
return None return None
grd_k = np.where(np.invert(keep), 0, grd_k) grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k return grd_k
def process_cld_opd_(grd_k): def process_cld_opd(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
# if num_keep / grd_k.size < 0.98: keep_cld = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
# return None frac_cld = np.sum(keep_cld)/num_keep
grd_k = np.where(np.invert(keep), 0, grd_k) if not (0.10 < frac_cld < 0.90):
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50:
return None return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k return grd_k
def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10): def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
cnt = start cnt = start
total_num_samples = 0 total_num_train_samples = 0
total_num_valid_samples = 0
num_keep_x_tiles = 4
path = directory + '**' + '/' + pattern path = directory + '**' + '/' + pattern
files = glob.glob(path, recursive=True) all_files = glob.glob(path, recursive=True)
data_files = [f for f in all_files if f not in keep_out]
# data_files = glob.glob(path, recursive=True)
label_tiles = [] valid_tiles_i = []
data_tiles = [] train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
f_cnt = 0 f_cnt = 0
num_files = len(files) num_files = len(data_files)
print('Start, number of files: ', num_files) print('Start, number of files: ', num_files)
for idx, f in enumerate(files): total_num_not_missing = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files # if idx % 4 == 0: # if we want to skip some files
if True: if True:
try: try:
h5f = h5py.File(f, 'r') h5f = h5py.File(data_f, 'r')
except: except:
print('cant open file: ', f) print('cant open file: ', data_f)
continue continue
try: try:
run(h5f, data_params, data_tiles, label_params, label_tiles, kernel_size=5) num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m,
params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
except Exception as e: except Exception as e:
print(e) print(e)
h5f.close() h5f.close()
continue continue
print(data_f)
print(f)
f_cnt += 1 f_cnt += 1
h5f.close() h5f.close()
if len(data_tiles) == 0: total_num_not_missing += num_not_missing
continue
if (f_cnt % 100) == 0:
num_samples = 0
if len(data_tiles) > 0:
label = np.stack(label_tiles)
data = np.stack(data_tiles)
#np.save(out_directory + 'label_' + str(cnt), label)
#np.save(out_directory + 'data_' + str(cnt), data)
num_samples = data.shape[0]
label_tiles = [] if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0:
data_tiles = [] continue
print(' num_samples, progress % : ', num_samples, int((f_cnt/num_files)*100)) if (f_cnt % 20) == 0:
total_num_samples += num_samples num_valid_samples = 0
print('total_num_samples: ', total_num_samples) if len(valid_tiles_m) > 0:
print('------------------------------------------------------------') valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1 cnt += 1
print('** total_num_samples: ', total_num_samples) # Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0
if len(valid_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples,
int((f_cnt / num_files) * 100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even! # tile_width: Must be even!
# kernel_size: Must be odd! # kernel_size: Must be odd!
def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, kernel_size=3): def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = param_s[0] param_name = params_m[0]
num_lines = h5f[param_name].shape[0] num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = [] grd_s = []
for param in param_s: for param in params_m:
try: try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels) grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd) grd_s.append(grd)
except Exception as e: except Exception as e:
print(e) print(e)
return return
data = np.stack(grd_s) data_m = np.stack(grd_s)
grd_s = [] grd_s = []
for param in lbl_param_s: for param in params_i:
try: try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2) grd = get_grid_values(h5f, param, 0, 0, None, num_lines*factor, num_pixels*factor)
grd_s.append(grd) grd_s.append(grd)
except Exception as e: except Exception as e:
print(e) print(e)
return return
label = np.stack(grd_s) data_i = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
# i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
# j_start = 0
i_start = border - 1 # zero-based
j_start = border - 1 # zero-based
num_y_tiles = int(num_lines / tile_width) - 1
data_tiles_m = []
data_tiles_i = []
num_not_missing = 0
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda_m = data_m[:, j_a:j_b, i_a:i_b]
nda_i = data_i[:, j_a*factor:j_b*factor, i_a*factor:i_b*factor]
if is_missing(param_idx_i, nda_i):
continue
num_not_missing += 1
nda = data[:, :, :] nda_i = keep_tile(param_idx_i, nda_i)
nda = keep_tile(group_name_m + target_param, param_s, nda) if nda_i is not None:
if nda is None: # if none, no need to check the next one data_tiles_m.append(nda_m)
return data_tiles_i.append(nda_i)
nda_lbl = label[:, :, :] num_tiles = len(data_tiles_i)
nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl) num_valid = int(num_tiles * 0.10)
num_train = num_tiles - num_valid
if nda_lbl is not None: for k in range(num_train):
tiles.append(nda) train_tiles_m.append(data_tiles_m[k])
lbl_tiles.append(nda_lbl) train_tiles_i.append(data_tiles_i[k])
for k in range(num_valid):
valid_tiles_m.append(data_tiles_m[num_train + k])
valid_tiles_i.append(data_tiles_i[num_train + k])
return num_not_missing
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment