Skip to content
Snippets Groups Projects
Commit e0fa7621 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent f5c5dcda
No related branches found
No related tags found
No related merge requests found
import numpy as np
import h5py
from util.util import get_grid_values
from util.util import get_grid_values, is_day
import glob
target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
keep_out_opd = ['/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/11/02/clavrx_VNP02IMG.A2019306.1912.001.2019307003236.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/arm/2019/04/13/clavrx_VNP02IMG.A2019103.1918.001.2019104005120.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/05/25/clavrx_VNP02IMG.A2019145.1936.001.2019146005424.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/11/01/clavrx_VNP02IMG.A2019305.1936.001.2019306005913.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/sioux_falls/2019/03/01/clavrx_VNP02IMG.A2019060.1930.001.2019061005942.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/12/01/clavrx_VNP02IMG.A2019335.2012.001.2019336013827.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/table_mountain/2019/05/18/clavrx_VNP02IMG.A2019138.2006.001.2019139013059.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/01/28/clavrx_VNP02IMG.A2019028.1930.001.2019029005408.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/fort_peck/2019/08/08/clavrx_VNP02IMG.A2019220.1930.001.2019221010714.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/10/13/clavrx_VNP02IMG.A2019286.1848.001.2019287001722.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/03/20/clavrx_VNP02IMG.A2019079.1830.001.2019079235918.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/madison/2019/12/26/clavrx_VNP02IMG.A2019360.1900.001.2019361001327.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/02/05/clavrx_VNP02IMG.A2019036.2018.001.2019037030301.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/desert_rock/2019/03/30/clavrx_VNP02IMG.A2019089.2024.001.2019090015614.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/bondville_il/2019/11/03/clavrx_VNP02IMG.A2019307.1854.001.2019308001716.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/goodwin_creek/2019/04/15/clavrx_VNP02IMG.A2019105.1842.001.2019106001003.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/07/18/clavrx_VNP02IMG.A2019199.1742.001.2019199230925.uwssec.nc',
'/ships19/cloud/scratch/cphillips/clavrx/run_viirs_superres/sites_super_l2/penn_state/2019/02/02/clavrx_VNP02IMG.A2019033.1754.001.2019034011318.uwssec.nc']
keep_out = keep_out_opd
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
label_params = [group_name_i+target_param]
data_params = [group_name_m+'temp_ch31', group_name_m+'refl_ch01', group_name_m+target_param]
# params_i = [group_name_i+'temp_11_0um', group_name_i+'refl_0_65um', group_name_i+target_param]
# params_m = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01', group_name_m+target_param]
param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
def keep_tile(param, param_s, tile):
k = param_s.index(param)
grd_k = tile[k, ].copy()
def keep_tile(p_idx, tile):
grd_k = tile[p_idx, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k)
grd_k = process_cld_prob(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k)
grd_k = process_cld_opd(grd_k)
if grd_k is not None:
tile[k, ] = grd_k
tile[p_idx, ] = grd_k
return tile
else:
return None
def process_cld_prob_(grd_k):
def process_cld_prob(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
# if num_keep / grd_k.size < 0.98:
# return None
keep = np.where(keep, np.logical_and(0.10 < grd_k, grd_k < 0.90), False)
if np.sum(keep)/num_keep < 0.25:
keep_clr = np.where(keep, grd_k < 0.30, False)
keep_cld = np.where(keep, grd_k > 0.70, False)
frac_clr = np.sum(keep_clr)/num_keep
frac_cld = np.sum(keep_cld)/num_keep
if not (frac_clr >= 0.20 and frac_cld >= 0.20):
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
def process_cld_opd_(grd_k):
def process_cld_opd(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
# if num_keep / grd_k.size < 0.98:
# return None
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50:
keep_cld = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
frac_cld = np.sum(keep_cld)/num_keep
if not (0.10 < frac_cld < 0.90):
return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
cnt = start
total_num_samples = 0
total_num_train_samples = 0
total_num_valid_samples = 0
num_keep_x_tiles = 4
path = directory + '**' + '/' + pattern
files = glob.glob(path, recursive=True)
all_files = glob.glob(path, recursive=True)
data_files = [f for f in all_files if f not in keep_out]
# data_files = glob.glob(path, recursive=True)
label_tiles = []
data_tiles = []
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
f_cnt = 0
num_files = len(files)
num_files = len(data_files)
print('Start, number of files: ', num_files)
for idx, f in enumerate(files):
total_num_not_missing = 0
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(f, 'r')
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', f)
print('cant open file: ', data_f)
continue
try:
run(h5f, data_params, data_tiles, label_params, label_tiles, kernel_size=5)
num_not_missing = run(h5f, params_m, train_tiles_m, valid_tiles_m,
params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=num_keep_x_tiles, tile_width=16, kernel_size=4, factor=4, day_night=day_night)
except Exception as e:
print(e)
h5f.close()
continue
print(f)
print(data_f)
f_cnt += 1
h5f.close()
if len(data_tiles) == 0:
continue
if (f_cnt % 100) == 0:
num_samples = 0
if len(data_tiles) > 0:
label = np.stack(label_tiles)
data = np.stack(data_tiles)
#np.save(out_directory + 'label_' + str(cnt), label)
#np.save(out_directory + 'data_' + str(cnt), data)
num_samples = data.shape[0]
total_num_not_missing += num_not_missing
label_tiles = []
data_tiles = []
if len(train_tiles_m) == 0 and len(valid_tiles_m) == 0:
continue
print(' num_samples, progress % : ', num_samples, int((f_cnt/num_files)*100))
total_num_samples += num_samples
print('total_num_samples: ', total_num_samples)
print('------------------------------------------------------------')
if (f_cnt % 20) == 0:
num_valid_samples = 0
if len(valid_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
valid_tiles_i = []
train_tiles_i = []
valid_tiles_m = []
train_tiles_m = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
print('** total_num_samples: ', total_num_samples)
# Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0
if len(valid_tiles_m) > 0:
valid_i = np.stack(valid_tiles_i)
valid_m = np.stack(valid_tiles_m)
np.save(out_directory + 'valid_mres_' + str(cnt), valid_m)
np.save(out_directory + 'valid_ires_' + str(cnt), valid_i)
num_valid_samples = valid_m.shape[0]
num_train_samples = 0
if len(train_tiles_m) > 0:
train_i = np.stack(train_tiles_i)
train_m = np.stack(train_tiles_m)
np.save(out_directory + 'train_ires_' + str(cnt), train_i)
np.save(out_directory + 'train_mres_' + str(cnt), train_m)
num_train_samples = train_m.shape[0]
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples,
int((f_cnt / num_files) * 100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, kernel_size=3):
def run(h5f, params_m, train_tiles_m, valid_tiles_m, params_i, train_tiles_i, valid_tiles_i,
num_keep_x_tiles=8, tile_width=64, kernel_size=3, factor=2, day_night='ANY'):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = param_s[0]
param_name = params_m[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in param_s:
for param in params_m:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data = np.stack(grd_s)
data_m = np.stack(grd_s)
grd_s = []
for param in lbl_param_s:
for param in params_i:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*factor, num_pixels*factor)
grd_s.append(grd)
except Exception as e:
print(e)
return
label = np.stack(grd_s)
data_i = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
# i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
# j_start = 0
i_start = border - 1 # zero-based
j_start = border - 1 # zero-based
num_y_tiles = int(num_lines / tile_width) - 1
data_tiles_m = []
data_tiles_i = []
num_not_missing = 0
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda_m = data_m[:, j_a:j_b, i_a:i_b]
nda_i = data_i[:, j_a*factor:j_b*factor, i_a*factor:i_b*factor]
if is_missing(param_idx_i, nda_i):
continue
num_not_missing += 1
nda = data[:, :, :]
nda = keep_tile(group_name_m + target_param, param_s, nda)
if nda is None: # if none, no need to check the next one
return
nda_i = keep_tile(param_idx_i, nda_i)
if nda_i is not None:
data_tiles_m.append(nda_m)
data_tiles_i.append(nda_i)
nda_lbl = label[:, :, :]
nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl)
num_tiles = len(data_tiles_i)
num_valid = int(num_tiles * 0.10)
num_train = num_tiles - num_valid
if nda_lbl is not None:
tiles.append(nda)
lbl_tiles.append(nda_lbl)
for k in range(num_train):
train_tiles_m.append(data_tiles_m[k])
train_tiles_i.append(data_tiles_i[k])
for k in range(num_valid):
valid_tiles_m.append(data_tiles_m[num_train + k])
valid_tiles_i.append(data_tiles_i[num_train + k])
return num_not_missing
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment