Newer
Older
# target_param = 'cld_opd_dcomp_2'
# target_param = 'cld_opd_dcomp_3'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+target_param]
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01',
group_name_m+'refl_submin_ch01', group_name_m+'refl_submax_ch01', group_name_m+'refl_substddev_ch01',
param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)
hist_range = [0.0, 1.0]
# hist_range = [0.0, 160.0]
tile_width = 32
kernel_size = 5
factor = 4
# tile_width = 64
# kernel_size = 7
# factor = 4
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
# grd_k = process_cld_prob(grd_k)
grd_k = process_cloud_frac(grd_k)
keep = np.invert(np.isnan(grd_k))
grd_by_cat = get_cloud_frac_5cat(grd_k)
keep_mix = np.logical_and(grd_by_cat > 0, grd_by_cat < 4)
frac_mix = np.sum(keep_mix)/keep_mix.size
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
grd_k = np.where(np.isnan(grd_k), 0, grd_k)
grd_k = np.where(grd_k < 0.5, 0, 1)
s = grd_k[0::4, 0::4] + grd_k[1::4, 0::4] + grd_k[2::4, 0::4] + grd_k[3::4, 0::4] + \
grd_k[0::4, 1::4] + grd_k[1::4, 1::4] + grd_k[2::4, 1::4] + grd_k[3::4, 1::4] + \
grd_k[0::4, 2::4] + grd_k[1::4, 2::4] + grd_k[2::4, 2::4] + grd_k[3::4, 2::4] + \
grd_k[0::4, 3::4] + grd_k[1::4, 3::4] + grd_k[2::4, 3::4] + grd_k[3::4, 3::4]
cat_0 = np.logical_and(s >= 0, s < 1)
cat_1 = np.logical_and(s >= 1, s < 6)
cat_3 = np.logical_and(s >= 11, s <= 15)
cat_4 = np.logical_and(s > 15, s <= 16)
s[cat_0] = 0
s[cat_1] = 1
s[cat_2] = 2
s[cat_3] = 3
s[cat_4] = 4
return s
keep_cld = np.where(keep, np.logical_and(2.0 < grd_k, grd_k < 158.0), False)
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10, is_snow_covered=None):
test_files = glob.glob(directory + '*_v3/202?/*/01/*/*.nc', recursive=True)
valid_files = glob.glob(directory + '*_v3/202?/*/0[2-6]/*/*.nc', recursive=True)
train_files = [f for f in all_files if f not in valid_files + test_files]
num_files = len(valid_files)
print('Start, number of valid files: ', num_files)
param_train_hist = np.zeros([16], dtype=np.int64)
param_valid_hist = np.zeros([16], dtype=np.int64)
num_not_missing, num_snow_covered = \
run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
except Exception as e:
print(e)
h5f.close()
continue
if len(data_tiles_m) > 0:
valid_i = np.stack(data_tiles_i)
valid_m = np.stack(data_tiles_m)
print('problem: number of samples dont match', valid_m.shape, valid_i.shape)
continue
np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
print(' num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/(num_files/num_skip))*100))
print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
# Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0
if len(data_tiles_m) > 0:
valid_i = np.stack(data_tiles_i)
valid_m = np.stack(data_tiles_m)
print('problem: number of samples dont match', valid_m.shape, valid_i.shape)
elif DO_WRITE_OUTFILE:
np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
total_num_valid_samples += num_valid_samples
print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
print('----------------------------------------------------------------')
num_files = len(train_files)
print('Start, number of train files: ', num_files)
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing, num_snow_covered = \
run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
total_num_not_missing += num_not_missing
if len(data_tiles_m) == 0:
continue
num_train_samples = 0
if len(data_tiles_m) > 0:
train_i = np.stack(data_tiles_i)
train_m = np.stack(data_tiles_m)
np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
print(' num_train_samples, progress % : ', num_train_samples, int((f_cnt/(num_files/num_skip))*100))
total_num_train_samples += num_train_samples
print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
# Write out leftover, if any. Maybe make this better someday
if len(data_tiles_m) > 0:
train_i = np.stack(data_tiles_i)
train_m = np.stack(data_tiles_m)
np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
print('*** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, tile_width=64, kernel_size=3, factor=2,
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
if is_snow_covered is not None:
snow = get_grid_values(h5f, snow_class_name, 0, 0, None, num_lines, num_pixels)
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*factor, num_pixels*factor)
grd_s.append(grd)
except Exception as e:
print(e)
return
data_i = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = border - 1 # zero-based
j_start = border - 1 # zero-based
num_y_tiles = int(num_lines / tile_width)
num_x_tiles = int(num_pixels / tile_width)
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
if is_snow_covered is not None:
if is_snow_covered:
if not snow_covered(snow[j_a:j_b, i_a:i_b]):
continue
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda_m = data_m[:, j_a:j_b, i_a:i_b]
nda_i = data_i[:, j_a*factor:j_b*factor, i_a*factor:i_b*factor]
if is_missing(param_idx_i, nda_i):
continue
num_not_missing += 1
nda_i = keep_tile(param_idx_i, nda_i)
if nda_i is not None:
data_tiles_m.append(nda_m)
data_tiles_i.append(nda_i)