Skip to content
Snippets Groups Projects
Commit 16c28757 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 3f22145f
No related branches found
No related tags found
No related merge requests found
import numpy as np
import h5py
from util.util import get_grid_values, is_day
import glob
# target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
target_param = 'cld_opd_dcomp_1'
# target_param = 'cld_opd_dcomp_2'
# target_param = 'cld_opd_dcomp_3'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
snow_class_name = group_name_m + 'snow_class'
params_i = [group_name_i+'temp_ch38', group_name_i+'refl_ch01', group_name_i+'cloud_probability',
group_name_i+target_param]
params_m = [group_name_m+'temp_ch38', group_name_m+'refl_ch01',
group_name_m+'refl_submin_ch01', group_name_m+'refl_submax_ch01', group_name_m+'refl_substddev_ch01',
group_name_m+'cloud_probability',
group_name_m+target_param]
param_idx_m = params_m.index(group_name_m + target_param)
param_idx_i = params_i.index(group_name_i + target_param)
cp_idx_i = params_i.index(group_name_i + 'cloud_probability')
DO_WRITE_OUTFILE = True
def snow_covered(tile):
return np.any(tile > 1)
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
def keep_tile(cp_idx, opd_idx, tile):
opd = tile[opd_idx, ].copy()
cp = tile[cp_idx, ].copy()
cp = process_cld_prob(cp)
if cp is None:
return None
opd = process_cld_opd(opd)
if opd is not None:
tile[opd_idx, ] = opd
return tile
else:
return None
def process_cld_prob(cld_prob):
keep = np.invert(np.isnan(cld_prob))
num_keep = np.sum(keep)
keep = np.where(keep, np.logical_or(cld_prob < 0.35, cld_prob > 0.65), False)
frac_keep = np.sum(keep)/num_keep
if not (frac_keep > 0.70):
return None
grd_k = np.where(np.invert(keep), 0, cld_prob) # Convert NaN to 0
return grd_k
def process_cloud_frac(grd_k):
pass
def get_cloud_frac_5cat(grd_k):
grd_k = np.where(np.isnan(grd_k), 0, grd_k)
grd_k = np.where(grd_k < 0.5, 0, 1)
s = grd_k[:, 0::4, 0::4] + grd_k[:, 1::4, 0::4] + grd_k[:, 2::4, 0::4] + grd_k[:, 3::4, 0::4] + \
grd_k[:, 0::4, 1::4] + grd_k[:, 1::4, 1::4] + grd_k[:, 2::4, 1::4] + grd_k[:, 3::4, 1::4] + \
grd_k[:, 0::4, 2::4] + grd_k[:, 1::4, 2::4] + grd_k[:, 2::4, 2::4] + grd_k[:, 3::4, 2::4] + \
grd_k[:, 0::4, 3::4] + grd_k[:, 1::4, 3::4] + grd_k[:, 2::4, 3::4] + grd_k[:, 3::4, 3::4]
cat_0 = np.logical_and(s >= 0, s < 2)
cat_1 = np.logical_and(s >= 2, s < 6)
cat_2 = np.logical_and(s >= 6, s < 11)
cat_3 = np.logical_and(s >= 11, s < 15)
cat_4 = np.logical_and(s >= 15, s <= 16)
s[cat_0] = 0
s[cat_1] = 1
s[cat_2] = 2
s[cat_3] = 3
s[cat_4] = 4
return s
def process_cld_opd(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
keep_cld = np.where(keep, np.logical_and(2.0 < grd_k, grd_k < 158.0), False)
# keep_cld = np.where(keep, 2.0 < grd_k, False)
frac_cld = np.sum(keep_cld)/num_keep
if not (0.20 < frac_cld < 0.90):
# if not (0.20 < frac_cld):
return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10, is_snow_covered=None):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
# path = directory + '**' + '/' + pattern
path = directory + '*_v3/2020/' + '**' + '/' + pattern
all_files = glob.glob(path, recursive=True)
test_files = glob.glob(directory + '*_v3/2020/*/01/*/*.nc', recursive=True)
valid_files = glob.glob(directory + '*_v3/2020/*/0[2-6]/*/*.nc', recursive=True)
train_files = [f for f in all_files if f not in valid_files + test_files]
data_tiles_i = []
data_tiles_m = []
f_cnt = 0
num_files = len(valid_files)
print('Start, number of valid files: ', num_files)
total_num_not_missing = 0
num_skip = 3
param_train_hist = np.zeros([16], dtype=np.int64)
param_valid_hist = np.zeros([16], dtype=np.int64)
# cloud_prob to cloud fraction
# ----------------------------
tile_width = 32
kernel_size = 5
factor = 4
# tile_width = 64
# kernel_size = 7
# factor = 4
# hist_range = [0.0, 1.0]
hist_range = [0.0, 160.0]
for idx, data_f in enumerate(valid_files):
if idx % num_skip == 0: # if we want to skip some files
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing, num_snow_covered = \
run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
tile_width=tile_width, kernel_size=kernel_size, factor=factor,
day_night=day_night, is_snow_covered=is_snow_covered)
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
total_num_not_missing += num_not_missing
if len(data_tiles_m) == 0:
continue
if (f_cnt % 100) == 0:
num_valid_samples = 0
if len(data_tiles_m) > 0:
valid_i = np.stack(data_tiles_i)
valid_m = np.stack(data_tiles_m)
if DO_WRITE_OUTFILE:
np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
num_valid_samples = valid_m.shape[0]
param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
data_tiles_i = []
data_tiles_m = []
print(' num_valid_samples, progress % : ', num_valid_samples, int((f_cnt/(num_files/num_skip))*100))
total_num_valid_samples += num_valid_samples
print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
# Write out leftover, if any. Maybe make this better someday
num_valid_samples = 0
if len(data_tiles_m) > 0:
valid_i = np.stack(data_tiles_i)
valid_m = np.stack(data_tiles_m)
if DO_WRITE_OUTFILE:
np.save(out_directory + 'valid_mres_' + f'{cnt:04d}', valid_m)
np.save(out_directory + 'valid_ires_' + f'{cnt:04d}', valid_i)
num_valid_samples = valid_m.shape[0]
param_valid_hist += np.histogram(valid_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
total_num_valid_samples += num_valid_samples
print('total_num_valid_samples, total_num_not_missing: ', total_num_valid_samples, total_num_not_missing)
print(param_valid_hist)
print('--------------------------------------------------')
print('----------------------------------------------------------------')
data_tiles_i = []
data_tiles_m = []
f_cnt = 0
cnt = start
total_num_not_missing = 0
num_files = len(train_files)
print('Start, number of train files: ', num_files)
for idx, data_f in enumerate(train_files):
if idx % num_skip == 0: # if we want to skip some files
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing, num_snow_covered = \
run(h5f, params_m, data_tiles_m, params_i, data_tiles_i,
tile_width=tile_width, kernel_size=kernel_size, factor=factor,
day_night=day_night, is_snow_covered=is_snow_covered)
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
total_num_not_missing += num_not_missing
if len(data_tiles_m) == 0:
continue
if (f_cnt % 100) == 0:
num_train_samples = 0
if len(data_tiles_m) > 0:
train_i = np.stack(data_tiles_i)
train_m = np.stack(data_tiles_m)
if DO_WRITE_OUTFILE:
np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
num_train_samples = train_m.shape[0]
param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
data_tiles_i = []
data_tiles_m = []
print(' num_train_samples, progress % : ', num_train_samples, int((f_cnt/(num_files/num_skip))*100))
total_num_train_samples += num_train_samples
print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
print('--------------------------------------------------')
cnt += 1
# Write out leftover, if any. Maybe make this better someday
num_train_samples = 0
if len(data_tiles_m) > 0:
train_i = np.stack(data_tiles_i)
train_m = np.stack(data_tiles_m)
if DO_WRITE_OUTFILE:
np.save(out_directory + 'train_ires_' + f'{cnt:04d}', train_i)
np.save(out_directory + 'train_mres_' + f'{cnt:04d}', train_m)
num_train_samples = train_m.shape[0]
param_train_hist += np.histogram(train_m[:, param_idx_m, :, :], bins=16, range=hist_range)[0]
total_num_train_samples += num_train_samples
print('total_num_train_samples, total_num_not_missing: ', total_num_train_samples, total_num_not_missing)
print(param_train_hist)
print('--------------------------------------------------')
print('*** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, params_m, data_tiles_m, params_i, data_tiles_i, tile_width=64, kernel_size=3, factor=2,
day_night='ANY', is_snow_covered=None):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = params_m[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
if is_snow_covered is not None:
snow = get_grid_values(h5f, snow_class_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in params_m:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data_m = np.stack(grd_s)
grd_s = []
for param in params_i:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*factor, num_pixels*factor)
grd_s.append(grd)
except Exception as e:
print(e)
return
data_i = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = border - 1 # zero-based
j_start = border - 1 # zero-based
num_y_tiles = int(num_lines / tile_width)
num_x_tiles = int(num_pixels / tile_width)
num_not_missing = 0
num_snow_covered = 0
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if is_snow_covered is not None:
if is_snow_covered:
if not snow_covered(snow[j_a:j_b, i_a:i_b]):
continue
num_snow_covered += 1
else:
if snow_covered(snow[j_a:j_b, i_a:i_b]):
num_snow_covered += 1
continue
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda_m = data_m[:, j_a:j_b, i_a:i_b]
nda_i = data_i[:, j_a*factor:j_b*factor, i_a*factor:i_b*factor]
if is_missing(param_idx_i, nda_i):
continue
num_not_missing += 1
nda_i = keep_tile(cp_idx_i, param_idx_i, nda_i)
if nda_i is not None:
data_tiles_m.append(nda_m)
data_tiles_i.append(nda_i)
return num_not_missing, num_snow_covered
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment