Skip to content
Snippets Groups Projects
Commit f4dc032f authored by tomrink's avatar tomrink
Browse files

snapshot...

parent ee8bb10f
Branches
No related tags found
No related merge requests found
...@@ -31,20 +31,20 @@ cld_opd_norm_hist = [7.31926378e-01, 9.52482193e-02, 4.62747706e-02, 3.15450036e ...@@ -31,20 +31,20 @@ cld_opd_norm_hist = [7.31926378e-01, 9.52482193e-02, 4.62747706e-02, 3.15450036e
6.50404531e-04, 1.73557144e-02] 6.50404531e-04, 1.73557144e-02]
def keep_tile(param_s, tile, dum): def keep_tile(param_s, tile):
k = param_s.index(group_name + target_param) k = param_s.index(group_name + target_param)
grd_k = tile[k, ].copy() grd_k = tile[k, ].copy()
if target_param == 'cloud_probability': if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k, dum) grd_k, bflag = process_cld_prob_(grd_k)
elif target_param == 'cld_opd_dcomp': elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k, dum) grd_k, bflag = process_cld_opd_(grd_k)
if grd_k is not None: if grd_k is not None:
tile[k, ] = grd_k tile[k, ] = grd_k
return tile return tile, bflag
else: else:
return None return None, bflag
def process_cld_prob(param_s, tile): def process_cld_prob(param_s, tile):
...@@ -58,18 +58,17 @@ def process_cld_prob(param_s, tile): ...@@ -58,18 +58,17 @@ def process_cld_prob(param_s, tile):
return None return None
def process_cld_prob_(grd_k, dum): def process_cld_prob_(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98: if num_keep / grd_k.size < 0.98:
return None return None, True
# hist_10 += np.histogram(grd_k.flatten(), range=[0.0, 1.0], bins=10)[0]
keep_clr = np.where(keep, grd_k < 0.20, False) keep_clr = np.where(keep, grd_k < 0.20, False)
frac_keep = np.sum(keep_clr)/num_keep frac_keep = np.sum(keep_clr)/num_keep
if not (0.40 < frac_keep < 0.60): if not (0.40 < frac_keep < 0.60):
return None return None, False
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0 grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k return grd_k, False
def process_cld_opd(param_s, tile): def process_cld_opd(param_s, tile):
...@@ -83,23 +82,24 @@ def process_cld_opd(param_s, tile): ...@@ -83,23 +82,24 @@ def process_cld_opd(param_s, tile):
return None return None
def process_cld_opd_(grd_k, dum): def process_cld_opd_(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98: if num_keep / grd_k.size < 0.98:
return None return None, True
grd_k = np.where(np.invert(keep), 0, grd_k) grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
frac_keep = np.sum(keep)/num_keep frac_keep = np.sum(keep)/num_keep
if frac_keep < 0.50: if frac_keep < 0.50:
return None return None, False
return grd_k return grd_k, False
def run_all(directory, out_directory, day_night='ANY', start=10): def run_all(directory, out_directory, day_night='ANY', start=10):
cnt = start cnt = start
total_num_train_samples = 0 total_num_train_samples = 0
total_num_valid_samples = 0 total_num_valid_samples = 0
total_num_not_missing = 0
num_keep_x_tiles = 14 num_keep_x_tiles = 14
# pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc' # pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
...@@ -127,7 +127,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10): ...@@ -127,7 +127,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
continue continue
try: try:
run(data_h5f, data_params, data_train_tiles, data_valid_tiles, None, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night) num_not_missing = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
except Exception as e: except Exception as e:
print(e) print(e)
data_h5f.close() data_h5f.close()
...@@ -159,7 +159,9 @@ def run_all(directory, out_directory, day_night='ANY', start=10): ...@@ -159,7 +159,9 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) total_num_not_missing += num_not_missing
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ',
total_num_train_samples, total_num_valid_samples, total_num_not_missing)
print('---------------------------------------------------------') print('---------------------------------------------------------')
cnt += 1 cnt += 1
...@@ -169,7 +171,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10): ...@@ -169,7 +171,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
# tile_width: Must be even! # tile_width: Must be even!
# kernel_size: Must be odd! # kernel_size: Must be odd!
def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'): def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
border = int((kernel_size - 1)/2) border = int((kernel_size - 1)/2)
...@@ -201,6 +203,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti ...@@ -201,6 +203,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
num_y_tiles = int(num_lines / tile_width) - 1 num_y_tiles = int(num_lines / tile_width) - 1
tiles = [] tiles = []
num_not_missing = 0
for j in range(num_y_tiles): for j in range(num_y_tiles):
j_a = j_start + j * j_skip j_a = j_start + j * j_skip
...@@ -216,7 +219,9 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti ...@@ -216,7 +219,9 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
continue continue
nda = data[:, j_a:j_b, i_a:i_b] nda = data[:, j_a:j_b, i_a:i_b]
nda = keep_tile(param_s, nda, dum) nda, missing_flag = keep_tile(param_s, nda)
if not missing_flag:
num_not_missing += 1
if nda is not None: if nda is not None:
tiles.append(nda) tiles.append(nda)
...@@ -229,6 +234,8 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti ...@@ -229,6 +234,8 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, dum, num_keep_x_tiles=8, ti
for k in range(num_valid): for k in range(num_valid):
valid_tiles.append(tiles[num_train + k]) valid_tiles.append(tiles[num_train + k])
return num_not_missing
def scan(directory): def scan(directory):
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment