Skip to content
Snippets Groups Projects
Commit 2f2f044b authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 823df891
No related branches found
No related tags found
No related merge requests found
...@@ -25,40 +25,40 @@ def keep_tile(param, param_s, tile): ...@@ -25,40 +25,40 @@ def keep_tile(param, param_s, tile):
grd_k = tile[k, ].copy() grd_k = tile[k, ].copy()
if target_param == 'cloud_probability': if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k) grd_k, bflag = process_cld_prob_(grd_k)
elif target_param == 'cld_opd_dcomp': elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k) grd_k, bflag = process_cld_opd_(grd_k)
if grd_k is not None: if grd_k is not None:
tile[k, ] = grd_k tile[k, ] = grd_k
return tile return tile, bflag
else: else:
return None return None, bflag
def process_cld_prob_(grd_k): def process_cld_prob_(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98: if num_keep / grd_k.size < 0.98:
return None return None, True
keep_clr = np.where(keep, grd_k < 0.20, False) keep_clr = np.where(keep, grd_k < 0.20, False)
frac_keep = np.sum(keep_clr)/num_keep frac_keep = np.sum(keep_clr)/num_keep
if not (0.38 < frac_keep < 0.62): if not (0.40 < frac_keep < 0.60):
return None return None, False
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaNs to 0 grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaNs to 0
return grd_k return grd_k, False
def process_cld_opd_(grd_k): def process_cld_opd_(grd_k):
keep = np.invert(np.isnan(grd_k)) keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep) num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98: if num_keep / grd_k.size < 0.98:
return None return None, True
grd_k = np.where(np.invert(keep), 0, grd_k) grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False) keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50: if np.sum(keep)/num_keep < 0.50:
return None return None, False
return grd_k return grd_k, False
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10): def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
...@@ -80,6 +80,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -80,6 +80,8 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
num_files = len(data_files) num_files = len(data_files)
print('Start, number of files: ', num_files) print('Start, number of files: ', num_files)
total_num_not_missing = 0
for idx, data_f in enumerate(data_files): for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files # if idx % 4 == 0: # if we want to skip some files
if True: if True:
...@@ -90,9 +92,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -90,9 +92,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
continue continue
try: try:
run(h5f, data_params, data_train_tiles, data_valid_tiles, num_not_missing = run(h5f, data_params, data_train_tiles, data_valid_tiles,
label_params, label_train_tiles, label_valid_tiles, label_params, label_train_tiles, label_valid_tiles,
num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night) num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
except Exception as e: except Exception as e:
print(e) print(e)
h5f.close() h5f.close()
...@@ -129,7 +131,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st ...@@ -129,7 +131,9 @@ def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', st
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100)) print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples) total_num_not_missing += num_not_missing
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ', total_num_train_samples,
total_num_valid_samples, total_num_not_missing)
print('--------------------------------------------------') print('--------------------------------------------------')
cnt += 1 cnt += 1
...@@ -179,12 +183,13 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb ...@@ -179,12 +183,13 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2) i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
j_start = 0 j_start = 0
num_keep_y_tiles = int(num_lines / tile_width) - 3 num_y_tiles = int(num_lines / tile_width) - 1
num_y_valid = int(num_keep_y_tiles * 0.15) + 1 data_tiles = []
num_y_train = num_keep_y_tiles - num_y_valid - 1 lbl_tiles = []
num_not_missing = 0
for j in range(num_y_train): for j in range(num_y_tiles):
j_a = j_start + j * j_skip j_a = j_start + j * j_skip
j_b = j_a + tile_width j_b = j_a + tile_width
...@@ -199,31 +204,23 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb ...@@ -199,31 +204,23 @@ def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lb
nda = data[:, j_a:j_b, i_a:i_b] nda = data[:, j_a:j_b, i_a:i_b]
nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2] nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl) nda_lbl, missing_flag = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
if not missing_flag:
num_not_missing += 1
if nda_lbl is not None: if nda_lbl is not None:
train_tiles.append(nda) data_tiles.append(nda)
lbl_train_tiles.append(nda_lbl) lbl_tiles.append(nda_lbl)
j_start = num_y_train * tile_width + 2*tile_width
for j in range(num_y_valid):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles): num_tiles = len(lbl_tiles)
i_a = i_start + i * i_skip num_valid = int(num_tiles * 0.10)
i_b = i_a + tile_width num_train = num_tiles - num_valid
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]): for k in range(num_train):
continue train_tiles.append(data_tiles[k])
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]): lbl_train_tiles.append(lbl_tiles[k])
continue for k in range(num_valid):
valid_tiles.append(data_tiles[num_train + k])
nda = data[:, j_a:j_b, i_a:i_b] lbl_valid_tiles.append(lbl_tiles[num_train + k])
nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
if nda_lbl is not None:
valid_tiles.append(nda)
lbl_valid_tiles.append(nda_lbl)
return num_not_missing
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment