Skip to content
Snippets Groups Projects
Commit af8cacc4 authored by tomrink's avatar tomrink
Browse files

snapshot...

parent 29de3703
No related branches found
No related tags found
No related merge requests found
......@@ -6,8 +6,8 @@ from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
# group_name = ''
group_name = 'super/'
......@@ -21,7 +21,7 @@ label_params = l2_params
data_params = l2_params
def keep_tile(param_s, tile):
def keep_tile(param_s, tile, hist_10):
k = param_s.index(group_name + target_param)
grd_k = tile[k, ].copy()
......@@ -48,11 +48,12 @@ def process_cld_prob(param_s, tile):
return None
def process_cld_prob_(grd_k):
def process_cld_prob_(grd_k, hist_10):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
hist_10 += np.histogram(grd_k.flatten(), range=[0.0, 1.0], bins=10)[0]
keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
if np.sum(keep)/num_keep < 0.50:
return None
......@@ -102,6 +103,7 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
num_files = len(data_files)
print('Start, number of files: ', num_files)
hist_10 = np.zeros((10), dtype=np.int64)
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
......@@ -113,12 +115,12 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
continue
try:
total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
total, kept = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, hist_10, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
continue
print(data_f, int(100 * (kept/total)))
print(data_f, int(100 * (kept/total)), hist_10, (hist_10 / np.sum(hist_10)))
f_cnt += 1
data_h5f.close()
......@@ -126,33 +128,33 @@ def run_all(directory, out_directory, day_night='ANY', start=10):
if len(data_train_tiles) == 0:
continue
if (f_cnt % 5) == 0:
num_valid_samples = 0
if len(data_valid_tiles) > 0:
data_valid = np.stack(data_valid_tiles)
np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
num_valid_samples = data_valid.shape[0]
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
num_train_samples = data_train.shape[0]
data_valid_tiles = []
data_train_tiles = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
cnt += 1
# if (f_cnt % 5) == 0:
# num_valid_samples = 0
# if len(data_valid_tiles) > 0:
# data_valid = np.stack(data_valid_tiles)
# np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
# num_valid_samples = data_valid.shape[0]
#
# data_train = np.stack(data_train_tiles)
# np.save(out_directory+'data_train_' + str(cnt), data_train)
# num_train_samples = data_train.shape[0]
#
# data_valid_tiles = []
# data_train_tiles = []
#
# print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
# total_num_train_samples += num_train_samples
# total_num_valid_samples += num_valid_samples
# print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
#
# cnt += 1
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
def run(data_h5f, param_s, train_tiles, valid_tiles, hist_10, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
border = int((kernel_size - 1)/2)
......@@ -205,7 +207,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda = keep_tile(param_s, nda)
nda = keep_tile(param_s, nda, hist_10)
if nda is not None:
train_tiles.append(nda)
cnt_kept += 1
......@@ -227,7 +229,7 @@ def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_wi
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda = keep_tile(param_s, nda)
nda = keep_tile(param_s, nda, hist_10)
if nda is not None:
valid_tiles.append(nda)
cnt_kept += 1
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment