Skip to content
Snippets Groups Projects
Commit 910f1676 authored by tomrink's avatar tomrink
Browse files

initial commit

parent d7592b6b
No related branches found
No related tags found
No related merge requests found
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
import glob
import os
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
from pathlib import Path
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
label_params = [group_name_i+target_param]
data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
def keep_tile(param, param_s, tile):
k = param_s.index(param)
grd_k = tile[k, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k)
if grd_k is not None:
tile[k, ] = grd_k
return tile
else:
return None
def process_cld_prob_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
if np.sum(keep)/num_keep < 0.50:
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
return grd_k
def process_cld_opd_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50:
return None
return grd_k
def run_all(directory, out_directory, pattern='clavrx_*.nc', start=10):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
path = directory + '**' + '/' + pattern
files = glob.glob(path, recursive=True)
label_tiles = []
data_tiles = []
f_cnt = 0
num_files = len(files)
print('Start, number of files: ', num_files)
for idx, f in enumerate(files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(f, 'r')
except:
print('cant open file: ', f)
continue
try:
run(h5f, data_params, data_tiles, label_params, label_tiles, tile_width=64, kernel_size=5)
except Exception as e:
print(e)
h5f.close()
continue
print(f)
f_cnt += 1
h5f.close()
if len(data_tiles) == 0:
continue
if (f_cnt % 100) == 0:
num_valid_samples = 0
label = np.stack(label_tiles)
data = np.stack(data_tiles)
np.save(out_directory + 'label_train_' + str(cnt), label)
np.save(out_directory + 'data_train_' + str(cnt), data)
num_samples = data.shape[0]
label_tiles = []
data_tiles = []
# print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
# total_num_train_samples += num_samples
# total_num_valid_samples += num_valid_samples
# print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
cnt += 1
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, param_s, tiles, lbl_param_s, lbl_tiles, tile_width=64, kernel_size=3):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = param_s[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
grd_s = []
for param in param_s:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data = np.stack(grd_s)
grd_s = []
for param in lbl_param_s:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
grd_s.append(grd)
except Exception as e:
print(e)
return
label = np.stack(grd_s)
nda = data[:, :, :]
nda = keep_tile(group_name_m + target_param, param_s, nda)
if nda is None: # if none, no need to check the next one
return
nda_lbl = label[:, :, :]
nda_lbl = keep_tile(group_name_i + target_param, lbl_param_s, nda_lbl)
if nda_lbl is not None:
tiles.append(nda)
lbl_tiles.append(nda_lbl)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment