viirs_l1b_l2.py 10.01 KiB
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc
import glob
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
# group_name = ''
group_name = 'super/'
# l2_params = [group_name+'temp_11_0um_nom', group_name+'refl_0_65um_nom', group_name+target_param]
l2_params = [group_name+'temp_11_0um', group_name+'refl_0_65um', group_name+target_param]
# solzen_name = group_name + 'solar_zenith_angle'
solzen_name = group_name + 'solar_zenith'
label_params = l2_params
data_params = l2_params
param_idx = data_params.index(group_name + target_param)
# range = [0.0, 1.0]
cld_prob_norm_hist = [0.34458323, 0.03729378, 0.01817725, 0.01246574, 0.00991681, 0.00826515, 0.00785976, 0.00595133,
0.00567965, 0.00579926, 0.00642895, 0.00797761, 0.01218471, 0.51741677]
# range = [0.0, 160.0]
cld_opd_norm_hist = [7.31926378e-01, 9.52482193e-02, 4.62747706e-02, 3.15450036e-02, 1.98358694e-02, 1.33123841e-02,
1.03378429e-02, 7.95560979e-03, 5.77925319e-03, 4.82856215e-03, 3.31576300e-03, 2.86789405e-03,
2.50456177e-03, 1.79184632e-03, 1.51077739e-03, 1.29144749e-03,9.20514553e-04, 7.47183923e-04,
6.50404531e-04, 1.73557144e-02]
def is_missing(p_idx, tile):
keep = np.invert(np.isnan(tile[p_idx, ]))
if np.sum(keep) / keep.size < 0.98:
return True
def keep_tile(p_idx, tile):
grd_k = tile[p_idx, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd(grd_k)
if grd_k is not None:
tile[p_idx, ] = grd_k
return tile
else:
return None
def process_cld_prob(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
keep_clr = np.where(keep, grd_k < 0.20, False)
frac_keep = np.sum(keep_clr)/num_keep
if not (0.38 < frac_keep < 0.62):
return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaN to 0
return grd_k
def process_cld_opd(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
frac_keep = np.sum(keep)/num_keep
if frac_keep < 0.50:
return None
return grd_k
def run_all(directory, out_directory, day_night='ANY', start=10):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
total_num_not_missing = 0
num_keep_x_tiles = 14
# pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
pattern = 'clavrx_*.nc'
path = directory + '**' + '/' + pattern
data_files = glob.glob(path, recursive=True)
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
num_files = len(data_files)
print('Start, number of files: ', num_files)
# hist_10 = np.zeros((10), dtype=np.int64)
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
num_not_missing = run(data_h5f, data_params, data_train_tiles, data_valid_tiles, num_keep_x_tiles=num_keep_x_tiles, tile_width=128, kernel_size=11, day_night=day_night)
except Exception as e:
print(e)
data_h5f.close()
continue
print(data_f)
f_cnt += 1
data_h5f.close()
if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0:
continue
if (f_cnt % 20) == 0:
num_valid_samples = 0
if len(data_valid_tiles) > 0:
data_valid = np.stack(data_valid_tiles)
np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
num_valid_samples = data_valid.shape[0]
num_train_samples = 0
if len(data_train_tiles) > 0:
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
num_train_samples = data_train.shape[0]
data_valid_tiles = []
data_train_tiles = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
total_num_not_missing += num_not_missing
print('total_num_train_samples, total_num_valid_samples, total_num_not_missing: ',
total_num_train_samples, total_num_valid_samples, total_num_not_missing)
print('---------------------------------------------------------')
cnt += 1
print('** Done, total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(data_h5f, param_s, train_tiles, valid_tiles, num_keep_x_tiles=8, tile_width=64, kernel_size=9, day_night='ANY'):
border = int((kernel_size - 1)/2)
param_name = param_s[0]
num_lines = data_h5f[param_name].shape[0]
num_pixels = data_h5f[param_name].shape[1] # Must be even
if day_night != 'ANY':
solzen = get_grid_values(data_h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in param_s:
try:
grd = get_grid_values(data_h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
j_start = 0
num_y_tiles = int(num_lines / tile_width) - 1
tiles = []
num_not_missing = 0
for j in range(num_y_tiles):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda = data[:, j_a:j_b, i_a:i_b]
if is_missing(param_idx, nda):
continue
num_not_missing += 1
nda = keep_tile(param_idx, nda)
if nda is not None:
tiles.append(nda)
num_tiles = len(tiles)
num_valid = int(num_tiles * 0.10)
num_train = num_tiles - num_valid
for k in range(num_train):
train_tiles.append(tiles[k])
for k in range(num_valid):
valid_tiles.append(tiles[num_train + k])
return num_not_missing
def scan(directory):
data_src = CLAVRx_VIIRS(directory)
files = data_src.flist
for idx, file in enumerate(files):
h5f = h5py.File(file, 'r')
ts = data_src.ftimes[idx][0]
try:
solzen = get_grid_values_all(h5f, 'solar_zenith_angle')
except Exception as e:
# print(e)
h5f.close()
continue
# if is_day(solzen) and moon_phase(ts):
if is_night(solzen) and moon_phase(ts):
print(file)
h5f.close()
def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]):
with open(txt_file) as file:
for idx, fpath in enumerate(file):
fpath = fpath.strip()
h5f = h5py.File(fpath, 'r')
try:
lon_s = get_grid_values_all(h5f, 'longitude', stride=4)
lat_s = get_grid_values_all(h5f, 'latitude', stride=4)
c_lon, c_lat = lon_s[406, 400], lat_s[406, 400]
if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]):
print(fpath)
except Exception as e:
# print(e)
h5f.close()
continue
def test_nlcomp(file):
h5f = h5py.File(file, 'r')
cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None)
keep_0 = np.invert(np.isnan(cld_phs))
reff = get_grid_values_all(h5f, 'cld_reff_nlcomp')
keep_1 = np.invert(np.isnan(reff))
opd = get_grid_values_all(h5f, 'cld_opd_nlcomp')
keep_2 = np.invert(np.isnan(opd))
cld_dz = get_grid_values_all(h5f, 'cld_geo_thick')
keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0)
keep = keep_0 & keep_1 & keep_2 & keep_3
cld_phs = cld_phs[keep]
reff = reff[keep]
opd = opd[keep]
cld_dz = cld_dz[keep]
lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz)
return lwc_c, iwc_c
# def run_mean_std(directory):
#
# data_dct = {name: [] for name in mod_res_params}
# mean_dct = {name: 0 for name in mod_res_params}
# std_dct = {name: 0 for name in mod_res_params}
#
# for p in os.scandir(directory):
# if not p.is_dir():
# continue
# mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc')
#
# for idx, mfile in enumerate(mod_files):
# if idx % 8 == 0:
# h5f = h5py.File(mfile, 'r')
# for param in mod_res_params:
# name = 'observation_data/'+param
# gvals = get_grid_values_all(h5f, name, range_name=None, stride=10)
# data_dct[param].append(gvals.flatten())
# print(mfile)
# h5f.close()
#
# for param in mod_res_params:
# data = data_dct[param]
# data = np.concatenate(data)
#
# mean_dct[param] = np.nanmean(data)
# std_dct[param] = np.nanstd(data)