Skip to content
Snippets Groups Projects
Commit 6f04c860 authored by tomrink's avatar tomrink
Browse files

initial commit...

parent 069f3aaf
No related branches found
No related tags found
No related merge requests found
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
import glob
import os
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
from pathlib import Path
# --- CLAVRx Radiometric parameters and metadata ------------------------------------------------
l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
l1b_ds_types = {ds: 'f4' for ds in l1b_ds_list}
l1b_ds_fill = {l1b_ds_list[i]: -32767 for i in range(10)}
l1b_ds_fill.update({l1b_ds_list[i+10]: -32768 for i in range(5)})
l1b_ds_range = {ds: 'actual_range' for ds in l1b_ds_list}
# --- CLAVRx L2 parameters and metadata
ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask']
ds_types = {ds_list[i]: 'f4' for i in range(23)}
ds_types.update({ds_list[i+23]: 'i1' for i in range(3)})
ds_fill = {ds_list[i]: -32768 for i in range(23)}
ds_fill.update({ds_list[i+23]: -128 for i in range(3)})
ds_range = {ds_list[i]: 'actual_range' for i in range(23)}
ds_range.update({ds_list[i]: None for i in range(3)})
ds_types.update(l1b_ds_types)
ds_fill.update(l1b_ds_fill)
ds_range.update(l1b_ds_range)
ds_types.update({'temp_3_9um_nom': 'f4'})
ds_types.update({'cloud_fraction': 'f4'})
ds_fill.update({'temp_3_9um_nom': -32767})
ds_fill.update({'cloud_fraction': -32768})
ds_range.update({'temp_3_9um_nom': 'actual_range'})
ds_range.update({'cloud_fraction': 'actual_range'})
emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_9um_nom',
'temp_6_7um_nom']
# refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
# data_params = refl_params + emis_params
# data_params = emis_params
# target_param = 'cloud_probability'
target_param = 'cld_opd_dcomp'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
label_params = group_name_i+target_param
data_params = [group_name_m+'temp_11_0um', group_name_m+'temp_12_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
def keep_tile(param_s, tile):
k = param_s.index(group_name_m + target_param)
grd_k = tile[k, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k)
if grd_k is not None:
tile[k, ] = grd_k
return tile
else:
return None
def process_cld_prob_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
keep = np.where(keep, np.logical_and(0.05 < grd_k, grd_k < 0.95), False)
if np.sum(keep)/num_keep < 0.50:
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
return grd_k
def process_cld_opd_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50:
return None
return grd_k
def run_all(directory, out_directory, day_night='ANY', start=10):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
num_keep_x_tiles = 8
pattern = 'clavrx_VNP02MOD*.highres.nc.level2.nc'
pattern = 'clavrx_*.nc'
path = directory + '**' + '/' + pattern
data_files = glob.glob(path, recursive=True)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
num_files = len(data_files)
print('Start, number of files: ', num_files)
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
run(h5f, data_params, data_train_tiles, data_valid_tiles, label_params, label_train_tiles, label_valid_tiles,
num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=3, day_night=day_night)
except Exception as e:
print(e)
h5f.close()
# label_h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
# label_h5f.close()
if len(data_train_tiles) == 0:
continue
if (f_cnt % 5) == 0:
num_valid_samples = 0
if len(data_valid_tiles) > 0:
# label_valid = np.stack(label_valid_tiles)
data_valid = np.stack(data_valid_tiles)
np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
# np.save(out_directory+'label_valid_' + str(cnt), label_valid)
num_valid_samples = data_valid.shape[0]
# label_train = np.stack(label_train_tiles)
# np.save(out_directory+'label_train_' + str(cnt), label_train)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
num_train_samples = data_train.shape[0]
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
cnt += 1
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles,
num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='DAY'):
border = int((kernel_size - 1)/2) + 1
param_name = param_s[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'BOTH':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in param_s:
fill_value, fill_value_name = get_fill_attrs(param)
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels, fill_value_name=fill_value_name, fill_value=fill_value)
grd_s.append(grd)
except Exception as e:
print(e)
return
data = np.stack(grd_s)
grd_s = []
for param in lbl_param_s:
fill_value, fill_value_name = get_fill_attrs(param)
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2, fill_value_name=fill_value_name, fill_value=fill_value)
grd_s.append(grd)
except Exception as e:
print(e)
return
label = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
j_start = 0
num_keep_y_tiles = int(num_lines / tile_width) - 3
num_y_valid = int(num_keep_y_tiles * 0.1) + 1
num_y_train = num_keep_y_tiles - num_y_valid - 1
for j in range(num_y_train):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
nda = keep_tile(param_s, nda)
if nda is not None:
train_tiles.append(nda)
lbl_train_tiles.append(nda_lbl)
j_start = num_y_train * tile_width + 2*tile_width
for j in range(num_y_valid):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
nda = keep_tile(param_s, nda)
if nda is not None:
valid_tiles.append(nda)
lbl_valid_tiles.append(nda_lbl)
def scan(directory):
data_src = CLAVRx_VIIRS(directory)
files = data_src.flist
for idx, file in enumerate(files):
h5f = h5py.File(file, 'r')
ts = data_src.ftimes[idx][0]
try:
solzen = get_grid_values_all(h5f, 'solar_zenith_angle')
except Exception as e:
# print(e)
h5f.close()
continue
# if is_day(solzen) and moon_phase(ts):
if is_night(solzen) and moon_phase(ts):
print(file)
h5f.close()
def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]):
with open(txt_file) as file:
for idx, fpath in enumerate(file):
fpath = fpath.strip()
h5f = h5py.File(fpath, 'r')
try:
lon_s = get_grid_values_all(h5f, 'longitude', stride=4)
lat_s = get_grid_values_all(h5f, 'latitude', stride=4)
c_lon, c_lat = lon_s[406, 400], lat_s[406, 400]
if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]):
print(fpath)
except Exception as e:
# print(e)
h5f.close()
continue
def test_nlcomp(file):
h5f = h5py.File(file, 'r')
cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None)
keep_0 = np.invert(np.isnan(cld_phs))
reff = get_grid_values_all(h5f, 'cld_reff_nlcomp')
keep_1 = np.invert(np.isnan(reff))
opd = get_grid_values_all(h5f, 'cld_opd_nlcomp')
keep_2 = np.invert(np.isnan(opd))
cld_dz = get_grid_values_all(h5f, 'cld_geo_thick')
keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0)
keep = keep_0 & keep_1 & keep_2 & keep_3
cld_phs = cld_phs[keep]
reff = reff[keep]
opd = opd[keep]
cld_dz = cld_dz[keep]
lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz)
return lwc_c, iwc_c
# def run_mean_std(directory):
#
# data_dct = {name: [] for name in mod_res_params}
# mean_dct = {name: 0 for name in mod_res_params}
# std_dct = {name: 0 for name in mod_res_params}
#
# for p in os.scandir(directory):
# if not p.is_dir():
# continue
# mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc')
#
# for idx, mfile in enumerate(mod_files):
# if idx % 8 == 0:
# h5f = h5py.File(mfile, 'r')
# for param in mod_res_params:
# name = 'observation_data/'+param
# gvals = get_grid_values_all(h5f, name, range_name=None, stride=10)
# data_dct[param].append(gvals.flatten())
# print(mfile)
# h5f.close()
#
# for param in mod_res_params:
# data = data_dct[param]
# data = np.concatenate(data)
#
# mean_dct[param] = np.nanmean(data)
# std_dct[param] = np.nanstd(data)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment