viirs_l1b_l2.py 10.31 KiB
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc
import glob
import os
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
from pathlib import Path
emis_params = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
'temp_6_7um_nom', 'temp_6_2um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom']
#refl_params = ['refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
# data_params = refl_params + emis_params
data_params = emis_params
l2_params = ['cloud_fraction', 'cld_temp_acha', 'cld_press_acha', 'cld_opd_acha', 'cld_reff_acha']
label_params = l2_params
# data_params = ['cloud_fraction']
# label_params = ['cloud_fraction']
# data_params = ['observation_data/M15']
# label_params = ['observation_data/M15_highres']
def run_all(directory, out_directory):
num_train_samples, num_valid_samples = 0, 0
cnt = 10
for p in os.scandir(directory):
if not p.is_dir():
continue
data_files = glob.glob(directory + p.name+'/'+'clavrx_snpp_viirs*.uwssec*.h5')
# data_files = glob.glob(directory + p.name + '/' + 'VNP02MOD*.uwssec.nc')
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
for idx, data_f in enumerate(data_files):
# if idx % 8 == 0: # if we want to skip some files
if True:
w_o_ext, ext = os.path.splitext(data_f)
label_f = data_f
# label_f = w_o_ext+'.highres'+ext
# label_f = label_f.replace('snpp_viirs', 'VNP02MOD')
if not os.path.exists(label_f):
continue
try:
data_h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
label_h5f = h5py.File(label_f, 'r')
except:
print('cant open file: ', label_f)
data_h5f.close()
continue
data_tiles = []
label_tiles = []
try:
run(data_h5f, label_h5f, data_tiles, label_tiles, mod_tile_width=16, border=7)
except Exception as e:
print(e)
data_h5f.close()
label_h5f.close()
continue
data_h5f.close()
label_h5f.close()
if len(data_tiles) == 0 or len(label_tiles) == 0:
continue
if len(data_tiles) != len(label_tiles):
print('weirdness: ', data_f)
continue
num = len(data_tiles)
[label_valid_tiles.append(label_tiles[k]) for k in range(4)]
[label_train_tiles.append(label_tiles[k]) for k in range(4, num)]
[data_valid_tiles.append(data_tiles[k]) for k in range(4)]
[data_train_tiles.append(data_tiles[k]) for k in range(4, num)]
# [label_valid_tiles.append(label_tiles[k]) for k in range(2)]
# [label_train_tiles.append(label_tiles[k]) for k in range(2, num)]
# [data_valid_tiles.append(data_tiles[k]) for k in range(2)]
# [data_train_tiles.append(data_tiles[k]) for k in range(2, num)]
if f_cnt == 40:
cnt += 1
label_valid = np.stack(label_valid_tiles)
label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
np.save(out_directory+'label_train_' + str(cnt), label_train)
np.save(out_directory+'label_valid_' + str(cnt), label_valid)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
else:
f_cnt += 1
if len(label_train_tiles) == 0 or len(data_train_tiles) == 0:
continue
if len(label_train_tiles) != len(data_train_tiles):
print('weirdness')
continue
label_valid = np.stack(label_valid_tiles)
label_train = np.stack(label_train_tiles)
data_valid = np.stack(data_valid_tiles)
data_train = np.stack(data_train_tiles)
cnt += 1
np.save(out_directory+'data_train_' + str(cnt), data_train)
np.save(out_directory+'data_valid_' + str(cnt), data_valid)
np.save(out_directory+'label_train_' + str(cnt), label_train)
np.save(out_directory+'label_valid_' + str(cnt), label_valid)
num_train_samples += data_train.shape[0]
num_valid_samples += data_valid.shape[0]
print('num_train_samples, num_valid_samples: ', num_train_samples, num_valid_samples)
def run(data_h5f, label_h5f, data_tiles, label_tiles, mod_tile_width=64, border=9):
if label_h5f is None:
label_h5f = data_h5f
l1b_param_name = data_params[0]
l2_param_name = label_params[0]
mod_num_lines = data_h5f[l1b_param_name].shape[0]
mod_num_pixels = data_h5f[l1b_param_name].shape[1]
img_num_lines = label_h5f[l2_param_name].shape[0]
img_num_pixels = label_h5f[l2_param_name].shape[1]
factor = int(img_num_pixels / mod_num_pixels)
img_tile_width = mod_tile_width * factor
# mod_num_y_tiles = int(mod_num_lines / mod_tile_width)
# mod_num_x_tiles = int(mod_num_pixels / mod_tile_width)
l1b_grd_s = []
l2_grd_s = []
for param in data_params:
try:
grd = get_grid_values(data_h5f, param, 0, 0, None, mod_num_lines, mod_num_pixels, range_name=None)
l1b_grd_s.append(grd)
except Exception as e:
print(e)
return
for param in label_params:
try:
grd = get_grid_values(label_h5f, param, 0, 0, None, img_num_lines, img_num_pixels, range_name=None)
l2_grd_s.append(grd)
except Exception as e:
print(e)
return
mod_data = np.stack(l1b_grd_s)
img_data = np.stack(l2_grd_s)
#num_keep_x_tiles = 3
num_keep_x_tiles = 1
i_skip = 3 * mod_tile_width
#i_start = int(mod_num_pixels / 2) - int((num_keep_x_tiles * 3 * mod_tile_width) / 2)
i_start = int(mod_num_pixels / 2) - int((mod_tile_width) / 2)
#num_keep_y_tiles = 16
num_keep_y_tiles = 48
j_skip = 3 * mod_tile_width
for j in range(num_keep_y_tiles):
j_c = j * j_skip
j_m = j_c + border
j_i = j_m * factor
for i in range(num_keep_x_tiles):
i_c = i * i_skip + i_start
i_m = i_c + border
i_i = i_m * factor
j_stop = j_m + mod_tile_width + border
if j_stop > mod_num_lines - 1:
continue
i_stop = i_m + mod_tile_width + border
if i_stop > mod_num_pixels - 1:
continue
nda = mod_data[:, j_m-border:j_stop, i_m-border:i_stop]
data_tiles.append(nda)
nda = img_data[:, j_i:j_i + img_tile_width, i_i:i_i + img_tile_width]
label_tiles.append(nda)
def scan(directory):
data_src = CLAVRx_VIIRS(directory)
files = data_src.flist
for idx, file in enumerate(files):
h5f = h5py.File(file, 'r')
ts = data_src.ftimes[idx][0]
try:
solzen = get_grid_values_all(h5f, 'solar_zenith_angle')
except Exception as e:
# print(e)
h5f.close()
continue
# if is_day(solzen) and moon_phase(ts):
if is_night(solzen) and moon_phase(ts):
print(file)
h5f.close()
def scan_for_location(txt_file, lon_range=[111.0, 130.0], lat_range=[14.0, 32.0]):
with open(txt_file) as file:
for idx, fpath in enumerate(file):
h5f = h5py.File(fpath, 'r')
try:
lon_s = get_grid_values_all(h5f, 'longitude')
lat_s = get_grid_values_all(h5f, 'latitude')
c_lon, c_lat = lon_s[1624, 1600], lat_s[1624, 1600]
if (lon_range[0] < c_lon < lon_range[1]) and (lat_range[0] < c_lat < lat_range[1]):
print(fpath)
except Exception as e:
# print(e)
h5f.close()
continue
def test_nlcomp(file):
h5f = h5py.File(file, 'r')
cld_phs = get_grid_values_all(h5f, 'cloud_phase', scale_factor_name=None, range_name=None)
keep_0 = np.invert(np.isnan(cld_phs))
reff = get_grid_values_all(h5f, 'cld_reff_nlcomp')
keep_1 = np.invert(np.isnan(reff))
opd = get_grid_values_all(h5f, 'cld_opd_nlcomp')
keep_2 = np.invert(np.isnan(opd))
cld_dz = get_grid_values_all(h5f, 'cld_geo_thick')
keep_3 = np.logical_and(np.invert(np.isnan(cld_dz)), cld_dz > 5.0)
keep = keep_0 & keep_1 & keep_2 & keep_3
cld_phs = cld_phs[keep]
reff = reff[keep]
opd = opd[keep]
cld_dz = cld_dz[keep]
lwc_c, iwc_c = compute_lwc_iwc(cld_phs, reff, opd, cld_dz)
return lwc_c, iwc_c
# def run_mean_std(directory):
#
# data_dct = {name: [] for name in mod_res_params}
# mean_dct = {name: 0 for name in mod_res_params}
# std_dct = {name: 0 for name in mod_res_params}
#
# for p in os.scandir(directory):
# if not p.is_dir():
# continue
# mod_files = glob.glob(directory+p.name+'/'+'VNP02MOD*.uwssec.nc')
#
# for idx, mfile in enumerate(mod_files):
# if idx % 8 == 0:
# h5f = h5py.File(mfile, 'r')
# for param in mod_res_params:
# name = 'observation_data/'+param
# gvals = get_grid_values_all(h5f, name, range_name=None, stride=10)
# data_dct[param].append(gvals.flatten())
# print(mfile)
# h5f.close()
#
# for param in mod_res_params:
# data = data_dct[param]
# data = np.concatenate(data)
#
# mean_dct[param] = np.nanmean(data)
# std_dct[param] = np.nanstd(data)