viirs_surfrad.py 7.46 KiB
import numpy as np
import h5py
from util.util import get_grid_values, get_grid_values_all, is_night, is_day, compute_lwc_iwc, get_fill_attrs
import glob
import os
from aeolus.datasource import CLAVRx_VIIRS
from icing.moon_phase import *
from pathlib import Path
target_param = 'cloud_probability'
# target_param = 'cld_opd_dcomp'
group_name_i = 'super/'
group_name_m = 'orig/'
solzen_name = group_name_m + 'solar_zenith'
label_params = [group_name_i+target_param]
data_params = [group_name_m+'temp_11_0um', group_name_m+'refl_0_65um', group_name_m+target_param]
def keep_tile(param, param_s, tile):
k = param_s.index(param)
grd_k = tile[k, ].copy()
if target_param == 'cloud_probability':
grd_k = process_cld_prob_(grd_k)
elif target_param == 'cld_opd_dcomp':
grd_k = process_cld_opd_(grd_k)
if grd_k is not None:
tile[k, ] = grd_k
return tile
else:
return None
def process_cld_prob_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
keep_clr = np.where(keep, grd_k < 0.20, False)
frac_keep = np.sum(keep_clr)/num_keep
if not (0.40 < frac_keep < 0.60):
return None
grd_k = np.where(np.invert(keep), 0, grd_k) # Convert NaNs to 0
return grd_k
def process_cld_opd_(grd_k):
keep = np.invert(np.isnan(grd_k))
num_keep = np.sum(keep)
if num_keep / grd_k.size < 0.98:
return None
grd_k = np.where(np.invert(keep), 0, grd_k)
keep = np.where(keep, np.logical_and(0.1 < grd_k, grd_k < 158.0), False)
if np.sum(keep)/num_keep < 0.50:
return None
return grd_k
def run_all(directory, out_directory, day_night='ANY', pattern='clavrx_*.nc', start=10):
cnt = start
total_num_train_samples = 0
total_num_valid_samples = 0
num_keep_x_tiles = 8
path = directory + '**' + '/' + pattern
data_files = glob.glob(path, recursive=True)
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
f_cnt = 0
num_files = len(data_files)
print('Start, number of files: ', num_files)
for idx, data_f in enumerate(data_files):
# if idx % 4 == 0: # if we want to skip some files
if True:
try:
h5f = h5py.File(data_f, 'r')
except:
print('cant open file: ', data_f)
continue
try:
run(h5f, data_params, data_train_tiles, data_valid_tiles,
label_params, label_train_tiles, label_valid_tiles,
num_keep_x_tiles=num_keep_x_tiles, tile_width=64, kernel_size=7, day_night=day_night)
except Exception as e:
print(e)
h5f.close()
continue
print(data_f)
f_cnt += 1
h5f.close()
if len(data_train_tiles) == 0 and len(data_valid_tiles) == 0:
continue
if (f_cnt % 10) == 0:
num_valid_samples = 0
if len(data_valid_tiles) > 0:
label_valid = np.stack(label_valid_tiles)
data_valid = np.stack(data_valid_tiles)
np.save(out_directory + 'data_valid_' + str(cnt), data_valid)
np.save(out_directory + 'label_valid_' + str(cnt), label_valid)
num_valid_samples = data_valid.shape[0]
num_train_samples = 0
if len(data_train_tiles) > 0:
label_train = np.stack(label_train_tiles)
data_train = np.stack(data_train_tiles)
np.save(out_directory + 'label_train_' + str(cnt), label_train)
np.save(out_directory + 'data_train_' + str(cnt), data_train)
num_train_samples = data_train.shape[0]
label_valid_tiles = []
label_train_tiles = []
data_valid_tiles = []
data_train_tiles = []
print(' num_train_samples, num_valid_samples, progress % : ', num_train_samples, num_valid_samples, int((f_cnt/num_files)*100))
total_num_train_samples += num_train_samples
total_num_valid_samples += num_valid_samples
print('total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
print('--------------------------------------------------')
cnt += 1
print('** total_num_train_samples, total_num_valid_samples: ', total_num_train_samples, total_num_valid_samples)
# tile_width: Must be even!
# kernel_size: Must be odd!
def run(h5f, param_s, train_tiles, valid_tiles, lbl_param_s, lbl_train_tiles, lbl_valid_tiles,
num_keep_x_tiles=8, tile_width=64, kernel_size=3, day_night='DAY'):
border = int((kernel_size - 1)/2) + 1 # Need to add for interpolation with no edge effects
param_name = param_s[0]
num_lines = h5f[param_name].shape[0]
num_pixels = h5f[param_name].shape[1] # Must be even
if day_night != 'BOTH':
solzen = get_grid_values(h5f, solzen_name, 0, 0, None, num_lines, num_pixels)
grd_s = []
for param in param_s:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines, num_pixels)
grd_s.append(grd)
except Exception as e:
print(e)
return
data = np.stack(grd_s)
grd_s = []
for param in lbl_param_s:
try:
grd = get_grid_values(h5f, param, 0, 0, None, num_lines*2, num_pixels*2)
grd_s.append(grd)
except Exception as e:
print(e)
return
label = np.stack(grd_s)
tile_width += 2 * border
i_skip = tile_width
j_skip = tile_width
i_start = int(num_pixels / 2) - int((num_keep_x_tiles * tile_width) / 2)
j_start = 0
num_keep_y_tiles = int(num_lines / tile_width) - 3
num_y_valid = int(num_keep_y_tiles * 0.15) + 1
num_y_train = num_keep_y_tiles - num_y_valid - 1
for j in range(num_y_train):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda_lbl = label[:, j_a*2:j_b*2, i_a*2:i_b*2]
nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
if nda_lbl is not None:
train_tiles.append(nda)
lbl_train_tiles.append(nda_lbl)
j_start = num_y_train * tile_width + 2*tile_width
for j in range(num_y_valid):
j_a = j_start + j * j_skip
j_b = j_a + tile_width
for i in range(num_keep_x_tiles):
i_a = i_start + i * i_skip
i_b = i_a + tile_width
if day_night == 'DAY' and not is_day(solzen[j_a:j_b, i_a:i_b]):
continue
elif day_night == 'NIGHT' and is_day(solzen[j_a:j_b, i_a:i_b]):
continue
nda = data[:, j_a:j_b, i_a:i_b]
nda_lbl = label[:, j_a * 2:j_b * 2, i_a * 2:i_b * 2]
nda_lbl = keep_tile(group_name_i+target_param, lbl_param_s, nda_lbl)
if nda_lbl is not None:
valid_tiles.append(nda)
lbl_valid_tiles.append(nda_lbl)