pirep_goes.py 41.32 KiB
from icing.pireps import pirep_icing
import numpy as np
import pickle
import matplotlib.pyplot as plt
import os
from util.util import get_time_tuple_utc, GenericException, add_time_range_to_filename, is_night, is_day
from aeolus.datasource import CLAVRx, GOESL1B
from util.geos_nav import GEOSNavigation
import h5py
import re
import datetime
from datetime import timezone
import glob
goes_date_format = '%Y%j%H'
goes16_directory = '/arcdata/goes/grb/goes16' # /year/date/abi/L1b/RadC
clavrx_dir = '/ships19/cloud/scratch/ICING/'
dir_fmt = '%Y_%m_%d_%j'
# dir_list = [f.path for f in os.scandir('.') if f.is_dir()]
ds_dct = {}
goes_ds_dct = {}
#pirep_file = '/home/rink/data/pireps/pireps_2019010000_2019063023.csv'
pirep_file = '/home/rink/data/pireps/pireps_20180101_20200331.csv'
l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
'refl_0_47um_nom', 'refl_0_55um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom',
'refl_1_60um_nom']
l1b_ds_types = ['f4' for ds in l1b_ds_list]
ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask']
ds_types = ['f4' for i in range(23)] + ['i4' for i in range(3)]
a_clvr_file = '/home/rink/data/clavrx/clavrx_OR_ABI-L1b-RadC-M3C01_G16_s20190020002186.level2.nc'
icing_files = [f for f in glob.glob('/data/Personal/rink/icing2_ml/icing_2*.h5')]
icing_l1b_files = []
no_icing_files = [f for f in glob.glob('/data/Personal/rink/icing2_ml/no_icing_2*.h5')]
no_icing_l1b_files = []
train_params_day = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_press_acha',
'solar_zenith_angle', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp', 'iwc_dcomp', 'lwc_dcomp',
'cloud_phase', 'cloud_mask']
train_params_night = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha',
'cld_press_acha', 'solar_zenith_angle', 'cld_reff_acha', 'cld_opd_acha', 'cloud_phase', 'cloud_mask']
def setup():
ice_dict, no_ice_dict, neg_ice_dict = pirep_icing(pirep_file)
return ice_dict, no_ice_dict, neg_ice_dict
def get_clavrx_datasource(timestamp):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
date_dir_str = dt_obj.strftime(dir_fmt)
ds = ds_dct.get(date_dir_str)
if ds is None:
ds = CLAVRx(clavrx_dir + date_dir_str + '/')
ds_dct[date_dir_str] = ds
return ds
def get_goes_datasource(timestamp):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
yr_dir = str(dt_obj.timetuple().tm_year)
date_dir = dt_obj.strftime(dir_fmt)
files_path = goes16_directory + '/' + yr_dir + '/' + date_dir + '/abi' + '/L1b' + '/RadC/'
ds = goes_ds_dct.get(date_dir)
if ds is None:
ds = GOESL1B(files_path)
goes_ds_dct[date_dir] = ds
return ds
def get_grid_values(h5f, grid_name, j_c, i_c, half_width, scale_factor_name='scale_factor', add_offset_name='add_offset'):
hfds = h5f[grid_name]
attrs = hfds.attrs
ylen, xlen = hfds.shape
j_l = j_c-half_width
i_l = i_c-half_width
if j_l < 0 or i_l < 0:
return None
j_r = j_c+half_width+1
i_r = i_c+half_width+1
if j_r >= ylen or i_r >= xlen:
return None
grd_vals = hfds[j_l:j_r, i_l:i_r]
grd_vals = np.where(grd_vals == -999, np.nan, grd_vals)
grd_vals = np.where(grd_vals == -127, np.nan, grd_vals)
grd_vals = np.where(grd_vals == -32768, np.nan, grd_vals)
if attrs is None:
return grd_vals
if scale_factor_name is not None:
scale_factor = attrs.get(scale_factor_name)[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
add_offset = attrs.get(add_offset_name)[0]
grd_vals = grd_vals + add_offset
return grd_vals
def create_file(filename, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, icing_intensity, unq_ids):
h5f_expl = h5py.File(a_clvr_file, 'r')
h5f = h5py.File(filename, 'w')
for idx, ds_name in enumerate(ds_list):
data = data_dct[ds_name]
h5f.create_dataset(ds_name, data=data, dtype=ds_types[idx])
lon_ds = h5f.create_dataset('longitude', data=lon_c, dtype='f4')
lon_ds.dims[0].label = 'time'
lon_ds.attrs.create('units', data='degrees_east')
lon_ds.attrs.create('long_name', data='PIREP longitude')
lat_ds = h5f.create_dataset('latitude', data=lat_c, dtype='f4')
lat_ds.dims[0].label = 'time'
lat_ds.attrs.create('units', data='degrees_north')
lat_ds.attrs.create('long_name', data='PIREP latitude')
time_ds = h5f.create_dataset('time', data=time_s)
time_ds.dims[0].label = 'time'
time_ds.attrs.create('units', data='seconds since 1970-1-1 00:00:00')
time_ds.attrs.create('long_name', data='PIREP time')
ice_alt_ds = h5f.create_dataset('icing_altitude', data=fl_alt_s, dtype='f4')
ice_alt_ds.dims[0].label = 'time'
ice_alt_ds.attrs.create('units', data='m')
ice_alt_ds.attrs.create('long_name', data='PIREP altitude')
if icing_intensity is not None:
icing_int_ds = h5f.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
icing_int_ds.attrs.create('long_name', data='From PIREP. 0:No intensity report, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
unq_ids_ds = h5f.create_dataset('unique_id', data=unq_ids, dtype='i4')
unq_ids_ds.attrs.create('long_name', data='ID mapping to PIREP icing dictionary: see pireps.py')
# copy relevant attributes
for ds_name in ds_list:
h5f_ds = h5f[ds_name]
h5f_ds.attrs.create('standard_name', data=h5f_expl[ds_name].attrs.get('standard_name'))
h5f_ds.attrs.create('long_name', data=h5f_expl[ds_name].attrs.get('long_name'))
h5f_ds.attrs.create('units', data=h5f_expl[ds_name].attrs.get('units'))
h5f_ds.dims[0].label = 'time'
h5f_ds.dims[1].label = 'y'
h5f_ds.dims[2].label = 'x'
h5f.close()
h5f_expl.close()
def run(pirep_dct, outfile=None, outfile_l1b=None, dt_str_start=None, dt_str_end=None, reduce=False):
time_keys = list(pirep_dct.keys())
l1b_grd_dct = {name: [] for name in l1b_ds_list}
ds_grd_dct = {name: [] for name in ds_list}
t_start = None
t_end = None
if (dt_str_start is not None) and (dt_str_end is not None):
dto = datetime.datetime.strptime(dt_str_start, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_start = dto.timestamp()
dto = datetime.datetime.strptime(dt_str_end, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_end = dto.timestamp()
nav = GEOSNavigation(sub_lon=-75.0, CFAC=5.6E-05, COFF=-0.101332, LFAC=-5.6E-05, LOFF=0.128212, num_elems=2500, num_lines=1500)
lon_s = np.zeros(1)
lat_s = np.zeros(1)
last_clvr_file = None
last_h5f = None
lon_c = []
lat_c = []
time_s = []
fl_alt_s = []
ice_int_s = []
unq_ids = []
for idx, time in enumerate(time_keys):
if t_start is not None:
if time < t_start:
continue
if time > t_end:
continue
try:
clvr_ds = get_clavrx_datasource(time)
except Exception:
continue
clvr_file = clvr_ds.get_file(time)[0]
if clvr_file is None:
continue
if clvr_file != last_clvr_file:
try:
h5f = h5py.File(clvr_file, 'r')
except Exception:
if h5f is not None:
h5f.close()
print('Problem with file: ', clvr_file)
continue
if last_h5f is not None:
last_h5f.close()
last_h5f = h5f
last_clvr_file = clvr_file
else:
h5f = last_h5f
reports = pirep_dct[time]
for tup in reports:
lat, lon, fl, I, uid, rpt_str = tup
lat_s[0] = lat
lon_s[0] = lon
cc, ll = nav.earth_to_lc_s(lon_s, lat_s)
if cc[0] < 0:
continue
cnt_a = 0
for didx, ds_name in enumerate(ds_list):
gvals = get_grid_values(h5f, ds_name, ll[0], cc[0], 20)
if gvals is not None:
ds_grd_dct[ds_name].append(gvals)
cnt_a += 1
cnt_b = 0
for didx, ds_name in enumerate(l1b_ds_list):
gvals = get_grid_values(h5f, ds_name, ll[0], cc[0], 20)
if gvals is not None:
l1b_grd_dct[ds_name].append(gvals)
cnt_b += 1
if cnt_a > 0 and cnt_a != len(ds_list):
raise GenericException('weirdness')
if cnt_b > 0 and cnt_b != len(l1b_ds_list):
raise GenericException('weirdness')
if cnt_a == len(ds_list) and cnt_b == len(l1b_ds_list):
lon_c.append(lon_s[0])
lat_c.append(lat_s[0])
time_s.append(time)
fl_alt_s.append(fl)
ice_int_s.append(I)
unq_ids.append(uid)
if reduce is True:
break
if len(time_s) == 0:
return
t_start = time_s[0]
t_end = time_s[len(time_s)-1]
data_dct = {}
for ds_name in ds_list:
data_dct[ds_name] = np.array(ds_grd_dct[ds_name])
lon_c = np.array(lon_c)
lat_c = np.array(lat_c)
time_s = np.array(time_s)
fl_alt_s = np.array(fl_alt_s)
ice_int_s = np.array(ice_int_s)
unq_ids = np.array(unq_ids)
if outfile is not None:
outfile = add_time_range_to_filename(outfile, t_start, t_end)
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
data_dct = {}
for ds_name in l1b_ds_list:
data_dct[ds_name] = np.array(l1b_grd_dct[ds_name])
if outfile_l1b is not None:
outfile_l1b = add_time_range_to_filename(outfile_l1b, t_start, t_end)
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
def analyze(ice_dct, no_ice_dct):
last_file = None
ice_files = []
ice_times = []
for ts in list(ice_dct.keys()):
try:
ds = get_goes_datasource(ts)
goes_file, t_0, _ = ds.get_file(ts)
if goes_file is not None and goes_file != last_file:
ice_files.append(goes_file)
ice_times.append(t_0)
last_file = goes_file
except Exception:
continue
last_file = None
no_ice_files = []
no_ice_times = []
for ts in list(no_ice_dct.keys()):
try:
ds = get_goes_datasource(ts)
goes_file, t_0, _ = ds.get_file(ts)
if goes_file is not None and goes_file != last_file:
no_ice_files.append(goes_file)
no_ice_times.append(t_0)
last_file = goes_file
except Exception:
continue
ice_times = np.array(ice_times)
no_ice_times = np.array(no_ice_times)
itrsct_vals, comm1, comm2 = np.intersect1d(no_ice_times, ice_times, return_indices=True)
ice_indexes = np.arange(len(ice_times))
ucomm2 = np.setxor1d(comm2, ice_indexes)
np.random.seed(42)
np.random.shuffle(ucomm2)
ucomm2 = ucomm2[0:8000]
files_comm = []
for i in comm2:
files_comm.append(ice_files[i])
files_extra = []
times_extra = []
for i in ucomm2:
files_extra.append(ice_files[i])
times_extra.append(ice_times[i])
files = files_comm + files_extra
times = itrsct_vals.tolist() + times_extra
times = np.array(times)
sidxs = np.argsort(times)
for i in sidxs:
filename = os.path.split(files[i])[1]
so = re.search('_s\\d{11}', filename)
dt_str = so.group()
print(dt_str[2:])
def process_2(ice_dct, no_ice_dct, neg_ice_dct):
new_ice_dct = {}
new_no_ice_dct = {}
new_neg_ice_dct = {}
ice_keys_5_6 = []
ice_keys_1 = []
ice_keys_4 = []
ice_keys_3 = []
ice_keys_2 = []
print('num keys ice, no_ice, neg_ice: ', len(ice_dct), len(no_ice_dct), len(neg_ice_dct))
no_intensity_cnt = 0
num_no_ice_reports = 0
for ts in list(ice_dct.keys()):
rpts = ice_dct[ts]
for tup in rpts:
num_no_ice_reports += 1
if tup[3] == 5 or tup[3] == 6:
ice_keys_5_6.append(ts)
elif tup[3] == 1:
ice_keys_1.append(ts)
elif tup[3] == 4:
ice_keys_4.append(ts)
elif tup[3] == 3:
ice_keys_3.append(ts)
elif tup[3] == 2:
ice_keys_2.append(ts)
else:
no_intensity_cnt += 1
no_ice_keys = []
for ts in list(no_ice_dct.keys()):
rpts = no_ice_dct[ts]
for tup in rpts:
no_ice_keys.append(ts)
neg_ice_keys = []
for ts in list(neg_ice_dct.keys()):
rpts = neg_ice_dct[ts]
for tup in rpts:
neg_ice_keys.append(ts)
print('num ice reports, no ice, neg ice: ', num_no_ice_reports, len(no_ice_keys), len(neg_ice_keys))
print('------------------------------------------------')
ice_keys_5_6 = np.array(ice_keys_5_6)
print('5_6: ', ice_keys_5_6.shape)
ice_keys_4 = np.array(ice_keys_4)
print('4: ', ice_keys_4.shape)
ice_keys_3 = np.array(ice_keys_3)
print('3: ', ice_keys_3.shape)
ice_keys_2 = np.array(ice_keys_2)
print('2: ', ice_keys_2.shape)
np.random.seed(42)
np.random.shuffle(ice_keys_2)
ice_keys_2 = ice_keys_2[0:60000]
ice_keys_1 = np.array(ice_keys_1)
print('1: ', ice_keys_1.shape)
print('no intensity: ', no_intensity_cnt)
ice_keys = np.concatenate([ice_keys_5_6, ice_keys_1, ice_keys_2, ice_keys_3, ice_keys_4])
uniq_sorted_keys = np.unique(ice_keys)
print('ice: ', ice_keys.shape, uniq_sorted_keys.shape)
uniq_sorted_keys = uniq_sorted_keys.tolist()
for key in uniq_sorted_keys:
new_ice_dct[key] = ice_dct[key]
no_ice_keys = np.array(no_ice_keys)
print('no ice total: ', no_ice_keys.shape)
np.random.seed(42)
np.random.shuffle(no_ice_keys)
no_ice_keys = no_ice_keys[0:150000]
uniq_sorted_no_ice = np.unique(no_ice_keys)
print('no ice: ', no_ice_keys.shape, uniq_sorted_no_ice.shape)
uniq_sorted_no_ice = uniq_sorted_no_ice.tolist()
for key in uniq_sorted_no_ice:
new_no_ice_dct[key] = no_ice_dct[key]
neg_ice_keys = np.array(neg_ice_keys)
print('neg ice total: ', neg_ice_keys.shape)
np.random.seed(42)
np.random.shuffle(neg_ice_keys)
neg_ice_keys = neg_ice_keys[0:10000]
uniq_sorted_neg_ice = np.unique(neg_ice_keys)
print('neg ice: ', neg_ice_keys.shape, uniq_sorted_neg_ice.shape)
for key in uniq_sorted_neg_ice:
new_neg_ice_dct[key] = neg_ice_dct[key]
return new_ice_dct, new_no_ice_dct, new_neg_ice_dct
def process_1(ice_dct, no_ice_dct, neg_ice_dct):
new_ice_dct = {}
new_no_ice_dct = {}
new_neg_ice_dct = {}
last_file = None
ice_files_5_6 = []
ice_times_5_6 = []
ice_keys_5_6 = []
ice_files_1 = []
ice_times_1 = []
ice_keys_1 = []
ice_files_4 = []
ice_times_4 = []
ice_keys_4 = []
ice_files_3 = []
ice_times_3 = []
ice_keys_3 = []
ice_files_2 = []
ice_times_2 = []
ice_keys_2 = []
print('num keys ice, no_ice, neg_ice: ', len(ice_dct), len(no_ice_dct), len(neg_ice_dct))
for ts in list(ice_dct.keys()):
try:
ds = get_goes_datasource(ts)
goes_file, t_0, _ = ds.get_file(ts)
if goes_file is not None and goes_file != last_file:
rpts = ice_dct[ts]
for tup in rpts:
if tup[3] == 5 or tup[3] == 6:
ice_files_5_6.append(goes_file)
ice_times_5_6.append(t_0)
ice_keys_5_6.append(ts)
elif tup[3] == 1:
ice_files_1.append(goes_file)
ice_times_1.append(t_0)
ice_keys_1.append(ts)
elif tup[3] == 4:
ice_files_4.append(goes_file)
ice_times_4.append(t_0)
ice_keys_4.append(ts)
elif tup[3] == 3:
ice_files_3.append(goes_file)
ice_times_3.append(t_0)
ice_keys_3.append(ts)
else:
ice_files_2.append(goes_file)
ice_times_2.append(t_0)
ice_keys_2.append(ts)
last_file = goes_file
except Exception:
continue
last_file = None
no_ice_files = []
no_ice_times = []
no_ice_keys = []
for ts in list(no_ice_dct.keys()):
try:
ds = get_goes_datasource(ts)
goes_file, t_0, _ = ds.get_file(ts)
if goes_file is not None and goes_file != last_file:
rpts = no_ice_dct[ts]
for tup in rpts:
no_ice_files.append(goes_file)
no_ice_times.append(t_0)
no_ice_keys.append(ts)
last_file = goes_file
except Exception:
continue
last_file = None
neg_ice_files = []
neg_ice_times = []
neg_ice_keys = []
for ts in list(neg_ice_dct.keys()):
try:
ds = get_goes_datasource(ts)
goes_file, t_0, _ = ds.get_file(ts)
if goes_file is not None and goes_file != last_file:
rpts = neg_ice_dct[ts]
for tup in rpts:
neg_ice_files.append(goes_file)
neg_ice_times.append(t_0)
neg_ice_keys.append(ts)
last_file = goes_file
except Exception:
continue
ice_times_5_6 = np.array(ice_times_5_6)
ice_keys_5_6 = np.array(ice_keys_5_6)
print('5_6: ', ice_times_5_6.shape)
ice_times_4 = np.array(ice_times_4)
ice_keys_4 = np.array(ice_keys_4)
print('4: ', ice_times_4.shape)
ice_times_3 = np.array(ice_times_3)
ice_keys_3 = np.array(ice_keys_3)
print('3: ', ice_times_3.shape)
ice_times_2 = np.array(ice_times_2)
ice_keys_2 = np.array(ice_keys_2)
print('2: ', ice_times_2.shape)
np.random.seed(42)
np.random.shuffle(ice_times_2)
np.random.seed(42)
np.random.shuffle(ice_keys_2)
ice_keys_2 = ice_keys_2[0:30000]
ice_times_1 = np.array(ice_times_1)
ice_keys_1 = np.array(ice_keys_1)
print('1: ', ice_times_1.shape)
ice_times = np.concatenate([ice_times_5_6, ice_times_1, ice_times_2, ice_times_3, ice_times_4])
ice_keys = np.concatenate([ice_keys_5_6, ice_keys_1, ice_keys_2, ice_keys_3, ice_keys_4])
uniq_sorted = np.unique(ice_times)
uniq_sorted_keys = np.unique(ice_keys)
print(ice_times.shape, uniq_sorted.shape)
print(ice_keys.shape, uniq_sorted_keys.shape)
uniq_sorted_keys = uniq_sorted_keys.tolist()
for key in uniq_sorted_keys:
new_ice_dct[key] = ice_dct[key]
no_ice_times = np.array(no_ice_times)
neg_ice_times = np.array(neg_ice_times)
print('no ice: ', no_ice_times.shape)
print('neg ice: ', neg_ice_times.shape)
no_ice_keys = np.array(no_ice_keys)
np.random.seed(42)
np.random.shuffle(no_ice_keys)
no_ice_keys = no_ice_keys[0:50000]
uniq_sorted_no_ice = np.unique(no_ice_keys)
print(no_ice_keys.shape, uniq_sorted_no_ice.shape)
uniq_sorted_no_ice = uniq_sorted_no_ice.tolist()
for key in uniq_sorted_no_ice:
new_no_ice_dct[key] = no_ice_dct[key]
neg_ice_keys = np.array(neg_ice_keys)
np.random.seed(42)
np.random.shuffle(neg_ice_keys)
neg_ice_keys = neg_ice_keys[0:5000]
uniq_sorted_neg_ice = np.unique(neg_ice_keys)
print(neg_ice_keys.shape, uniq_sorted_neg_ice.shape)
for key in uniq_sorted_neg_ice:
new_neg_ice_dct[key] = neg_ice_dct[key]
return new_ice_dct, new_no_ice_dct, new_neg_ice_dct
def analyze2(filename, filename_l1b):
f = h5py.File(filename, 'r')
icing_alt = f['icing_altitude'][:]
cld_top_hgt = f['cld_height_acha'][:, 10:30, 10:30]
cld_phase = f['cloud_phase'][:, 10:30, 10:30]
cld_opd_dc = f['cld_opd_dcomp'][:, 10:30, 10:30]
cld_opd = f['cld_opd_acha'][:, 10:30, 10:30]
solzen = f['solar_zenith_angle'][:, 10:30, 10:30]
f_l1b = h5py.File(filename_l1b, 'r')
bt_11um = f_l1b['temp_11_0um_nom'][:, 10:30, 10:30]
cld_opd = cld_opd.flatten()
cld_opd_dc = cld_opd_dc.flatten()
solzen = solzen.flatten()
keep1 = np.invert(np.isnan(cld_opd))
keep2 = np.invert(np.isnan(solzen))
keep = keep1 & keep2
cld_opd = cld_opd[np.invert(np.isnan(cld_opd))]
cld_opd_dc = cld_opd_dc[keep]
solzen = solzen[keep]
plt.hist(cld_opd, bins=20)
plt.show()
plt.hist(cld_opd_dc, bins=20)
plt.show()
def run_daynight(filename, filename_l1b, day_night='ANY'):
f = h5py.File(filename, 'r')
f_l1b = h5py.File(filename_l1b, 'r')
solzen = f['solar_zenith_angle'][:, 10:30, 10:30]
num_obs = solzen.shape[0]
idxs = []
for i in range(num_obs):
if day_night == 'NIGHT':
if not is_day(solzen[i,]):
idxs.append(i)
elif day_night == 'DAY':
if is_day(solzen[i,]):
idxs.append(i)
keep_idxs = np.array(idxs)
data_dct = {}
for didx, ds_name in enumerate(ds_list):
data_dct[ds_name] = f[ds_name][keep_idxs,]
lon_c = f['longitude'][keep_idxs]
lat_c = f['latitude'][keep_idxs]
time_s = f['time'][keep_idxs]
fl_alt_s = f['icing_altitude'][keep_idxs]
ice_int_s = f['icing_intensity'][keep_idxs]
unq_ids = f['unique_id'][keep_idxs]
path, fname = os.path.split(filename)
fbase, fext = os.path.splitext(fname)
outfile = path + '/' + fbase + '_' + day_night + fext
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
data_dct = {}
for didx, ds_name in enumerate(l1b_ds_list):
data_dct[ds_name] = f_l1b[ds_name][keep_idxs]
path, fname = os.path.split(filename_l1b)
fbase, fext = os.path.splitext(fname)
outfile_l1b = path + '/' + fbase + '_' + 'QC' + '_' + day_night + fext
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
f.close()
f_l1b.close()
def run_qc(filename, filename_l1b, day_night='ANY'):
f = h5py.File(filename, 'r')
icing_alt = f['icing_altitude'][:]
cld_top_hgt = f['cld_height_acha'][:, 10:30, 10:30]
cld_phase = f['cloud_phase'][:, 10:30, 10:30]
if day_night == 'DAY':
cld_opd = f['cld_opd_dcomp'][:, 10:30, 10:30]
else:
cld_opd = f['cld_opd_acha'][:, 10:30, 10:30]
cld_mask = f['cloud_mask'][:, 10:30, 10:30]
sol_zen = f['solar_zenith_angle'][:, 10:30, 10:30]
f_l1b = h5py.File(filename_l1b, 'r')
bt_11um = f_l1b['temp_11_0um_nom'][:, 10:30, 10:30]
print('num pireps all: ', len(icing_alt))
mask, idxs = apply_qc_icing_pireps(icing_alt, cld_top_hgt, cld_phase, cld_opd, cld_mask, bt_11um, sol_zen, day_night=day_night)
print('num pireps day_night: ', len(mask), day_night)
keep_idxs = []
for i in range(len(mask)):
if (np.sum(mask[i]) / 400) > 0.20:
keep_idxs.append(idxs[i])
print('num valid pireps: ', len(keep_idxs))
keep_idxs = np.array(keep_idxs)
data_dct = {}
for didx, ds_name in enumerate(ds_list):
data_dct[ds_name] = f[ds_name][keep_idxs,]
lon_c = f['longitude'][keep_idxs]
lat_c = f['latitude'][keep_idxs]
time_s = f['time'][keep_idxs]
fl_alt_s = f['icing_altitude'][keep_idxs]
ice_int_s = f['icing_intensity'][keep_idxs]
unq_ids = f['unique_id'][keep_idxs]
path, fname = os.path.split(filename)
fbase, fext = os.path.splitext(fname)
outfile = path + '/' + fbase + '_' + 'QC' + '_' + day_night + fext
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
data_dct = {}
for didx, ds_name in enumerate(l1b_ds_list):
data_dct[ds_name] = f_l1b[ds_name][keep_idxs]
path, fname = os.path.split(filename_l1b)
fbase, fext = os.path.splitext(fname)
outfile_l1b = path + '/' + fbase + '_' + 'QC' + '_' + day_night + fext
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
f.close()
f_l1b.close()
def apply_qc_icing_pireps(icing_alt, cld_top_hgt, cld_phase, cld_opd, cld_mask, bt_11um, solzen, day_night='ANY'):
opd_threshold = 2
if day_night == 'DAY':
opd_threshold = 20
closeness = 100.0 # meters
num_obs = len(icing_alt)
cld_mask = cld_mask.reshape((num_obs, -1))
cld_top_hgt = cld_top_hgt.reshape((num_obs, -1))
cld_phase = cld_phase.reshape((num_obs, -1))
cld_opd = cld_opd.reshape((num_obs, -1))
bt_11um = bt_11um.reshape((num_obs, -1))
mask = []
idxs = []
for i in range(num_obs):
if day_night == 'NIGHT':
if is_day(solzen[i,]):
continue
elif day_night == 'DAY':
if not is_day(solzen[i,]):
continue
keep_0 = np.logical_or(cld_mask[i,] == 2, cld_mask[i,] == 3) # cloudy
keep_1 = np.invert(np.isnan(cld_top_hgt[i,]))
keep_2 = np.invert(np.isnan(bt_11um[i,]))
keep_3 = np.invert(np.isnan(cld_opd[i,]))
keep = keep_0 & keep_1 & keep_2 & keep_3
keep = np.where(keep, cld_top_hgt[i,] > icing_alt[i], False)
keep = np.where(keep,
np.invert((cld_phase[i,] == 4) &
np.logical_and(cld_top_hgt[i,]+closeness > icing_alt[i], cld_top_hgt[i,]-closeness < icing_alt[i])),
False)
keep = np.where(keep, (cld_opd[i,] >= opd_threshold) & (cld_phase[i,] == 4) & (cld_top_hgt[i,] > icing_alt[i]), False)
keep = np.where(keep, np.invert((cld_phase[i,] == 4) & (cld_opd[i,] < 0.1) & (cld_top_hgt[i,] > icing_alt[i])), False)
keep = np.where(keep, np.invert(bt_11um[i,] > 270.0), False)
keep = np.where(keep, np.invert(bt_11um[i,] < 228.0), False)
mask.append(keep)
idxs.append(i)
return mask, idxs
def fov_extract(outfile='/home/rink/fovs_out.h5', train_params=train_params_day):
ice_times = []
icing_int_s = []
ice_lons = []
ice_lats = []
no_ice_times = []
no_ice_lons = []
no_ice_lats = []
h5_s_icing = []
h5_s_no_icing = []
icing_data_dct = {ds: [] for ds in train_params}
no_icing_data_dct = {ds: [] for ds in train_params}
sub_indexes = np.arange(400)
num_ice = 0
for fidx in range(len(icing_files)):
fname = icing_files[fidx]
f = h5py.File(fname, 'r')
h5_s_icing.append(f)
times = f['time'][:]
num_obs = len(times)
icing_int = f['icing_intensity'][:]
lons = f['longitude'][:]
lats = f['latitude'][:]
cld_mask = f['cloud_mask'][:, 10:30, 10:30]
cld_mask = cld_mask.reshape((num_obs, -1))
cld_top_temp = f['cld_temp_acha'][:, 10:30, 10:30]
cld_top_temp = cld_top_temp.reshape((num_obs, -1))
for i in range(num_obs):
keep_0 = np.logical_or(cld_mask[i,] == 2, cld_mask[i,] == 3) # cloudy
keep_1 = np.invert(np.isnan(cld_top_temp[i,]))
keep = keep_0 & keep_1
keep = np.where(keep, cld_top_temp[i,] < 273.0, False)
k_idxs = sub_indexes[keep]
np.random.shuffle(k_idxs)
if len(k_idxs) > 20:
k_idxs = k_idxs[0:20]
else:
k_idxs = k_idxs[0:len(k_idxs)]
num_ice += len(k_idxs)
for ds_name in train_params:
dat = f[ds_name][i, 10:30, 10:30].flatten()
icing_data_dct[ds_name].append(dat[k_idxs])
icing_int_s.append(np.full(len(k_idxs), icing_int[i]))
ice_times.append(np.full(len(k_idxs), times[i]))
ice_lons.append(np.full(len(k_idxs), lons[i]))
ice_lats.append(np.full(len(k_idxs), lats[i]))
print(fname)
for ds_name in train_params:
lst = icing_data_dct[ds_name]
icing_data_dct[ds_name] = np.concatenate(lst)
icing_int_s = np.concatenate(icing_int_s)
ice_times = np.concatenate(ice_times)
ice_lons = np.concatenate(ice_lons)
ice_lats = np.concatenate(ice_lats)
num_no_ice = 0
for fidx in range(len(no_icing_files)):
fname = no_icing_files[fidx]
f = h5py.File(fname, 'r')
h5_s_no_icing.append(f)
times = f['time']
num_obs = len(times)
lons = f['longitude']
lats = f['latitude']
cld_mask = f['cloud_mask'][:, 10:30, 10:30]
cld_mask = cld_mask.reshape((num_obs, -1))
cld_top_temp = f['cld_temp_acha'][:, 10:30, 10:30]
cld_top_temp = cld_top_temp.reshape((num_obs, -1))
for i in range(num_obs):
keep_0 = np.logical_or(cld_mask[i,] == 2, cld_mask[i,] == 3) # cloudy
keep_1 = np.invert(np.isnan(cld_top_temp[i,]))
keep = keep_0 & keep_1
keep = np.where(keep, cld_top_temp[i,] < 273.0, False)
k_idxs = sub_indexes[keep]
np.random.shuffle(k_idxs)
if len(k_idxs) > 10:
k_idxs = k_idxs[0:10]
else:
k_idxs = k_idxs[0:len(k_idxs)]
num_no_ice += len(k_idxs)
no_ice_times.append(np.full(len(k_idxs), times[i]))
no_ice_lons.append(np.full(len(k_idxs), lons[i]))
no_ice_lats.append(np.full(len(k_idxs), lats[i]))
for ds_name in train_params:
dat = f[ds_name][i, 10:30, 10:30].flatten()
no_icing_data_dct[ds_name].append(dat[k_idxs])
print(fname)
for ds_name in train_params:
lst = no_icing_data_dct[ds_name]
no_icing_data_dct[ds_name] = np.concatenate(lst)
no_icing_int_s = np.full(num_no_ice, -1)
no_ice_times = np.concatenate(no_ice_times)
no_ice_lons = np.concatenate(no_ice_lons)
no_ice_lats = np.concatenate(no_ice_lats)
icing_intensity = np.concatenate([icing_int_s, no_icing_int_s])
icing_times = np.concatenate([ice_times, no_ice_times])
icing_lons = np.concatenate([ice_lons, no_ice_lons])
icing_lats = np.concatenate([ice_lats, no_ice_lats])
data_dct = {}
for ds_name in train_params:
data_dct[ds_name] = np.concatenate([icing_data_dct[ds_name], no_icing_data_dct[ds_name]])
# apply shuffle indexes
# ds_indexes = np.arange(num_ice + num_no_ice)
# np.random.shuffle(ds_indexes)
#
# for ds_name in train_params:
# data_dct[ds_name] = data_dct[ds_name][ds_indexes]
# icing_intensity = icing_intensity[ds_indexes]
# icing_times = icing_times[ds_indexes]
# icing_lons = icing_lons[ds_indexes]
# icing_lats = icing_lats[ds_indexes]
# do sort
ds_indexes = np.argsort(icing_times)
for ds_name in train_params:
data_dct[ds_name] = data_dct[ds_name][ds_indexes]
icing_intensity = icing_intensity[ds_indexes]
icing_times = icing_times[ds_indexes]
icing_lons = icing_lons[ds_indexes]
icing_lats = icing_lats[ds_indexes]
h5f_expl = h5py.File(a_clvr_file, 'r')
h5f_out = h5py.File(outfile, 'w')
for idx, ds_name in enumerate(train_params):
dt = ds_types[ds_list.index(ds_name)]
data = data_dct[ds_name]
h5f_out.create_dataset(ds_name, data=data, dtype=dt)
icing_int_ds = h5f_out.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
icing_int_ds.attrs.create('long_name', data='From PIREP. -1:No Icing, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
time_ds = h5f_out.create_dataset('time', data=icing_times, dtype='f4')
time_ds.attrs.create('units', data='seconds since 1970-1-1 00:00:00')
time_ds.attrs.create('long_name', data='PIREP time')
lon_ds = h5f_out.create_dataset('longitude', data=icing_lons, dtype='f4')
lon_ds.attrs.create('units', data='degrees_east')
lon_ds.attrs.create('long_name', data='PIREP longitude')
lat_ds = h5f_out.create_dataset('latitude', data=icing_lats, dtype='f4')
lat_ds.attrs.create('units', data='degrees_north')
lat_ds.attrs.create('long_name', data='PIREP latitude')
# copy relevant attributes
for ds_name in train_params:
h5f_ds = h5f_out[ds_name]
h5f_ds.attrs.create('standard_name', data=h5f_expl[ds_name].attrs.get('standard_name'))
h5f_ds.attrs.create('long_name', data=h5f_expl[ds_name].attrs.get('long_name'))
h5f_ds.attrs.create('units', data=h5f_expl[ds_name].attrs.get('units'))
attr = h5f_expl[ds_name].attrs.get('actual_range')
if attr is not None:
h5f_ds.attrs.create('actual_range', data=attr)
attr = h5f_expl[ds_name].attrs.get('flag_values')
if attr is not None:
h5f_ds.attrs.create('flag_values', data=attr)
# --- close files
for h5f in h5_s_icing:
h5f.close()
for h5f in h5_s_no_icing:
h5f.close()
h5f_out.close()
h5f_expl.close()
def tile_extract(outfile='/home/rink/tiles_out.h5', train_params=train_params_day):
icing_int_s = []
ice_time_s = []
no_ice_time_s = []
ice_lon_s = []
no_ice_lon_s = []
ice_lat_s = []
no_ice_lat_s = []
h5_s_icing = []
h5_s_no_icing = []
icing_data_dct = {ds: [] for ds in train_params}
no_icing_data_dct = {ds: [] for ds in train_params}
for fidx in range(len(icing_files)):
fname = icing_files[fidx]
f = h5py.File(fname, 'r')
h5_s_icing.append(f)
times = f['time'][:]
num_obs = len(times)
lons = f['longitude']
lats = f['latitude']
icing_int = f['icing_intensity'][:]
for i in range(num_obs):
for ds_name in train_params:
dat = f[ds_name][i, 12:28, 12:28]
icing_data_dct[ds_name].append(dat)
icing_int_s.append(icing_int[i])
ice_time_s.append(times[i])
ice_lon_s.append(lons[i])
ice_lat_s.append(lats[i])
print(fname)
for ds_name in train_params:
lst = icing_data_dct[ds_name]
icing_data_dct[ds_name] = np.stack(lst, axis=0)
icing_int_s = np.array(icing_int_s)
ice_time_s = np.array(ice_time_s)
ice_lon_s = np.array(ice_lon_s)
ice_lat_s = np.array(ice_lat_s)
num_ice = icing_int_s.shape[0]
num_no_ice = 0
for fidx in range(len(no_icing_files)):
fname = no_icing_files[fidx]
f = h5py.File(fname, 'r')
h5_s_no_icing.append(f)
times = f['time']
num_obs = len(times)
lons = f['longitude']
lats = f['latitude']
for i in range(num_obs):
for ds_name in train_params:
dat = f[ds_name][i, 12:28, 12:28]
no_icing_data_dct[ds_name].append(dat)
num_no_ice += 1
no_ice_time_s.append(times[i])
no_ice_lon_s.append(lons[i])
no_ice_lat_s.append(lats[i])
print(fname)
for ds_name in train_params:
lst = no_icing_data_dct[ds_name]
no_icing_data_dct[ds_name] = np.stack(lst, axis=0)
no_icing_int_s = np.full(num_no_ice, -1)
no_ice_time_s = np.array(no_ice_time_s)
no_ice_lon_s = np.array(no_ice_lon_s)
no_ice_lat_s = np.array(no_ice_lat_s)
icing_intensity = np.concatenate([icing_int_s, no_icing_int_s])
icing_times = np.concatenate([ice_time_s, no_ice_time_s])
icing_lons = np.concatenate([ice_lon_s, no_ice_lon_s])
icing_lats = np.concatenate([ice_lat_s, no_ice_lat_s])
data_dct = {}
for ds_name in train_params:
data_dct[ds_name] = np.concatenate([icing_data_dct[ds_name], no_icing_data_dct[ds_name]])
# Do shuffle
# ds_indexes = np.arange(num_ice + num_no_ice)
# np.random.shuffle(ds_indexes)
#
# for ds_name in train_params:
# data_dct[ds_name] = data_dct[ds_name][ds_indexes]
# icing_intensity = icing_intensity[ds_indexes]
# icing_times = icing_times[ds_indexes]
# icing_lons = icing_lons[ds_indexes]
# icing_lats = icing_lats[ds_indexes]
# do sort
ds_indexes = np.argsort(icing_times)
for ds_name in train_params:
data_dct[ds_name] = data_dct[ds_name][ds_indexes]
icing_intensity = icing_intensity[ds_indexes]
icing_times = icing_times[ds_indexes]
icing_lons = icing_lons[ds_indexes]
icing_lats = icing_lats[ds_indexes]
h5f_expl = h5py.File(a_clvr_file, 'r')
h5f_out = h5py.File(outfile, 'w')
for idx, ds_name in enumerate(train_params):
dt = ds_types[ds_list.index(ds_name)]
data = data_dct[ds_name]
h5f_out.create_dataset(ds_name, data=data, dtype=dt)
icing_int_ds = h5f_out.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
icing_int_ds.attrs.create('long_name', data='From PIREP. -1:No Icing, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
time_ds = h5f_out.create_dataset('time', data=icing_times, dtype='f4')
time_ds.attrs.create('units', data='seconds since 1970-1-1 00:00:00')
time_ds.attrs.create('long_name', data='PIREP time')
lon_ds = h5f_out.create_dataset('longitude', data=icing_lons, dtype='f4')
lon_ds.attrs.create('units', data='degrees_east')
lon_ds.attrs.create('long_name', data='PIREP longitude')
lat_ds = h5f_out.create_dataset('latitude', data=icing_lats, dtype='f4')
lat_ds.attrs.create('units', data='degrees_north')
lat_ds.attrs.create('long_name', data='PIREP latitude')
# copy relevant attributes
for ds_name in train_params:
h5f_ds = h5f_out[ds_name]
h5f_ds.attrs.create('standard_name', data=h5f_expl[ds_name].attrs.get('standard_name'))
h5f_ds.attrs.create('long_name', data=h5f_expl[ds_name].attrs.get('long_name'))
h5f_ds.attrs.create('units', data=h5f_expl[ds_name].attrs.get('units'))
attr = h5f_expl[ds_name].attrs.get('actual_range')
if attr is not None:
h5f_ds.attrs.create('actual_range', data=attr)
attr = h5f_expl[ds_name].attrs.get('flag_values')
if attr is not None:
h5f_ds.attrs.create('flag_values', data=attr)
# --- close files
for h5f in h5_s_icing:
h5f.close()
for h5f in h5_s_no_icing:
h5f.close()
h5f_out.close()
h5f_expl.close()
def run_mean_std(check_cloudy=False, no_icing_to_icing_ratio=5):
ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha',
'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha',
'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction']
mean_dct = {}
std_dct = {}
ice_flist = [f for f in glob.glob('/home/rink/data/icing/icing_2*.h5')]
no_ice_flist = [f for f in glob.glob('/home/rink/data/icing/no_icing_2*.h5')]
ice_h5f_lst = [h5py.File(f, 'r') for f in ice_flist]
no_ice_h5f_lst = [h5py.File(f, 'r') for f in no_ice_flist]
if check_cloudy:
cld_msk_i = []
cld_msk_ni = []
for idx, ice_h5f in enumerate(ice_h5f_lst):
no_ice_h5f = no_ice_h5f_lst[idx]
cld_msk_i.append(ice_h5f['cloud_mask'][:,].flatten())
cld_msk_ni.append(no_ice_h5f['cloud_mask'][:,].flatten())
cld_msk_i = np.concatenate(cld_msk_i)
cld_msk_ni = np.concatenate(cld_msk_ni)
for dname in ds_list:
data = []
data_i = []
data_ni = []
for idx, ice_h5f in enumerate(ice_h5f_lst):
no_ice_h5f = no_ice_h5f_lst[idx]
data.append(ice_h5f[dname][:,].flatten())
data.append(no_ice_h5f[dname][:,].flatten())
data_i.append(ice_h5f[dname][:,].flatten())
data_ni.append(no_ice_h5f[dname][:,].flatten())
data = np.concatenate(data)
mean = np.nanmean(data)
data -= mean
std = np.nanstd(data)
data_i = np.concatenate(data_i)
if check_cloudy:
keep = np.logical_or(cld_msk_i == 2, cld_msk_i == 3)
data_i = data_i[keep]
mean_i = np.nanmean(data_i)
data_i -= mean_i
std_i = np.nanstd(data_i)
data_ni = np.concatenate(data_ni)
if check_cloudy:
keep = np.logical_or(cld_msk_ni == 2, cld_msk_ni == 3)
data_ni = data_ni[keep]
mean_ni = np.nanmean(data_ni)
data_ni -= mean_ni
std_ni = np.nanstd(data_ni)
mean = (mean_i + no_icing_to_icing_ratio*mean_ni)/(no_icing_to_icing_ratio + 1)
std = (std_i + no_icing_to_icing_ratio*std_ni)/(no_icing_to_icing_ratio + 1)
print(dname,': (', mean, mean_i, mean_ni, ') (', std, std_i, std_ni, ')')
mean_dct[dname] = mean
std_dct[dname] = std
[h5f.close() for h5f in ice_h5f_lst]
[h5f.close() for h5f in no_ice_h5f_lst]
return mean_dct, std_dct