Newer
Older
from util.util import get_time_tuple_utc, GenericException, add_time_range_to_filename, is_night, is_day, \
goes16_directory = '/arcdata/goes/grb/goes16' # /year/date/abi/L1b/RadC
clavrx_viirs_dir = '/apollo/cloud/scratch/Satellite_Output/NASA-SNPP_VIIRS/global/2019_DNB_for_Rink_wDBfix/level2_h5/'
dir_fmt = '%Y_%m_%d_%j'
# dir_list = [f.path for f in os.scandir('.') if f.is_dir()]
ds_dct = {}
# --- CLAVRx Radiometric parameters and metadata ------------------------------------------------
l1b_ds_list = ['temp_10_4um_nom', 'temp_11_0um_nom', 'temp_12_0um_nom', 'temp_13_3um_nom', 'temp_3_75um_nom',
'temp_6_2um_nom', 'temp_6_7um_nom', 'temp_7_3um_nom', 'temp_8_5um_nom', 'temp_9_7um_nom',
'refl_0_47um_nom', 'refl_0_65um_nom', 'refl_0_86um_nom', 'refl_1_38um_nom', 'refl_1_60um_nom']
l1b_ds_fill = [-32767 for i in range(10)] + [-32768 for i in range(5)]
ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cloud_type', 'cloud_phase', 'cloud_mask']
ds_types = ['f4' for i in range(23)] + ['i1' for i in range(3)]
ds_fill = [-32768 for i in range(23)] + [-128 for i in range(3)]
ds_range = ['actual_range' for i in range(23)] + [None for i in range(3)]
# --- CLAVRx VIIRS L2 parameters and metadata
# ds_list = ['cld_height_acha', 'cld_geo_thick', 'cld_press_acha', 'sensor_zenith_angle', 'supercooled_prob_acha',
# 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_opd_acha', 'solar_zenith_angle',
# 'cld_reff_acha', 'cld_reff_dcomp', 'cld_reff_dcomp_1', 'cld_reff_dcomp_2', 'cld_reff_dcomp_3',
# 'cld_opd_dcomp', 'cld_opd_dcomp_1', 'cld_opd_dcomp_2', 'cld_opd_dcomp_3', 'cld_cwp_dcomp', 'iwc_dcomp',
# 'lwc_dcomp', 'cld_emiss_acha', 'conv_cloud_fraction', 'cld_opd_nlcomp', 'cld_reff_nlcomp', 'cloud_type',
# 'cloud_phase', 'cloud_mask']
# ds_types = ['f4' for i in range(25)] + ['i1' for i in range(3)]
# ds_fill = [-32768 for i in range(25)] + [-128 for i in range(3)]
# ds_range = ['actual_range' for i in range(25)] + [None for i in range(3)]
# --------------------------------------------
a_clvr_file = homedir+'data/clavrx/clavrx_OR_ABI-L1b-RadC-M3C01_G16_s20190020002186.level2.nc'
#a_clvr_file = homedir+'data/clavrx/RadC/265/clavrx_OR_ABI-L1b-RadC-M6C01_G16_s20212651711172.level2.nc'
#a_clvr_file = homedir+'data/clavrx/clavrx_snpp_viirs.A2019071.0000.001.2019071061610.uwssec_B00038187.level2.h5'
# icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/icing_2*_DAY.h5')]
# icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/icing_l1b_2*_DAY.h5')]
# icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/icing_l1b_2*_ANY.h5')]
# no_icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/no_icing_2*_DAY.h5')]
# no_icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/no_icing_l1b_2*_DAY.h5')]
# no_icing_files = [f for f in glob.glob('/data/Personal/rink/icing_ml/no_icing_l1b_2*_ANY.h5')]
train_params_day = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha', 'cld_press_acha',
'solar_zenith_angle', 'cld_reff_dcomp', 'cld_opd_dcomp', 'cld_cwp_dcomp', 'iwc_dcomp', 'lwc_dcomp',
'cloud_phase', 'cloud_mask']
train_params_night = ['cld_height_acha', 'cld_geo_thick', 'supercooled_cloud_fraction', 'cld_temp_acha',
'cld_press_acha', 'cld_reff_acha', 'cld_opd_acha', 'cloud_phase', 'cloud_mask']
# Just a wrapper for the conversion of PIREP reports to in-memory dictionary
ice_dict, no_ice_dict, neg_ice_dict = pirep_icing(pirep_file)
return ice_dict, no_ice_dict, neg_ice_dict
return get_clavrx_datasource_goes(timestamp, file_time_span=file_time_span)
elif platform == 'VIIRS':
return get_clavrx_datasource_viirs(timestamp)
dt_obj, time_tup = get_time_tuple_utc(timestamp)
date_dir_str = dt_obj.strftime(dir_fmt)
ds = ds_dct.get(date_dir_str)
if ds is None:
ds = CLAVRx(clavrx_dir + date_dir_str + '/', file_time_span=file_time_span)
ds_dct[date_dir_str] = ds
return ds
def get_clavrx_datasource_viirs(timestamp):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
date_dir_str = dt_obj.strftime('%j')
ds = ds_dct.get(date_dir_str)
if ds is None:
def get_goes_datasource(timestamp):
dt_obj, time_tup = get_time_tuple_utc(timestamp)
yr_dir = str(dt_obj.timetuple().tm_year)
date_dir = dt_obj.strftime(dir_fmt)
files_path = goes16_directory + '/' + yr_dir + '/' + date_dir + '/abi' + '/L1b' + '/RadC/'
def get_grid_values(h5f, grid_name, j_c, i_c, half_width, num_j=None, num_i=None, scale_factor_name='scale_factor', add_offset_name='add_offset',
fill_value_name='_FillValue', range_name='actual_range', fill_value=None):
try:
hfds = h5f[grid_name]
except Exception:
return None
if attrs is None:
raise GenericException('No attributes object for: '+grid_name)
if half_width is not None:
j_l = j_c-half_width
i_l = i_c-half_width
if j_l < 0 or i_l < 0:
return None
j_r = j_c+half_width+1
i_r = i_c+half_width+1
if j_r >= ylen or i_r >= xlen:
return None
else:
j_l = j_c
j_r = j_c + num_j + 1
i_l = i_c
i_r = i_c + num_i + 1
if fill_value is not None:
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
if attr is not None:
if np.isscalar(attr):
scale_factor = attr
else:
scale_factor = attr[0]
grd_vals = grd_vals * scale_factor
if attr is not None:
if np.isscalar(attr):
add_offset = attr
else:
add_offset = attr[0]
grd_vals = grd_vals + add_offset
if range_name is not None:
attr = attrs.get(range_name)
if attr is None:
raise GenericException('Attribute: '+range_name+' not found for dataset: '+grid_name)
low = attr[0]
high = attr[1]
grd_vals = np.where(grd_vals < low, np.nan, grd_vals)
grd_vals = np.where(grd_vals > high, np.nan, grd_vals)
elif fill_value_name is not None:
attr = attrs.get(fill_value_name)
if attr is None:
raise GenericException('Attribute: '+fill_value_name+' not found for dataset: '+grid_name)
if np.isscalar(attr):
fill_value = attr
else:
fill_value = attr[0]
grd_vals = np.where(grd_vals == fill_value, np.nan, grd_vals)
def create_file(filename, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, icing_intensity, unq_ids, mask=None):
lon_ds.dims[0].label = 'time'
lon_ds.attrs.create('units', data='degrees_east')
lat_ds.dims[0].label = 'time'
lat_ds.attrs.create('units', data='degrees_north')
time_ds = h5f.create_dataset('time', data=time_s)
time_ds.dims[0].label = 'time'
time_ds.attrs.create('units', data='seconds since 1970-1-1 00:00:00')
ice_alt_ds = h5f.create_dataset('icing_altitude', data=fl_alt_s, dtype='f4')
icing_int_ds = h5f.create_dataset('icing_intensity', data=icing_intensity, dtype='i4')
icing_int_ds.attrs.create('long_name', data='From PIREP. 0:No intensity report, 1:Trace, 2:Light, 3:Light Moderate, 4:Moderate, 5:Moderate Severe, 6:Severe')
unq_ids_ds = h5f.create_dataset('unique_id', data=unq_ids, dtype='i4')
unq_ids_ds.attrs.create('long_name', data='ID mapping to PIREP icing dictionary: see pireps.py')
if mask is not None:
mask = mask.astype(np.byte)
mask_ds = h5f.create_dataset('FOV_mask', data=mask, dtype=np.byte)
mask_ds.attrs.create('long_name', data='The FOVs which pass the cloudy icing report test')
mask_ds.dims[0].label = 'time'
mask_ds.dims[1].label = 'y'
mask_ds.dims[2].label = 'x'
h5f_ds = h5f[ds_name]
h5f_ds.attrs.create('standard_name', data=h5f_expl[ds_name].attrs.get('standard_name'))
h5f_ds.attrs.create('long_name', data=h5f_expl[ds_name].attrs.get('long_name'))
h5f_ds.attrs.create('units', data=h5f_expl[ds_name].attrs.get('units'))
h5f_ds.dims[0].label = 'time'
h5f_ds.dims[1].label = 'y'
h5f_ds.dims[2].label = 'x'
def run(pirep_dct, platform, outfile=None, outfile_l1b=None, dt_str_start=None, dt_str_end=None, skip=True, file_time_span=10):
l1b_grd_dct = {name: [] for name in l1b_ds_list}
ds_grd_dct = {name: [] for name in ds_list}
t_start = None
t_end = None
if (dt_str_start is not None) and (dt_str_end is not None):
dto = datetime.datetime.strptime(dt_str_start, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_start = dto.timestamp()
dto = datetime.datetime.strptime(dt_str_end, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_end = dto.timestamp()
if t_start is not None:
if time < t_start:
continue
if time > t_end:
continue
clvr_ds = get_clavrx_datasource(time, platform, file_time_span=file_time_span)
print('run: Problem retrieving Datasource', get_time_tuple_utc(time))
continue
clvr_file = clvr_ds.get_file(time)[0]
if clvr_file is None:
continue
if clvr_file != last_clvr_file:
try:
h5f = h5py.File(clvr_file, 'r')
except Exception:
if h5f is not None:
h5f.close()
print('Problem with file: ', clvr_file)
continue
if last_h5f is not None:
last_h5f.close()
last_h5f = h5f
last_clvr_file = clvr_file
else:
h5f = last_h5f
cc_a, ll_a = nav.earth_to_lc_s(lon_s, lat_s) # non-navigable, skip
if skip and (cc_a[0] == cc and ll_a[0] == ll): # time adjacent duplicate, skip
continue
else:
cc = cc_a[0]
ll = ll_a[0]
gvals = get_grid_values(h5f, ds_name, ll_a[0], cc_a[0], HALF_WIDTH, fill_value_name=None, range_name=ds_range[didx], fill_value=ds_fill[didx])
gvals = get_grid_values(h5f, ds_name, ll_a[0], cc_a[0], HALF_WIDTH, fill_value_name=None, range_name=l1b_ds_range[didx], fill_value=l1b_ds_fill[didx])
for didx, ds_name in enumerate(ds_list):
ds_grd_dct[ds_name].append(ds_lst[didx])
for didx, ds_name in enumerate(l1b_ds_list):
l1b_grd_dct[ds_name].append(l1b_lst[didx])
lon_c.append(lon_s[0])
lat_c.append(lat_s[0])
time_s.append(time)
lon_c = np.array(lon_c)
lat_c = np.array(lat_c)
time_s = np.array(time_s)
data_dct = {}
for ds_name in ds_list:
data_dct[ds_name] = np.array(ds_grd_dct[ds_name])
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
outfile_l1b = add_time_range_to_filename(outfile_l1b, t_start, t_end)
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
def run_viirs(pirep_dct, platform='VIIRS', outfile=None, outfile_l1b=None, dt_str_start=None, dt_str_end=None):
time_keys = list(pirep_dct.keys())
l1b_grd_dct = {name: [] for name in l1b_ds_list}
ds_grd_dct = {name: [] for name in ds_list}
t_start = None
t_end = None
if (dt_str_start is not None) and (dt_str_end is not None):
dto = datetime.datetime.strptime(dt_str_start, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_start = dto.timestamp()
dto = datetime.datetime.strptime(dt_str_end, '%Y-%m-%d_%H:%M').replace(tzinfo=timezone.utc)
dto.replace(tzinfo=timezone.utc)
t_end = dto.timestamp()
lon_s = np.zeros(1)
lat_s = np.zeros(1)
last_clvr_file = None
last_h5f = None
nav = None
lon_c = []
lat_c = []
time_s = []
fl_alt_s = []
ice_int_s = []
unq_ids = []
num_rpts = 0
num_rpts_match = 0
for idx, time in enumerate(time_keys):
if t_start is not None:
if time < t_start:
continue
if time > t_end:
continue
num_rpts += 1
try:
clvr_ds = get_clavrx_datasource(time, platform)
except Exception:
print('run: Problem retrieving Datasource')
continue
clvr_file = clvr_ds.get_file_containing_time(time)[0]
if clvr_file is None:
continue
if clvr_file != last_clvr_file:
try:
h5f = h5py.File(clvr_file, 'r')
nav = clvr_ds.get_navigation(h5f)
except Exception:
if h5f is not None:
h5f.close()
print('Problem with file: ', clvr_file)
continue
if last_h5f is not None:
last_h5f.close()
last_h5f = h5f
last_clvr_file = clvr_file
else:
h5f = last_h5f
cc = ll = -1
reports = pirep_dct[time]
for tup in reports:
lat, lon, fl, I, uid, rpt_str = tup
lat_s[0] = lat
lon_s[0] = lon
print(' ',lon, lat)
if not nav.check_inside(lon, lat):
print('missed range check')
continue
cc_a, ll_a = nav.earth_to_lc_s(lon_s, lat_s) # non-navigable, skip
if cc_a[0] < 0:
print('cant navigate')
continue
if cc_a[0] == cc and ll_a[0] == ll: # time adjacent duplicate, skip
continue
else:
cc = cc_a[0]
ll = ll_a[0]
cnt_a = 0
for didx, ds_name in enumerate(ds_list):
gvals = get_grid_values(h5f, ds_name, ll_a[0], cc_a[0], 20, fill_value_name=None, range_name=ds_range[didx], fill_value=ds_fill[didx])
if gvals is not None:
ds_grd_dct[ds_name].append(gvals)
cnt_a += 1
cnt_b = 0
for didx, ds_name in enumerate(l1b_ds_list):
gvals = get_grid_values(h5f, ds_name, ll_a[0], cc_a[0], 20, fill_value_name=None, range_name=l1b_ds_range[didx], fill_value=l1b_ds_fill[didx])
if gvals is not None:
l1b_grd_dct[ds_name].append(gvals)
cnt_b += 1
if cnt_a > 0 and cnt_a != len(ds_list):
raise GenericException('weirdness')
if cnt_b > 0 and cnt_b != len(l1b_ds_list):
raise GenericException('weirdness')
if cnt_a == len(ds_list) and cnt_b == len(l1b_ds_list):
lon_c.append(lon_s[0])
lat_c.append(lat_s[0])
time_s.append(time)
fl_alt_s.append(fl)
ice_int_s.append(I)
unq_ids.append(uid)
print('num reports: ', num_rpts)
if len(time_s) == 0:
return
t_start = time_s[0]
t_end = time_s[len(time_s)-1]
data_dct = {}
for ds_name in ds_list:
data_dct[ds_name] = np.array(ds_grd_dct[ds_name])
lon_c = np.array(lon_c)
lat_c = np.array(lat_c)
time_s = np.array(time_s)
fl_alt_s = np.array(fl_alt_s)
ice_int_s = np.array(ice_int_s)
unq_ids = np.array(unq_ids)
if outfile is not None:
outfile = add_time_range_to_filename(outfile, t_start, t_end)
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
data_dct = {}
for ds_name in l1b_ds_list:
data_dct[ds_name] = np.array(l1b_grd_dct[ds_name])
if outfile_l1b is not None:
outfile_l1b = add_time_range_to_filename(outfile_l1b, t_start, t_end)
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
def pirep_info(pirep_dct):
time_keys = list(pirep_dct.keys())
lat_s = []
lon_s = []
flt_lvl_s = []
ice_intensity_s = []
for tkey in time_keys:
reports = pirep_dct[tkey]
for tup in reports:
lat, lon, fl, I, uid, rpt_str = tup
lat_s.append(lat)
lon_s.append(lon)
flt_lvl_s.append(fl)
ice_intensity_s.append(I)
lat_s = np.array(lat_s)
lon_s = np.array(lon_s)
flt_lvl_s = np.array(flt_lvl_s)
ice_intensity_s = np.array(ice_intensity_s)
return flt_lvl_s, ice_intensity_s, lat_s, lon_s
lon_space_hdeg = np.linspace(-180, 180, 721)
lat_space_hdeg = np.linspace(-90, 90, 361)
def check_no_overlap(lon, lat, ts, grd_bins, t_delta=600.0):
grd_x_hi = lon_space_hdeg.shape[0] - 1
grd_y_hi = lat_space_hdeg.shape[0] - 1
lon_idx = np.searchsorted(lon_space_hdeg, lon)
lat_idx = np.searchsorted(lat_space_hdeg, lat)
if lon_idx < 0 or lon_idx > grd_x_hi:
return False
if lat_idx < 0 or lat_idx > grd_y_hi:
return False
last_ts = grd_bins[lat_idx, lon_idx]
if ts - last_ts > t_delta:
grd_bins[lat_idx, lon_idx] = ts
return True
else:
return False
def check_no_overlap_alt(lon, lat, hgt, ts, grd_bins, t_delta=600.0):
grd_x_hi = lon_space_hdeg.shape[0] - 1
grd_y_hi = lat_space_hdeg.shape[0] - 1
grd_z_hi = hgt_space_3000.shape[0] - 1
lon_idx = np.searchsorted(lon_space_hdeg, lon)
lat_idx = np.searchsorted(lat_space_hdeg, lat)
hgt_idx = np.searchsorted(hgt_space_3000, hgt)
if lon_idx < 0 or lon_idx > grd_x_hi:
return False
if lat_idx < 0 or lat_idx > grd_y_hi:
return False
if hgt_idx < 0 or hgt_idx > grd_z_hi:
return False
last_ts = grd_bins[hgt_idx, lat_idx, lon_idx]
if ts - last_ts > t_delta:
grd_bins[hgt_idx, lat_idx, lon_idx] = ts
return True
else:
return False
# This mostly reduces some categories for a degree of class balancing and removes no intensity reports
t_delta = 600 # seconds
new_ice_dct = {}
new_no_ice_dct = {}
new_neg_ice_dct = {}
ice_keys_5_6 = []
print('num keys ice, no_ice, neg_ice: ', len(ice_dct), len(no_ice_dct), len(neg_ice_dct))
no_intensity_cnt = 0
neg_ice_tidx = []
rpts = neg_ice_dct[ts]
for idx, tup in enumerate(rpts):
neg_ice_keys.append(ts)
neg_ice_tidx.append(idx)
print('num ice reports, no ice, neg ice: ', num_ice_reports, len(no_ice_keys), len(neg_ice_keys))
ice_keys = np.concatenate([ice_keys_1, ice_keys_2, ice_keys_3, ice_keys_4, ice_keys_5_6])
ice_tidx = np.concatenate([ice_tidx_1, ice_tidx_2, ice_tidx_3, ice_tidx_4, ice_tidx_5_6])
sidxs = np.argsort(ice_keys)
ice_keys = ice_keys[sidxs]
ice_tidx = ice_tidx[sidxs]
no_ice_tidx = np.array(no_ice_tidx)
print('no ice total: ', no_ice_keys.shape[0])
ridxs = np.random.permutation(np.arange(no_ice_keys.shape[0]))
no_ice_keys = no_ice_keys[ridxs]
no_ice_tidx = no_ice_tidx[ridxs]
sidxs = np.argsort(no_ice_keys)
no_ice_keys = no_ice_keys[sidxs]
no_ice_tidx = no_ice_tidx[sidxs]
all_which = np.concatenate([np.full(len(ice_keys), 1), np.full(len(no_ice_keys), 0)])
all_keys = np.concatenate([ice_keys, no_ice_keys])
all_tidx = np.concatenate([ice_tidx, no_ice_tidx])
sidxs = np.argsort(all_keys)
all_keys = all_keys[sidxs]
all_tidx = all_tidx[sidxs]
all_which = all_which[sidxs]
grd_bins = np.full((lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
# grd_bins = np.full((hgt_space_3000.shape[0], lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
cnt_i = 0
cnt_ni = 0
for idx, key in enumerate(all_keys):
i_ni = all_which[idx]
if i_ni == 0:
rpts = no_ice_dct[key]
tup = rpts[all_tidx[idx]]
elif i_ni == 1:
rpts = ice_dct[key]
tup = rpts[all_tidx[idx]]
# if not check_no_overlap_alt(lon, lat, falt, key, grd_bins, t_delta=t_delta):
if not check_no_overlap(lon, lat, key, grd_bins, t_delta=t_delta):
if i_ni == 0:
cnt_ni += 1
n_rpts = new_no_ice_dct.get(key)
if n_rpts is None:
n_rpts = []
new_no_ice_dct[key] = n_rpts
n_rpts.append(tup)
elif i_ni == 1:
cnt_i += 1
n_rpts = new_ice_dct.get(key)
if n_rpts is None:
n_rpts = []
new_ice_dct[key] = n_rpts
n_rpts.append(tup)
print('no overlap ICE: ', cnt_i)
print('no overlap NO ICE: ', cnt_ni)
# -----------------------------------------------------
neg_ice_tidx = np.array(neg_ice_tidx)
# grd_bins = np.full((hgt_space_3000.shape[0], lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
grd_bins = np.full((lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
cnt = 0
for idx, key in enumerate(neg_ice_keys):
rpts = neg_ice_dct[key]
tup = rpts[neg_ice_tidx[idx]]
# if not check_no_overlap_alt(lon, lat, falt, key, grd_bins, t_delta=t_delta):
if not check_no_overlap(lon, lat, key, grd_bins, t_delta=t_delta):
cnt += 1
n_rpts = new_neg_ice_dct.get(key)
if n_rpts is None:
n_rpts = []
new_neg_ice_dct[key] = n_rpts
n_rpts.append(tup)
print('neg icing total no overlap: ', cnt)
# -------------------------------------------------
def analyze2(filename, filename_l1b):
f = h5py.File(filename, 'r')
iint = f['icing_intensity'][:]
icing_alt = f['flight_altitude'][:]
iint = np.broadcast_to(iint.reshape(num_obs, 1), (num_obs, 256)).flatten()
icing_alt = np.broadcast_to(icing_alt.reshape(num_obs, 1), (num_obs, 256)).flatten()
keep_mask = f['FOV_mask'][:, :, :].reshape([num_obs, -1])
keep_mask = keep_mask.astype(np.bool)
keep_mask = keep_mask.flatten()
iint = iint[keep_mask]
icing_alt = icing_alt[keep_mask]
no_ice = iint == -1 # All no icing
ice = iint >= 1 # All icing
tr_ice = (iint == 1) | (iint == 2) # True for icing cats 1 and 2
tr_plus_ice = iint > 2 # True for icing cats 3 and above
print('num no ice: ', np.sum(no_ice))
print('num ice: ', np.sum(ice))
print('trace ice: ', np.sum(tr_ice))
print('trace plus ice: ', np.sum(tr_plus_ice))
# keep_no_1_2 = (iint == -1) | (iint == 1) | (iint == 2)
# keep_no_3_4_5 = (iint == -1) | (iint > 2)
# # iint = iint[keep_no_1_2]
# iint = iint[keep_no_3_4_5]
# # icing_alt = icing_alt[keep_no_1_2]
# icing_alt = icing_alt[keep_no_3_4_5]
# no_ice = iint == -1
# ice = iint > 2
cld_top_hgt = f['cld_height_acha'][:, :, :].flatten()
cld_top_tmp = f['cld_temp_acha'][:, :, :].flatten()
sc_cld_frac = f['supercooled_cloud_fraction'][:, :, :].flatten()
cld_mask = f['cloud_mask'][:, :, :].flatten()
cld_opd = f['cld_opd_dcomp'][:, :, :].flatten()
# cld_opd = f['cld_opd_acha'][:, :, :].flatten()
cld_reff = f['cld_reff_dcomp'][:, :, :].flatten()
cld_lwc = f['lwc_dcomp'][:, :, :].flatten()
cld_iwc = f['iwc_dcomp'][:, :, :].flatten()
# cld_reff = f['cld_reff_acha'][:, :, :].flatten()
cld_emiss = f['cld_emiss_acha'][:, :, :].flatten()
# cld_phase = f['cloud_phase'][:, :, :].flatten()
# solzen = f['solar_zenith_angle'][:, :, :].flatten()
# bt_10_4 = f_l1b['temp_10_4um_nom'][:, :, :].flatten()
cld_top_hgt = cld_top_hgt[keep_mask]
cld_top_tmp = cld_top_tmp[keep_mask]
sc_cld_frac = sc_cld_frac[keep_mask]
cld_opd = cld_opd[keep_mask]
cld_reff = cld_reff[keep_mask]
cld_lwc = cld_lwc[keep_mask]
cld_iwc = cld_iwc[keep_mask]
cld_mask = cld_mask[keep_mask]
# cld_frac = np.sum(cld_mask, axis=1) / 256 # need to do this earlier
# Simple model: Everything below the melting point is icing=True else icing=False ------------------------------
true_ice = ice & preds
false_ice = no_ice & preds
true_no_ice = no_ice & np.invert(preds)
false_no_ice = ice & np.invert(preds)
tp = np.sum(true_ice).astype(np.float32)
tn = np.sum(true_no_ice).astype(np.float32)
fp = np.sum(false_ice).astype(np.float32)
fn = np.sum(false_no_ice).astype(np.float32)
recall = tp / (tp + fn)
precision = tp / (tp + fp)
f1 = 2 * (precision * recall) / (precision + recall)
mcc = ((tp * tn) - (fp * fn)) / np.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
print('Total (Positive/Icing Prediction: ')
print('True icing: ', np.sum(true_ice))
print('-------------------------')
print('False no icing (False Negative/Miss): ', np.sum(false_no_ice))
print('---------------------------------------------------')
print('---------------------------------------------------')
print('Total (Negative/No Icing Prediction: ')
print('True no icing: ', np.sum(true_no_ice))
print('-------------------------')
print('* False icing (False Positive/False Alarm) *: ', np.sum(false_ice))
print('-------------------------')
print('ACC: ', acc)
print('Recall: ', recall)
print('Precision: ', precision)
print('F1: ', f1)
print('MCC: ', mcc)
# ------------------------------------------------------------------------------------------------------------
cld_top_hgt[no_ice], cld_top_hgt[ice],
cld_top_tmp[no_ice], cld_top_tmp[ice],
cld_reff[no_ice], cld_reff[ice],
cld_lwc[no_ice], cld_lwc[ice],
cld_iwc[no_ice], cld_iwc[ice],
sc_cld_frac[no_ice], sc_cld_frac[ice])
y_a = x_a
y_b = x_b
nx = ny = (x_b - x_a)
nx_x_ny = nx * ny
def run_daynight(filename, filename_l1b, day_night='ANY'):
f = h5py.File(filename, 'r')
f_l1b = h5py.File(filename_l1b, 'r')
num_obs = solzen.shape[0]
idxs = []
for i in range(num_obs):
if not check_oblique(satzen[i,]):
continue
if day_night == 'NIGHT' and is_night(solzen[i,]):
idxs.append(i)
elif day_night == 'DAY' and is_day(solzen[i,]):
elif day_night == 'ANY':
if is_day(solzen[i,]) or is_night(solzen[i,]):
idxs.append(i)
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
keep_idxs = np.array(idxs)
data_dct = {}
for didx, ds_name in enumerate(ds_list):
data_dct[ds_name] = f[ds_name][keep_idxs,]
lon_c = f['longitude'][keep_idxs]
lat_c = f['latitude'][keep_idxs]
time_s = f['time'][keep_idxs]
fl_alt_s = f['icing_altitude'][keep_idxs]
ice_int_s = f['icing_intensity'][keep_idxs]
unq_ids = f['unique_id'][keep_idxs]
path, fname = os.path.split(filename)
fbase, fext = os.path.splitext(fname)
outfile = path + '/' + fbase + '_' + day_night + fext
create_file(outfile, data_dct, ds_list, ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
data_dct = {}
for didx, ds_name in enumerate(l1b_ds_list):
data_dct[ds_name] = f_l1b[ds_name][keep_idxs]
path, fname = os.path.split(filename_l1b)
fbase, fext = os.path.splitext(fname)
create_file(outfile_l1b, data_dct, l1b_ds_list, l1b_ds_types, lon_c, lat_c, time_s, fl_alt_s, ice_int_s, unq_ids)
f.close()
f_l1b.close()
def run_qc(filename, filename_l1b, day_night='ANY', pass_thresh_frac=0.20, cloudy_frac=0.5, icing=True):
f = h5py.File(filename, 'r')
icing_alt = f['icing_altitude'][:]