Skip to content
Snippets Groups Projects
Commit f4b16bfb authored by tomrink's avatar tomrink
Browse files

improved processing of pirep dictionary: check spatial temporal overlaps

parent f63a254e
No related branches found
No related tags found
No related merge requests found
...@@ -474,8 +474,36 @@ def analyze(ice_dct, no_ice_dct): ...@@ -474,8 +474,36 @@ def analyze(ice_dct, no_ice_dct):
print(dt_str[2:]) print(dt_str[2:])
lon_space_hdeg = np.linspace(-180, 180, 721)
lat_space_hdeg = np.linspace(-90, 90, 361)
# grd_bins = np.full((lat_space.shape[0], lon_space.shape[0]), t_delta)
def check_no_overlap(lon, lat, ts, grd_bins, t_delta=600.0):
grd_x_hi = lon_space.shape[0] - 1
grd_y_hi = lat_space.shape[0] - 1
lon_idx = np.searchsorted(lon_space_hdeg, lon)
lat_idx = np.searchsorted(lat_space_hdeg, lat)
if lon_idx < 0 or lon_idx > grd_x_hi:
return False
if lat_idx < 0 or lat_idx > grd_y_hi:
return False
last_ts = grd_bins[lat_idx, lon_idx]
if ts - last_ts > t_delta:
grd_bins[lat_idx, lon_idx] = ts
return True
else:
return False
# This mostly reduces some categories for a degree of class balancing and removes no intensity reports # This mostly reduces some categories for a degree of class balancing and removes no intensity reports
def process(ice_dct, no_ice_dct, neg_ice_dct): def process(ice_dct, no_ice_dct, neg_ice_dct):
t_delta = 600 # seconds
new_ice_dct = {} new_ice_dct = {}
new_no_ice_dct = {} new_no_ice_dct = {}
new_neg_ice_dct = {} new_neg_ice_dct = {}
...@@ -526,10 +554,12 @@ def process(ice_dct, no_ice_dct, neg_ice_dct): ...@@ -526,10 +554,12 @@ def process(ice_dct, no_ice_dct, neg_ice_dct):
no_ice_tidx.append(idx) no_ice_tidx.append(idx)
neg_ice_keys = [] neg_ice_keys = []
neg_ice_tidx = []
for ts in list(neg_ice_dct.keys()): for ts in list(neg_ice_dct.keys()):
rpts = neg_ice_dct[ts] rpts = neg_ice_dct[ts]
for tup in rpts: for idx, tup in enumerate(rpts):
neg_ice_keys.append(ts) neg_ice_keys.append(ts)
neg_ice_tidx.append(idx)
print('num ice reports, no ice, neg ice: ', num_ice_reports, len(no_ice_keys), len(neg_ice_keys)) print('num ice reports, no ice, neg ice: ', num_ice_reports, len(no_ice_keys), len(neg_ice_keys))
print('------------------------------------------------') print('------------------------------------------------')
...@@ -571,15 +601,23 @@ def process(ice_dct, no_ice_dct, neg_ice_dct): ...@@ -571,15 +601,23 @@ def process(ice_dct, no_ice_dct, neg_ice_dct):
ice_keys = ice_keys[sidxs] ice_keys = ice_keys[sidxs]
ice_tidx = ice_tidx[sidxs] ice_tidx = ice_tidx[sidxs]
grd_bins = np.full((lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
cnt = 0
for idx, key in enumerate(ice_keys): for idx, key in enumerate(ice_keys):
rpts = ice_dct[key] rpts = ice_dct[key]
tup = rpts[ice_tidx[idx]] tup = rpts[ice_tidx[idx]]
lat, lon = tup[0], tup[1]
if not check_no_overlap(lon, lat, key, grd_bins, t_delta=t_delta):
continue
cnt += 1
n_rpts = new_ice_dct.get(key) n_rpts = new_ice_dct.get(key)
if n_rpts is None: if n_rpts is None:
n_rpts = [] n_rpts = []
new_ice_dct[key] = n_rpts new_ice_dct[key] = n_rpts
n_rpts.append(tup) n_rpts.append(tup)
print('icing total no overlap: ', cnt)
# ----------------------------------------------------- # -----------------------------------------------------
no_ice_keys = np.array(no_ice_keys) no_ice_keys = np.array(no_ice_keys)
...@@ -589,35 +627,68 @@ def process(ice_dct, no_ice_dct, neg_ice_dct): ...@@ -589,35 +627,68 @@ def process(ice_dct, no_ice_dct, neg_ice_dct):
ridxs = np.random.permutation(np.arange(no_ice_keys.shape[0])) ridxs = np.random.permutation(np.arange(no_ice_keys.shape[0]))
no_ice_keys = no_ice_keys[ridxs] no_ice_keys = no_ice_keys[ridxs]
no_ice_tidx = no_ice_tidx[ridxs] no_ice_tidx = no_ice_tidx[ridxs]
no_ice_keys = no_ice_keys[::10] no_ice_keys = no_ice_keys[::5]
no_ice_tidx = no_ice_tidx[::10] no_ice_tidx = no_ice_tidx[::5]
print('no ice reduced: ', no_ice_keys.shape[0]) print('no ice reduced: ', no_ice_keys.shape[0])
sidxs = np.argsort(no_ice_keys) sidxs = np.argsort(no_ice_keys)
no_ice_keys = no_ice_keys[sidxs] no_ice_keys = no_ice_keys[sidxs]
no_ice_tidx = no_ice_tidx[sidxs] no_ice_tidx = no_ice_tidx[sidxs]
grd_bins = np.full((lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
cnt = 0
for idx, key in enumerate(no_ice_keys): for idx, key in enumerate(no_ice_keys):
rpts = no_ice_dct[key] rpts = no_ice_dct[key]
tup = rpts[no_ice_tidx[idx]] tup = rpts[no_ice_tidx[idx]]
lat, lon = tup[0], tup[1]
if not check_no_overlap(lon, lat, key, grd_bins, t_delta=t_delta):
continue
cnt += 1
n_rpts = new_no_ice_dct.get(key) n_rpts = new_no_ice_dct.get(key)
if n_rpts is None: if n_rpts is None:
n_rpts = [] n_rpts = []
new_no_ice_dct[key] = n_rpts new_no_ice_dct[key] = n_rpts
n_rpts.append(tup) n_rpts.append(tup)
print('no icing total no overlap: ', cnt)
# ------------------------------------------------- # -------------------------------------------------
# Previous for neg icing
# neg_ice_keys = np.array(neg_ice_keys)
# print('neg ice total: ', neg_ice_keys.shape[0])
# np.random.seed(42)
# np.random.shuffle(neg_ice_keys)
# neg_ice_keys = neg_ice_keys[0:12000]
# uniq_sorted_neg_ice = np.unique(neg_ice_keys)
# print('neg ice reduced: ', uniq_sorted_neg_ice.shape)
#
# for key in uniq_sorted_neg_ice:
# new_neg_ice_dct[key] = neg_ice_dct[key]
# -----------------------------------------------------
neg_ice_keys = np.array(neg_ice_keys) neg_ice_keys = np.array(neg_ice_keys)
neg_ice_tidx = np.array(neg_ice_tidx)
print('neg ice total: ', neg_ice_keys.shape[0]) print('neg ice total: ', neg_ice_keys.shape[0])
np.random.seed(42)
np.random.shuffle(neg_ice_keys)
neg_ice_keys = neg_ice_keys[0:12000]
uniq_sorted_neg_ice = np.unique(neg_ice_keys)
print('neg ice reduced: ', uniq_sorted_neg_ice.shape)
for key in uniq_sorted_neg_ice: grd_bins = np.full((lat_space_hdeg.shape[0], lon_space_hdeg.shape[0]), -(t_delta+1))
new_neg_ice_dct[key] = neg_ice_dct[key] cnt = 0
for idx, key in enumerate(neg_ice_keys):
rpts = neg_ice_dct[key]
tup = rpts[neg_ice_tidx[idx]]
lat, lon = tup[0], tup[1]
if not check_no_overlap(lon, lat, key, grd_bins, t_delta=t_delta):
continue
cnt += 1
n_rpts = new_neg_ice_dct.get(key)
if n_rpts is None:
n_rpts = []
new_neg_ice_dct[key] = n_rpts
n_rpts.append(tup)
print('neg icing total no overlap: ', cnt)
# -------------------------------------------------
return new_ice_dct, new_no_ice_dct, new_neg_ice_dct return new_ice_dct, new_no_ice_dct, new_neg_ice_dct
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment