aeolus.py 17.49 KiB
import datetime
from datetime import timezone
import glob
import numpy as np
# from cartopy import *
from metpy import *
import h5py
from netCDF4 import Dataset
from util.lon_lat_grid import earth_to_indexs
from util.geos_nav import GEOSNavigation
from util.util import bin_data_by, get_bin_ranges
import math
class MyGenericException(Exception):
def __init__(self, message):
self.message = message
datapath = '/apollo/cloud/personal/stevew/IWW15/G16/fulldisk/CLAVRX'
datadirs = ['20190825', '20190826', '20190827', '20190828', '20190829', '20190830', '20190831', '20190901', '20190902', '20190903', '20190904', '20190905']
amv_hdr_list = ['lat', 'lon', 'tbox', 'sbox', 'spd', 'dir', 'pres', 'lowl', 'mspd', 'mdir',
'alb', 'corr', 'tmet', 'perr', 'qi', 'cqi', 'qif']
raob_hdr_list = ['pres', 'temp', 'dir', 'spd']
raob_spd_idx = raob_hdr_list.index('spd')
raob_dir_idx = raob_hdr_list.index('dir')
amv_lon_idx = amv_hdr_list.index('lon')
amv_lat_idx = amv_hdr_list.index('lat')
amv_pres_idx = amv_hdr_list.index('pres')
raob_pres_idx = raob_hdr_list.index('pres')
amv_spd_idx = amv_hdr_list.index('spd')
amv_dir_idx = amv_hdr_list.index('dir')
amv_prs_idx = amv_hdr_list.index('pres')
amv_qi_idx = amv_hdr_list.index('qi')
amv_cqi_idx = amv_hdr_list.index('cqi')
amv_qif_idx = amv_hdr_list.index('qif')
first_time = True
ftimes = []
flist = None
# H08 range we'll use for now
H08_lon_range = [64, 216] # 0, 360
H08_lat_range = [-70, 70]
G16_lon_range = [-130.0, -30.0]
G16_lat_range = [-64.0, 64.0]
bin_ranges = []
bin_ranges.append([300.0, 2000.0])
bin_ranges.append([2000.0, 8000.0])
bin_ranges.append([8000.0, 20000.0])
lat_ranges = get_bin_ranges(-70, 70, 10)
half_width = 30
num_elems = 5424
num_lines = 5424
def get_filelist():
filelist = []
for i in range(len(datadirs)):
dir = datadirs[i]
dirpath = os.path.join(datapath, dir)
list = glob.glob(os.path.join(dirpath, "*.nc"))
filelist = filelist + list
return filelist
def get_datetime(pathname):
fname = os.path.split(pathname)[1]
toks = fname.split('_')
dtstr = toks[4]
dtstr = dtstr[1:12]
dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)
return dto
def get_datetime_2(pathname):
fname = os.path.split(pathname)[1]
toks = fname.split('.')
dstr = toks[2]
tstr = toks[3]
dtstr = dstr+'.'+tstr
dto = datetime.datetime.strptime(dtstr, '%Y%j.%H%M').replace(tzinfo=timezone.utc)
return dto
def get_datetime_3(pathname):
fname = os.path.split(pathname)[1]
toks = fname.split('_')
dstr = toks[4]
tstr = toks[5]
dtstr = dstr+tstr
dto = datetime.datetime.strptime(dtstr, '%Y%j%H%M').replace(tzinfo=timezone.utc)
return dto
def get_file_containing_time(timestamp, files_path, dto_func, file_time_span):
global first_time, ftimes, flist
if first_time is True:
if files_path is not None:
flist = glob.glob(files_path + '*.nc')
else:
flist = get_filelist()
for pname in flist: # TODO: make better with regular expressions (someday)
dto = dto_func(pname)
dto_start = dto
dto_end = dto + datetime.timedelta(minutes=file_time_span)
ftimes.append((dto_start.timestamp(), dto_end.timestamp()))
first_time = False
k = -1
for i in range(len(ftimes)):
if (timestamp >= ftimes[i][0]) and (timestamp < ftimes[i][1]):
k = i
break
if k < 0:
return None, None, None
return flist[k], ftimes[k], k
# imports the S4 NOAA output
# filename: full path as a string, '/home/user/filename'
# returns a dict: time -> list of profiles (a profile is a list of levels)
def get_aeolus_time_dict(filename, lon360=False, do_sort=True):
time_dict = {}
with open(filename) as file:
while True:
prof_hdr = file.readline()
if not prof_hdr:
break
toks = prof_hdr.split()
yr = int(float(toks[0]))
mon = int(float(toks[1]))
dy = int(float(toks[2]))
hr = int(float(toks[3]))
mn = int(float(toks[4]))
ss = int(float(toks[5]))
lon = float(toks[6])
lat = float(toks[7])
nlevs = int(toks[8])
if lon360:
if lon < 0:
lon += 360.0
else:
if lon > 180.0:
lon -= 360.0
dto = datetime.datetime(year=yr, month=mon, day=dy, hour=hr, minute=mn, second=ss)
dto = dto.replace(tzinfo=timezone.utc)
timestamp = dto.timestamp()
prof = []
if time_dict.get(timestamp, -1) == -1:
prof_s = []
prof_s.append(prof)
time_dict[timestamp] = prof_s
else:
prof_s = time_dict.get(timestamp)
prof_s.append(prof)
for k in range(nlevs):
line = file.readline()
toks = line.split()
lvlidx = int(toks[0])
hhh = float(toks[1]) * 1000.0
hht = float(toks[2]) * 1000.0
hhb = float(toks[3]) * 1000.0
err = float(toks[4])
azm = float(toks[5])
ws = float(toks[6])
len = float(toks[7])
tup = (lat, lon, hhh, hht, hhb, azm, ws)
prof.append(tup)
file.close()
if do_sort:
keys = np.array(list(time_dict.keys()))
keys.sort()
keys = keys.tolist()
sorted_time_dict = {}
for key in keys:
sorted_time_dict[key] = time_dict.get(key)
time_dict = sorted_time_dict
return time_dict
def time_dict_to_cld_layers(time_dict):
time_dict_layers = {}
keys = list(time_dict.keys())
for key in keys:
prof_s = time_dict[key]
layers = []
prof = prof_s[0]
if len(prof) == 1:
tup = prof[0]
layers.append((tup[0], tup[1], tup[3], tup[4]))
time_dict_layers[key] = layers
continue
top = -9999.9
last_bot = -9999.9
tup = None
for i in range(len(prof)):
tup = prof[i]
if i == 0:
top = tup[3]
bot = tup[4]
last_bot = bot
else:
if math.fabs(last_bot - tup[3]) > 10.0:
layers.append((tup[0], tup[1], top, last_bot))
top = tup[3]
last_bot = tup[4]
layers.append((tup[0], tup[1], top, tup[4]))
time_dict_layers[key] = layers
return time_dict_layers
def time_dict_to_nd_2(time_dict):
keys = list(time_dict.keys())
for key in keys:
vals = time_dict[key]
if vals is not None:
time_dict[key] = np.stack(vals)
return time_dict
# make each profile at a timestamp a numpy array
def time_dict_to_nd(time_dict):
keys = list(time_dict.keys())
for key in keys:
vals = time_dict[key]
if vals is not None:
for i in range(len(vals)):
nda = np.array(vals[i])
vals[i] = nda
return time_dict
def get_cloud_layers_dict(filename, lon360=False):
a_d = get_aeolus_time_dict(filename, lon360=lon360)
c_d = time_dict_to_cld_layers(a_d)
cld_lyr_dct = time_dict_to_nd_2(c_d)
return cld_lyr_dct
def compare_aeolus_max_height(aeolus_dict, files_path, grid_value_name='cld_height_acha', one_cld_layer_only=False, highest_layer=False):
if files_path is not None:
f_list = glob.glob(files_path + '*.nc')
else:
f_list = get_filelist()
num_files = len(f_list)
keys = list(aeolus_dict.keys())
a_lons = [[] for i in range(num_files)]
a_lats = [[] for i in range(num_files)]
a_hgts = [[] for i in range(num_files)]
a_profs = [[] for i in range(num_files)]
a_times = [[] for i in range(num_files)]
for key in keys:
_, _, f_idx = get_file_containing_time(key, files_path, get_datetime, 10)
if f_idx is None:
continue
layers = aeolus_dict.get(key)
if layers is None:
continue
lat = layers[0, 0]
lon = layers[0, 1]
# check range
if lat > G16_lat_range[1] or lat < G16_lat_range[0]:
continue
if lon > G16_lon_range[1] or lon < G16_lon_range[0]:
continue
# ------------------------------------------------
a_lons[f_idx].append(lon)
a_lats[f_idx].append(lat)
a_profs[f_idx].append(layers)
a_times[f_idx].append(key)
for f_idx in range(num_files):
a_lons[f_idx] = np.array(a_lons[f_idx])
a_lats[f_idx] = np.array(a_lats[f_idx])
a_hgts[f_idx] = np.array(a_hgts[f_idx])
a_times[f_idx] = np.array(a_times[f_idx])
a_profs[f_idx] = np.array(a_profs[f_idx])
grd_hgt = []
grd_mean = []
grd_std = []
hits = []
num_layers = []
lon = []
lat = []
layer_thickness = []
hgt_diff = []
hgt_top_max_diff_s = []
layer_bot = []
layer_top = []
times = []
# Loop over all Datasets
vld_grd = 0
num_hits = 0
total = 0
for f_idx, file in enumerate(f_list):
try:
h5f = h5py.File(file, 'r')
except Exception:
h5f.close()
print('Problem with file: ', file)
continue
try:
grd_vals = get_grid_values(h5f, grid_value_name)
except Exception:
print('can\'t read grid')
h5f.close()
continue
nn_idxs = earth_to_indexs(a_lons[f_idx], a_lats[f_idx], grd_vals.shape[0])
if len(nn_idxs) == 0:
continue
on_earth = nn_idxs != -1
nn_idxs = nn_idxs[on_earth]
grd_crds = np.unravel_index(nn_idxs, grd_vals.shape)
a_lons[f_idx] = a_lons[f_idx][on_earth]
a_lats[f_idx] = a_lats[f_idx][on_earth]
a_profs[f_idx] = a_profs[f_idx][on_earth]
a_times[f_idx] = a_times[f_idx][on_earth]
total += len(nn_idxs)
# loop over Aeolus profile valid in the file
for k in range(len(nn_idxs)):
j = grd_crds[0][k]
i = grd_crds[1][k]
grd_cntr_val = grd_vals[j, i]
grd_vals_3x3 = grd_vals[j-1:j+2, i-1:i+2]
grd_vals_3x3 = grd_vals_3x3.flatten()
# if np.sum(np.isnan(grd_vals_3x3)) > 4:
# continue
keep = np.invert(np.isnan(grd_vals_3x3))
grd_3x3_mean = np.mean(grd_vals_3x3[keep])
grd_3x3_std = np.std(grd_vals_3x3[keep])
# grd_max = np.max(grd_vals_3x3[keep])
# bt_vals_5x5 = bt_vals[j-2:j+3, i-2:i+3]
# bt_vals_5x5 = bt_vals_5x5.flatten()
# keep = np.invert(np.isnan(bt_vals_5x5))
# bt_5x5_mean = np.mean(bt_vals_5x5[keep])
# bt_5x5_std = np.std(bt_vals_5x5[keep])
grd_vals_1x1 = grd_vals[j, i]
grd_vals_1x1 = grd_vals_1x1.flatten()
if (np.isnan(grd_vals_1x1[0])):
continue
prof = a_profs[f_idx][k]
max_idx = np.argmax(prof[:, 2])
hht_max = prof[max_idx, 2]
nlayers = prof.shape[0]
lyr_idxs = [*range(nlayers)]
if highest_layer:
lyr_idxs = [max_idx]
if one_cld_layer_only and nlayers > 1:
continue
vld_grd += 1
hit = False
lyr_width = None
hgt_top_diff = None
hgt_top_max_diff = None
tops = []
bots = []
for j in lyr_idxs:
hhb = prof[j, 3]
hht = prof[j, 2]
bots.append(hhb)
tops.append(hht)
lyr_width = hht - hhb
hhb -= 100
hht += 100
cnt = 0
for i in range(len(grd_vals_1x1)):
ghgt = grd_vals_1x1[i]
if np.isnan(ghgt):
continue
hgt_top_diff = ghgt - hht
hgt_top_max_diff = ghgt - hht_max
if ghgt >= hhb and ghgt <= hht:
cnt += 1
if cnt > 0:
hit = True
num_hits += 1
break
hits.append(hit)
grd_hgt.append(grd_vals_1x1)
grd_mean.append(grd_3x3_mean)
grd_std.append(grd_3x3_std)
lon.append(a_lons[f_idx][k])
lat.append(a_lats[f_idx][k])
times.append(a_times[f_idx][k])
layer_thickness.append(lyr_width)
hgt_diff.append(hgt_top_diff)
hgt_top_max_diff_s.append(hgt_top_max_diff)
num_layers.append(nlayers)
layer_bot.append(bots)
layer_top.append(tops)
h5f.close()
print('Overall: ', num_hits, vld_grd, total)
print('*')
hits = np.array(hits)
grd_hgt = np.array(grd_hgt)
grd_mean = np.array(grd_mean)
grd_std = np.array(grd_std)
num_layers = np.array(num_layers)
lon = np.array(lon)
lat = np.array(lat)
hgt_diff = np.array(hgt_diff)
hgt_top_max_diff_s = np.array(hgt_top_max_diff_s)
layer_thickness = np.array(layer_thickness)
layer_bot = np.array(layer_bot)
layer_top = np.array(layer_top)
times = np.array(times)
print('AEOLUS num layers: ')
print(np.average(num_layers), np.std(num_layers))
print('--------------------')
print('Grid cntr height: ')
print(np.average(grd_hgt[hits]))
print(np.average(grd_hgt[np.invert(hits)]))
print('--------------------')
print('3x3 mean: ')
print(np.average(grd_mean[hits]))
print(np.average(grd_mean[np.invert(hits)]))
print('--------------------')
print('3x3 std: ')
print(np.average(grd_std[hits]))
print(np.average(grd_std[np.invert(hits)]))
print('--------------------')
print('layer thickness: ')
print(np.average(layer_thickness[hits]))
print(np.average(layer_thickness[np.invert(hits)]))
print('---------------------')
print('CLAVR - AEOLUS TOP: ')
print(np.average(hgt_diff[hits]), np.std(hgt_diff[hits]))
print(np.average(hgt_top_max_diff_s[np.invert(hits)]))
print('---------------------')
print('CLVR grid cntr height histo: ')
print(np.histogram(grd_hgt, bins=8))
print('----------------------')
bdat = bin_data_by(hgt_top_max_diff_s, grd_hgt[:, 0], bin_ranges=bin_ranges)
print('CLAVR - (HIGHEST AEOLUS CLD LYR TOP: ')
print('All: ')
print(np.average(hgt_top_max_diff_s), np.std(hgt_top_max_diff_s))
print('Low: ')
print(np.average(bdat[0]), np.std(bdat[0]))
print('Mid: ')
print(np.average(bdat[1]), np.std(bdat[1]))
print('High: ')
print(np.average(bdat[2]), np.std(bdat[2]))
print('------------------------')
# bdat = bin_data_by(hgt_top_max_diff_s, lat[:, 0], bin_ranges=lat_ranges)
# lat_cntrs = []
# avg_dz = []
# std_dz = []
# for i in range(len(lat_ranges)):
# lat_cntrs.append((lat_ranges[i][0] + lat_ranges[i][1])/2)
# avg_dz.append(np.average(bdat[i]))
# std_dz.append(np.std(bdat[i]))
return layer_bot, layer_top, grd_hgt, lat, lon, times
def get_grid_values(h5f, grid_name, scale_factor_name='scale_factor', add_offset_name='add_offset'):
hfds = h5f[grid_name]
attrs = hfds.attrs
grd_vals = hfds[:, :]
grd_vals = np.where(grd_vals == -999, np.nan, grd_vals)
grd_vals = np.where(grd_vals == -32768, np.nan, grd_vals)
if scale_factor_name is not None:
scale_factor = attrs.get(scale_factor_name)[0]
grd_vals = grd_vals * scale_factor
if add_offset_name is not None:
add_offset = attrs.get(add_offset_name)[0]
grd_vals = grd_vals + add_offset
return grd_vals
def analyze_aelolus_cld_layers(cld_layer_dct):
keys = list(cld_layer_dct.keys())
num_single_layer = 0
num_multi_layer = 0
num_cld_layers = 0
lats = []
lons = []
lats_sl = []
lons_sl = []
cld_tops = []
cld_thickness = []
cld_cntr = []
num_layers = []
max_top = []
for key in keys:
layers = cld_layer_dct[key]
lat = layers[0, 0]
lon = layers[0, 1]
n_lyrs = layers.shape[0]
if n_lyrs > 1:
num_multi_layer += 1
else:
num_single_layer += 1
num_cld_layers += 1
num_layers.append(n_lyrs)
idx_max = np.argmax(layers[:, 2])
max_top.append(layers[idx_max, 2])
lats_sl.append(lat)
lons_sl.append(lon)
for k in range(n_lyrs):
# hhh = layers[k, 2]
hht = layers[k, 2]
hhb = layers[k, 3]
cld_tops.append(hht)
cld_thickness.append(hht - hhb)
cld_cntr.append((hht - hhb)/2)
lats.append(lat)
lons.append(lon)
lats = np.array(lats)
lons = np.array(lons)
cld_tops = np.array(cld_tops)
cld_thickness = np.array(cld_thickness)
cld_cntr = np.array(cld_cntr)
max_top = np.array(max_top)
lats_sl = np.array(lats_sl)
lons_sl = np.array(lons_sl)
print(num_cld_layers, num_single_layer, num_multi_layer)
mt_by_lat = bin_data_by(max_top, lats_sl, lat_ranges)
lat_cntrs = []
avg_maxz = []
std_maxz = []
for i in range(len(lat_ranges)):
lat_cntrs.append((lat_ranges[i][0] + lat_ranges[i][1])/2)
avg_maxz.append(np.average(mt_by_lat[i]))
std_maxz.append(np.std(mt_by_lat[i]))
return cld_thickness, max_top, avg_maxz, std_maxz, lat_cntrs