Skip to content
Snippets Groups Projects
Commit 8b083835 authored by Coda Phillips's avatar Coda Phillips
Browse files

update l2bc_utils.py

parent f92fcc58
No related branches found
No related tags found
No related merge requests found
......@@ -14,33 +14,39 @@ import random
import warnings
from collections import defaultdict
import xarray as xr
import subprocess
import os
ROOT = Path('/media/coda-drive/patmosx_l2bc')
ROOT = Path('/media/coda-drive/patmosx_l2bc/l2bc')
def scan_files(root=ROOT):
root = Path(root)
df = []
with tqdm(root.rglob('*.nc')) as bar:
for f in bar:
# patmosx_v06r00_NOAA-09_asc_d19880428_c20210810.cloud_probability.nc
stem, key, _ = f.name.split('.')
_,_,platform,node,date,_ = stem.split('_')
_,_,platform,node,date,created = stem.split('_')
date = datetime.strptime(date, 'd%Y%m%d')
df.append({'platform': platform, 'node': node, 'date': date, 'key': key, 'path': f})
created = datetime.strptime(created, 'c%Y%m%d')
df.append({'platform': platform, 'node': node, 'date': date, 'key': key, 'path': f, 'created': created})
df = pd.DataFrame(df)
return df
def get_files(root=ROOT):
root = Path(root)
df = scan_files(root)
if df.empty:
raise ValueError(f'No .nc files found in {root}')
def pivot_files(df):
df.sort_values('created', inplace=True)
df = df.drop_duplicates(subset=['platform','node','date','key'], keep='last')
files = df.pivot(index=['platform','node','date'], columns='key', values='path').sort_index(level='date')
return files
def get_files(root=ROOT):
df = scan_files(root=root)
files = pivot_files(df)
return files
def _read_worker(q, out_q):
while True:
try:
......@@ -62,7 +68,45 @@ def clear_queue(q):
except Empty:
return
def fast_iter_nc_var(files, *args):
def mask_any(*args):
mask = np.zeros(args[0].shape, dtype=bool)
for a in args:
mask |= np.ma.getmaskarray(a)
return mask
def iter_nc_var(files, *args, progress=True, maskany=False):
args = list(args)
file_subset = files[args].dropna()
with tqdm(file_subset.iterrows(), disable=not progress, total=len(file_subset)) as bar:
for i,((platform, node, date), paths) in enumerate(bar):
key = platform, date, node
data = []
if i+1 < len(file_subset):
next_row = file_subset.iloc[i+1]
for k in args:
subprocess.Popen(['cat', str(next_row[k])], stdout=subprocess.DEVNULL)
for k in args:
nc = netCDF4.Dataset(paths[k])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
data.append(nc.variables[k][0])
nc.close()
with open(paths[k]) as fp:
os.posix_fadvise(fp.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
if maskany:
mask = mask_any(*data)
for d in data:
d[mask] = np.ma.masked
mask = ~mask
data.append(mask)
yield key, data
def fast_iter_nc_var(files, *args, thread=True):
args = list(args)
file_subset = files[args].dropna()
task_queue = Queue()
......@@ -364,7 +408,9 @@ PLATFORM_COLORS = {'noaa-06': 'C0',
'noaa-18': 'C11',
'metop-a': 'C12',
'noaa-19': 'C13',
'metop-b': 'C14'}
'metop-b': 'C14',
'metop-c': 'C15',
}
def plot_mean(mean):
import matplotlib.pyplot as plt
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment