Skip to content
Snippets Groups Projects
Commit 8b083835 authored by Coda Phillips's avatar Coda Phillips
Browse files

update l2bc_utils.py

parent f92fcc58
No related branches found
No related tags found
No related merge requests found
...@@ -14,33 +14,39 @@ import random ...@@ -14,33 +14,39 @@ import random
import warnings import warnings
from collections import defaultdict from collections import defaultdict
import xarray as xr import xarray as xr
import subprocess
import os
ROOT = Path('/media/coda-drive/patmosx_l2bc') ROOT = Path('/media/coda-drive/patmosx_l2bc/l2bc')
def scan_files(root=ROOT): def scan_files(root=ROOT):
root = Path(root)
df = [] df = []
with tqdm(root.rglob('*.nc')) as bar: with tqdm(root.rglob('*.nc')) as bar:
for f in bar: for f in bar:
# patmosx_v06r00_NOAA-09_asc_d19880428_c20210810.cloud_probability.nc # patmosx_v06r00_NOAA-09_asc_d19880428_c20210810.cloud_probability.nc
stem, key, _ = f.name.split('.') stem, key, _ = f.name.split('.')
_,_,platform,node,date,_ = stem.split('_') _,_,platform,node,date,created = stem.split('_')
date = datetime.strptime(date, 'd%Y%m%d') date = datetime.strptime(date, 'd%Y%m%d')
df.append({'platform': platform, 'node': node, 'date': date, 'key': key, 'path': f}) created = datetime.strptime(created, 'c%Y%m%d')
df.append({'platform': platform, 'node': node, 'date': date, 'key': key, 'path': f, 'created': created})
df = pd.DataFrame(df) df = pd.DataFrame(df)
return df return df
def get_files(root=ROOT): def pivot_files(df):
root = Path(root) df.sort_values('created', inplace=True)
df = scan_files(root) df = df.drop_duplicates(subset=['platform','node','date','key'], keep='last')
if df.empty:
raise ValueError(f'No .nc files found in {root}')
files = df.pivot(index=['platform','node','date'], columns='key', values='path').sort_index(level='date') files = df.pivot(index=['platform','node','date'], columns='key', values='path').sort_index(level='date')
return files return files
def get_files(root=ROOT):
df = scan_files(root=root)
files = pivot_files(df)
return files
def _read_worker(q, out_q): def _read_worker(q, out_q):
while True: while True:
try: try:
...@@ -62,7 +68,45 @@ def clear_queue(q): ...@@ -62,7 +68,45 @@ def clear_queue(q):
except Empty: except Empty:
return return
def fast_iter_nc_var(files, *args):
def mask_any(*args):
mask = np.zeros(args[0].shape, dtype=bool)
for a in args:
mask |= np.ma.getmaskarray(a)
return mask
def iter_nc_var(files, *args, progress=True, maskany=False):
args = list(args)
file_subset = files[args].dropna()
with tqdm(file_subset.iterrows(), disable=not progress, total=len(file_subset)) as bar:
for i,((platform, node, date), paths) in enumerate(bar):
key = platform, date, node
data = []
if i+1 < len(file_subset):
next_row = file_subset.iloc[i+1]
for k in args:
subprocess.Popen(['cat', str(next_row[k])], stdout=subprocess.DEVNULL)
for k in args:
nc = netCDF4.Dataset(paths[k])
with warnings.catch_warnings():
warnings.simplefilter("ignore")
data.append(nc.variables[k][0])
nc.close()
with open(paths[k]) as fp:
os.posix_fadvise(fp.fileno(), 0, 0, os.POSIX_FADV_DONTNEED)
if maskany:
mask = mask_any(*data)
for d in data:
d[mask] = np.ma.masked
mask = ~mask
data.append(mask)
yield key, data
def fast_iter_nc_var(files, *args, thread=True):
args = list(args) args = list(args)
file_subset = files[args].dropna() file_subset = files[args].dropna()
task_queue = Queue() task_queue = Queue()
...@@ -364,7 +408,9 @@ PLATFORM_COLORS = {'noaa-06': 'C0', ...@@ -364,7 +408,9 @@ PLATFORM_COLORS = {'noaa-06': 'C0',
'noaa-18': 'C11', 'noaa-18': 'C11',
'metop-a': 'C12', 'metop-a': 'C12',
'noaa-19': 'C13', 'noaa-19': 'C13',
'metop-b': 'C14'} 'metop-b': 'C14',
'metop-c': 'C15',
}
def plot_mean(mean): def plot_mean(mean):
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment