-
Greg Quinn authoredGreg Quinn authored
aggregate.py 6.89 KiB
import numpy
import os
import sys
from pyhdf.SD import SD, SDC
from util import HdfWriter
# helper functions to allow the classes below to be used with either
# one or two dimensions
def histogram_init_helper(num_bins, dtype):
if type(num_bins) == tuple:
return numpy.zeros(num_bins, dtype)
else:
return numpy.zeros((num_bins,), dtype)
def histogram_helper(arr, **kwargs):
if type(arr) == tuple:
return numpy.histogram2d(arr[0], arr[1], **kwargs)[0]
else:
return numpy.histogram(arr, **kwargs)[0]
# class for building up a histogram in parts
class Histogram:
def __init__(self, min_value, max_value, num_bins):
self.range = (min_value, max_value)
self.num_bins = num_bins
self.hist = histogram_init_helper(num_bins, numpy.int32)
def add(self, arr):
self.hist += histogram_helper(arr,
bins=self.num_bins,
range=self.range)
def get(self):
return self.hist
# class for taking statistics on some value (e.g., brightness
# temperature differences between AIRS and MODIS) in terms of some
# basis (e.g. Earth location or instrument scan angle). like
# Histogram, designed for building up these stats via successive
# calls to add()
class Stats:
def __init__(self, min_basis, max_basis, num_bins):
self.num_bins = num_bins
self.range = (min_basis, max_basis)
self.hist = Histogram(min_basis, max_basis, num_bins)
self.sum = histogram_init_helper(num_bins, numpy.float64)
self.squared_sum = histogram_init_helper(num_bins, numpy.float64)
def add(self, basis, values):
self.hist.add(basis)
self.sum += histogram_helper(basis,
bins=self.num_bins,
range=self.range,
weights=values)
squared_values = values * values
self.squared_sum += histogram_helper(basis,
bins=self.num_bins,
range=self.range,
weights=squared_values)
def get(self):
n = self.hist.get()
mean = self.sum / n
std = (self.squared_sum / n) - (mean * mean)
return n, mean, std
# helper class to save repetition for all the per-band data
# structures needed below
class PerBandHelper:
def __init__(self, ctor, *args):
self.data = [ctor(*args) for band in range(16)]
def __getitem__(self, key):
return self.data[key]
def get(self):
return self.data
# verify command line inputs
if len(sys.argv) != 3:
print 'usage: python %s <input_dir> <output_file>' % sys.argv[0]
sys.exit(1)
input_dir = sys.argv[1]
output_file = sys.argv[2]
# set up per-band histogram and stats objects
airs_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150)
modis_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150)
bt_diff_hist = PerBandHelper(Histogram, -10.0, 10.0, 100)
latlon_airs_stats = PerBandHelper(Stats,
(-90.0, 90.0),
(-180.0, 180.0),
(180, 360))
latlon_diff_stats = PerBandHelper(Stats,
(-90.0, 90.0),
(-180.0, 180.0),
(180, 360))
bt_diff_stats = PerBandHelper(Stats, 200.0, 350.0, 150)
scan_ang_diff_stats = PerBandHelper(Stats, -55.0, 55.0, 110)
sol_zen_diff_stats = PerBandHelper(Stats, 0.0, 180.0, 180)
# loop over each granule in the input directory
for gran in range(1, 241):
print 'Processing granule %d' % gran
# make sure all the needed files are present
airs_file = '%s/intercal.a2m.2008.12.31.%03d.hdf' % (input_dir, gran)
modis_file = '%s/intercal.m2a.2008.12.31.%03d.hdf' % (input_dir, gran)
meta_file = '%s/intercal.meta.2008.12.31.%03d.hdf' % (input_dir, gran)
if not (os.path.exists(airs_file) and
os.path.exists(modis_file) and
os.path.exists(meta_file)):
print ' Granule %d missing' % gran
continue
# grab AIRS data
airs_sd = SD(airs_file)
airs_bt = airs_sd.select('AIRS_Brightness_Temp').get()
airs_flags = airs_sd.select('AIRS_Flags').get()
# MODIS data
modis_sd = SD(modis_file)
modis_bt = modis_sd.select('MODIS_Brightness_Temp').get()
modis_flags = modis_sd.select('MODIS_Flags').get()
# "meta" data
meta_sd = SD(meta_file)
lats = meta_sd.select('Latitude').get()
lons = meta_sd.select('Longitude').get()
scan_ang = meta_sd.select('Scan_Angle').get()
sol_zen = meta_sd.select('Solar_Zenith').get()
# AIRS - MODIS BTs
bt_diff = airs_bt - modis_bt
# we'll mask off all pixels where the data is suspect for either
# instrument
mask = numpy.logical_and(airs_flags == 0, modis_flags == 0)
mask = numpy.logical_and(mask, numpy.isnan(bt_diff) == False)
# now update our per-band statistics
for band in range(16):
# set up some shorthands for convenience and to avoid
# duplicate mask applications
m = mask[band]
abt = airs_bt[band][m]
mbt = modis_bt[band][m]
btd = bt_diff[band][m]
la = lats[m]
lo = lons[m]
sa = scan_ang[m]
sz = sol_zen[m]
# do the actual updates
airs_bt_hist[band].add(abt)
modis_bt_hist[band].add(mbt)
bt_diff_hist[band].add(btd)
latlon_airs_stats[band].add((la, lo), abt)
latlon_diff_stats[band].add((la, lo), btd)
bt_diff_stats[band].add(abt, btd)
scan_ang_diff_stats[band].add(sa, btd)
sol_zen_diff_stats[band].add(sz, btd)
# output results to HDF
# FIXME: handle the limits more coherently
writer = HdfWriter(output_file)
writer.write_seq('AIRS_BT_Hist',
[o.get() for o in airs_bt_hist.get()],
min_bucket=200.0, max_bucket=350.0)
writer.write_seq('MODIS_BT_Hist',
[o.get() for o in modis_bt_hist.get()],
min_bucket=200.0, max_bucket=350.0)
writer.write_seq('BT_Diff_Hist',
[o.get() for o in bt_diff_hist.get()],
min_bucket=-10.0, max_bucket=10.0)
writer.write_seq('AIRS_BT_By_Location',
[o.get()[1] for o in latlon_airs_stats.get()])
writer.write_seq('BT_Diff_By_Location',
[o.get()[1] for o in latlon_diff_stats.get()])
writer.write_seq('BT_Diff_By_AIRS_BT',
[o.get()[1] for o in bt_diff_stats.get()],
min_bucket=200.0, max_bucket=350.0)
writer.write_seq('BT_Diff_By_Scan_Angle',
[o.get()[1] for o in scan_ang_diff_stats.get()],
min_bucket=-55.0, max_bucket=55.0)
writer.write_seq('BT_Diff_By_Solar_Zenith',
[o.get()[1] for o in sol_zen_diff_stats.get()],
min_bucket=0.0, max_bucket=180.0)