Skip to content
Snippets Groups Projects
aggregate.py 6.89 KiB
import numpy
import os
import sys

from pyhdf.SD import SD, SDC
from util import HdfWriter

# helper functions to allow the classes below to be used with either
# one or two dimensions

def histogram_init_helper(num_bins, dtype):

    if type(num_bins) == tuple:
        return numpy.zeros(num_bins, dtype)
    else:
        return numpy.zeros((num_bins,), dtype)

def histogram_helper(arr, **kwargs):

    if type(arr) == tuple:
        return numpy.histogram2d(arr[0], arr[1], **kwargs)[0]
    else:
        return numpy.histogram(arr, **kwargs)[0]

# class for building up a histogram in parts
class Histogram:

    def __init__(self, min_value, max_value, num_bins):

        self.range = (min_value, max_value)
        self.num_bins = num_bins
        self.hist = histogram_init_helper(num_bins, numpy.int32)

    def add(self, arr):

        self.hist += histogram_helper(arr,
                                      bins=self.num_bins,
                                      range=self.range)
        
    def get(self):

        return self.hist

# class for taking statistics on some value (e.g., brightness
# temperature differences between AIRS and MODIS) in terms of some
# basis (e.g. Earth location or instrument scan angle). like
# Histogram, designed for building up these stats via successive
# calls to add()
class Stats:

    def __init__(self, min_basis, max_basis, num_bins):

        self.num_bins = num_bins
        self.range = (min_basis, max_basis)
        self.hist = Histogram(min_basis, max_basis, num_bins)
        self.sum =  histogram_init_helper(num_bins, numpy.float64)
        self.squared_sum = histogram_init_helper(num_bins, numpy.float64)

    def add(self, basis, values):

        self.hist.add(basis)

        self.sum += histogram_helper(basis,
                                     bins=self.num_bins,
                                     range=self.range,
                                     weights=values)

        squared_values = values * values
        self.squared_sum += histogram_helper(basis,
                                             bins=self.num_bins,
                                             range=self.range,
                                             weights=squared_values)

    def get(self):

        n = self.hist.get()
        mean = self.sum / n
        std = (self.squared_sum / n) - (mean * mean)
        return n, mean, std

# helper class to save repetition for all the per-band data
# structures needed below
class PerBandHelper:

    def __init__(self, ctor, *args):

        self.data = [ctor(*args) for band in range(16)]

    def __getitem__(self, key):

        return self.data[key]

    def get(self):

        return self.data

# verify command line inputs
if len(sys.argv) != 3:
    print 'usage: python %s <input_dir> <output_file>' % sys.argv[0]
    sys.exit(1)
input_dir = sys.argv[1]
output_file = sys.argv[2]

# set up per-band histogram and stats objects
airs_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150)
modis_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150)
bt_diff_hist = PerBandHelper(Histogram, -10.0, 10.0, 100)
latlon_airs_stats = PerBandHelper(Stats,
                                  (-90.0, 90.0),
                                  (-180.0, 180.0),
                                  (180, 360))
latlon_diff_stats = PerBandHelper(Stats,
                                  (-90.0, 90.0),
                                  (-180.0, 180.0),
                                  (180, 360))
bt_diff_stats = PerBandHelper(Stats, 200.0, 350.0, 150)
scan_ang_diff_stats = PerBandHelper(Stats, -55.0, 55.0, 110)
sol_zen_diff_stats = PerBandHelper(Stats, 0.0, 180.0, 180)

# loop over each granule in the input directory
for gran in range(1, 241):

    print 'Processing granule %d' % gran

    # make sure all the needed files are present
    airs_file = '%s/intercal.a2m.2008.12.31.%03d.hdf' % (input_dir, gran)
    modis_file = '%s/intercal.m2a.2008.12.31.%03d.hdf' % (input_dir, gran)
    meta_file = '%s/intercal.meta.2008.12.31.%03d.hdf' % (input_dir, gran)
    if not (os.path.exists(airs_file) and
            os.path.exists(modis_file) and
            os.path.exists(meta_file)):
        print '  Granule %d missing' % gran
        continue

    # grab AIRS data
    airs_sd = SD(airs_file)
    airs_bt = airs_sd.select('AIRS_Brightness_Temp').get()
    airs_flags = airs_sd.select('AIRS_Flags').get()

    # MODIS data
    modis_sd = SD(modis_file)
    modis_bt = modis_sd.select('MODIS_Brightness_Temp').get()
    modis_flags = modis_sd.select('MODIS_Flags').get()

    # "meta" data
    meta_sd = SD(meta_file)
    lats = meta_sd.select('Latitude').get()
    lons = meta_sd.select('Longitude').get()
    scan_ang = meta_sd.select('Scan_Angle').get()
    sol_zen = meta_sd.select('Solar_Zenith').get()

    # AIRS - MODIS BTs
    bt_diff = airs_bt - modis_bt

    # we'll mask off all pixels where the data is suspect for either
    # instrument
    mask = numpy.logical_and(airs_flags == 0, modis_flags == 0)
    mask = numpy.logical_and(mask, numpy.isnan(bt_diff) == False)

    # now update our per-band statistics
    for band in range(16):

        # set up some shorthands for convenience and to avoid
        # duplicate mask applications
        m = mask[band]
        abt = airs_bt[band][m]
        mbt = modis_bt[band][m]
        btd = bt_diff[band][m]
        la = lats[m]
        lo = lons[m]
        sa = scan_ang[m]
        sz = sol_zen[m]

        # do the actual updates
        airs_bt_hist[band].add(abt)
        modis_bt_hist[band].add(mbt)
        bt_diff_hist[band].add(btd)
        latlon_airs_stats[band].add((la, lo), abt)
        latlon_diff_stats[band].add((la, lo), btd)
        bt_diff_stats[band].add(abt, btd)
        scan_ang_diff_stats[band].add(sa, btd)
        sol_zen_diff_stats[band].add(sz, btd)

# output results to HDF
# FIXME: handle the limits more coherently
writer = HdfWriter(output_file)
writer.write_seq('AIRS_BT_Hist',
                 [o.get() for o in airs_bt_hist.get()],
                 min_bucket=200.0, max_bucket=350.0)
writer.write_seq('MODIS_BT_Hist',
                 [o.get() for o in modis_bt_hist.get()],
                 min_bucket=200.0, max_bucket=350.0)
writer.write_seq('BT_Diff_Hist',
                 [o.get() for o in bt_diff_hist.get()],
                 min_bucket=-10.0, max_bucket=10.0)
writer.write_seq('AIRS_BT_By_Location',
                 [o.get()[1] for o in latlon_airs_stats.get()])
writer.write_seq('BT_Diff_By_Location',
                 [o.get()[1] for o in latlon_diff_stats.get()])
writer.write_seq('BT_Diff_By_AIRS_BT',
                 [o.get()[1] for o in bt_diff_stats.get()],
                 min_bucket=200.0, max_bucket=350.0)
writer.write_seq('BT_Diff_By_Scan_Angle',
                 [o.get()[1] for o in scan_ang_diff_stats.get()],
                 min_bucket=-55.0, max_bucket=55.0)
writer.write_seq('BT_Diff_By_Solar_Zenith',
                 [o.get()[1] for o in sol_zen_diff_stats.get()],
                 min_bucket=0.0, max_bucket=180.0)