diff --git a/aggregate.py b/aggregate.py new file mode 100644 index 0000000000000000000000000000000000000000..f00546da60d8de1d7281ab5653e8840c95bec733 --- /dev/null +++ b/aggregate.py @@ -0,0 +1,208 @@ +import numpy +import os +import sys + +from pyhdf.SD import SD, SDC +from util import HdfWriter + +# helper functions to allow the classes below to be used with either +# one or two dimensions + +def histogram_init_helper(num_bins, dtype): + + if type(num_bins) == tuple: + return numpy.zeros(num_bins, dtype) + else: + return numpy.zeros((num_bins,), dtype) + +def histogram_helper(arr, **kwargs): + + if type(arr) == tuple: + return numpy.histogram2d(arr[0], arr[1], **kwargs)[0] + else: + return numpy.histogram(arr, **kwargs)[0] + +# class for building up a histogram in parts +class Histogram: + + def __init__(self, min_value, max_value, num_bins): + + self.range = (min_value, max_value) + self.num_bins = num_bins + self.hist = histogram_init_helper(num_bins, numpy.int32) + + def add(self, arr): + + self.hist += histogram_helper(arr, + bins=self.num_bins, + range=self.range) + + def get(self): + + return self.hist + +# class for taking statistics on some value (e.g., brightness +# temperature differences between AIRS and MODIS) in terms of some +# basis (e.g. Earth location or instrument scan angle). like +# Histogram, designed for building up these stats via successive +# calls to add() +class Stats: + + def __init__(self, min_basis, max_basis, num_bins): + + self.num_bins = num_bins + self.range = (min_basis, max_basis) + self.hist = Histogram(min_basis, max_basis, num_bins) + self.sum = histogram_init_helper(num_bins, numpy.float64) + self.squared_sum = histogram_init_helper(num_bins, numpy.float64) + + def add(self, basis, values): + + self.hist.add(basis) + + self.sum += histogram_helper(basis, + bins=self.num_bins, + range=self.range, + weights=values) + + squared_values = values * values + self.squared_sum += histogram_helper(basis, + bins=self.num_bins, + range=self.range, + weights=squared_values) + + def get(self): + + n = self.hist.get() + mean = self.sum / n + std = (self.squared_sum / n) - (mean * mean) + return n, mean, std + +# helper class to save repetition for all the per-band data +# structures needed below +class PerBandHelper: + + def __init__(self, ctor, *args): + + self.data = [ctor(*args) for band in range(16)] + + def __getitem__(self, key): + + return self.data[key] + + def get(self): + + return self.data + +# verify command line inputs +if len(sys.argv) != 3: + print 'usage: python %s <input_dir> <output_file>' % sys.argv[0] + sys.exit(1) +input_dir = sys.argv[1] +output_file = sys.argv[2] + +# set up per-band histogram and stats objects +airs_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150) +modis_bt_hist = PerBandHelper(Histogram, 200.0, 350.0, 150) +bt_diff_hist = PerBandHelper(Histogram, -10.0, 10.0, 100) +latlon_airs_stats = PerBandHelper(Stats, + (-90.0, 90.0), + (-180.0, 180.0), + (180, 360)) +latlon_diff_stats = PerBandHelper(Stats, + (-90.0, 90.0), + (-180.0, 180.0), + (180, 360)) +bt_diff_stats = PerBandHelper(Stats, 200.0, 350.0, 150) +scan_ang_diff_stats = PerBandHelper(Stats, -55.0, 55.0, 110) +sol_zen_diff_stats = PerBandHelper(Stats, 0.0, 180.0, 180) + +# loop over each granule in the input directory +for gran in range(1, 241): + + print 'Processing granule %d' % gran + + # make sure all the needed files are present + airs_file = '%s/a2m_%03d.hdf' % (input_dir, gran) + modis_file = '%s/m2a_%03d.hdf' % (input_dir, gran) + meta_file = '%s/meta_%03d.hdf' % (input_dir, gran) + if not (os.path.exists(airs_file) and + os.path.exists(modis_file) and + os.path.exists(meta_file)): + print ' Granule %d missing' % gran + continue + + # grab AIRS data + airs_sd = SD(airs_file) + airs_bt = airs_sd.select('AIRS_Brightness_Temp').get() + airs_flags = airs_sd.select('AIRS_Flags').get() + + # MODIS data + modis_sd = SD(modis_file) + modis_bt = modis_sd.select('MODIS_Brightness_Temp').get() + modis_flags = modis_sd.select('MODIS_Flags').get() + + # "meta" data + meta_sd = SD(meta_file) + lats = meta_sd.select('Latitude').get() + lons = meta_sd.select('Longitude').get() + scan_ang = meta_sd.select('Scan_Angle').get() + sol_zen = meta_sd.select('Solar_Zenith').get() + + # AIRS - MODIS BTs + bt_diff = airs_bt - modis_bt + + # we'll mask off all pixels where the data is suspect for either + # instrument + mask = numpy.logical_and(airs_flags == 0, modis_flags == 0) + mask = numpy.logical_and(mask, numpy.isnan(bt_diff) == False) + + # now update our per-band statistics + for band in range(16): + + # set up some shorthands for convenience and to avoid + # duplicate mask applications + m = mask[band] + abt = airs_bt[band][m] + mbt = modis_bt[band][m] + btd = bt_diff[band][m] + la = lats[m] + lo = lons[m] + sa = scan_ang[m] + sz = sol_zen[m] + + # do the actual updates + airs_bt_hist[band].add(abt) + modis_bt_hist[band].add(mbt) + bt_diff_hist[band].add(btd) + latlon_airs_stats[band].add((la, lo), abt) + latlon_diff_stats[band].add((la, lo), btd) + bt_diff_stats[band].add(abt, btd) + scan_ang_diff_stats[band].add(sa, btd) + sol_zen_diff_stats[band].add(sz, btd) + +# output results to HDF +# FIXME: handle the limits more coherently +writer = HdfWriter(output_file) +writer.write_seq('AIRS_BT_Hist', + [o.get() for o in airs_bt_hist.get()], + min_bucket=200.0, max_bucket=350.0) +writer.write_seq('MODIS_BT_Hist', + [o.get() for o in modis_bt_hist.get()], + min_bucket=200.0, max_bucket=350.0) +writer.write_seq('BT_Diff_Hist', + [o.get() for o in bt_diff_hist.get()], + min_bucket=-10.0, max_bucket=10.0) +writer.write_seq('AIRS_BT_By_Location', + [o.get()[1] for o in latlon_airs_stats.get()]) +writer.write_seq('BT_Diff_By_Location', + [o.get()[1] for o in latlon_diff_stats.get()]) +writer.write_seq('BT_Diff_By_AIRS_BT', + [o.get()[1] for o in bt_diff_stats.get()], + min_bucket=200.0, max_bucket=350.0) +writer.write_seq('BT_Diff_By_Scan_Angle', + [o.get()[1] for o in scan_ang_diff_stats.get()], + min_bucket=-55.0, max_bucket=55.0) +writer.write_seq('BT_Diff_By_Solar_Zenith', + [o.get()[1] for o in sol_zen_diff_stats.get()], + min_bucket=0.0, max_bucket=180.0) diff --git a/airs2modis.py b/airs2modis.py index 3e38f9d9900e398bbf7cf6a2afd4560eff7ee10b..613774a6e056909c713dd0b44afa4eb692502f14 100644 --- a/airs2modis.py +++ b/airs2modis.py @@ -5,6 +5,7 @@ import sys from modis_bright import modis_bright_from_airs from pyhdf.SD import SD, SDC +from util import HdfWriter # verify command line inputs if len(sys.argv) != 3: @@ -61,10 +62,7 @@ high_mask = ((in_rads > high_values)[numpy.newaxis,:,:,:] & out_flags[high_mask] |= flags.HIGH # write results out to HDF -output_sd = SD(output_filename, SDC.WRITE | SDC.CREATE | SDC.TRUNC) -rad_sds = output_sd.create('AIRS_Radiance', SDC.FLOAT64, out_rads.shape) -bt_sds = output_sd.create('AIRS_Brightness_Temp', SDC.FLOAT64, out_bts.shape) -flags_sds = output_sd.create('AIRS_Flags', SDC.INT8, out_flags.shape) -rad_sds[:] = out_rads -bt_sds[:] = out_bts -flags_sds[:] = out_flags +writer = HdfWriter(output_filename) +writer.write('AIRS_Radiance', out_rads) +writer.write('AIRS_Brightness_Temp', out_bts) +writer.write('AIRS_Flags', out_flags) diff --git a/airs2modis_pre.py b/airs2modis_pre.py index 848a16f5addb851a9a6885aa827baa0c7034e17c..66147feda324c0ec6122ec2c8a097fb5cbdb54f7 100644 --- a/airs2modis_pre.py +++ b/airs2modis_pre.py @@ -14,6 +14,7 @@ import sys from bright import inverse_bright_wavenumber from pyhdf.SD import SD, SDC +from util import HdfWriter # returns a dictionary of objects providing spectral response # function information for each MODIS band, based on the given MODIS @@ -106,13 +107,9 @@ noise = (inverse_bright_wavenumber(freqs, bt250 + nedts) - bt350 = 350.0 * numpy.ones((freqs.size,), numpy.float64) r350 = inverse_bright_wavenumber(freqs, bt350) -# write out the mask and weights to HDF -sd = SD('airs2modis.hdf', SDC.WRITE | SDC.CREATE | SDC.TRUNC) -mask_sds = sd.create('Channel_Mask', SDC.INT8, mask.shape) -response_sds = sd.create('Channel_Weights', SDC.FLOAT64, response.shape) -noise_sds = sd.create('Channel_Noise_Rating', SDC.FLOAT64, noise.shape) -r350_sds = sd.create('Channel_350K_Radiance', SDC.FLOAT64, r350.shape) -mask_sds[:] = mask -response_sds[:] = response -noise_sds[:] = noise -r350_sds[:] = r350 +# write out the arrays to HDF +writer = HdfWriter('airs2modis.hdf') +writer.write('Channel_Mask', mask) +writer.write('Channel_Weights', response) +writer.write('Channel_Noise_Rating', noise) +writer.write('Channel_350K_Radiance', r350) diff --git a/get_meta.py b/get_meta.py index 4fcf93122462148febdf8d1f453e359ff72560bb..d8c5c1053d957798ea473a4ddd147a1c126d74b0 100644 --- a/get_meta.py +++ b/get_meta.py @@ -1,6 +1,7 @@ import sys from pyhdf.SD import SD, SDC +from util import HdfWriter # verify command line inputs if len(sys.argv) != 3: @@ -17,14 +18,8 @@ scan_ang = sd.select('scanang').get() sol_zen = sd.select('solzen').get() # output the selected arrays to a new HDF file -output_sd = SD(output_filename, SDC.WRITE | SDC.CREATE | SDC.TRUNC) -lat_sds = output_sd.create('Latitude', SDC.FLOAT64, lat.shape) -lon_sds = output_sd.create('Longitude', SDC.FLOAT64, lon.shape) -scan_ang_sds = output_sd.create('Scan_Angle', SDC.FLOAT32, scan_ang.shape) -sol_zen_sds = output_sd.create('Solar_Zenith', - SDC.FLOAT32, - sol_zen.shape) -lat_sds[:] = lat -lon_sds[:] = lon -scan_ang_sds[:] = scan_ang -sol_zen_sds[:] = sol_zen +writer = HdfWriter(output_filename) +writer.write('Latitude', lat) +writer.write('Longitude', lon) +writer.write('Scan_Angle', scan_ang) +writer.write('Solar_Zenith', sol_zen) diff --git a/modis2airs.py b/modis2airs.py index 0ec523888f8590d47173fa931fd943837ccbd997..2e93460725e888fe1f56cb0af38f270bcb1000d4 100644 --- a/modis2airs.py +++ b/modis2airs.py @@ -3,6 +3,7 @@ import sys from flags import * from pyhdf.SD import SD, SDC +from util import HdfWriter # helper function for reading in MODIS radiances. returns a numpy # array with dimensions: @@ -84,17 +85,8 @@ for airs_row in range(135): rads_squared_sum[:] = (rads * rads).sum(axis=1) # write results to HDF - -sd = SD(output_file, SDC.WRITE | SDC.CREATE | SDC.TRUNC) - -n_sds = sd.create('MODIS_N', SDC.INT32, n_arr.shape) -rad_sum_sds = sd.create('MODIS_Radiance_Sum', SDC.FLOAT64, rad_sum_arr.shape) -rad_squared_sum_sds = sd.create('MODIS_Radiance_Squared_Sum', - SDC.FLOAT64, - rad_squared_sum_arr.shape) -flags_sds = sd.create('MODIS_Flags', SDC.INT8, flags_arr.shape) - -n_sds[:] = n_arr -rad_sum_sds[:] = rad_sum_arr -rad_squared_sum_sds[:] = rad_squared_sum_arr -flags_sds[:] = flags_arr +writer = HdfWriter(output_file) +writer.write('MODIS_N', n_arr) +writer.write('MODIS_Radiance_Sum', rad_sum_arr) +writer.write('MODIS_Radiance_Squared_Sum', rad_squared_sum_arr) +writer.write('MODIS_Flags', flags_arr) diff --git a/modis2airs_collect.py b/modis2airs_collect.py index fbc78db6be127ef259f57279840f304e15974339..2106ef4a14fbdfb65afe331714af8b49604197b0 100644 --- a/modis2airs_collect.py +++ b/modis2airs_collect.py @@ -3,6 +3,7 @@ import sys from modis_bright import modis_bright from pyhdf.SD import SD, SDC +from util import HdfWriter # verify command line arguments if len(sys.argv) < 2: @@ -54,24 +55,10 @@ bt_mean[missing_idxs] = -9999.0 std_bt[missing_idxs] = -9999.0 # output to HDF - -sd = SD(output_file, SDC.WRITE | SDC.CREATE | SDC.TRUNC) - -n_sds = sd.create('MODIS_N', SDC.INT32, n.shape) -rad_mean_sds = sd.create('MODIS_Radiance', SDC.FLOAT64, rad_mean.shape) -rad_std_sds = sd.create('MODIS_Radiance_Std', SDC.FLOAT64, rad_std.shape) -bt_mean_sds = sd.create('MODIS_Brightness_Temp', SDC.FLOAT64, bt_mean.shape) -std_bt_sds = sd.create('MODIS_Std_Brightness', SDC.FLOAT64, std_bt.shape) -flags_sds = sd.create('MODIS_Flags', SDC.INT8, flags.shape) - -rad_mean_sds.setfillvalue(-9999.0) -rad_std_sds.setfillvalue(-9999.0) -bt_mean_sds.setfillvalue(-9999.0) -std_bt_sds.setfillvalue(-9999.0) - -n_sds[:] = n -rad_mean_sds[:] = rad_mean -rad_std_sds[:] = rad_std -bt_mean_sds[:] = bt_mean -std_bt_sds[:] = std_bt -flags_sds[:] = flags +writer = HdfWriter(output_file) +writer.write('MODIS_N', n) +writer.write('MODIS_Radiance', rad_mean, fill=-9999.0) +writer.write('MODIS_Radiance_Std', rad_std, fill=-9999.0) +writer.write('MODIS_Brightness_Temp', bt_mean, fill=-9999.0) +writer.write('MODIS_Std_Brightness', std_bt, fill=-9999.0) +writer.write('MODIS_Flags', flags) diff --git a/plot.py b/plot.py new file mode 100644 index 0000000000000000000000000000000000000000..6f97a54cc1fdfb1fd79e87212022d30d5d0431ca --- /dev/null +++ b/plot.py @@ -0,0 +1,186 @@ +import matplotlib +import numpy +import sys + +from matplotlib.backends.backend_agg import FigureCanvasAgg +from matplotlib.figure import Figure +from mpl_toolkits.basemap import Basemap +from pyhdf.SD import SD + +# helper function for creating plots +def plot_helper(out_file, data, + x_range, + title, x_label, y_label=None, + data_labels=None): + + # plot setup; params determined via trial and error + fig = Figure((4, 2)) + ax = fig.add_axes((0.15, 0.15, 0.8, 0.7)) + + # plot the given data + x = numpy.linspace(x_range[0], x_range[1], len(data[0])) + for y in data: + ax.plot(x, y) + + # add labels and such + ax.set_xlim(x_range[0], x_range[1]) + ax.set_title(title) + ax.set_xlabel(x_label) + if y_label: + ax.set_ylabel(y_label) + if data_labels: + ax.legend(data_labels) + + # write out the image file + canvas = FigureCanvasAgg(fig) + canvas.print_figure(out_file) + +# helper for ploting a single line based on the given SDS +def plot_sds(sds, band, out_file, **kwargs): + + # just grab the data and x-axis limits and pass everything to + # plot_helper + data = (sds[band],) + x_range = (sds.min_bucket, sds.max_bucket) + plot_helper(out_file, data, x_range, **kwargs) + +# helper function for creating map plots +def map_helper(out_file, data, title): + + # imshow doesn't deal well with NaNs, so mask them off + data = numpy.ma.masked_array(data, mask=numpy.isnan(data)) + + # plot setup; params determined via trial and error + fig = Figure((5, 2.25)) + ax = fig.add_axes((0.01, 0.01, 0.89, 0.88)) + color_ax = fig.add_axes((0.91, 0.05, 0.02, 0.90)) + + # use basemap to plot the given data and draw geographical + # boundaries + bm = Basemap(projection='cyl', ax=ax) + data = bm.transform_scalar(data, + numpy.arange(-179.5, 180.0, 1.0), + numpy.arange(-89.5, 90.0, 1.0), + 360, 180) + im = bm.imshow(data) + bm.drawmapboundary() + bm.drawcoastlines() + + # add the given title and a colorbar + ax.set_title(title) + fig.colorbar(im, cax=color_ax) + + # write out the image file + canvas = FigureCanvasAgg(fig) + canvas.print_figure(out_file) + +# plot histograms of all BT samples for both AIRS and MODIS +def plot_bt_hist(sd, band, out_file): + + # grab the data and x-axis limits and invoke plot_helper + airs_sds = sd.select('AIRS_BT_Hist') + modis_sds = sd.select('MODIS_BT_Hist') + data = (airs_sds[band], modis_sds[band]) + x_range = (airs_sds.min_bucket, airs_sds.max_bucket) + plot_helper(out_file, + data, + x_range, + title='BT Histogram', + x_label='BT (K)', + data_labels=('AIRS', 'MODIS')) + +# plot histogram of BT differences +def plot_bt_diff_hist(sd, band, out_file): + + sds = sd.select('BT_Diff_Hist') + plot_sds(sds, band, out_file, + title='BT Difference Histogram', + x_label='AIRS-MODIS BT Diff (K)') + +# plot mean BT difference as a function of BT +def plot_bt_diff_by_airs_bt(sd, band, out_file): + + sds = sd.select('BT_Diff_By_AIRS_BT') + plot_sds(sds, band, out_file, + title='BT Difference By AIRS BT', + x_label='AIRS BT (K)', + y_label='Mean AIRS-MODIS BT Diff (K)') + +# plot mean BT difference as a function of scan angle +def plot_bt_diff_by_scan_angle(sd, band, out_file): + + sds = sd.select('BT_Diff_By_Scan_Angle') + plot_sds(sds, band, out_file, + title='BT Difference By Scan Angle', + x_label='Scan Angle (Degrees)', + y_label='Mean AIRS-MODIS BT Diff (K)') + +# plot mean BT difference as a function of solar zenith angle +def plot_bt_diff_by_solar_zenith(sd, band, out_file): + + sds = sd.select('BT_Diff_By_Solar_Zenith') + plot_sds(sds, band, out_file, + title='BT Difference By Solar Zenith Angle', + x_label='Solar Zenith Angle (Degrees)', + y_label='Mean AIRS-MODIS BT Diff (K)') + +# create a map showing AIRS BTs +def plot_airs_bt_by_location(sd, band, out_file): + + bt = sd.select('AIRS_BT_By_Location')[band] + map_helper(out_file, bt, 'Mean AIRS BT (K)') + +# create a map showing BT differences +def plot_bt_diff_by_location(sd, band, out_file): + + diff = sd.select('BT_Diff_By_Location')[band] + map_helper(out_file, diff, 'Mean AIRS-MODIS BT Diff (K)') + +# check command-line arguments +if len(sys.argv) != 3: + print 'usage: python %s <aggregate_file> <output_dir>' % sys.argv[0] + sys.exit(1) +aggr_file = sys.argv[1] +output_dir = sys.argv[2] + +# the default size for plot titles and labels is too big +matplotlib.rc('font', size=8) + +# open the aggregate file and produce plots for all bands +sd = SD(aggr_file) +for band in range(16): + plot_bt_hist( + sd, + band, + '%s/bt_hist_%02d.png' % (output_dir, band) + ) + plot_bt_diff_hist( + sd, + band, + '%s/bt_diff_hist_%02d.png' % (output_dir, band) + ) + plot_bt_diff_by_airs_bt( + sd, + band, + '%s/bt_diff_by_airs_bt_%02d.png' % (output_dir, band) + ) + plot_bt_diff_by_scan_angle( + sd, + band, + '%s/bt_diff_by_scan_angle_%02d.png' % (output_dir, band) + ) + plot_bt_diff_by_solar_zenith( + sd, + band, + '%s/bt_diff_by_solar_zenith_%02d.png' % (output_dir, band) + ) + plot_airs_bt_by_location( + sd, + band, + '%s/airs_bt_by_location_%02d.png' % (output_dir, band) + ) + plot_bt_diff_by_location( + sd, + band, + '%s/bt_diff_by_location_%02d.png' % (output_dir, band) + ) diff --git a/util.py b/util.py new file mode 100644 index 0000000000000000000000000000000000000000..a9046d6c27f2fcb96c867578672ef76e8de5f4dc --- /dev/null +++ b/util.py @@ -0,0 +1,66 @@ +import numpy + +from pyhdf.SD import SD, SDC + +# for mapping numpy types to HDF type constants; this used to be set +# up as a dictionary (like python_type_map, below), but the +# dictionary was misbehaving. perhaps numpy types aren't correctly +# hashable? +def map_numpy_type(t): + if t == numpy.dtype('bool8'): + return SDC.INT8 + if t == numpy.dtype('int8'): + return SDC.INT8 + if t == numpy.dtype('int16'): + return SDC.INT16 + if t == numpy.dtype('int32'): + return SDC.INT32 + if t == numpy.dtype('float32'): + return SDC.FLOAT32 + if t == numpy.dtype('float64'): + return SDC.FLOAT64 + +# for mapping python types to HDF type constants +python_type_map = { + int: SDC.INT32, + float: SDC.FLOAT64, + str: SDC.CHAR8 +} + +# helper class for writing HDF output from the intercal scripts +class HdfWriter: + + # construct a HdfWriter object with the HDF filename + def __init__(self, filename): + + self.sd = SD(filename, SDC.WRITE | SDC.CREATE | SDC.TRUNC) + + # helper for handling attributes for the write* functions + def handle_attrs(self, sds, attrs): + + # fill value is a predefined attribute so gets special-cased + if 'fill' in attrs: + sds.setfillvalue(attrs['fill']) + del attrs['fill'] + + # the rest get set via the general attribute interface + for attr, value in attrs.iteritems(): + sds.attr(attr).set(python_type_map[type(value)], value) + + # write out the array arr under the variable name var; kwargs can + # be used for setting attributes + def write(self, var, arr, **kwargs): + + sds = self.sd.create(var, map_numpy_type(arr.dtype), arr.shape) + self.handle_attrs(sds, kwargs) + sds[:] = arr + + # write out the given sequence of arrays (which must all be the + # same shape) + def write_seq(self, var, seq, **kwargs): + + shape = (len(seq),) + seq[0].shape + sds = self.sd.create(var, map_numpy_type(seq[0].dtype), shape) + self.handle_attrs(sds, kwargs) + for i, arr in enumerate(seq): + sds[i] = arr