Fix netcdf variable creation to be alphabetical for data variables

069ff562 · David Hoese · dd95488c · 069ff562
Unverified Commit 069ff562 authored 8 years ago by David Hoese
--- a/aosstower/level_b1/nc.py
+++ b/aosstower/level_b1/nc.py
@@ -154,7 +154,7 @@ def create_variables(nc_file, first_stamp, database, chunk_sizes=None, zlib=Fals
        # if 'time' in key:
        #     variable.calendar = 'gregorian'

-    for entry in database:
+    for entry in sorted(database.keys()):
        if entry == 'stamp':
            continue

@@ -364,7 +364,7 @@ def write_vars(nc_file, frame, database):
            fileVar['qc_' + varName][:] |= 0b1


-def write_global_attributes(nc_file, input_sources):
+def write_global_attributes(nc_file, input_sources, interval=None, datastream=None):
    # create global attributes
    nc_file.source = 'surface observation'
    nc_file.conventions = 'ARM-1.2 CF-1.6'
@@ -375,7 +375,14 @@ def write_global_attributes(nc_file, input_sources):
    # monthly files end with .month.nc
    # these end with .day.nc

-    nc_file.datastream = 'aoss.tower.nc-1d-1m.b1.v{}'.format(SOFTWARE_VERSION)
+    if datastream:
+        nc_file.datastream = datastream
+    elif interval in ['1D']:
+        # assume this is a monthly file, averaged daily
+        nc_file.datastream = 'aoss.tower.nc-1mo-1d.b1.v{software_version}'.format(software_version=SOFTWARE_VERSION)
+    elif interval in ['1T', '1min']:
+        # assume this is a daily file, averaged
+        nc_file.datastream = 'aoss.tower.nc-1d-1m.b1.v{software_version}'.format(software_version=SOFTWARE_VERSION)
    nc_file.software_version = SOFTWARE_VERSION
    nc_file.command_line = " ".join(sys.argv)

@@ -388,7 +395,7 @@ def write_global_attributes(nc_file, input_sources):
 def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
                        start=None, end=None, interval_width=None,
                        summary=False,
-                        database=schema.database):
+                        database=schema.database, datastream=None):
    frame = get_data(input_files)
    if frame.empty:
        raise ValueError("No data found from input files: {}".format(", ".join(input_files)))
@@ -432,7 +439,10 @@ def create_giant_netcdf(input_files, output_fn, zlib, chunk_size,
    write_dimensions(nc_file)
    create_variables(nc_file, first_stamp, database, chunk_sizes, zlib)
    write_vars(nc_file, frame, database)
-    write_global_attributes(nc_file, input_files)
+    write_global_attributes(nc_file,
+                            [os.path.basename(x) for x in input_files],
+                            interval=interval_width,
+                            datastream=datastream)
    nc_file.close()
    return nc_file

@@ -474,6 +484,7 @@ http://pandas.pydata.org/pandas-docs/stable/timeseries.html#offset-aliases""")
                        help="Variable names to include in the NetCDF file (base name, no suffixes)")
    parser.add_argument('--chunk-size', type=int, help='chunk size for the netCDF file')
    parser.add_argument('-z', '--zlib', action='store_true', help='compress netCDF file with zlib')
+    parser.add_argument('--data-stream', help="'datastream' global attribute to put in output file")

    parser.add_argument('-i', '--input', dest='input_files', required=True, nargs="+",
                        help="aoss_tower level_00 paths. Use @filename to red a list of paths from that file.")
@@ -513,7 +524,7 @@ each input file is mapped to the corresponding output file.
            create_giant_netcdf(in_files, out_fn, args.zlib,
                                args.chunk_size, args.start_time,
                                args.end_time, args.interval, args.summary,
-                                mini_database)
+                                mini_database, args.data_stream)
            success = True
        except (ValueError, TypeError):
            LOG.error("Could not generate NetCDF file for {}".format(in_files), exc_info=True)