Skip to content
Snippets Groups Projects
Select Git revision
  • 37056b20e4d3e890959aef04de031c95c4346360
  • master default protected
  • 13-curl
  • temporal-cleaning
  • 16-bracketed-forecasts
  • test-examples
  • progress-api
  • example
  • sphinx
  • newlock
10 results

ancil.py

Blame
  • Alan De Smet's avatar
    Alan De Smet authored
    Eliminates the need for the scripts change their own sys.path. Also gets
    the non-stock dateutil.parser
    37056b20
    History
    ancil.py 7.96 KiB
    # python3
    
    # Copyright 2021 University of Wisconsin Regents
    # 
    # This file is part of csppfetch.
    # 
    # csppfetch is free software: you can redistribute it and/or modify
    # it under the terms of the GNU General Public License as published by
    # the Free Software Foundation, either version 3 of the License, or
    # (at your option) any later version.
    # 
    # Foobar is distributed in the hope that it will be useful,
    # but WITHOUT ANY WARRANTY; without even the implied warranty of
    # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    # GNU General Public License for more details.
    # 
    # You should have received a copy of the GNU General Public License
    # along with Foobar.  If not, see <https://www.gnu.org/licenses/>.
    
    import os
    import sys
    import datetime as dt
    import csppfetch
    import csppfetch.daterange
    from csppfetch.roundtozero import roundtozero
    
    ################################################################################
    #
    # Common settings
    #
    
    PACKAGE_ENV_ID = 'CSPP_GEO_AITF_ANCIL_'
    CSPP_GEO_AITF_URL_BASE = "https://geodb.ssec.wisc.edu/ancillary/"
    CSPP_GEO_AITF_URL_DIR_PART = "%Y_%m_%d_%j/"
    
    
    ################################################################################
    #
    # Data: AVHRR Sea Surface Temperature
    #
    # This is a relatively basic use of csppfetch.Downloader.
    
    SST_FILENAME = "avhrr-only-v2.%Y%m%d{priority}.nc"
    SST_LOCAL_DIR = 'oisst_daily/'
    SST = csppfetch.Downloader(
        name = "AVHRR Sea Surface temperature",
        package_env_id = PACKAGE_ENV_ID,
        url_base = CSPP_GEO_AITF_URL_BASE,
        url_relative = CSPP_GEO_AITF_URL_DIR_PART+SST_FILENAME,
        local = SST_LOCAL_DIR+SST_FILENAME,
        period = dt.timedelta(days=1),
        epoch_start = dt.datetime(2010,1,1,0,0,0),
        priorities = ['', '_preliminary'],
        oldest_usable = dt.timedelta(days=7),
        expected_newest = dt.timedelta(hours=1)
        )
    
    
    ###############################################################################
    #
    # Data: Global Forecast System
    #
    # GFS is unusual; we need a pair of forecasts that bracket the time we
    # want to process, while a full cache is just a pile of stuff.
    # Only accept forecasts 3 hours through 15 hours.
    #
    # So, to process 2019-05-27 22:00Z, we could use any of
    #   gfs.t18z.190527.pgrb2f03 + gfs.t18z.190527.pgrb2f06 or
    #   gfs.t12z.190527.pgrb2f09 + gfs.t12z.190527.pgrb2f12
    # We can NOT use 
    #   gfs.t06z.190527.pgrb2f15 + gfs.t06z.190527.pgrb2f18 
    # as we won't use a 18 hour and later forecasts.
    
    class GFSDownloader(csppfetch.Downloader):
        def __init__(self):
            remote_filename = "gfs.t%Hz.%y%m%d.pgrb2f{priority}"
            local_filename = "gfs.t%Hz.pgrbf{priority}"
            localdir = "gfs_grib2_0.5deg/%Y/%m/%d/"
    
            # Why 5 hours?  
            #
            # Examination of geodb in 2019 covering 2019-03-01 through 2019-06-12 
            # and 2021 covering 2021-07-02 through 2021-09-01, suggests the data
            # usually available within about 65 minutes, and is almost always 
            # available within 4 hours and 30 minutes. Round up to 5 arbitrarily.
            #
            # As we don't really have use for the data until 3 hours after
            # nominal generation and don't _need_ it until 6 hours, this seems
            # a reasonable number.
            expected_newest = dt.timedelta(hours=5)
    
            super().__init__(
                name="Global Forecast System",
                package_env_id = PACKAGE_ENV_ID,
                url_base = CSPP_GEO_AITF_URL_BASE,
                url_relative = CSPP_GEO_AITF_URL_DIR_PART+remote_filename,
                local = localdir+local_filename,
                period = dt.timedelta(hours=6),
                epoch_start = dt.datetime(2010,1,1,0,0,0),
                expected_newest = expected_newest,
                )
    
            # Fields we're not using:
            # oldest_usable - Not meaningful; the limit is how far into the future
            #               of a forecast we're willing to use, specified as
            #               "{priority}" in the "filename" above.
    
            # Forecasts are available this often
            self.forecast_step = dt.timedelta(hours=3)
            # and the first forecast is (starting point for forecast_steps)
            self.first_forecast = dt.timedelta(hours=0)
            # Given the above, valid forecasts are are  0, 3, 6,... hours
    
            # We can use forecasts from this far into the future
            self.shortest_valid_forecast = dt.timedelta(hours=3)
            # ...through this far into the future.
            self.longest_valid_forecast = dt.timedelta(hours=12)
            # Given that, valid forecasts are 3, 6, 9, and 12.
    
            valid_multiple = (self.shortest_valid_forecast-self.first_forecast)/self.forecast_step
            if not valid_multiple.is_integer():
                raise RuntimeError("shortest_valid_forecast ({self.shortest_valid_forecast})-self.first_forecast  ({self.first_forecast}) is not an integer multiple of forecast step ({self.forecast_step})")
    
        def url_to_file_for_time(self, time, forecast_hours):
            hours = int(forecast_hours / dt.timedelta(hours=1))
            hours_str = f"{hours:02d}"
            url = self._expand(self.url_relative, time, hours_str)
            local = self._expand(self.local, time, hours_str)
            return {url:local}
    
        def get_cache_filesets(self, start = None, end = None):
    
            # A copy of Downloader's implementation, but we don't use
            # self._daterange; we call daterange.daterange directly so we can pass
            # in a step of self.forecast_step instead of self.period. We do this
            # because although our period is only every 6 hours, we change which
            # files someone might want every _3_. That is to process 5Z data, we'd
            # use the 0Z GFS data's 3 and 6 hour forecasts, but for 7Z data, we'd
            # use the 0Z GFS data's 6 and 9 hour forecasts.
            #
            # Perhaps the ablity to override the period here should be in the
            # parent class? Or maybe this case is too specialized to bother.
    
            if self.oldest_cache < dt.timedelta(days=0):
                raise ValueError(f"oldest_cache should be positive; it is {self.oldest_cache}")
    
            if end is None: end = dt.datetime.now()
            if start is None: start = end - self.oldest_cache
    
            if start > end:
                raise ValueError(f"start ({start}) should be before end ({end})")
    
            fileset_list_list = []
            for time in csppfetch.daterange.daterange(end, start, -self.forecast_step, inclusive=True):
                fileset_list = self.get_filesets_for_time(time)
                fileset_list_list.append(fileset_list)
            return fileset_list_list
    
        def get_filesets_for_time(self, scan_time):
            generated_time = self._nearest_preceeding_time(scan_time)
            time_since_gen = scan_time - generated_time
            if time_since_gen < dt.timedelta(0):
                raise RuntimeError(f"self._nearest_preceeding_time({scan_time}) returned {generated_time}, but that's AFTER the start; should be BEFORE or SAME.")
            forecast_start = roundtozero(time_since_gen, self.forecast_step, self.first_forecast)
    
            # There is a non-looping way to do this, 
            # but my brain is fried at the moment.
            while forecast_start < self.shortest_valid_forecast:
                forecast_start += self.period
                generated_time -= self.period
    
            filesets = []
            while (forecast_start+self.forecast_step) <= self.longest_valid_forecast:
                end = forecast_start + self.forecast_step
                urls_to_files = {}
                urls_to_files.update(self.url_to_file_for_time(generated_time, forecast_start))
                urls_to_files.update(self.url_to_file_for_time(generated_time, end))
    
                expected = self.is_expected(generated_time)
                startstr = str(int(forecast_start.total_seconds()/(60*60)))
                endstr   = str(int(           end.total_seconds()/(60*60)))
    
                description = f"{self.name} for {generated_time} forecast {startstr} and {endstr} hours into the future"
                fs = csppfetch.FileSet(urls_to_files, expected, description)
                filesets.append(fs)
    
                forecast_start += self.period
                generated_time -= self.period
            return filesets
    GFS = GFSDownloader()