From 3e6611a0d1a2372e2a99159ace8904b7cf67d69f Mon Sep 17 00:00:00 2001
From: Paolo Veglio <paolo.veglio@ssec.wisc.edu>
Date: Fri, 27 Jan 2023 20:18:57 +0000
Subject: [PATCH] read_data tested. working as intended

---
 read_data.py | 126 +++++++++++++++------------------------------------
 1 file changed, 37 insertions(+), 89 deletions(-)

diff --git a/read_data.py b/read_data.py
index bffd73b..a2e64c5 100644
--- a/read_data.py
+++ b/read_data.py
@@ -55,8 +55,8 @@ class CollectInputs(object):
     ancillary_dir: str = field(default='', validator=[validators.instance_of(str), ])
     sst_file: str = field(default='', validator=[validators.instance_of(str), ])
     ndvi_file: str = field(default='', validator=[validators.instance_of(str), ])
-    geos_file_1: str = field(default='', validator=[validators.instance_of(str), ])
-    geos_file_2: str = field(default='', validator=[validators.instance_of(str), ])
+    geos_file_before: str = field(default='', validator=[validators.instance_of(str), ])
+    geos_file_after: str = field(default='', validator=[validators.instance_of(str), ])
     geos_land: str = field(default='', validator=[validators.instance_of(str), ])
     geos_ocean: str = field(default='', validator=[validators.instance_of(str), ])
     geos_constants: str = field(default='', validator=[validators.instance_of(str), ])
@@ -147,11 +147,14 @@ class ReadData(CollectInputs):
         return rad_data
 
     def preprocess_viirs(self,
+                         geo_data: xr.Dataset,
                          viirs: xr.Dataset) -> xr.Dataset:
         """Create band combinations (e.g. ratios, differences) that are used by some spectral tests.
 
         Parameters
         ----------
+        geo_data: xarray.Dataset
+            dataset containing all geolocation data
         viirs: xr.Dataset
             VIIRS L1b data
 
@@ -209,6 +212,9 @@ class ReadData(CollectInputs):
         # temp value to force the code to work
         viirs['M128'] = (self.dims, np.zeros(viirs.M15.shape))
 
+        viirs.update(geo_data)
+        viirs = viirs.set_coords(['latitude', 'longitude'])
+
         logging.debug('Viirs preprocessing completed successfully.')
         return viirs
 
@@ -368,7 +374,7 @@ class ReadAncillary(CollectInputs):
         """
         geos_variables = ['tpw', 'snow_fraction', 'ice_fraction', 'ocean_fraction',
                           'land_ice_fraction', 'surface_temperature']
-        geos_data = {var: np.empty(self.out_shape, dtype=np.float32) for var in geos_variables}
+        geos_data = {var: np.empty(self.out_shape, dtype=np.float32).ravel() for var in geos_variables}
 
         geos_data = anc.py_get_GEOS(self.latitude.ravel(), self.longitude.ravel(), self.resolution,
                                     self.get_granule_time(), self.ancillary_dir, self.geos_file_before,
@@ -381,79 +387,40 @@ class ReadAncillary(CollectInputs):
         logging.debug('GEOS data read successfully')
         return geos_data
 
+    def pack_data(self) -> xr.Dataset:
+        """Pack all the ancillary data into a single dataset
 
-def correct_reflectances():
-    pass
+        Parameters
+        ----------
 
+        Returns
+        -------
+        ancillary_data: xr.Dataset
+            dataset containing all the ancillary data
+        """
+        ancillary_data = xr.Dataset()
+        ancillary_data['sst'] = (self.dims, self.get_sst())
+        ancillary_data['ndvi'] = (self.dims, self.get_ndvi())
 
-def read_ancillary_data(file_names: str,
-                        latitude: np.ndarray,
-                        longitude: np.ndarray,
-                        resolution: int = 1) -> xr.Dataset:
-    """Read ancillary data using C functions from original MVCM
+        geos_tmp = self.get_geos()
+        for var in list(geos_tmp.keys()):
+            ancillary_data[f'geos_{var}'] = (self.dims, geos_tmp[var])
 
-    Parameters
-    ----------
-    file_names: str
-    latitude: np.ndarray
-    longitude: np.ndarray
-    resolution: int
+        return ancillary_data
 
-    Returns
-    -------
-    data: xarray.Dataset
-    """
-    # Ancillary files temporarily defined here. Eventually we will find a better way to pass these
-    anc_dir = file_names['ANC_DIR']
-    sst_file = file_names['SST']
-    ndvi_file = file_names['NDVI']
-    geos_cnst = file_names['GEOS_constants']
-    geos_lnd = file_names['GEOS_land']
-    geos_ocn = file_names['GEOS_ocean']
-    geos1 = file_names['GEOS_atm_1']
-    geos2 = file_names['GEOS_atm_2']
-    vnptime = '.'.join(os.path.basename(file_names['MOD02']).split('.')[1:3])
-    start_time = dt.strftime(dt.strptime(vnptime, 'A%Y%j.%H%M'), '%Y-%m-%d %H:%M:%S.000')
-
-    out_shape = latitude.shape
-
-    sst = np.empty((np.prod(out_shape), ), dtype=np.float32)
-    ndvi = np.empty((np.prod(out_shape), ), dtype=np.float32)
-    eco = np.empty((np.prod(out_shape), ), dtype=np.ubyte)
-
-    geos_data = {'tpw': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 'snowfr': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 'icefr': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 'ocnfr': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 'landicefr': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 'sfct': np.empty((np.prod(out_shape), ), dtype=np.float32),
-                 }
-
-    sst = anc.py_get_Reynolds_SST(latitude.ravel(), longitude.ravel(), resolution, anc_dir, sst_file, sst)
-    logging.info('SST read successfully')
-
-    ndvi = anc.py_get_NDVI_background(latitude.ravel(), longitude.ravel(),
-                                      resolution, anc_dir, ndvi_file, ndvi)
-    logging.info('NDVI read successfully')
-
-    eco = anc.py_get_Olson_eco(latitude.ravel(), longitude.ravel(), resolution, anc_dir, eco)
-    logging.info('Olson eco read successfully')
-
-    geos = anc.py_get_GEOS(latitude.ravel(), longitude.ravel(), resolution, start_time, anc_dir,
-                           geos1, geos2, geos_lnd, geos_ocn, geos_cnst, geos_data)
-    logging.info('GEOS5 data read successfully')
-
-    ancillary = {'sst': sst.reshape(out_shape),
-                 'ndvi': ndvi.reshape(out_shape),
-                 'eco': eco.reshape(out_shape)
-                 }
-    for var in list(geos):
-        ancillary[f'geos_{var}'] = geos[var].reshape(out_shape)
-
-    dims = ('number_of_lines', 'number_of_pixels')
-    data = xr.Dataset.from_dict({var: {'dims': dims, 'data': ancillary[var]} for var in list(ancillary)})
+    scene_flags = scn.find_scene(viirs, sunglint_angle)
+    scene = scn.scene_id(scene_flags)
+
+    scene_xr = xr.Dataset()
+    for s in scn._scene_list:
+        scene_xr[s] = (('number_of_lines', 'number_of_pixels'), scene[s])
+
+    scene['lat'] = viirs.latitude
+    scene['lon'] = viirs.longitude
+
+    data = xr.Dataset(viirs, coords=scene_xr)
+    data.drop_vars(['latitude', 'longitude'])
 
-    return data
 
 
 def get_data(satellite: str,
@@ -567,22 +534,3 @@ def get_data(satellite: str,
     data.drop_vars(['latitude', 'longitude'])
 
     return data
-
-
-def get_data_new(satellite: str,
-                 sensor: str,
-                 file_names: Dict[str, str],
-                 sunglint_angle: float,
-                 hires: bool = False) -> xr.Dataset:
-    """Reads satellite and ancillary data and packs everything in an xarray dataset for the main function
-    """
-    mod02 = file_names['MOD02']
-    mod03 = file_names['MOD03']
-
-    viirs = read_viirs(sensor, f'{mod02}', f'{mod03}')
-    ancillary_data = read_ancillary_data(file_names, viirs.latitude.values, viirs.longitude.values)
-
-
-
-
-
-- 
GitLab