From 7f5429f4abbffa5265dec885b2f1cc8e806aa969 Mon Sep 17 00:00:00 2001
From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01>
Date: Fri, 1 Jul 2011 04:35:37 +0000
Subject: [PATCH] a draft of a great circle distance function; fixed the bin
 range and epsilon comparison in colocation; added a check for no-data to stop
 scatter plot from crashing bin/tuple mode

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@145 8a9318a1-56ba-4d59-b755-99d26321be01
---
 pyglance/glance/collocation.py   | 93 +++-----------------------------
 pyglance/glance/compare.py       | 46 ++++++++++++++--
 pyglance/glance/delta.py         | 27 ++++++++++
 pyglance/glance/io.py            |  5 ++
 pyglance/glance/plotcreatefns.py |  2 +
 pyglance/setup.py                |  2 +-
 6 files changed, 83 insertions(+), 92 deletions(-)

diff --git a/pyglance/glance/collocation.py b/pyglance/glance/collocation.py
index ea34629..db4ab00 100644
--- a/pyglance/glance/collocation.py
+++ b/pyglance/glance/collocation.py
@@ -11,91 +11,11 @@ Copyright (c) 2010 University of Wisconsin SSEC. All rights reserved.
 import logging
 import numpy as np
 
+import glance.delta as delta
+
+
 LOG = logging.getLogger(__name__)
 
-# TODO, move to using this class (not yet finished)
-class CollocationMapping :
-    """
-    This class represents a mapping that collocates points in two
-    sets of data within certain tolerances. These points may be
-    filtered based on secondary criteria beyond spatial tolerances,
-    but once the inital mapping is defined, an instance of this class
-    can only be used to convert size-compatable data from the two
-    original sets into the final, collocated order.
-    
-    Note: the mapping will include all pairs of points that match
-    within the given epsilon and additional data criteria given,
-    this means an individual a or b point may be repeated if it matches
-    multiple points in the other data set satisfactorily
-    
-    WARNING: The algorithm used may fail to find all possible matches
-    if a longitude/latitude epsilon of greater than or equal to one
-    degree is given TODO
-    """
-    
-    """
-    raw_lon_lat_matches  - the number of points matched, based on the longitude and latitude alone
-    raw_unmatchable_in_a - the number of unmacthed pointes in a, based on longitude and latitude alone
-    raw_unmatchable_in_b - the number of unmacthed pointes in b, based on longitude and latitude alone
-    
-    a_point_mapping      - mapping of points in a to points in b; TODO give form
-    b_point_mapping      - mapping of points in b to points in a; TODO give form
-    
-    matched_longitude    - the list of matched longitude values
-    matched_latitude     - the list of matched latitude  values
-    
-    unmatchable_longitude_a - the list of longitude values in a that could not be matched
-    unmatchable_latitude_a  - the list of latitude  values in a that could not be matched
-    unmatchable_longitude_b - the list of longitude values in b that could not be matched
-    unmatchable_latitude_b  - the list of latitude  values in b that could not be matched
-    """
-    
-    def __init__(self,
-                 (a_longitude, a_latitude),
-                 (b_longitude, b_latitude),
-                 lon_lat_epsilon,
-                 valid_in_a_mask=None, valid_in_b_mask=None,
-                 additional_data_criteria=[ ],
-                 additional_filter_functions=[ ]) :
-        """
-        Build collocation mapping data based on two sets of
-        longitude and latitude, as well as an epsilon that
-        describes acceptable differences in degrees.
-        
-        Note: additional "criteria" variable data may be inculuded.
-        additional_filter_functions will be called in the form:
-        
-            additional_filter_functions[i](aRow, aCol, bRow, bCol, additional_data_criteria[i])
-        
-        a return of True or False is expected, to indicate whether or not
-        the match should be accepted
-        """
-        pass
-    
-    def _create_basic_mapping_from_lon_lat((alongitude, alatitude),
-                                           (blongitude, blatitude),
-                                           lonlatEpsilon,
-                                           invalidAMask=None, invalidBMask=None) :
-        """
-        match points together based on their longitude and latitude values
-        to match points must be within lonlatEpsilon degrees in both longitude and latitude
-        
-        if the longitude and latitude variables contain invalid data the invalidAMask and
-        invalidBMask should be passed with the appropriate masking to remove the invalid values
-        
-        the return will be in the form of two dictionaries of points, one from a and one from b,
-        indexed on the index number in the A or B data where they can be found. Each entry will
-        consist of a list of:
-            [longitudeValue, latitudeValue, indexNumber, [list of matching indexes in the other set]]
-        
-        Note: the return will include all pairs of points that match,
-        this means an individual a or b point may be repeated if it matches
-        multiple points within the lonlatEpsilon provided
-        
-        Warning: This algorithm will fail to find all matching points if the lonlatEpsilon is set to a
-        value greater than or equal to 1.0 degrees. This is related to the bin size used for searching
-        thoretically the bin size could be corrected to scale with the lonlatEpsilon in the future. TODO
-        """
 
 def create_colocation_mapping_within_epsilon((alongitude, alatitude),
                                              (blongitude, blatitude),
@@ -194,8 +114,8 @@ def create_colocation_mapping_within_epsilon((alongitude, alatitude),
         
         # figure out the lat/lon of the 9 bins that are "near" this one
         toSearch = [ ]
-        for latValue in range(binLatitude - 1, binLatitude + 1) :
-            for lonValue in range(binLongitude -1, binLongitude + 1) :
+        for latValue in (binLatitude - 1, binLatitude, binLatitude + 1) : # the old broken code: range(binLatitude - 1, binLatitude + 1) :
+            for lonValue in (binLongitude - 1, binLongitude, binLongitude + 1) : # the old broken code: range(binLongitude - 1, binLongitude + 1) :
                 toSearch.append((latValue, lonValue))
         
         # for each A pt in this bin
@@ -211,7 +131,8 @@ def create_colocation_mapping_within_epsilon((alongitude, alatitude),
                     # for each data point in the B bin, check if it matches our current A point
                     for bLat, bLon, bIndex, bMatches in bBins[(latValue, lonValue)] :
                         
-                        if (abs(bLat - aLat) < lonlatEpsilon) and (abs(aLon - bLon) < lonlatEpsilon) :
+                        # if the difference is is less than or equal to epsilon, this is an acceptable match
+                        if (abs(bLat - aLat) <= lonlatEpsilon) and (abs(aLon - bLon) <= lonlatEpsilon) :
                             totalMatches = totalMatches + 1
                             
                             # put the point on our matched lists
diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py
index 5fbcb30..0663465 100644
--- a/pyglance/glance/compare.py
+++ b/pyglance/glance/compare.py
@@ -106,6 +106,19 @@ def _check_file_names(fileAObject, fileBObject) :
     uniqueToANames = aNames - commonNames
     uniqueToBNames = bNames - commonNames
     
+    return _check_shared_names(set(fileAObject()), set(fileBObject()))
+
+def _check_shared_names (nameSetA, nameSetB) :
+    """
+    compare the names in the two sets
+    """
+    
+    # what names do they have in common?
+    commonNames = nameSetA.intersection(nameSetB)
+    # what names are unique to each set?
+    uniqueToANames = nameSetA - commonNames
+    uniqueToBNames = nameSetB - commonNames
+    
     return {'sharedVars': commonNames,  'uniqueToAVars': uniqueToANames, 'uniqueToBVars': uniqueToBNames}
 
 def _resolve_names(fileAObject, fileBObject, defaultValues,
@@ -426,6 +439,10 @@ def _get_and_analyze_lon_lat (fileObject,
     invalidLongitude = (longitudeData < -180)   | (longitudeData > 360) | ~isfinite(longitudeData)
     spaciallyInvalidMask = invalidLatitude | invalidLongitude
     
+    # get the missing value as well
+    longitudeMissingVal = fileObject.file_object.missing_value(longitudeVariableName)
+    latitudeMissingVal  = fileObject.file_object.missing_value( latitudeVariableName)
+    
     # analyze our spacially invalid data
     percentageOfSpaciallyInvalidPts, numberOfSpaciallyInvalidPts = _get_percentage_from_mask(spaciallyInvalidMask)
     
@@ -434,7 +451,8 @@ def _get_and_analyze_lon_lat (fileObject,
                        'perInvPts':    percentageOfSpaciallyInvalidPts
                        }
     
-    return dataobj.DataObject(longitudeData, ignoreMask=invalidLongitude), dataobj.DataObject(latitudeData, ignoreMask=invalidLatitude), spatialStatInfo
+    return dataobj.DataObject(longitudeData, fillValue=longitudeMissingVal, ignoreMask=invalidLongitude), \
+           dataobj.DataObject(latitudeData,  fillValue=latitudeMissingVal,  ignoreMask=invalidLatitude), spatialStatInfo
 
 def _get_percentage_from_mask(dataMask) :
     """
@@ -668,9 +686,27 @@ def _handle_lon_lat_info (lon_lat_settings, a_file_object, b_file_object, output
         latitude_common      = None
     
     # FUTURE, return the lon/lat objects instead?
-    return {'a':      {"lon": longitude_a_object.data,      "lat": latitude_a_object.data,      "inv_mask": longitude_a_object.masks.ignore_mask},
-            'b':      {"lon": longitude_b_object.data,      "lat": latitude_b_object.data,      "inv_mask": longitude_b_object.masks.ignore_mask},
-            'common': {"lon": longitude_common,             "lat": latitude_common,             "inv_mask": spaciallyInvalidMask}   }, \
+    return {
+            'a':      {
+                       "lon":       longitude_a_object.data,
+                       "lat":       latitude_a_object.data,
+                       "inv_mask":  longitude_a_object.masks.ignore_mask,
+                       "lon_fill":  longitude_a_object.fill_value,
+                       "lat_fill":  latitude_a_object.fill_value
+                       },
+            'b':      {
+                       "lon":       longitude_b_object.data,
+                       "lat":       latitude_b_object.data,
+                       "inv_mask":  longitude_b_object.masks.ignore_mask,
+                       "lon_fill":  longitude_b_object.fill_value,
+                       "lat_fill":  latitude_b_object.fill_value
+                       },
+            'common': {
+                       "lon":       longitude_common,
+                       "lat":       latitude_common,
+                       "inv_mask":  spaciallyInvalidMask
+                       }
+            }, \
            spatialInfo
 
 def _open_and_process_files (args, numFilesExpected):
@@ -1316,7 +1352,7 @@ def reportGen_library_call (a_path, b_path, var_list=[ ],
         technical_name, b_variable_technical_name, \
                 explanationName = _get_name_info_for_variable(displayName, varRunInfo)
         
-        print('analyzing: ' + explanationName + ')')
+        print('analyzing: ' + explanationName)
         
         # load the variable data
         aData = _load_variable_data(aFile.file_object, technical_name,
diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py
index 9dcab83..a100b9b 100644
--- a/pyglance/glance/delta.py
+++ b/pyglance/glance/delta.py
@@ -8,6 +8,7 @@ Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
 """
 
 import logging
+import math
 import numpy as np
 from numpy import * # todo, remove this line
 
@@ -15,8 +16,34 @@ from scipy.stats import pearsonr
 
 LOG = logging.getLogger(__name__)
 
+# for calculating the great circle distance, this is the radius well will
+# assume that the spherical model of the earth has, in km
+SPHERICAL_EARTH_RADIUS = 6373.0
+
 # -------------- generic data manipulation and analysis --------------
 
+# TODO have someone double check the math here
+def great_circle_distance (latitudeA, longitudeA, latitudeB, longitudeB) :
+    """
+    Calculate the great circle distance (in km) between the A and B points
+    given in the input parameters, the inputs are expected to be in degrees
+    
+    note: This method uses the spherical law of cosines, and is best suited
+    for smaller distances.
+    """
+    
+    # convert to radians
+    latARad = math.radians(latitudeA)
+    lonARad = math.radians(longitudeA)
+    latBRad = math.radians(latitudeB)
+    lonBRad = math.radians(longitudeB)
+    
+    distToReturn = math.acos(math.sin(latARad) * math.sin(latBRad) +
+                             math.cos(latARad) * math.cos(latBRad) *
+                             math.cos(lonBRad - lonARad)) * SPHERICAL_EARTH_RADIUS
+    
+    return distToReturn
+
 def min_with_mask(data, goodMask=None) :
     """
     get the minimum of the values in data,
diff --git a/pyglance/glance/io.py b/pyglance/glance/io.py
index 9da2be8..02abd6c 100644
--- a/pyglance/glance/io.py
+++ b/pyglance/glance/io.py
@@ -97,10 +97,13 @@ class hdf(SD):
         data_type = None 
         scaling_method = None
         
+        print ("***** getting " + name + " from file")
+        
         # get the variable object and use it to
         # get our raw data and scaling info
         variable_object = self.get_variable_object(name)
         raw_data_copy = variable_object[:]
+        print ("****** raw data loaded")
         try :
             # TODO, this currently won't work with geocat data, work around it for now
             scale_factor, scale_factor_error, add_offset, add_offset_error, data_type = SDS.getcal(variable_object)
@@ -117,6 +120,8 @@ class hdf(SD):
                 scaling_method = temp_attributes['scaling_method']
         SDS.endaccess(variable_object)
         
+        print ("***** scaling information loaded")
+        
         # don't do lots of work if we don't need to scale things
         if (scale_factor == 1.0) and (add_offset == 0.0) :
             return raw_data_copy
diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py
index 4ed53d6..fcc090e 100644
--- a/pyglance/glance/plotcreatefns.py
+++ b/pyglance/glance/plotcreatefns.py
@@ -845,6 +845,8 @@ class BinTupleAnalysisFunctionFactory (PlottingFunctionFactory) :
         # our list of functions that will later create the plots
         functionsToReturn = { }
         
+        if (aData.size <= 0) :
+            return functionsToReturn
         
         # create the scatter plot with colors for each section
         scatterPlotList = [ ]
diff --git a/pyglance/setup.py b/pyglance/setup.py
index 5be9183..737d362 100644
--- a/pyglance/setup.py
+++ b/pyglance/setup.py
@@ -22,7 +22,7 @@ easy_install -d $HOME/Library/Python -vi http://larch.ssec.wisc.edu/eggs/repos g
 from setuptools import setup, find_packages
 
 setup( name="glance", 
-       version="0.2.6.25-20110328-adl-rkg", 
+       version="0.2.6.26", 
        zip_safe = True,
        entry_points = { 'console_scripts': [ 'glance = glance.compare:main' ] },
        packages = find_packages('.'),
-- 
GitLab