From c7910dc1374f4eba6b12290b95d23ccd0abf5311 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)@8a9318a1-56ba-4d59-b755-99d26321be01> Date: Mon, 17 May 2010 20:22:40 +0000 Subject: [PATCH] moving collocation methods in to a separate module git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@109 8a9318a1-56ba-4d59-b755-99d26321be01 --- pyglance/glance/collocation.py | 389 +++++++++++++++++++++++++++++++++ pyglance/glance/compare.py | 35 +-- pyglance/glance/delta.py | 311 -------------------------- 3 files changed, 407 insertions(+), 328 deletions(-) create mode 100644 pyglance/glance/collocation.py diff --git a/pyglance/glance/collocation.py b/pyglance/glance/collocation.py new file mode 100644 index 0000000..35b1401 --- /dev/null +++ b/pyglance/glance/collocation.py @@ -0,0 +1,389 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +This module handles physical collocation of two data sets. The code +in this module is based on previous versions of delta.py. + +Created by evas Apr 2010. +Copyright (c) 2010 University of Wisconsin SSEC. All rights reserved. +""" + +import logging +import numpy as np + +LOG = logging.getLogger(__name__) + +# TODO, move to using this class (not yet finished) +class CollocationMapping : + """ + This class represents a mapping that collocates points in two + sets of data within certain tolerances. These points may be + filtered based on secondary criteria beyond spatial tolerances, + but once the inital mapping is defined, an instance of this class + can only be used to convert size-compatable data from the two + original sets into the final, collocated order. + + Note: the mapping will include all pairs of points that match + within the given epsilon and additional data criteria given, + this means an individual a or b point may be repeated if it matches + multiple points in the other data set satisfactorily + + WARNING: The algorithm used may fail to find all possible matches + if a longitude/latitude epsilon of greater than or equal to one + degree is given TODO + """ + + """ + number_of_matches = -1 + number_unmatchable_in_a = -1 + number_unmatchable_in_b = -1 + + a_point_mapping = None + b_point_mapping = None + + matched_longitude = None + matched_latitude = None + + unmatchable_longitude_a = None + unmatchable_latitude_a = None + unmatchable_longitude_b = None + unmatchable_latitude_b = None + """ + + def __init__(self, + (a_longitude, a_latitude), + (b_longitude, b_latitude), + lon_lat_epsilon, + valid_in_a_mask=None, valid_in_b_mask=None, + additional_data_criteria=[ ], + additional_filter_functions=[ ]) : + """ + Build collocation mapping data based on two sets of + longitude and latitude, as well as an epsilon that + describes acceptable differences in degrees. + + Note: additional "criteria" variable data may be inculuded. + additional_filter_functions will be called in the form: + + additional_filter_functions[i](aRow, aCol, bRow, bCol, additional_data_criteria[i]) + + a return of True or False is expected, to indicate whether or not + the match should be accepted + """ + pass + + +def create_colocation_mapping_within_epsilon((alongitude, alatitude), + (blongitude, blatitude), + lonlatEpsilon, + invalidAMask=None, invalidBMask=None): + """ + match points together based on their longitude and latitude values + to match points must be within lonlatEpsilon degrees in both longitude and latitude + + if the longitude and latitude variables contain invalid data the invalidAMask and + invalidBMask should be passed with the appropriate masking to remove the invalid values + + the return will be in the form of two dictionaries of points, one from a and one from b, + indexed on the index number in the A or B data where they can be found. Each entry will + consist of a list of: + [longitudeValue, latitudeValue, indexNumber, [list of matching indexes in the other set]] + + Note: the return will include all pairs of points that match, + this means an individual a or b point may be repeated if it matches + multiple points within the lonlatEpsilon provided + + Warning: This algorithm will fail to find all matching points if the lonlatEpsilon is set to a + value greater than or equal to 1.0 degrees. This is related to the bin size used for searching + thoretically the bin size could be corrected to scale with the lonlatEpsilon in the future. TODO + """ + assert(alongitude.shape == alatitude.shape) + assert(blongitude.shape == blatitude.shape) + assert(lonlatEpsilon >= 0.0) + + LOG.debug("Preparing to colocate longitude and latitude points (acceptable epsilon: " + str(lonlatEpsilon) + " degrees)") + LOG.debug("size of A: " + str(alongitude.shape)) + LOG.debug("size of B: " + str(blongitude.shape)) + + # make blank invalid masks if none were passed in + if invalidAMask is None : + invalidAMask = np.zeros(alongitude.shape, dtype=bool) + if invalidBMask is None : + invalidBMask = np.zeros(blongitude.shape, dtype=bool) + + # make flat versions of our longitude and latitude + # so that our index correlations will be simple + flatALatitude = alatitude.ravel() + flatALongitude = alongitude.ravel() + flatBLatitude = blatitude.ravel() + flatBLongitude = blongitude.ravel() + + # find the ranges of the longitude and latitude + minLatitude = np.min(np.min(flatALatitude), np.min(flatBLatitude)) + maxLatitude = np.max(np.max(flatALatitude), np.max(flatBLatitude)) + minLongitude = np.min(np.min(flatALongitude), np.min(flatBLongitude)) + maxLongitude = np.max(np.max(flatALongitude), np.max(flatBLongitude)) + + # make the bins for the data in longitude/latitude space + aBins = { } + bBins = { } + allAPts = { } + allBPts = { } + + # loop to put all the aData in the bins + for index in range(flatALatitude.size) : + filingLat = int( flatALatitude[index]) + filingLon = int(flatALongitude[index]) + + if (filingLat, filingLon) not in aBins : + aBins[(filingLat, filingLon)] = [ ] + + # create the simple list holding that point in the form: + # the lon/lat values (for ease of comparison), the index number in A, and the list of matches + aPoint = [flatALatitude[index], flatALongitude[index], index, [ ]] + + # put the point in the list and bin + allAPts[index] = aPoint + aBins[(filingLat, filingLon)].append(aPoint) + + # loop to put all the bData in the bins + for index in range(flatBLatitude.size) : + filingLat = int( flatBLatitude[index]) + filingLon = int(flatBLongitude[index]) + + if (filingLat, filingLon) not in bBins : + bBins[(filingLat, filingLon)] = [ ] + + # create the simple list holding that point in the form: + # the lon/lat values (for ease of comparison), the index number in A, and the list of matches + bPoint = [flatBLatitude[index], flatBLongitude[index], index, [ ]] + + # put the point in the list and bin + allBPts[index] = bPoint + bBins[(filingLat, filingLon)].append(bPoint) + + # for debugging purposes + totalMatches = 0 + + # look in all the aData bins and select point pairs that match within epsilon + for binLatitude, binLongitude in aBins.keys() : + + # figure out the lat/lon of the 9 bins that are "near" this one + toSearch = [ ] + for latValue in range(binLatitude - 1, binLatitude + 1) : + for lonValue in range(binLongitude -1, binLongitude + 1) : + toSearch.append((latValue, lonValue)) + + # for each A pt in this bin + for aLat, aLon, aIndex, aMatches in aBins[(binLatitude, binLongitude)] : + + # look through my nearby B bins and find any + # "matching" points that fall within epsilon + for latValue, lonValue in toSearch : + + # if there's anything in that B bin + if ((latValue, lonValue) in bBins) and (bBins[(latValue, lonValue)] is not None) : + + # for each data point in the B bin, check if it matches our current A point + for bLat, bLon, bIndex, bMatches in bBins[(latValue, lonValue)] : + + if (abs(bLat - aLat) < lonlatEpsilon) and (abs(aLon - bLon) < lonlatEpsilon) : + totalMatches = totalMatches + 1 + + # put the point on our matched lists + aMatches.append(bIndex) + bMatches.append(aIndex) + + LOG.debug('Found ' + str(totalMatches) + ' matched pairs.') + + return allAPts, allBPts, totalMatches + +def create_colocated_lonlat_with_lon_lat_colocation(listOfColocatedALonLat, listOfColocatedBLonLat, + totalMatches, + aLongitude, aLatitude, + bLongitude, bLatitude) : + """ + given a pre colocated list of A and B lon/lat info from create_colocation_mapping_within_epsilon, + match up the longitude and latitude and return the colocated sets + """ + + # some general statistics + multipleMatchesInA = 0 + multipleMatchesInB = 0 + totalValidMatchedPairs = 0 + + # our final data sets + matchedLongitude = np.zeros(totalMatches, dtype=aLongitude.dtype) + matchedLatitide = np.zeros(totalMatches, dtype=aLatitude.dtype) + + # we don't know how many unmatched points we may have + unmatchedALongitude = [ ] + unmatchedALatitude = [ ] + unmatchedBLongitude = [ ] + unmatchedBLatitude = [ ] + + # go through the A list and collect all the matches + currentIndex = 0 + # look through all the A points + for aIndex in sorted(listOfColocatedALonLat.keys()) : + + [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[aIndex] + tempMatches = 0 + + # for each match you found on a given a point + for matchIndex in sorted(aMatches) : + + [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[matchIndex] + + # copy the lon/lat info + matchedLongitude[currentIndex] = (aLon + bLon) / 2 + matchedLatitide[currentIndex] = (aLat + bLat) / 2 + + currentIndex = currentIndex + 1 + tempMatches = tempMatches + 1 + + # update statistics based on the number of matches + totalValidMatchedPairs = totalValidMatchedPairs + tempMatches + if tempMatches > 1 : + multipleMatchesInA = multipleMatchesInA + tempMatches + elif tempMatches <= 0 : + unmatchedALatitude.append(aLat) + unmatchedALongitude.append(aLon) + + # gather some additional statistics from the B list + # go through each b point + for bIndex in sorted(listOfColocatedBLonLat) : + + [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[bIndex] + tempMatches = len(bMatches) + + # update some statistics based on the number of matches + if tempMatches > 1 : + multipleMatchesInB = multipleMatchesInB + tempMatches + elif tempMatches <= 0 : + unmatchedBLatitude.append(bLat) + unmatchedBLongitude.append(bLon) + + # make the unmatched lists into proper numpy arrays + unmatchedALatitude = np.array(unmatchedALatitude, dtype=aLatitude.dtype) + unmatchedALongitude = np.array(unmatchedALongitude, dtype=aLongitude.dtype) + unmatchedBLatitude = np.array(unmatchedBLatitude, dtype=bLatitude.dtype) + unmatchedBLongitude = np.array(unmatchedBLongitude, dtype=bLongitude.dtype) + + LOG.debug("Total matched pairs of longitude/latitide: " + str(totalValidMatchedPairs)) + + return (matchedLongitude, matchedLatitide, (multipleMatchesInA, multipleMatchesInB)), \ + (unmatchedALongitude, unmatchedALatitude), \ + (unmatchedBLongitude, unmatchedBLatitude) + +def create_colocated_data_with_lon_lat_colocation(listOfColocatedALonLat, listOfColocatedBLonLat, + colocatedLongitude, colocatedLatitude, + aData, bData, + missingData=None, altMissingDataInB=None, + invalidAMask=None, invalidBMask=None) : + """ + given a pre colocated list of A and B lon/lat info from create_colocation_mapping_within_epsilon, + match up the valid data in two data sets and return the list of valid data, padded with missing + values so that it will match the original longitude and latitude + """ + + if altMissingDataInB is None : + altMissingDataInB = missingData + + # some general statistics + multipleMatchesInA = 0 + multipleMatchesInB = 0 + totalValidMatchedPairs = 0 + + # our final data sets + matchedAPoints = np.ones(colocatedLatitude.shape, dtype=aData.dtype) * missingData + matchedBPoints = np.ones(colocatedLatitude.shape, dtype=bData.dtype) * altMissingDataInB + + # we don't know how many unmatched points we may have + unmatchedAPoints = [ ] + unmatchedBPoints = [ ] + unmatchedALongitude = [ ] + unmatchedALatitude = [ ] + unmatchedBLongitude = [ ] + unmatchedBLatitude = [ ] + + # go through the A list and sort all the valid matches + currentIndex = 0 + # go through all the a points + for aIndex in sorted(listOfColocatedALonLat.keys()) : + + [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[aIndex] + tempMatches = 0 + isInvalidA = invalidAMask[aIndex] + + # for each point that matched to a given a point + for matchIndex in sorted(aMatches) : + + [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[matchIndex] + isInvalidB = invalidBMask[bIndex] + + # if either of our data points is invalid, then the data doesn't match + if isInvalidA or isInvalidB : + # fill in missing data in the matches + matchedAPoints[currentIndex] = missingData + matchedBPoints[currentIndex] = altMissingDataInB + else: # we have a valid match! + tempMatches = tempMatches + 1 + matchedAPoints[currentIndex] = aData[aIndex] + matchedBPoints[currentIndex] = bData[bIndex] + + currentIndex = currentIndex + 1 + + totalValidMatchedPairs = totalValidMatchedPairs + tempMatches + if tempMatches > 1 : + multipleMatchesInA = multipleMatchesInA + tempMatches + elif (tempMatches <= 0) and (not isInvalidA) : + unmatchedAPoints.append(aData[aIndex]) + unmatchedALongitude.append(aLon) + unmatchedALatitude.append(aLat) + + # gather some additional statistics from the B list + # go through all the b points + for bIndex in sorted(listOfColocatedBLonLat.keys()) : + + [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[bIndex] + tempMatches = 0 + isInvalidB = invalidBMask[bIndex] + + # for each point that matched to a given b point + for matchIndex in sorted(bMatches) : + + [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[matchIndex] + isInvalidA = invalidAMask[aIndex] + + # if either of our data points is invalid, then the data doesn't match + if isInvalidB or isInvalidA : + # we've already built our matched data, so no need to missing it out + pass + else: # we have a valid match! + tempMatches = tempMatches + 1 + + if tempMatches > 1 : + multipleMatchesInB = multipleMatchesInB + tempMatches + elif (tempMatches <= 0) and (not isInvalidB) : + unmatchedBPoints.append(bData[bIndex]) + unmatchedBLongitude.append(bLon) + unmatchedBLatitude.append(bLat) + + # make the unmatched lists into proper numpy arrays + unmatchedAPoints = np.array(unmatchedAPoints, dtype=aData.dtype) + unmatchedBPoints = np.array(unmatchedBPoints, dtype=bData.dtype) + unmatchedALongitude = np.array(unmatchedALongitude, dtype=colocatedLongitude.dtype) + unmatchedALatitude = np.array(unmatchedALatitude, dtype=colocatedLatitude.dtype) + unmatchedBLongitude = np.array(unmatchedBLongitude, dtype=colocatedLongitude.dtype) + unmatchedBLatitude = np.array(unmatchedBLatitude, dtype=colocatedLatitude.dtype) + + LOG.debug("Total matched data point pairs found: " + str(totalValidMatchedPairs)) + + return (matchedAPoints, matchedBPoints, (multipleMatchesInA, multipleMatchesInB)), \ + (unmatchedAPoints, unmatchedALongitude, unmatchedALatitude), \ + (unmatchedBPoints, unmatchedBLongitude, unmatchedBLatitude) + +if __name__=='__main__': + import doctest + doctest.testmod() diff --git a/pyglance/glance/compare.py b/pyglance/glance/compare.py index 1cd3af2..e8f8419 100644 --- a/pyglance/glance/compare.py +++ b/pyglance/glance/compare.py @@ -22,6 +22,7 @@ import glance.delta as delta import glance.plot as plot import glance.report as report import glance.stats as statistics +import glance.collocation as collocation import glance.plotcreatefns as plotcreate LOG = logging.getLogger(__name__) @@ -899,18 +900,18 @@ def colocateToFile_library_call(a_path, b_path, var_list=[ ], # handle the longitude and latitude colocation LOG.info("Colocating raw longitude and latitude information") aColocationInfomation, bColocationInformation, totalNumberOfMatchedPoints = \ - delta.create_colocation_mapping_within_epsilon((lon_lat_data['a']['lon'], lon_lat_data['a']['lat']), - (lon_lat_data['b']['lon'], lon_lat_data['b']['lat']), - runInfo['lon_lat_epsilon'], - invalidAMask=lon_lat_data['a']['inv_mask'], - invalidBMask=lon_lat_data['b']['inv_mask']) + collocation.create_colocation_mapping_within_epsilon((lon_lat_data['a']['lon'], lon_lat_data['a']['lat']), + (lon_lat_data['b']['lon'], lon_lat_data['b']['lat']), + runInfo['lon_lat_epsilon'], + invalidAMask=lon_lat_data['a']['inv_mask'], + invalidBMask=lon_lat_data['b']['inv_mask']) (colocatedLongitude, colocatedLatitude, (numMultipleMatchesInA, numMultipleMatchesInB)), \ (unmatchedALongitude, unmatchedALatitude), \ (unmatchedBLongitude, unmatchedBLatitude) = \ - delta.create_colocated_lonlat_with_lon_lat_colocation(aColocationInfomation, bColocationInformation, - totalNumberOfMatchedPoints, - lon_lat_data['a']['lon'], lon_lat_data['a']['lat'], - lon_lat_data['b']['lon'], lon_lat_data['b']['lat']) + collocation.create_colocated_lonlat_with_lon_lat_colocation(aColocationInfomation, bColocationInformation, + totalNumberOfMatchedPoints, + lon_lat_data['a']['lon'], lon_lat_data['a']['lat'], + lon_lat_data['b']['lon'], lon_lat_data['b']['lat']) # TODO, based on unmatched, issue warnings and record info in the file? LOG.debug("colocated shape of the longitude: " + str(colocatedLongitude.shape)) @@ -956,14 +957,14 @@ def colocateToFile_library_call(a_path, b_path, var_list=[ ], (aData, bData, (numberOfMultipleMatchesInA, numberOfMultipleMatchesInB)), \ (aUnmatchedData, unmatchedALongitude, unmatchedALatitude), \ (bUnmatchedData, unmatchedBLongitude, unmatchedBLatitude) = \ - delta.create_colocated_data_with_lon_lat_colocation(aColocationInfomation, bColocationInformation, - colocatedLongitude, colocatedLatitude, - aData, bData, - missingData=varRunInfo['missing_value'], - altMissingDataInB=varRunInfo['missing_value_alt_in_b'], - # TODO, should missing data be considered? - invalidAMask=invalidA, - invalidBMask=invalidB) + collocation.create_colocated_data_with_lon_lat_colocation(aColocationInfomation, bColocationInformation, + colocatedLongitude, colocatedLatitude, + aData, bData, + missingData=varRunInfo['missing_value'], + altMissingDataInB=varRunInfo['missing_value_alt_in_b'], + # TODO, should missing data be considered? + invalidAMask=invalidA, + invalidBMask=invalidB) LOG.debug(str(numberOfMultipleMatchesInA) + " data pairs contain A data points used for multiple matches.") LOG.debug(str(numberOfMultipleMatchesInB) + " data pairs contain B data points used for multiple matches.") diff --git a/pyglance/glance/delta.py b/pyglance/glance/delta.py index 23178a2..31f69db 100644 --- a/pyglance/glance/delta.py +++ b/pyglance/glance/delta.py @@ -282,317 +282,6 @@ def max_with_mask(data, mask=None) : toReturn = temp[temp.argmax()] return toReturn -def create_colocation_mapping_within_epsilon((alongitude, alatitude), - (blongitude, blatitude), - lonlatEpsilon, - invalidAMask=None, invalidBMask=None): - """ - match points together based on their longitude and latitude values - to match points must be within lonlatEpsilon degrees in both longitude and latitude - - if the longitude and latitude variables contain invalid data the invalidAMask and - invalidBMask should be passed with the appropriate masking to remove the invalid values - - the return will be in the form of two dictionaries of points, one from a and one from b, - indexed on the index number in the A or B data where they can be found. Each entry will - consist of a list of: - [longitudeValue, latitudeValue, indexNumber, [list of matching indexes in the other set]] - - Note: the return will include all pairs of points that match, - this means an individual a or b point may be repeated if it matches - multiple points within the lonlatEpsilon provided - - Warning: This algorithm will fail to find all matching points if the lonlatEpsilon is set to a - value greater than or equal to 1.0 degrees. This is related to the bin size used for searching - thoretically the bin size could be corrected to scale with the lonlatEpsilon in the future. TODO - """ - assert(alongitude.shape == alatitude.shape) - assert(blongitude.shape == blatitude.shape) - assert(lonlatEpsilon >= 0.0) - - LOG.debug("Preparing to colocate longitude and latitude points (acceptable epsilon: " + str(lonlatEpsilon) + " degrees)") - LOG.debug("size of A: " + str(alongitude.shape)) - LOG.debug("size of B: " + str(blongitude.shape)) - - # make blank invalid masks if none were passed in - if invalidAMask is None : - invalidAMask = np.zeros(alongitude.shape, dtype=bool) - if invalidBMask is None : - invalidBMask = np.zeros(blongitude.shape, dtype=bool) - - # make flat versions of our longitude and latitude - # so that our index correlations will be simple - flatALatitude = alatitude.ravel() - flatALongitude = alongitude.ravel() - flatBLatitude = blatitude.ravel() - flatBLongitude = blongitude.ravel() - - # find the ranges of the longitude and latitude - minLatitude = min(min(flatALatitude), min(flatBLatitude)) - maxLatitude = max(max(flatALatitude), max(flatBLatitude)) - minLongitude = min(min(flatALongitude), min(flatBLongitude)) - maxLongitude = max(max(flatALongitude), max(flatBLongitude)) - - # make the bins for the data in longitude/latitude space - aBins = { } - bBins = { } - allAPts = { } - allBPts = { } - - # loop to put all the aData in the bins - for index in range(flatALatitude.size) : - filingLat = int( flatALatitude[index]) - filingLon = int(flatALongitude[index]) - - if (filingLat, filingLon) not in aBins : - aBins[(filingLat, filingLon)] = [ ] - - # create the simple list holding that point in the form: - # the lon/lat values (for ease of comparison), the index number in A, and the list of matches - aPoint = [flatALatitude[index], flatALongitude[index], index, [ ]] - - # put the point in the list and bin - allAPts[index] = aPoint - aBins[(filingLat, filingLon)].append(aPoint) - - # loop to put all the bData in the bins - for index in range(flatBLatitude.size) : - filingLat = int( flatBLatitude[index]) - filingLon = int(flatBLongitude[index]) - - if (filingLat, filingLon) not in bBins : - bBins[(filingLat, filingLon)] = [ ] - - # create the simple list holding that point in the form: - # the lon/lat values (for ease of comparison), the index number in A, and the list of matches - bPoint = [flatBLatitude[index], flatBLongitude[index], index, [ ]] - - # put the point in the list and bin - allBPts[index] = bPoint - bBins[(filingLat, filingLon)].append(bPoint) - - # for debugging purposes - totalMatches = 0 - - # look in all the aData bins and select point pairs that match within epsilon - for binLatitude, binLongitude in aBins.keys() : - - # figure out the lat/lon of the 9 bins that are "near" this one - toSearch = [ ] - for latValue in range(binLatitude - 1, binLatitude + 1) : - for lonValue in range(binLongitude -1, binLongitude + 1) : - toSearch.append((latValue, lonValue)) - - # for each A pt in this bin - for aLat, aLon, aIndex, aMatches in aBins[(binLatitude, binLongitude)] : - - # look through my nearby B bins and find any - # "matching" points that fall within epsilon - for latValue, lonValue in toSearch : - - # if there's anything in that B bin - if ((latValue, lonValue) in bBins) and (bBins[(latValue, lonValue)] is not None) : - - # for each data point in the B bin, check if it matches our current A point - for bLat, bLon, bIndex, bMatches in bBins[(latValue, lonValue)] : - - if (abs(bLat - aLat) < lonlatEpsilon) and (abs(aLon - bLon) < lonlatEpsilon) : - totalMatches = totalMatches + 1 - - # put the point on our matched lists - aMatches.append(bIndex) - bMatches.append(aIndex) - - LOG.debug('Found ' + str(totalMatches) + ' matched pairs.') - - return allAPts, allBPts, totalMatches - -def create_colocated_lonlat_with_lon_lat_colocation(listOfColocatedALonLat, listOfColocatedBLonLat, - totalMatches, - aLongitude, aLatitude, - bLongitude, bLatitude) : - """ - given a pre colocated list of A and B lon/lat info from create_colocation_mapping_within_epsilon, - match up the longitude and latitude and return the colocated sets - """ - - # some general statistics - multipleMatchesInA = 0 - multipleMatchesInB = 0 - totalValidMatchedPairs = 0 - - # our final data sets - matchedLongitude = np.zeros(totalMatches, dtype=aLongitude.dtype) - matchedLatitide = np.zeros(totalMatches, dtype=aLatitude.dtype) - - # we don't know how many unmatched points we may have - unmatchedALongitude = [ ] - unmatchedALatitude = [ ] - unmatchedBLongitude = [ ] - unmatchedBLatitude = [ ] - - # go through the A list and collect all the matches - currentIndex = 0 - # look through all the A points - for aIndex in sorted(listOfColocatedALonLat.keys()) : - - [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[aIndex] - tempMatches = 0 - - # for each match you found on a given a point - for matchIndex in sorted(aMatches) : - - [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[matchIndex] - - # copy the lon/lat info - matchedLongitude[currentIndex] = (aLon + bLon) / 2 - matchedLatitide[currentIndex] = (aLat + bLat) / 2 - - currentIndex = currentIndex + 1 - tempMatches = tempMatches + 1 - - # update statistics based on the number of matches - totalValidMatchedPairs = totalValidMatchedPairs + tempMatches - if tempMatches > 1 : - multipleMatchesInA = multipleMatchesInA + tempMatches - elif tempMatches <= 0 : - unmatchedALatitude.append(aLat) - unmatchedALongitude.append(aLon) - - # gather some additional statistics from the B list - # go through each b point - for bIndex in sorted(listOfColocatedBLonLat) : - - [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[bIndex] - tempMatches = len(bMatches) - - # update some statistics based on the number of matches - if tempMatches > 1 : - multipleMatchesInB = multipleMatchesInB + tempMatches - elif tempMatches <= 0 : - unmatchedBLatitude.append(bLat) - unmatchedBLongitude.append(bLon) - - # make the unmatched lists into proper numpy arrays - unmatchedALatitude = np.array(unmatchedALatitude, dtype=aLatitude.dtype) - unmatchedALongitude = np.array(unmatchedALongitude, dtype=aLongitude.dtype) - unmatchedBLatitude = np.array(unmatchedBLatitude, dtype=bLatitude.dtype) - unmatchedBLongitude = np.array(unmatchedBLongitude, dtype=bLongitude.dtype) - - LOG.debug("Total matched pairs of longitude/latitide: " + str(totalValidMatchedPairs)) - - return (matchedLongitude, matchedLatitide, (multipleMatchesInA, multipleMatchesInB)), \ - (unmatchedALongitude, unmatchedALatitude), \ - (unmatchedBLongitude, unmatchedBLatitude) - -def create_colocated_data_with_lon_lat_colocation(listOfColocatedALonLat, listOfColocatedBLonLat, - colocatedLongitude, colocatedLatitude, - aData, bData, - missingData=None, altMissingDataInB=None, - invalidAMask=None, invalidBMask=None) : - """ - given a pre colocated list of A and B lon/lat info from create_colocation_mapping_within_epsilon, - match up the valid data in two data sets and return the list of valid data, padded with missing - values so that it will match the original longitude and latitude - """ - - if altMissingDataInB is None : - altMissingDataInB = missingData - - # some general statistics - multipleMatchesInA = 0 - multipleMatchesInB = 0 - totalValidMatchedPairs = 0 - - # our final data sets - matchedAPoints = np.ones(colocatedLatitude.shape, dtype=aData.dtype) * missingData - matchedBPoints = np.ones(colocatedLatitude.shape, dtype=bData.dtype) * altMissingDataInB - - # we don't know how many unmatched points we may have - unmatchedAPoints = [ ] - unmatchedBPoints = [ ] - unmatchedALongitude = [ ] - unmatchedALatitude = [ ] - unmatchedBLongitude = [ ] - unmatchedBLatitude = [ ] - - # go through the A list and sort all the valid matches - currentIndex = 0 - # go through all the a points - for aIndex in sorted(listOfColocatedALonLat.keys()) : - - [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[aIndex] - tempMatches = 0 - isInvalidA = invalidAMask[aIndex] - - # for each point that matched to a given a point - for matchIndex in sorted(aMatches) : - - [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[matchIndex] - isInvalidB = invalidBMask[bIndex] - - # if either of our data points is invalid, then the data doesn't match - if isInvalidA or isInvalidB : - # fill in missing data in the matches - matchedAPoints[currentIndex] = missingData - matchedBPoints[currentIndex] = altMissingDataInB - else: # we have a valid match! - tempMatches = tempMatches + 1 - matchedAPoints[currentIndex] = aData[aIndex] - matchedBPoints[currentIndex] = bData[bIndex] - - currentIndex = currentIndex + 1 - - totalValidMatchedPairs = totalValidMatchedPairs + tempMatches - if tempMatches > 1 : - multipleMatchesInA = multipleMatchesInA + tempMatches - elif (tempMatches <= 0) and (not isInvalidA) : - unmatchedAPoints.append(aData[aIndex]) - unmatchedALongitude.append(aLon) - unmatchedALatitude.append(aLat) - - # gather some additional statistics from the B list - # go through all the b points - for bIndex in sorted(listOfColocatedBLonLat.keys()) : - - [bLon, bLat, bIndex, bMatches] = listOfColocatedBLonLat[bIndex] - tempMatches = 0 - isInvalidB = invalidBMask[bIndex] - - # for each point that matched to a given b point - for matchIndex in sorted(bMatches) : - - [aLon, aLat, aIndex, aMatches] = listOfColocatedALonLat[matchIndex] - isInvalidA = invalidAMask[aIndex] - - # if either of our data points is invalid, then the data doesn't match - if isInvalidB or isInvalidA : - # we've already built our matched data, so no need to missing it out - pass - else: # we have a valid match! - tempMatches = tempMatches + 1 - - if tempMatches > 1 : - multipleMatchesInB = multipleMatchesInB + tempMatches - elif (tempMatches <= 0) and (not isInvalidB) : - unmatchedBPoints.append(bData[bIndex]) - unmatchedBLongitude.append(bLon) - unmatchedBLatitude.append(bLat) - - # make the unmatched lists into proper numpy arrays - unmatchedAPoints = np.array(unmatchedAPoints, dtype=aData.dtype) - unmatchedBPoints = np.array(unmatchedBPoints, dtype=bData.dtype) - unmatchedALongitude = np.array(unmatchedALongitude, dtype=colocatedLongitude.dtype) - unmatchedALatitude = np.array(unmatchedALatitude, dtype=colocatedLatitude.dtype) - unmatchedBLongitude = np.array(unmatchedBLongitude, dtype=colocatedLongitude.dtype) - unmatchedBLatitude = np.array(unmatchedBLatitude, dtype=colocatedLatitude.dtype) - - LOG.debug("Total matched data point pairs found: " + str(totalValidMatchedPairs)) - - return (matchedAPoints, matchedBPoints, (multipleMatchesInA, multipleMatchesInB)), \ - (unmatchedAPoints, unmatchedALongitude, unmatchedALatitude), \ - (unmatchedBPoints, unmatchedBLongitude, unmatchedBLatitude) - if __name__=='__main__': import doctest doctest.testmod() -- GitLab