-
Eva Schiffer authoredEva Schiffer authored
delta.py 11.75 KiB
#!/usr/bin/env python
# encoding: utf-8
"""
Routines to do assorted difference and comparison calculations and statistics
Created by rayg Apr 2009.
Copyright (c) 2009 University of Wisconsin SSEC. All rights reserved.
"""
import logging
import math
import numpy as numpy
from numpy import * # todo, remove this line
from scipy.stats import pearsonr
LOG = logging.getLogger(__name__)
# for calculating the great circle distance, this is the radius well will
# assume that the spherical model of the earth has, in km
SPHERICAL_EARTH_RADIUS = 6373.0
# -------------- generic data manipulation and analysis --------------
# TODO have someone double check the math here
def great_circle_distance (latitudeA, longitudeA, latitudeB, longitudeB) :
"""
Calculate the great circle distance (in km) between the A and B points
given in the input parameters, the inputs are expected to be in degrees
note: This method uses the spherical law of cosines, and is best suited
for smaller distances.
"""
# convert to radians
latARad = math.radians(latitudeA)
lonARad = math.radians(longitudeA)
latBRad = math.radians(latitudeB)
lonBRad = math.radians(longitudeB)
distToReturn = math.acos(math.sin(latARad) * math.sin(latBRad) +
math.cos(latARad) * math.cos(latBRad) *
math.cos(lonBRad - lonARad)) * SPHERICAL_EARTH_RADIUS
return distToReturn
def min_with_mask(data, goodMask=None) :
"""
get the minimum of the values in data,
inspecting only the values selected in the goodMask
if no mask is passed and a masked array is given the
mask associated with the array will be used
if no mask is passed and any other kind of data is
given, all values are assumed to be good
"""
# if the data array is a masked array and we weren't
# given a mask, assume that we should use the one
# associated with the masked array
if (goodMask is None) and (type(data) is numpy.ma) :
goodMask = data.mask
# select only the good data values
goodData = data[goodMask]
# if we have any good data, get the minimum
toReturn = numpy.min(goodData) if goodData.size > 0 else None
return toReturn
def max_with_mask(data, goodMask=None) :
"""
get the maximum of the values in data,
inspecting only the values selected in the goodMask
if no mask is passed and a masked array is given the
mask associated with the array will be used
if no mask is passed and any other kind of data is
given, all values are assumed to be good
"""
# if the data array is a masked array and we weren't
# given a mask, assume that we should use the one
# associated with the masked array
if (goodMask is None) and (type(data) is numpy.ma) :
goodMask = data.mask
# select only the good data values
goodData = data[goodMask]
# if we have any good data, get the maximum
toReturn = numpy.max(goodData) if goodData.size > 0 else None
return toReturn
def compute_correlation(xData, yData, goodMask, compute_r_function=pearsonr):
"""
compute the correlation coefficient of two data sets
given a mask describing good data values in the sets
"""
# make sure our data sets and mask are the same shape
assert(xData.shape == yData.shape)
assert(xData.shape == goodMask.shape)
# pull out just the good data
good_x_data = xData[goodMask]
good_y_data = yData[goodMask]
# make sure that there is no remaining bad data
assert(numpy.all(numpy.isfinite(good_x_data)))
assert(numpy.all(numpy.isfinite(good_y_data)))
# if we have enough data, try to build the correlation
toReturn = numpy.nan
if (good_x_data.size >= 2) and (good_y_data.size >= 2) :
toReturn = compute_r_function(good_x_data, good_y_data)[0]
return toReturn
def calculate_root_mean_square (data, goodMask=None) :
"""
calculate the root mean square of the data,
possibly selecting only the points in the given
goodMask, if no mask is given, all points will
be used
"""
# get a count of how many good data points we have
numGoodPoints = data.size
if goodMask is not None:
numGoodPoints = numpy.sum(goodMask)
rootMeanSquare = numpy.sqrt( numpy.sum( data[goodMask] ** 2 ) / numGoodPoints )
return rootMeanSquare
# TODO, should the name of this function be changed?
def convert_mag_dir_to_U_V_vector(magnitude_data, direction_data, invalidMask=None, offset_degrees=180):
"""
This method is intended to convert magnitude and direction data into (U, V) vector data.
An invalid mask may be given if some of the points in the set should be masked out.
TODO, this method is not fully tested
"""
if invalidMask is None :
invalidMask = numpy.zeros(magnitude_data.shape, dtype=bool)
new_direction_data = direction_data[:] + offset_degrees
LOG.debug ("direction data: " + str(new_direction_data[~invalidMask]))
uData = numpy.zeros(magnitude_data.shape, dtype=float)
uData[invalidMask] = numpy.nan
uData[~invalidMask] = magnitude_data[~invalidMask] * numpy.sin (deg2rad(new_direction_data[~invalidMask]))
vData = numpy.zeros(magnitude_data.shape, dtype=float)
vData[invalidMask] = numpy.nan
vData[~invalidMask] = magnitude_data[~invalidMask] * numpy.cos (deg2rad(new_direction_data[~invalidMask]))
return uData, vData
# ------------- bin/tuple related functions --------------------
class BinTupleMapping (object) :
"""
This class represents a bin / tuple data remapping.
It encapsulates information about the dimensions that are considered the
bin and tuple dimensions and is able to transform data into the
[bin][case][tuple] form. It also allows for the reverse calculation of
indexes so that you can recreate positioning information in the original
data set based on the new shape of the case dimension.
internal instance variables:
bin_dimension_index - the original index of the bin dimension
tuple_dimension_index - the original index of the tuple dimension
original_data_shape - the shape the data was before it was reordered
new_data_shape - the data shape after it's been reordered
new_index_order - a mapping that lists the order of the new dimension indexes
original_case_shape - the shape of the case dimension(s) before being flattened
reverse_case_index - a reverse index for finding the original positions of
flattened case indexes
TODO, in the long run, find a way to get rid of the reverse_case_index
"""
def __init__ (self, dataShape, binIndexNumber=0, tupleIndexNumber=None) :
"""
Given information on the original data and the desired bin/tuple,
build the mapping object
"""
# minimally, we need to have a shape
assert(dataShape is not None)
# get the number of dimensions present in our data
numberOfDimensions = len(dataShape)
# is our shape ok?
assert(numberOfDimensions >=2)
self.original_data_shape = dataShape
# set up our tuple if it wasn't selected
if (tupleIndexNumber is None) :
tupleIndexNumber = numberOfDimensions - 1
# are the bin and tuple ok?
assert(binIndexNumber is not None)
assert(binIndexNumber >= 0)
assert(binIndexNumber < numberOfDimensions)
assert(tupleIndexNumber >= 0)
assert(tupleIndexNumber < numberOfDimensions)
self.bin_dimension_index = binIndexNumber
self.tuple_dimension_index = tupleIndexNumber
# get the new index ordering for the data
self.new_index_order = BinTupleMapping._make_new_index_list(numberOfDimensions,
self.bin_dimension_index,
self.tuple_dimension_index)
temp_data_shape = [ ]
for index in self.new_index_order:
temp_data_shape = temp_data_shape + [dataShape[index]]
temp_data_shape = tuple(temp_data_shape)
"""
temp_data_shape = numpy.array(dataShape).transpose(self.new_index_order)
"""
self.original_case_shape = temp_data_shape[1:-1]
# figure out the new size with the flattened cases
number_of_cases = 0
self.new_data_shape = (temp_data_shape[0], temp_data_shape[-1])
if len(self.original_case_shape) > 0 :
number_of_cases = numpy.multiply.accumulate(self.original_case_shape)[-1]
self.new_data_shape = (temp_data_shape[0], number_of_cases, temp_data_shape[-1])
# build the reverse index for looking up flat case indexes
self.reverse_case_index = None
if len(self.original_case_shape) > 0 :
self.reverse_case_index = numpy.arange(number_of_cases).reshape(self.original_case_shape)
@staticmethod
def _make_new_index_list(numberOfIndexes, firstIndexNumber, lastIndexNumber) :
"""
a utility method to make a list of index numbers for reordering a
multi-dimensional array
the first and last index numbers represent the dimensions you want to be
first and last (respectively) when the list is reordered; any other indexes
will retain their relative ordering
Note: This is a private method of the BinTupleMapping class and assumes
that the index numbers passed to it will have been preverified to be
acceptable.
"""
# make the new list
newIndexList = range(numberOfIndexes)
# remove our two "important" indexes, in the correct order
maxSpecial = max(firstIndexNumber, lastIndexNumber)
minSpecial = min(firstIndexNumber, lastIndexNumber)
del(newIndexList[maxSpecial])
del(newIndexList[minSpecial])
# add our two important indexes back into the list in their new places
newIndexList = [firstIndexNumber] + newIndexList + [lastIndexNumber]
return newIndexList
def reorder_for_bin_tuple (self, data) :
"""
reorder the data so that the bin index is first, the tuple index is last,
and any additional dimensions are flattened into a middle "case" index
the reordered data and the shape of flattened case indexes will be returned
(note: the shape of the data must match the shape with which the BinTupleMatching
object was originally constructed)
"""
assert(data.shape == self.original_data_shape)
# put the bin and tuple dimensions in the correct places
newData = data.transpose(self.new_index_order)
# flatten the case dimensions
newData = newData.reshape(self.new_data_shape)
return newData
def determine_case_indecies (self, flatIndex) :
"""
determine the original indexes of the case from the flat case index number
Note: this method requires the object to hold a large data structure
TODO, find a better way of doing this? does numpy guarantee reshaping strategy?
TODO, can I find information on reshape and do this with pure math?
"""
if self.reverse_case_index is None :
return None
# find the flat index in our reverse case index
positionOfIndex = numpy.where(self.reverse_case_index == flatIndex)
return positionOfIndex
if __name__=='__main__':
import doctest
doctest.testmod()