diff --git a/pyglance/glance/data.py b/pyglance/glance/data.py index 0a5d36bf56099f4729ecabaa663a889fd484d7a0..9f9c71b1f0052f53af99280ed64e4f4a1adb4929 100644 --- a/pyglance/glance/data.py +++ b/pyglance/glance/data.py @@ -8,6 +8,7 @@ Copyright (c) 2010 University of Wisconsin SSEC. All rights reserved. """ import logging +import os, subprocess, datetime import numpy as np import glance.delta as delta @@ -277,6 +278,78 @@ class DiffInfoObject (object) : return diff_data_object +class FileInfo (object) : + """ + This class represents information about a file object. It may or may not include the actual file object. + + The following member variables are available from this class: + + path - the file path to reach the original file on disk + md5_sum - an md5 sum calculated from the original file + last_modified - the time that the file was last modified (TODO, what form should this be in?) + file_object - the file object that can be used to access the data in the file, may be None + """ + + def __init__(self, pathToFile, md5sum=None, lastModifiedTime=None, fileObject=None) : + """ + Create the file info object using the values given. + + If the md5 sum and last modified time aren't given, the initialization will figure them out. + Note: if the md5 sum is not given, the file object will also be loaded. + """ + + self.path = pathToFile + + # if the file doesn't exist, stop + # TODO, is this the right strategy? + if not os.path.exists(self.path) : + LOG.warn("Requested file " + self.path + " could not be opened because it does not exist.") + self.md5_sum = None + self.last_modified = None + self.file_object = None + return + + # if the md5 sum isn't given, load the file and figure it out + if md5sum is None: + + # open the file + LOG.info("Opening " + self.path) + tempPath = os.path.abspath(os.path.expanduser(self.path)) + LOG.debug("Provided path after normalization and symbol expansion: " + tempPath) + fileObject = io.open(tempPath, allowWrite=allowWrite) + + # figure out the md5 sum + tempSubProcess = subprocess.Popen("md5sum \'" + tempPath + "\'", shell=True, stdout=subprocess.PIPE) + md5sum = tempSubProcess.communicate()[0].split()[0] + LOG.info("File md5sum: " + str(md5sum)) + + self.md5_sum = md5sum + self.file_object = fileObject + + # if the last modified time isn't given, figure it out + if lastModifiedTime is None : + + statsForFile = os.stat(os.path.abspath(os.path.expanduser(self.path))) + lastModifiedTime = datetime.datetime.fromtimestamp(statsForFile.st_mtime).ctime() # should time zone be forced? + LOG.info ("File was last modified: " + lastModifiedTime) + + self.last_modified = lastModifiedTime + + def get_version_without_file_object (self) : + """ + get a version of this object without a file object + (this method is useful if you want file information but do not need access and want to save space) + """ + toReturn = None + + if self.file_object is None: + toReturn = self + else: + toReturn = FileInfo(self.path, self.md5_sum, self.last_modified) + + return toReturn + + if __name__=='__main__': import doctest doctest.testmod() diff --git a/pyglance/glance/figures.py b/pyglance/glance/figures.py index ef5035d97e42c919f711c5fb5897444d4b3a5af1..05d561584fd8070032ef0dd692ca228d59edeb59 100644 --- a/pyglance/glance/figures.py +++ b/pyglance/glance/figures.py @@ -13,13 +13,14 @@ matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine from pylab import * +import matplotlib.cm as cm import matplotlib.pyplot as plt import matplotlib.colors as colors -from matplotlib.ticker import FormatStrFormatter +from matplotlib.ticker import FormatStrFormatter import logging import numpy as np -from numpy import ma +from numpy import ma import glance.graphics as maps import glance.delta as delta @@ -236,6 +237,34 @@ def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsil return figure +# build a hexbin plot of the x,y points and show the density of the point distribution +def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel) : + + # make the figure + figure = plt.figure() + axes = figure.add_subplot(111) + + # the hexbin plot of the good data + plt.hexbin(dataX, dataY, bins='log', cmap=cm.jet) + plt.axis([dataX.min(), dataX.max(), dataY.min(), dataY.max()]) + + # create a color bar + cb = plt.colorbar() + cb.set_label('log10 (count + 1)') + + # and some informational stuff + axes.set_title(title) + plt.xlabel(xLabel) + plt.ylabel(yLabel) + + # format our axes so they display gracefully + yFormatter = FormatStrFormatter("%4.4g") + axes.yaxis.set_major_formatter(yFormatter) + xFormatter = FormatStrFormatter("%4.4g") + axes.xaxis.set_major_formatter(xFormatter) + + return figure + # build a histogram figure of the given data with the given title and number of bins def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) : diff --git a/pyglance/glance/plotcreatefns.py b/pyglance/glance/plotcreatefns.py index b5505ff102337843d013dbf7f77844e9a2841fd6..d43bc03f74fabb6a7be319ac677755a0bff302b5 100644 --- a/pyglance/glance/plotcreatefns.py +++ b/pyglance/glance/plotcreatefns.py @@ -273,6 +273,18 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) : "scatter plot of file a values vs file b values for " + variableDisplayName, "Scatter.png", compared_fig_list) + # make a hexplot, which is like a scatter plot with density + if ('do_plot_hex' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_hex']) : + + assert(aData.shape == bData.shape) + assert(bData.shape == goodInBothMask.shape) + + functionsToReturn['scatterD'] = ((lambda : figures.create_hexbin_plot(aData[goodInBothMask], bData[goodInBothMask], + "Value in File A vs Value in File B", + "File A Value", "File B Value")), + "density of file a values vs file b values for " + variableDisplayName, + "Hex.png", compared_fig_list) + return functionsToReturn """