Skip to content
Snippets Groups Projects
Commit 6891b5d4 authored by (no author)'s avatar (no author)
Browse files

added new hexplot image; added initial rough draft of FileInfo class for later refactoring

git-svn-id: https://svn.ssec.wisc.edu/repos/glance/trunk@128 8a9318a1-56ba-4d59-b755-99d26321be01
parent 3520cfac
No related branches found
No related tags found
No related merge requests found
...@@ -8,6 +8,7 @@ Copyright (c) 2010 University of Wisconsin SSEC. All rights reserved. ...@@ -8,6 +8,7 @@ Copyright (c) 2010 University of Wisconsin SSEC. All rights reserved.
""" """
import logging import logging
import os, subprocess, datetime
import numpy as np import numpy as np
import glance.delta as delta import glance.delta as delta
...@@ -277,6 +278,78 @@ class DiffInfoObject (object) : ...@@ -277,6 +278,78 @@ class DiffInfoObject (object) :
return diff_data_object return diff_data_object
class FileInfo (object) :
"""
This class represents information about a file object. It may or may not include the actual file object.
The following member variables are available from this class:
path - the file path to reach the original file on disk
md5_sum - an md5 sum calculated from the original file
last_modified - the time that the file was last modified (TODO, what form should this be in?)
file_object - the file object that can be used to access the data in the file, may be None
"""
def __init__(self, pathToFile, md5sum=None, lastModifiedTime=None, fileObject=None) :
"""
Create the file info object using the values given.
If the md5 sum and last modified time aren't given, the initialization will figure them out.
Note: if the md5 sum is not given, the file object will also be loaded.
"""
self.path = pathToFile
# if the file doesn't exist, stop
# TODO, is this the right strategy?
if not os.path.exists(self.path) :
LOG.warn("Requested file " + self.path + " could not be opened because it does not exist.")
self.md5_sum = None
self.last_modified = None
self.file_object = None
return
# if the md5 sum isn't given, load the file and figure it out
if md5sum is None:
# open the file
LOG.info("Opening " + self.path)
tempPath = os.path.abspath(os.path.expanduser(self.path))
LOG.debug("Provided path after normalization and symbol expansion: " + tempPath)
fileObject = io.open(tempPath, allowWrite=allowWrite)
# figure out the md5 sum
tempSubProcess = subprocess.Popen("md5sum \'" + tempPath + "\'", shell=True, stdout=subprocess.PIPE)
md5sum = tempSubProcess.communicate()[0].split()[0]
LOG.info("File md5sum: " + str(md5sum))
self.md5_sum = md5sum
self.file_object = fileObject
# if the last modified time isn't given, figure it out
if lastModifiedTime is None :
statsForFile = os.stat(os.path.abspath(os.path.expanduser(self.path)))
lastModifiedTime = datetime.datetime.fromtimestamp(statsForFile.st_mtime).ctime() # should time zone be forced?
LOG.info ("File was last modified: " + lastModifiedTime)
self.last_modified = lastModifiedTime
def get_version_without_file_object (self) :
"""
get a version of this object without a file object
(this method is useful if you want file information but do not need access and want to save space)
"""
toReturn = None
if self.file_object is None:
toReturn = self
else:
toReturn = FileInfo(self.path, self.md5_sum, self.last_modified)
return toReturn
if __name__=='__main__': if __name__=='__main__':
import doctest import doctest
doctest.testmod() doctest.testmod()
...@@ -13,13 +13,14 @@ matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine ...@@ -13,13 +13,14 @@ matplotlib.use('Agg') # use the Anti-Grain Geometry rendering engine
from pylab import * from pylab import *
import matplotlib.cm as cm
import matplotlib.pyplot as plt import matplotlib.pyplot as plt
import matplotlib.colors as colors import matplotlib.colors as colors
from matplotlib.ticker import FormatStrFormatter from matplotlib.ticker import FormatStrFormatter
import logging import logging
import numpy as np import numpy as np
from numpy import ma from numpy import ma
import glance.graphics as maps import glance.graphics as maps
import glance.delta as delta import glance.delta as delta
...@@ -236,6 +237,34 @@ def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsil ...@@ -236,6 +237,34 @@ def create_scatter_plot(dataX, dataY, title, xLabel, yLabel, badMask=None, epsil
return figure return figure
# build a hexbin plot of the x,y points and show the density of the point distribution
def create_hexbin_plot(dataX, dataY, title, xLabel, yLabel) :
# make the figure
figure = plt.figure()
axes = figure.add_subplot(111)
# the hexbin plot of the good data
plt.hexbin(dataX, dataY, bins='log', cmap=cm.jet)
plt.axis([dataX.min(), dataX.max(), dataY.min(), dataY.max()])
# create a color bar
cb = plt.colorbar()
cb.set_label('log10 (count + 1)')
# and some informational stuff
axes.set_title(title)
plt.xlabel(xLabel)
plt.ylabel(yLabel)
# format our axes so they display gracefully
yFormatter = FormatStrFormatter("%4.4g")
axes.yaxis.set_major_formatter(yFormatter)
xFormatter = FormatStrFormatter("%4.4g")
axes.xaxis.set_major_formatter(xFormatter)
return figure
# build a histogram figure of the given data with the given title and number of bins # build a histogram figure of the given data with the given title and number of bins
def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) : def create_histogram(data, bins, title, xLabel, yLabel, displayStats=False) :
......
...@@ -273,6 +273,18 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) : ...@@ -273,6 +273,18 @@ class BasicComparisonPlotsFunctionFactory (PlottingFunctionFactory) :
"scatter plot of file a values vs file b values for " + variableDisplayName, "scatter plot of file a values vs file b values for " + variableDisplayName,
"Scatter.png", compared_fig_list) "Scatter.png", compared_fig_list)
# make a hexplot, which is like a scatter plot with density
if ('do_plot_hex' not in doPlotSettingsDict) or (doPlotSettingsDict['do_plot_hex']) :
assert(aData.shape == bData.shape)
assert(bData.shape == goodInBothMask.shape)
functionsToReturn['scatterD'] = ((lambda : figures.create_hexbin_plot(aData[goodInBothMask], bData[goodInBothMask],
"Value in File A vs Value in File B",
"File A Value", "File B Value")),
"density of file a values vs file b values for " + variableDisplayName,
"Hex.png", compared_fig_list)
return functionsToReturn return functionsToReturn
""" """
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment