Skip to content
Snippets Groups Projects
Commit d057ae40 authored by William Roberts's avatar William Roberts
Browse files

Change str.replace to a.replace and b.replace

parent 08e5d17d
No related branches found
No related tags found
No related merge requests found
"""These functions are helper functions to the individual instrument tidy """These functions are helper functions to the individual instrument tidy
modules. They move incoming data to its correct destination reliably. See modules. They move incoming data to its correct destination reliably. See
individual functions for specific information on how files are moved. individual functions for specific information on how files are moved.
""" """
__docformat__='Epytext' __docformat__='Epytext'
import os import os
import string import string
import logging import logging
import shutil import shutil
import re import re
from subprocess import check_call as sh from subprocess import check_call as sh
LOG = logging.getLogger(__name__) LOG = logging.getLogger(__name__)
core = re.compile( '[^'+re.escape(os.sep)+']+' ) core = re.compile('[^'+re.escape(os.sep)+']+')
# where is directory b, relative to directory a? # Copied from rays cvs playgrd # where is directory b, relative to directory a? # Copied from rays cvs playgrd
def relativedir( a, b ): def relativedir(a, b):
a = os.path.abspath(a) a = os.path.abspath(a)
b = os.path.abspath(b) b = os.path.abspath(b)
pfx = os.path.dirname( os.path.commonprefix( [a,b] ) )+os.sep pfx = os.path.dirname(os.path.commonprefix( a,b]))+os.sep
a = str.replace( a, pfx, '', 1 ) a = a.replace(pfx, '', 1)
b = str.replace( b, pfx, '', 1 ) b = b.replace(pfx, '', 1)
a = re.sub( core, '..', a ) a = re.sub(core, '..', a)
return os.path.join( a,b ) return os.path.join(a, b)
def _imm_copy_raw(incoming, raw, new_files, renamed=None): def _imm_copy_raw(incoming, raw, new_files, renamed=None):
LOG.debug("Copying new files from %s to raw directories" % (incoming) ) LOG.debug("Copying new files from %s to raw directories" % (incoming) )
if not renamed: renamed = new_files if not renamed: renamed = new_files
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ]
[ shutil.copy2( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] [ shutil.copy2( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
def _imm_move_raw(incoming, raw, new_files, renamed=None): def _imm_move_raw(incoming, raw, new_files, renamed=None):
LOG.debug("Moving new files from %s to raw directories" % (incoming) ) LOG.debug("Moving new files from %s to raw directories" % (incoming) )
if not renamed: renamed = new_files if not renamed: renamed = new_files
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ]
# Had to change to a copy then delete because move will use copy2 (which copies owner/group) if on separate disks # Had to change to a copy then delete because move will use copy2 (which copies owner/group) if on separate disks
#[ shutil.move( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] #[ shutil.move( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
[ shutil.copy( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] [ shutil.copy( os.path.join( incoming, fn ) , os.path.join( raw_dir, rn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
[ os.remove( os.path.join( incoming, fn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] [ os.remove( os.path.join( incoming, fn ) ) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
def _imm_link_raw(raw, cache, renamed): def _imm_link_raw(raw, cache, renamed):
LOG.debug("Linking new_files from cache -> secondary raw") LOG.debug("Linking new_files from cache -> secondary raw")
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in cache if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in cache if not os.path.exists(x) ]
rel_path = [ relativedir(cache_dir, raw_dir) for (raw_dir, cache_dir) in zip(raw, cache) ] rel_path = [ relativedir(cache_dir, raw_dir) for (raw_dir, cache_dir) in zip(raw, cache) ]
[ os.symlink( os.path.join( rel_p, rn ) , os.path.join( cache_dir, rn ) ) for (rel_p,cache_dir,rn) in zip(rel_path,cache,renamed) ] [ os.symlink( os.path.join( rel_p, rn ) , os.path.join( cache_dir, rn ) ) for (rel_p,cache_dir,rn) in zip(rel_path,cache,renamed) ]
def imm_manage_raw(incoming, primary, secondary, cache, new_files, renamed=None): def imm_manage_raw(incoming, primary, secondary, cache, new_files, renamed=None):
"""Function that calls all other functions for this movement type. This """Function that calls all other functions for this movement type. This
immediate movement type will remove and delete each file as it moves it. immediate movement type will remove and delete each file as it moves it.
There are two expected storage configurations that should use this function: There are two expected storage configurations that should use this function:
- One primary raw storage location and one secondary location that contains - One primary raw storage location and one secondary location that contains
a copy of the primary raw data and stores all the created products. a copy of the primary raw data and stores all the created products.
The expected directories would be: The expected directories would be:
- primary = <primary location> - primary = <primary location>
- secondary = <secondary location on separate disk> - secondary = <secondary location on separate disk>
- cache = <cache location on same disk as secondary> - cache = <cache location on same disk as secondary>
- One storage location, one directory being the primary raw location - One storage location, one directory being the primary raw location
and the other being the generated products directory. One storage and the other being the generated products directory. One storage
location means that it is one physical disk or one filesystem. location means that it is one physical disk or one filesystem.
The expected directories would be: The expected directories would be:
- primary = <primary location> - primary = <primary location>
- secondary = None - secondary = None
- cache = <cache location on same disk as primary> - cache = <cache location on same disk as primary>
@param incoming: The directory that the new_files were found @param incoming: The directory that the new_files were found
@type incoming: string @type incoming: string
@param primary: A list of primary raw directories. These directories should @param primary: A list of primary raw directories. These directories should
be on the same volume as incoming for optimization, but is not needed. be on the same volume as incoming for optimization, but is not needed.
The list must be in the same order as the new_files that will be put in The list must be in the same order as the new_files that will be put in
those directories. If the directory is one string then a list the size those directories. If the directory is one string then a list the size
of new_files will be filled and all new files will be put in that one of new_files will be filled and all new files will be put in that one
directory. directory.
@type primary: list or string @type primary: list or string
@param secondary: A list of the secondary raw directories. These @param secondary: A list of the secondary raw directories. These
directories do not need to be on the same volume as incoming. Same directories do not need to be on the same volume as incoming. Same
ordering restrictions apply for secondary as for primary. ordering restrictions apply for secondary as for primary.
@type secondary: list or string @type secondary: list or string
@param cache: A list of cache directories. These directories also follow @param cache: A list of cache directories. These directories also follow
the ordering restrictions that primary and secondary do. the ordering restrictions that primary and secondary do.
@type cache: list or string @type cache: list or string
@param new_files: A list of new files found in the incoming directory @param new_files: A list of new files found in the incoming directory
@type new_files: list @type new_files: list
@param renamed: A list the same size as new_files that represents what the @param renamed: A list the same size as new_files that represents what the
new files will be called if so desired. If None(default), files will new files will be called if so desired. If None(default), files will
keep the same name as in incoming. keep the same name as in incoming.
""" """
if not isinstance(incoming, str): if not isinstance(incoming, str):
raise ValueError("Incoming must be one directory represented as a string") raise ValueError("Incoming must be one directory represented as a string")
if primary is not None and isinstance(primary, str): if primary is not None and isinstance(primary, str):
primary = [primary]*len(new_files) primary = [primary]*len(new_files)
if secondary is not None and isinstance(secondary, str): if secondary is not None and isinstance(secondary, str):
secondary = [secondary]*len(new_files) secondary = [secondary]*len(new_files)
if cache is not None and isinstance(cache, str): if cache is not None and isinstance(cache, str):
cache = [cache]*len(new_files) cache = [cache]*len(new_files)
if renamed is None: renamed = new_files if renamed is None: renamed = new_files
# Copy new files from incoming directory to secondary raw (lagoon) # Copy new files from incoming directory to secondary raw (lagoon)
if secondary is not None: if secondary is not None:
_imm_copy_raw(incoming, secondary, new_files, renamed=renamed) _imm_copy_raw(incoming, secondary, new_files, renamed=renamed)
# Move new files from incoming directory to primary raw (beach) # Move new files from incoming directory to primary raw (beach)
if primary is not None: if primary is not None:
_imm_move_raw(incoming, primary, new_files, renamed=renamed) _imm_move_raw(incoming, primary, new_files, renamed=renamed)
# Link new files from the cache location (lagoon) to the secondary raw (lagoon) # Link new files from the cache location (lagoon) to the secondary raw (lagoon)
if cache is not None: if cache is not None:
if secondary is not None: if secondary is not None:
_imm_link_raw(secondary, cache, renamed) _imm_link_raw(secondary, cache, renamed)
elif primary is not None: elif primary is not None:
# For different storage configurations # For different storage configurations
_imm_link_raw(primary, cache, renamed) _imm_link_raw(primary, cache, renamed)
def _daily_copy_raw(incoming, raw, new_files, renamed=None): def _daily_copy_raw(incoming, raw, new_files, renamed=None):
LOG.debug("Copying new files from %s to raw directories" % (incoming) ) LOG.debug("Copying new files from %s to raw directories" % (incoming) )
if not renamed: renamed = new_files if not renamed: renamed = new_files
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ]
[ sh(['rsync', '-a', '--partial', os.path.join( incoming, fn ) , os.path.join( raw_dir, rn )]) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] [ sh(['rsync', '-a', '--partial', os.path.join( incoming, fn ) , os.path.join( raw_dir, rn )]) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
def _daily_move_raw(incoming, raw, new_files, renamed=None, remove=None): def _daily_move_raw(incoming, raw, new_files, renamed=None, remove=None):
LOG.debug("Moving new files from %s to raw directories" % (incoming) ) LOG.debug("Moving new files from %s to raw directories" % (incoming) )
if not remove: if not remove:
remove = [False]*len(renamed) remove = [False]*len(renamed)
if not renamed: renamed = new_files if not renamed: renamed = new_files
if not isinstance(remove, list): if not isinstance(remove, list):
raise ValueError("remove must be a list of booleans") raise ValueError("remove must be a list of booleans")
if len(remove) != len(new_files): if len(remove) != len(new_files):
raise ValueError("remove does not contain the same amount of elements as new_files") raise ValueError("remove does not contain the same amount of elements as new_files")
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in raw if not os.path.exists(x) ]
[ sh(['rsync', '-a', '--partial', os.path.join( incoming, fn ) , os.path.join( raw_dir, rn )]) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ] [ sh(['rsync', '-a', '--partial', os.path.join( incoming, fn ) , os.path.join( raw_dir, rn )]) for (raw_dir,fn,rn) in zip(raw,new_files,renamed) ]
for fn,rm_bool in zip(new_files, remove): for fn,rm_bool in zip(new_files, remove):
if rm_bool: os.remove( os.path.join(incoming,fn) ) if rm_bool: os.remove( os.path.join(incoming,fn) )
def _daily_link_raw(raw, cache, renamed): def _daily_link_raw(raw, cache, renamed):
LOG.debug("Linking new_files from cache -> secondary raw") LOG.debug("Linking new_files from cache -> secondary raw")
[ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in cache if not os.path.exists(x) ] [ ( os.makedirs(x), LOG.debug("Creating directory %s" % x) ) for x in cache if not os.path.exists(x) ]
rel_path = [ relativedir(cache_dir, raw_dir) for (raw_dir, cache_dir) in zip(raw, cache) ] rel_path = [ relativedir(cache_dir, raw_dir) for (raw_dir, cache_dir) in zip(raw, cache) ]
[ os.symlink( os.path.join( rel_p, rn ) , os.path.join( cache_dir, rn ) ) for (rel_p,cache_dir,rn) in zip(rel_path,cache,renamed) if not os.path.exists(os.path.join(cache_dir, rn)) ] [ os.symlink( os.path.join( rel_p, rn ) , os.path.join( cache_dir, rn ) ) for (rel_p,cache_dir,rn) in zip(rel_path,cache,renamed) if not os.path.exists(os.path.join(cache_dir, rn)) ]
def daily_manage_raw(incoming, primary, secondary, cache, new_files, renamed=None, remove=None): def daily_manage_raw(incoming, primary, secondary, cache, new_files, renamed=None, remove=None):
"""Function that calls all other functions for this movement type. This """Function that calls all other functions for this movement type. This
daily movement type will remove files that it rsyncs if the remove parameter daily movement type will remove files that it rsyncs if the remove parameter
is True(False default). is True(False default).
There are two expected storage configurations that should use this function: There are two expected storage configurations that should use this function:
- One primary raw storage location and one secondary location that contains - One primary raw storage location and one secondary location that contains
a copy of the primary raw data and stores all the created products. a copy of the primary raw data and stores all the created products.
The expected directories would be: The expected directories would be:
- primary = <primary location> - primary = <primary location>
- secondary = <secondary location on separate disk> - secondary = <secondary location on separate disk>
- cache = <cache location on same disk as secondary> - cache = <cache location on same disk as secondary>
- One storage location total, that is one primary raw storage location - One storage location total, that is one primary raw storage location
and on the same physical disk a cache location. No secondary storage. and on the same physical disk a cache location. No secondary storage.
The expected directories would be: The expected directories would be:
- primary = <primary location> - primary = <primary location>
- secondary = None - secondary = None
- cache = <cache location on same disk as primary> - cache = <cache location on same disk as primary>
@param incoming: The directory that the new_files were found @param incoming: The directory that the new_files were found
@type incoming: string @type incoming: string
@param primary: A list of primary raw directories. These directories should @param primary: A list of primary raw directories. These directories should
be on the same volume as incoming for optimization, but is not needed. be on the same volume as incoming for optimization, but is not needed.
The list must be in the same order as the new_files that will be put in The list must be in the same order as the new_files that will be put in
those directories. If the directory is one string then a list the size those directories. If the directory is one string then a list the size
of new_files will be filled and all new files will be put in that one of new_files will be filled and all new files will be put in that one
directory. directory.
@type primary: list or string @type primary: list or string
@param secondary: A list of the secondary raw directories. These @param secondary: A list of the secondary raw directories. These
directories do not need to be on the same volume as incoming. Same directories do not need to be on the same volume as incoming. Same
ordering restrictions apply for secondary as for primary. ordering restrictions apply for secondary as for primary.
@type secondary: list or string @type secondary: list or string
@param cache: A list of cache directories. These directories also follow @param cache: A list of cache directories. These directories also follow
the ordering restrictions that primary and secondary do. the ordering restrictions that primary and secondary do.
@type cache: list or string @type cache: list or string
@param new_files: A list of new files found in the incoming directory @param new_files: A list of new files found in the incoming directory
@type new_files: list @type new_files: list
@param renamed: A list the same size as new_files that represents what the @param renamed: A list the same size as new_files that represents what the
new files will be called if so desired. If None(default), files will new files will be called if so desired. If None(default), files will
keep the same name as in incoming. keep the same name as in incoming.
@type renamed: list @type renamed: list
@param remove: A list of booleans to specify whether or not to remove the @param remove: A list of booleans to specify whether or not to remove the
file after it has been successfully moved to the two raw directories. file after it has been successfully moved to the two raw directories.
@type remove: list @type remove: list
""" """
if not isinstance(incoming, str): if not isinstance(incoming, str):
raise ValueError("Incoming must be one directory represented as a string") raise ValueError("Incoming must be one directory represented as a string")
if primary is not None and isinstance(primary, str): if primary is not None and isinstance(primary, str):
primary = [primary]*len(new_files) primary = [primary]*len(new_files)
if secondary is not None and isinstance(secondary, str): if secondary is not None and isinstance(secondary, str):
secondary = [secondary]*len(new_files) secondary = [secondary]*len(new_files)
if cache is not None and isinstance(cache, str): if cache is not None and isinstance(cache, str):
cache = [cache]*len(new_files) cache = [cache]*len(new_files)
if renamed is None: renamed = new_files if renamed is None: renamed = new_files
if remove is None: if remove is None:
remove = [False]*len(renamed) remove = [False]*len(renamed)
if not isinstance(remove, list): if not isinstance(remove, list):
raise ValueError("remove must be a list of booleans") raise ValueError("remove must be a list of booleans")
if len(remove) != len(new_files): if len(remove) != len(new_files):
raise ValueError("remove does not contain the same amount of elements as new_files") raise ValueError("remove does not contain the same amount of elements as new_files")
# Rsync new files from incoming directory to secondary raw (lagoon) # Rsync new files from incoming directory to secondary raw (lagoon)
if secondary is not None: if secondary is not None:
_daily_copy_raw(incoming, secondary, new_files, renamed=renamed) _daily_copy_raw(incoming, secondary, new_files, renamed=renamed)
# Rsync new files from incoming directory to primary raw (beach), remove if specified # Rsync new files from incoming directory to primary raw (beach), remove if specified
# Don't remove for current system because files get reproduced # Don't remove for current system because files get reproduced
if primary is not None: if primary is not None:
_daily_move_raw(incoming, primary, new_files, renamed=renamed, remove=remove) _daily_move_raw(incoming, primary, new_files, renamed=renamed, remove=remove)
# Link new files from the cache location (lagoon) to the secondary raw (lagoon) # Link new files from the cache location (lagoon) to the secondary raw (lagoon)
if cache is not None: if cache is not None:
if secondary is not None: if secondary is not None:
_daily_link_raw(secondary, cache, renamed) _daily_link_raw(secondary, cache, renamed)
elif primary is not None: elif primary is not None:
_daily_link_raw(primary, cache, renamed) _daily_link_raw(primary, cache, renamed)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment