From 3064976cc4a4d72257d0c72ecb48933d94768571 Mon Sep 17 00:00:00 2001 From: Bruce Flynn <brucef@ssec.wisc.edu> Date: Fri, 8 Apr 2016 21:04:14 +0000 Subject: [PATCH] merge: Fix bug where merge was not considering size in packet selection Because merge was only considering stamp/apid packets for the same stamp/apid with different byte size were being treated as equal. It now always treats packets/group with more bytes as more gooder. --- edosl0util/merge.py | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/edosl0util/merge.py b/edosl0util/merge.py index 99cbfcc..af4b604 100644 --- a/edosl0util/merge.py +++ b/edosl0util/merge.py @@ -10,7 +10,7 @@ """ import os import logging -from collections import deque +from collections import deque, OrderedDict LOG = logging.getLogger(__name__) @@ -44,7 +44,7 @@ class _Ptr(object): # instances with same stamp/apid will compare the same def __hash__(self): - return hash((self.stamp, self.apid)) + return hash((self.stamp, self.apid, self.size)) def bytes(self): self.fobj.seek(self.offset, os.SEEK_SET) @@ -93,6 +93,18 @@ def _sort_by_time_apid(index, order=None): return sorted(index, key=lambda p: p.stamp) +def _filter_duplicates_by_size(index): + filtered = OrderedDict() + for ptr in index: + key = (ptr.stamp, ptr.apid) + if key in filtered: + if ptr.size > filtered[key].size: + filtered[key] = ptr + else: + filtered[key] = ptr + return filtered.values() + + def merge(streams, output, trunc_to=None, apid_order=None): """ Merge packets from multiple streams to an output file. Duplicate packets @@ -116,6 +128,8 @@ def merge(streams, output, trunc_to=None, apid_order=None): LOG.debug('sorting index with %d pointers', len(index)) index = _sort_by_time_apid(index, order=apid_order) + index = _filter_duplicates_by_size(index) + LOG.debug('writing index to %s', output) for ptr in index: if trunc_to: -- GitLab