diff --git a/edosl0util/merge.py b/edosl0util/merge.py index 99cbfcccab26ded83276ea1902430afacc7c71b2..af4b604f1c8691c2c545cb833d9b4cc508fc91b9 100644 --- a/edosl0util/merge.py +++ b/edosl0util/merge.py @@ -10,7 +10,7 @@ """ import os import logging -from collections import deque +from collections import deque, OrderedDict LOG = logging.getLogger(__name__) @@ -44,7 +44,7 @@ class _Ptr(object): # instances with same stamp/apid will compare the same def __hash__(self): - return hash((self.stamp, self.apid)) + return hash((self.stamp, self.apid, self.size)) def bytes(self): self.fobj.seek(self.offset, os.SEEK_SET) @@ -93,6 +93,18 @@ def _sort_by_time_apid(index, order=None): return sorted(index, key=lambda p: p.stamp) +def _filter_duplicates_by_size(index): + filtered = OrderedDict() + for ptr in index: + key = (ptr.stamp, ptr.apid) + if key in filtered: + if ptr.size > filtered[key].size: + filtered[key] = ptr + else: + filtered[key] = ptr + return filtered.values() + + def merge(streams, output, trunc_to=None, apid_order=None): """ Merge packets from multiple streams to an output file. Duplicate packets @@ -116,6 +128,8 @@ def merge(streams, output, trunc_to=None, apid_order=None): LOG.debug('sorting index with %d pointers', len(index)) index = _sort_by_time_apid(index, order=apid_order) + index = _filter_duplicates_by_size(index) + LOG.debug('writing index to %s', output) for ptr in index: if trunc_to: