From 3064976cc4a4d72257d0c72ecb48933d94768571 Mon Sep 17 00:00:00 2001
From: Bruce Flynn <brucef@ssec.wisc.edu>
Date: Fri, 8 Apr 2016 21:04:14 +0000
Subject: [PATCH] merge: Fix bug where merge was not considering size in packet
 selection

Because merge was only considering stamp/apid packets for the same
stamp/apid with different byte size were being treated as equal. It now
always treats packets/group with more bytes as more gooder.
---
 edosl0util/merge.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/edosl0util/merge.py b/edosl0util/merge.py
index 99cbfcc..af4b604 100644
--- a/edosl0util/merge.py
+++ b/edosl0util/merge.py
@@ -10,7 +10,7 @@
 """
 import os
 import logging
-from collections import deque
+from collections import deque, OrderedDict
 
 LOG = logging.getLogger(__name__)
 
@@ -44,7 +44,7 @@ class _Ptr(object):
 
     # instances with same stamp/apid will compare the same
     def __hash__(self):
-        return hash((self.stamp, self.apid))
+        return hash((self.stamp, self.apid, self.size))
 
     def bytes(self):
         self.fobj.seek(self.offset, os.SEEK_SET)
@@ -93,6 +93,18 @@ def _sort_by_time_apid(index, order=None):
     return sorted(index, key=lambda p: p.stamp)
 
 
+def _filter_duplicates_by_size(index):
+    filtered = OrderedDict()
+    for ptr in index:
+        key = (ptr.stamp, ptr.apid)
+        if key in filtered:
+            if ptr.size > filtered[key].size:
+                filtered[key] = ptr
+        else:
+            filtered[key] = ptr
+    return filtered.values()
+
+
 def merge(streams, output, trunc_to=None, apid_order=None):
     """
     Merge packets from multiple streams to an output file. Duplicate packets
@@ -116,6 +128,8 @@ def merge(streams, output, trunc_to=None, apid_order=None):
     LOG.debug('sorting index with %d pointers', len(index))
     index = _sort_by_time_apid(index, order=apid_order)
 
+    index = _filter_duplicates_by_size(index)
+
     LOG.debug('writing index to %s', output)
     for ptr in index:
         if trunc_to:
-- 
GitLab