137 lines
4.1 KiB
Python
137 lines
4.1 KiB
Python
import json
|
|
import code
|
|
from pprint import pprint
|
|
import argparse
|
|
import collections
|
|
from weakref import proxy
|
|
#code.interact(local=dict(globals(), **locals()))
|
|
|
|
class Link(object):
|
|
__slots__ = 'prev', 'next', 'key', '__weakref__'
|
|
|
|
class OrderedSet(collections.MutableSet):
|
|
'Set the remembers the order elements were added'
|
|
# Big-O running times for all methods are the same as for regular sets.
|
|
# The internal self.__map dictionary maps keys to links in a doubly linked list.
|
|
# The circular doubly linked list starts and ends with a sentinel element.
|
|
# The sentinel element never gets deleted (this simplifies the algorithm).
|
|
# The prev/next links are weakref proxies (to prevent circular references).
|
|
# Individual links are kept alive by the hard reference in self.__map.
|
|
# Those hard references disappear when a key is deleted from an OrderedSet.
|
|
|
|
def __init__(self, iterable=None):
|
|
self.__root = root = Link() # sentinel node for doubly linked list
|
|
root.prev = root.next = root
|
|
self.__map = {} # key --> link
|
|
if iterable is not None:
|
|
self |= iterable
|
|
|
|
def __len__(self):
|
|
return len(self.__map)
|
|
|
|
def __contains__(self, key):
|
|
return key in self.__map
|
|
|
|
def add(self, key):
|
|
# Store new key in a new link at the end of the linked list
|
|
if key not in self.__map:
|
|
self.__map[key] = link = Link()
|
|
root = self.__root
|
|
last = root.prev
|
|
link.prev, link.next, link.key = last, root, key
|
|
last.next = root.prev = proxy(link)
|
|
|
|
def discard(self, key):
|
|
# Remove an existing item using self.__map to find the link which is
|
|
# then removed by updating the links in the predecessor and successors.
|
|
if key in self.__map:
|
|
link = self.__map.pop(key)
|
|
link.prev.next = link.next
|
|
link.next.prev = link.prev
|
|
|
|
def __iter__(self):
|
|
# Traverse the linked list in order.
|
|
root = self.__root
|
|
curr = root.next
|
|
while curr is not root:
|
|
yield curr.key
|
|
curr = curr.next
|
|
|
|
def __reversed__(self):
|
|
# Traverse the linked list in reverse order.
|
|
root = self.__root
|
|
curr = root.prev
|
|
while curr is not root:
|
|
yield curr.key
|
|
curr = curr.prev
|
|
|
|
def pop(self, last=True):
|
|
if not self:
|
|
raise KeyError('set is empty')
|
|
key = next(reversed(self)) if last else next(iter(self))
|
|
self.discard(key)
|
|
return key
|
|
|
|
def __repr__(self):
|
|
if not self:
|
|
return '%s()' % (self.__class__.__name__,)
|
|
return '%s(%r)' % (self.__class__.__name__, list(self))
|
|
|
|
def __eq__(self, other):
|
|
if isinstance(other, OrderedSet):
|
|
return len(self) == len(other) and list(self) == list(other)
|
|
return not self.isdisjoint(other)
|
|
|
|
|
|
def datalist(f):
|
|
with open(f) as jsonFile:
|
|
d = json.load(jsonFile)
|
|
if (isinstance(d, dict) and len(d.keys()) == 1):
|
|
d = d[list(d.keys())[0]]
|
|
return d
|
|
|
|
def compareby(l1, l2, field):
|
|
idx1 = [record[field] for record in l1]
|
|
idx2 = [record[field] for record in l2]
|
|
|
|
idx1Uniques = OrderedSet(idx1) - idx2
|
|
idx2Uniques = OrderedSet(idx2) - idx1
|
|
|
|
print('')
|
|
print('[%s] not in LEFT:' % field)
|
|
pprint(idx2Uniques)
|
|
print('')
|
|
print('[%s] not in RIGHT:' % field)
|
|
pprint(idx1Uniques)
|
|
print('')
|
|
|
|
idxCommon = (OrderedSet(idx1) | OrderedSet(idx2)) - idx1Uniques - idx2Uniques
|
|
idxCommonList = list(idxCommon)
|
|
|
|
dict1 = {record[field] : {i:record[i] for i in record if i!=field} for record in l1}
|
|
dict2 = {record[field] : {i:record[i] for i in record if i!=field} for record in l2}
|
|
|
|
for idx in idxCommonList:
|
|
if (dict1[idx] != dict2[idx]):
|
|
print('records do not match for [%s]=%s' % (field, idx))
|
|
print(' LEFT: ' + str(dict1[idx]))
|
|
print(' RIGHT: ' + str(dict2[idx]))
|
|
print('')
|
|
|
|
#code.interact(local=dict(globals(), **locals()))
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = argparse.ArgumentParser(description='Show delta to get from LEFT file to RIGHT file')
|
|
parser.add_argument('f1', type=str, help='LEFT file')
|
|
parser.add_argument('f2', type=str, help='RIGHT file')
|
|
parser.add_argument('field', type=str, help='FIELD to compare by')
|
|
args = parser.parse_args()
|
|
|
|
d1 = datalist(args.f1)
|
|
d2 = datalist(args.f2)
|
|
compareby(d1, d2, args.field)
|
|
|
|
#code.interact(local=dict(globals(), **locals()))
|
|
|