emo-server/Tools/diff_json_simple.py

137 lines
4.1 KiB
Python

import json
import code
from pprint import pprint
import argparse
import collections
from weakref import proxy
#code.interact(local=dict(globals(), **locals()))
class Link(object):
__slots__ = 'prev', 'next', 'key', '__weakref__'
class OrderedSet(collections.MutableSet):
'Set the remembers the order elements were added'
# Big-O running times for all methods are the same as for regular sets.
# The internal self.__map dictionary maps keys to links in a doubly linked list.
# The circular doubly linked list starts and ends with a sentinel element.
# The sentinel element never gets deleted (this simplifies the algorithm).
# The prev/next links are weakref proxies (to prevent circular references).
# Individual links are kept alive by the hard reference in self.__map.
# Those hard references disappear when a key is deleted from an OrderedSet.
def __init__(self, iterable=None):
self.__root = root = Link() # sentinel node for doubly linked list
root.prev = root.next = root
self.__map = {} # key --> link
if iterable is not None:
self |= iterable
def __len__(self):
return len(self.__map)
def __contains__(self, key):
return key in self.__map
def add(self, key):
# Store new key in a new link at the end of the linked list
if key not in self.__map:
self.__map[key] = link = Link()
root = self.__root
last = root.prev
link.prev, link.next, link.key = last, root, key
last.next = root.prev = proxy(link)
def discard(self, key):
# Remove an existing item using self.__map to find the link which is
# then removed by updating the links in the predecessor and successors.
if key in self.__map:
link = self.__map.pop(key)
link.prev.next = link.next
link.next.prev = link.prev
def __iter__(self):
# Traverse the linked list in order.
root = self.__root
curr = root.next
while curr is not root:
yield curr.key
curr = curr.next
def __reversed__(self):
# Traverse the linked list in reverse order.
root = self.__root
curr = root.prev
while curr is not root:
yield curr.key
curr = curr.prev
def pop(self, last=True):
if not self:
raise KeyError('set is empty')
key = next(reversed(self)) if last else next(iter(self))
self.discard(key)
return key
def __repr__(self):
if not self:
return '%s()' % (self.__class__.__name__,)
return '%s(%r)' % (self.__class__.__name__, list(self))
def __eq__(self, other):
if isinstance(other, OrderedSet):
return len(self) == len(other) and list(self) == list(other)
return not self.isdisjoint(other)
def datalist(f):
with open(f) as jsonFile:
d = json.load(jsonFile)
if (isinstance(d, dict) and len(d.keys()) == 1):
d = d[list(d.keys())[0]]
return d
def compareby(l1, l2, field):
idx1 = [record[field] for record in l1]
idx2 = [record[field] for record in l2]
idx1Uniques = OrderedSet(idx1) - idx2
idx2Uniques = OrderedSet(idx2) - idx1
print('')
print('[%s] not in LEFT:' % field)
pprint(idx2Uniques)
print('')
print('[%s] not in RIGHT:' % field)
pprint(idx1Uniques)
print('')
idxCommon = (OrderedSet(idx1) | OrderedSet(idx2)) - idx1Uniques - idx2Uniques
idxCommonList = list(idxCommon)
dict1 = {record[field] : {i:record[i] for i in record if i!=field} for record in l1}
dict2 = {record[field] : {i:record[i] for i in record if i!=field} for record in l2}
for idx in idxCommonList:
if (dict1[idx] != dict2[idx]):
print('records do not match for [%s]=%s' % (field, idx))
print(' LEFT: ' + str(dict1[idx]))
print(' RIGHT: ' + str(dict2[idx]))
print('')
#code.interact(local=dict(globals(), **locals()))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description='Show delta to get from LEFT file to RIGHT file')
parser.add_argument('f1', type=str, help='LEFT file')
parser.add_argument('f2', type=str, help='RIGHT file')
parser.add_argument('field', type=str, help='FIELD to compare by')
args = parser.parse_args()
d1 = datalist(args.f1)
d2 = datalist(args.f2)
compareby(d1, d2, args.field)
#code.interact(local=dict(globals(), **locals()))