utils.py 4.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114
  1. #
  2. # Metrix++, Copyright 2009-2019, Metrix++ Project
  3. # Link: https://github.com/metrixplusplus/metrixplusplus
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. import mpp.internal.py2xml
  8. import mpp.internal.py2txt
  9. import logging
  10. import re
  11. import os
  12. class FileRegionsMatcher(object):
  13. class FileRegionsDisposableGetter(object):
  14. def __init__(self, file_data):
  15. self.checksums = {}
  16. self.names = {}
  17. for each in file_data.iterate_regions():
  18. if each.get_checksum() not in self.checksums:
  19. self.checksums[each.get_checksum()] = []
  20. self.checksums[each.get_checksum()].append((each.get_id(), each.get_name()))
  21. if each.get_name() not in self.names:
  22. self.names[each.get_name()] = []
  23. self.names[each.get_name()].append((each.get_id(), each.get_checksum()))
  24. def get_next_id_once_by_checksum(self, checksum):
  25. if checksum not in list(self.checksums.keys()):
  26. return None
  27. if len(self.checksums[checksum]) == 0:
  28. return None
  29. elem = self.checksums[checksum].pop(0)
  30. next_id = elem[0]
  31. next_name = elem[1]
  32. self.names[next_name].remove((next_id, checksum))
  33. return next_id
  34. def get_next_id_once_by_name(self, name):
  35. if name not in list(self.names.keys()):
  36. return None
  37. if len(self.names[name]) == 0:
  38. return None
  39. elem = self.names[name].pop(0)
  40. next_id = elem[0]
  41. next_checksum = elem[1]
  42. self.checksums[next_checksum].remove((next_id, name))
  43. return next_id
  44. def __init__(self, file_data, prev_file_data):
  45. self.ids = [None] # add one to shift id from zero
  46. once_filter = self.FileRegionsDisposableGetter(prev_file_data)
  47. unmatched_region_ids = []
  48. for (ind, region) in enumerate(file_data.iterate_regions()):
  49. assert(ind + 1 == region.get_id())
  50. # Identify corresponding region in previous database (attempt by checksum)
  51. prev_id = once_filter.get_next_id_once_by_checksum(region.checksum)
  52. if prev_id != None:
  53. self.ids.append((prev_id, False))
  54. else:
  55. unmatched_region_ids.append(region.get_id())
  56. self.ids.append((None, True))
  57. # Identify corresponding region in previous database (attempt by name)
  58. for region_id in unmatched_region_ids:
  59. prev_id = once_filter.get_next_id_once_by_name(file_data.get_region(region_id).name)
  60. if prev_id != None:
  61. self.ids[region_id] = (prev_id, True)
  62. def get_prev_id(self, curr_id):
  63. return self.ids[curr_id][0]
  64. def is_matched(self, curr_id):
  65. return (self.ids[curr_id][0] != None)
  66. def is_modified(self, curr_id):
  67. return self.ids[curr_id][1]
  68. def preprocess_path(path):
  69. path = re.sub(r'''[\\]+''', "/", path)
  70. if os.path.isabs(path) == False and path.startswith('./') == False:
  71. path = './' + path
  72. logging.info("Processing: " + path)
  73. return path
  74. def report_bad_path(path):
  75. logging.error("Specified path '" + path + "' is invalid: not found in the database records.")
  76. def serialize_to_xml(data, root_name = None, digitCount = None):
  77. serializer = mpp.internal.py2xml.Py2XML(digitCount)
  78. return serializer.parse(data, objName=root_name)
  79. def serialize_to_python(data, root_name = None):
  80. prefix = ""
  81. postfix = ""
  82. if root_name != None:
  83. prefix = "{'" + root_name + "': "
  84. postfix = "}"
  85. return prefix + data.__repr__() + postfix
  86. def serialize_to_txt(data, root_name = None):
  87. serializer = mpp.internal.py2txt.Py2TXT()
  88. return serializer.parse(data, objName=root_name, indent = -1)