utils.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. class FileRegionsDisposableGetter(object):
  2. def __init__(self, file_data):
  3. self.checksums = {}
  4. self.names = {}
  5. for each in file_data.iterate_regions():
  6. if each.get_checksum() not in self.checksums:
  7. self.checksums[each.get_checksum()] = []
  8. self.checksums[each.get_checksum()].append((each.get_id(), each.get_name()))
  9. if each.get_name() not in self.names:
  10. self.names[each.get_name()] = []
  11. self.names[each.get_name()].append((each.get_id(), each.get_checksum()))
  12. def get_next_id_once_by_checksum(self, checksum):
  13. if checksum not in self.checksums.keys():
  14. return None
  15. if len(self.checksums[checksum]) == 0:
  16. return None
  17. elem = self.checksums[checksum].pop(0)
  18. next_id = elem[0]
  19. next_name = elem[1]
  20. self.names[next_name].remove((next_id, checksum))
  21. return next_id
  22. def get_next_id_once_by_name(self, name):
  23. if name not in self.names.keys():
  24. return None
  25. if len(self.names[name]) == 0:
  26. return None
  27. elem = self.names[name].pop(0)
  28. next_id = elem[0]
  29. next_checksum = elem[1]
  30. self.checksums[next_checksum].remove((next_id, name))
  31. return next_id
  32. class FileRegionsMatcher(object):
  33. def __init__(self, file_data, prev_file_data):
  34. self.ids = [None] # add one to shift id from zero
  35. once_filter = FileRegionsDisposableGetter(prev_file_data)
  36. unmatched_region_ids = []
  37. for (ind, region) in enumerate(file_data.iterate_regions()):
  38. assert(ind + 1 == region.id)
  39. # Identify corresponding region in previous database (attempt by checksum)
  40. prev_id = once_filter.get_next_id_once_by_checksum(region.checksum)
  41. if prev_id != None:
  42. self.ids.append((prev_id, False))
  43. else:
  44. unmatched_region_ids.append(region.id)
  45. self.ids.append((None, True))
  46. # Identify corresponding region in previous database (attempt by name)
  47. for region_id in unmatched_region_ids:
  48. prev_id = once_filter.get_next_id_once_by_name(file_data.get_region(region_id).name)
  49. if prev_id != None:
  50. self.ids[region_id] = (prev_id, True)
  51. def get_prev_id(self, curr_id):
  52. return self.ids[curr_id][0]
  53. def is_matched(self, curr_id):
  54. return (self.ids[curr_id][0] != None)
  55. def is_modified(self, curr_id):
  56. return self.ids[curr_id][1]