api.py 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258
  1. #
  2. # Metrix++, Copyright 2009-2019, Metrix++ Project
  3. # Link: https://github.com/metrixplusplus/metrixplusplus
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. import os.path
  8. import sys
  9. import mpp.internal.dbwrap
  10. import mpp.internal.api_impl
  11. class InterfaceNotImplemented(Exception):
  12. def __init__(self, obj):
  13. Exception.__init__(self, "Method '"
  14. + sys._getframe(1).f_code.co_name
  15. + "' has not been implemented for "
  16. + str(obj.__class__))
  17. class IConfigurable(object):
  18. def configure(self, options):
  19. raise InterfaceNotImplemented(self)
  20. def declare_configuration(self, optparser):
  21. raise InterfaceNotImplemented(self)
  22. class IRunable(object):
  23. def run(self, args):
  24. raise InterfaceNotImplemented(self)
  25. class IParser(object):
  26. def process(self, parent, data, is_updated):
  27. raise InterfaceNotImplemented(self)
  28. class ICode(object):
  29. pass
  30. class CallbackNotImplemented(Exception):
  31. def __init__(self, obj, callback_name):
  32. Exception.__init__(self, "Callback '"
  33. + callback_name
  34. + "' has not been implemented for "
  35. + str(obj.__class__))
  36. class Child(object):
  37. def notify(self, parent, callback_name, *args):
  38. if hasattr(self, callback_name) == False:
  39. raise CallbackNotImplemented(self, callback_name)
  40. self.__getattribute__(callback_name)(parent, *args)
  41. def subscribe_by_parents_name(self, parent_name, callback_name='callback'):
  42. self.get_plugin(parent_name).subscribe(self, callback_name)
  43. def subscribe_by_parents_names(self, parent_names, callback_name='callback'):
  44. for parent_name in parent_names:
  45. self.get_plugin(parent_name).subscribe(self, callback_name)
  46. def subscribe_by_parents_interface(self, interface, callback_name='callback'):
  47. for plugin in self._get_plugin_loader().iterate_plugins():
  48. if isinstance(plugin, interface):
  49. plugin.subscribe(self, callback_name)
  50. class Parent(object):
  51. def init_Parent(self):
  52. if hasattr(self, 'children') == False:
  53. self.children = []
  54. def subscribe(self, obj, callback_name):
  55. self.init_Parent()
  56. if (isinstance(obj, Child) == False):
  57. raise TypeError()
  58. self.children.append((obj,callback_name))
  59. def unsubscribe(self, obj, callback_name):
  60. self.init_Parent()
  61. self.children.remove((obj, callback_name))
  62. def notify_children(self, *args):
  63. self.init_Parent()
  64. for child in self.children:
  65. child[0].notify(self, child[1], *args)
  66. def iterate_children(self):
  67. self.init_Parent()
  68. for child in self.children:
  69. yield child
  70. ##############################################################################
  71. #
  72. #
  73. #
  74. ##############################################################################
  75. class Data(object):
  76. def __init__(self):
  77. self.data = {}
  78. def get_data(self, namespace, field):
  79. if namespace not in list(self.data.keys()):
  80. return None
  81. if field not in list(self.data[namespace].keys()):
  82. return None
  83. return self.data[namespace][field]
  84. def set_data(self, namespace, field, value):
  85. if namespace not in self.data:
  86. self.data[namespace] = {}
  87. self.data[namespace][field] = value
  88. def iterate_namespaces(self):
  89. for namespace in list(self.data.keys()):
  90. yield namespace
  91. def iterate_fields(self, namespace):
  92. for field in list(self.data[namespace].keys()):
  93. yield (field, self.data[namespace][field])
  94. def get_data_tree(self, namespaces=None):
  95. return self.data
  96. def __repr__(self):
  97. return object.__repr__(self) + " with data " + self.data.__repr__()
  98. class LoadableData(Data):
  99. def __init__(self, loader, file_id, region_id):
  100. Data.__init__(self)
  101. self.loader = loader
  102. self.file_id = file_id
  103. self.region_id = region_id
  104. self.loaded_namespaces = []
  105. self.changed_namespaces = []
  106. def load_namespace(self, namespace):
  107. namespace_obj = self.loader.get_namespace(namespace)
  108. if namespace_obj == None:
  109. return
  110. regions_supported = namespace_obj.are_regions_supported()
  111. if ((self.region_id == None and regions_supported == True) or
  112. (self.region_id != None and regions_supported == False)):
  113. return
  114. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  115. if row == None:
  116. return
  117. for column_name in list(row.keys()):
  118. try:
  119. packager = namespace_obj._get_field_packager(column_name)
  120. except mpp.internal.api_impl.PackagerError:
  121. continue
  122. if row[column_name] == None:
  123. continue
  124. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  125. def set_data(self, namespace, field, value):
  126. if namespace not in self.changed_namespaces:
  127. self.changed_namespaces.append(namespace)
  128. return Data.set_data(self, namespace, field, value)
  129. def get_data(self, namespace, field):
  130. if namespace not in self.loaded_namespaces:
  131. self.loaded_namespaces.append(namespace)
  132. self.load_namespace(namespace)
  133. return Data.get_data(self, namespace, field)
  134. def is_namespace_updated(self, namespace):
  135. return namespace in self.changed_namespaces
  136. def is_namespace_loaded(self, namespace):
  137. return namespace in self.loaded_namespaces
  138. def get_data_tree(self, namespaces=None):
  139. if namespaces == None:
  140. namespaces = self.loader.iterate_namespace_names()
  141. for each in namespaces:
  142. self.load_namespace(each)
  143. return Data.get_data_tree(self)
  144. class Region(LoadableData):
  145. class T(object):
  146. NONE = 0x00
  147. GLOBAL = 0x01
  148. CLASS = 0x02
  149. STRUCT = 0x04
  150. NAMESPACE = 0x08
  151. FUNCTION = 0x10
  152. INTERFACE = 0x20
  153. ANY = 0xFF
  154. def to_str(self, group):
  155. if group == self.NONE:
  156. return "none"
  157. if group == self.ANY:
  158. return "any"
  159. result = []
  160. if group & self.GLOBAL != 0:
  161. result.append("global")
  162. if group & self.CLASS != 0:
  163. result.append("class")
  164. if group & self.STRUCT != 0:
  165. result.append("struct")
  166. if group & self.NAMESPACE != 0:
  167. result.append("namespace")
  168. if group & self.FUNCTION != 0:
  169. result.append("function")
  170. if group & self.INTERFACE != 0:
  171. result.append("interface")
  172. assert(len(result) != 0)
  173. return ', '.join(result)
  174. def from_str(self, group):
  175. if group == "global":
  176. return self.GLOBAL
  177. elif group == "class":
  178. return self.CLASS
  179. elif group == "struct":
  180. return self.STRUCT
  181. elif group == "namespace":
  182. return self.NAMESPACE
  183. elif group == "function":
  184. return self.FUNCTION
  185. elif group == "interface":
  186. return self.INTERFACE
  187. elif group == "any":
  188. return self.ANY
  189. else:
  190. return None
  191. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  192. LoadableData.__init__(self, loader, file_id, region_id)
  193. self.name = region_name
  194. self.begin = offset_begin
  195. self.end = offset_end
  196. self.line_begin = line_begin
  197. self.line_end = line_end
  198. self.cursor = cursor_line
  199. self.group = group
  200. self.checksum = checksum
  201. self.children = []
  202. def get_id(self):
  203. return self.region_id
  204. def get_name(self):
  205. return self.name
  206. def get_offset_begin(self):
  207. return self.begin
  208. def get_offset_end(self):
  209. return self.end
  210. def get_line_begin(self):
  211. return self.line_begin
  212. def get_line_end(self):
  213. return self.line_end
  214. def get_cursor(self):
  215. return self.cursor
  216. def get_type(self):
  217. return self.group
  218. def get_checksum(self):
  219. return self.checksum
  220. def iterate_subregion_ids(self):
  221. return self.children
  222. def _register_subregion_id(self, child_id):
  223. self.children.append(child_id)
  224. class Marker(object):
  225. class T(object):
  226. NONE = 0x00
  227. COMMENT = 0x01
  228. STRING = 0x02
  229. PREPROCESSOR = 0x04
  230. CODE = 0x08
  231. ANY = 0xFF
  232. def to_str(self, group):
  233. if group == self.NONE:
  234. return "none"
  235. elif group == self.COMMENT:
  236. return "comment"
  237. elif group == self.STRING:
  238. return "string"
  239. elif group == self.PREPROCESSOR:
  240. return "preprocessor"
  241. elif group == self.CODE:
  242. return "code"
  243. else:
  244. assert(False)
  245. def __init__(self, offset_begin, offset_end, group):
  246. self.begin = offset_begin
  247. self.end = offset_end
  248. self.group = group
  249. def get_offset_begin(self):
  250. return self.begin
  251. def get_offset_end(self):
  252. return self.end
  253. def get_type(self):
  254. return self.group
  255. class FileData(LoadableData):
  256. def __init__(self, loader, path, file_id, checksum, content):
  257. LoadableData.__init__(self, loader, file_id, None)
  258. self.path = path
  259. self.checksum = checksum
  260. self.content = content
  261. self.regions = None
  262. self.markers = None
  263. self.loader = loader
  264. self.loading_tmp = []
  265. def get_id(self):
  266. return self.file_id
  267. def get_path(self):
  268. return self.path
  269. def get_checksum(self):
  270. return self.checksum
  271. def get_content(self):
  272. return self.content
  273. def _internal_append_region(self, region):
  274. # here we apply some magic - we rely on special ordering of coming regions,
  275. # which is supported by code parsers
  276. prev_id = None
  277. while True:
  278. if len(self.loading_tmp) == 0:
  279. break
  280. prev_id = self.loading_tmp.pop()
  281. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  282. self.loading_tmp.append(prev_id) # return back
  283. break
  284. self.loading_tmp.append(region.get_id())
  285. if prev_id != None:
  286. self.get_region(prev_id)._register_subregion_id(region.get_id())
  287. self.regions.append(region)
  288. def load_regions(self):
  289. if self.regions == None:
  290. self.regions = []
  291. for each in self.loader.db.iterate_regions(self.get_id()):
  292. self._internal_append_region(Region(self.loader,
  293. self.get_id(),
  294. each.region_id,
  295. each.name,
  296. each.begin,
  297. each.end,
  298. each.line_begin,
  299. each.line_end,
  300. each.cursor,
  301. each.group,
  302. each.checksum))
  303. assert(len(self.regions) == each.region_id)
  304. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  305. if self.regions == None:
  306. # # do not load regions and markers in time of collection
  307. # if region is added first by parser, set markers to empty list as well
  308. # because if there are no markers in a file, it forces loading of markers
  309. # during iterate_markers call
  310. self.regions = []
  311. self.markers = []
  312. new_id = len(self.regions) + 1
  313. self._internal_append_region(Region(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  314. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  315. return new_id
  316. def get_region(self, region_id):
  317. self.load_regions()
  318. return self.regions[region_id - 1]
  319. def iterate_regions(self, filter_group = Region.T.ANY, region_id = None):
  320. self.load_regions()
  321. if region_id == None:
  322. for each in self.regions:
  323. if each.group & filter_group:
  324. yield each
  325. else:
  326. for sub_id in self.get_region(region_id).iterate_subregion_ids():
  327. each = self.get_region(sub_id)
  328. if each.group & filter_group:
  329. yield each
  330. def are_regions_loaded(self):
  331. return self.regions != None
  332. def load_markers(self):
  333. if self.markers == None:
  334. # TODO add assert in case of an attempt to load data during collection
  335. assert(False) # TODO not used in post-processing tools for while, need to be fixed
  336. self.markers = []
  337. for each in self.loader.db.iterate_markers(self.get_id()):
  338. self.markers.append(Marker(each.begin, each.end, each.group))
  339. def add_marker(self, offset_begin, offset_end, group):
  340. if self.markers == None:
  341. # # do not load regions and markers in time of collection
  342. # if marker is added first by parser, set regions to empty list as well
  343. # because if there are no regions in a file, it forces loading of regions
  344. # during iterate_regions call
  345. self.regions = []
  346. self.markers = []
  347. self.markers.append(Marker(offset_begin, offset_end, group))
  348. # TODO drop collecting markers, it is faster to double parse
  349. # it is not the same with regions, it is faster to load regions
  350. # on iterative re-run
  351. #self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  352. def iterate_markers(self, filter_group = Marker.T.ANY,
  353. region_id = None, exclude_children = True, merge = False):
  354. self.load_markers()
  355. # merged markers
  356. if merge == True:
  357. next_marker = None
  358. for marker in self.iterate_markers(filter_group, region_id, exclude_children, merge = False):
  359. if next_marker != None:
  360. if next_marker.get_offset_end() == marker.get_offset_begin():
  361. # sequential markers
  362. next_marker = Marker(next_marker.get_offset_begin(),
  363. marker.get_offset_end(),
  364. next_marker.get_type() | marker.get_type())
  365. else:
  366. yield next_marker
  367. next_marker = None
  368. if next_marker == None:
  369. next_marker = Marker(marker.get_offset_begin(),
  370. marker.get_offset_end(),
  371. marker.get_type())
  372. if next_marker != None:
  373. yield next_marker
  374. # all markers per file
  375. elif region_id == None:
  376. next_code_marker_start = 0
  377. for marker in self.markers:
  378. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  379. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  380. if marker.group & filter_group:
  381. yield marker
  382. next_code_marker_start = marker.get_offset_end()
  383. if Marker.T.CODE & filter_group and next_code_marker_start < len(self.get_content()):
  384. yield Marker(next_code_marker_start, len(self.get_content()), Marker.T.CODE)
  385. # markers per region
  386. else:
  387. region = self.get_region(region_id)
  388. if region != None:
  389. # code parsers and database know about non-code markers
  390. # clients want to iterate code as markers as well
  391. # so, we embed code markers in run-time
  392. class CodeMarker(Marker):
  393. pass
  394. # cache markers for all regions if it does not exist
  395. if hasattr(region, '_markers_list') == False:
  396. # subroutine to populate _markers_list attribute
  397. # _markers_list does include code markers
  398. def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
  399. region = data.get_region(region_id)
  400. region._markers_list = []
  401. region._first_marker_ind = marker_start_ind
  402. #next_code_marker_start = region.get_offset_begin()
  403. for sub_id in region.iterate_subregion_ids():
  404. subregion = data.get_region(sub_id)
  405. # cache all markers before the subregion
  406. while len(data.markers) > marker_start_ind and \
  407. subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
  408. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  409. # append code markers coming before non-code marker
  410. region._markers_list.append(CodeMarker(next_code_marker_start,
  411. data.markers[marker_start_ind].get_offset_begin(),
  412. Marker.T.CODE))
  413. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  414. region._markers_list.append(marker_start_ind)
  415. marker_start_ind += 1
  416. # cache all code markers before the subregion but after the last marker
  417. if next_code_marker_start < subregion.get_offset_begin():
  418. region._markers_list.append(CodeMarker(next_code_marker_start,
  419. subregion.get_offset_begin(),
  420. Marker.T.CODE))
  421. next_code_marker_start = subregion.get_offset_begin()
  422. # here is the recursive call for all sub-regions
  423. (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
  424. sub_id,
  425. marker_start_ind,
  426. next_code_marker_start)
  427. # cache all markers after the last subregion
  428. while len(data.markers) > marker_start_ind and \
  429. region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
  430. # append code markers coming before non-code marker
  431. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  432. region._markers_list.append(CodeMarker(next_code_marker_start,
  433. data.markers[marker_start_ind].get_offset_begin(),
  434. Marker.T.CODE))
  435. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  436. region._markers_list.append(marker_start_ind)
  437. marker_start_ind += 1
  438. # cache the last code segment after the last marker
  439. if next_code_marker_start < region.get_offset_end():
  440. region._markers_list.append(CodeMarker(next_code_marker_start,
  441. region.get_offset_end(),
  442. Marker.T.CODE))
  443. next_code_marker_start = region.get_offset_end()
  444. # return the starting point for the next call of this function
  445. return (marker_start_ind, next_code_marker_start)
  446. # append markers list to all regions recursively
  447. (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
  448. assert(next_marker_pos == len(self.markers))
  449. # excluding subregions
  450. if exclude_children == True:
  451. for marker_ind in region._markers_list:
  452. if isinstance(marker_ind, int):
  453. marker = self.markers[marker_ind]
  454. else:
  455. marker = marker_ind # CodeMarker
  456. if marker.group & filter_group:
  457. yield marker
  458. # including subregions
  459. else:
  460. next_code_marker_start = region.get_offset_begin()
  461. for marker in self.markers[region._first_marker_ind:]:
  462. if marker.get_offset_begin() >= region.get_offset_end():
  463. break
  464. if region.get_offset_begin() > marker.get_offset_begin():
  465. continue
  466. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  467. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  468. if marker.group & filter_group:
  469. yield marker
  470. next_code_marker_start = marker.get_offset_end()
  471. if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
  472. yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
  473. def are_markers_loaded(self):
  474. return self.markers != None
  475. def __repr__(self):
  476. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  477. class AggregatedData(Data):
  478. def __init__(self, loader, path):
  479. Data.__init__(self)
  480. self.path = path
  481. self.loader = loader
  482. self.subdirs = None
  483. self.subfiles = None
  484. def get_subdirs(self):
  485. if self.subdirs != None:
  486. return self.subdirs
  487. self.subdirs = []
  488. if self.path != None:
  489. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  490. self.subdirs.append(subdir)
  491. return self.subdirs
  492. def get_subfiles(self):
  493. if self.subfiles != None:
  494. return self.subfiles
  495. self.subfiles = []
  496. if self.path != None:
  497. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  498. self.subfiles.append(subfile)
  499. return self.subfiles
  500. class SelectData(Data):
  501. def __init__(self, loader, path, file_id, region_id):
  502. Data.__init__(self)
  503. self.loader = loader
  504. self.path = path
  505. self.file_id = file_id
  506. self.region_id = region_id
  507. self.region = None
  508. def get_path(self):
  509. return self.path
  510. def get_region(self):
  511. if self.region == None and self.region_id != None:
  512. row = self.loader.db.get_region(self.file_id, self.region_id)
  513. if row != None:
  514. self.region = Region(self.loader,
  515. self.file_id,
  516. self.region_id,
  517. row.name,
  518. row.begin,
  519. row.end,
  520. row.line_begin,
  521. row.line_end,
  522. row.cursor,
  523. row.group,
  524. row.checksum)
  525. return self.region
  526. class DiffData(Data):
  527. def __init__(self, new_data, old_data):
  528. Data.__init__(self)
  529. self.new_data = new_data
  530. self.old_data = old_data
  531. def get_data(self, namespace, field):
  532. new_data = self.new_data.get_data(namespace, field)
  533. old_data = self.old_data.get_data(namespace, field)
  534. if new_data == None:
  535. return None
  536. if old_data == None:
  537. # non_zero fields has got zero value by default if missed
  538. # the data can be also unavailable,
  539. # because previous collection does not include that
  540. # but external tools (like limit.py) should warn about this,
  541. # using list of registered database properties
  542. old_data = 0
  543. return new_data - old_data
  544. ####################################
  545. # Loader
  546. ####################################
  547. class Namespace(object):
  548. class NamespaceError(Exception):
  549. def __init__(self, namespace, reason):
  550. Exception.__init__(self, "Namespace '"
  551. + namespace
  552. + "': '"
  553. + reason
  554. + "'")
  555. class FieldError(Exception):
  556. def __init__(self, field, reason):
  557. Exception.__init__(self, "Field '"
  558. + field
  559. + "': '"
  560. + reason
  561. + "'")
  562. def __init__(self, db_handle, name, support_regions = False, version='1.0'):
  563. if not isinstance(name, str):
  564. raise Namespace.NamespaceError(name, "name not a string")
  565. self.name = name
  566. self.support_regions = support_regions
  567. self.fields = {}
  568. self.db = db_handle
  569. if self.db.check_table(name) == False:
  570. self.db.create_table(name, support_regions, version)
  571. else:
  572. for column in self.db.iterate_columns(name):
  573. self.add_field(column.name,
  574. mpp.internal.api_impl.PackagerFactory().get_python_type(column.sql_type),
  575. non_zero=column.non_zero)
  576. def get_name(self):
  577. return self.name
  578. def are_regions_supported(self):
  579. return self.support_regions
  580. def add_field(self, field_name, python_type, non_zero=False):
  581. if not isinstance(field_name, str):
  582. raise Namespace.FieldError(field_name, "field_name not a string")
  583. packager = mpp.internal.api_impl.PackagerFactory().create(python_type, non_zero)
  584. if field_name in list(self.fields.keys()):
  585. raise Namespace.FieldError(field_name, "double used")
  586. self.fields[field_name] = packager
  587. if self.db.check_column(self.get_name(), field_name) == False:
  588. # - False if cloned
  589. # - True if created
  590. return self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  591. return None # if double request
  592. def iterate_field_names(self):
  593. for name in list(self.fields.keys()):
  594. yield name
  595. def check_field(self, field_name):
  596. try:
  597. self._get_field_packager(field_name)
  598. except mpp.internal.api_impl.PackagerError:
  599. return False
  600. return True
  601. def get_field_sql_type(self, field_name):
  602. try:
  603. return self._get_field_packager(field_name).get_sql_type()
  604. except mpp.internal.api_impl.PackagerError:
  605. raise Namespace.FieldError(field_name, 'does not exist')
  606. def get_field_python_type(self, field_name):
  607. try:
  608. return self._get_field_packager(field_name).get_python_type()
  609. except mpp.internal.api_impl.PackagerError:
  610. raise Namespace.FieldError(field_name, 'does not exist')
  611. def is_field_non_zero(self, field_name):
  612. try:
  613. return self._get_field_packager(field_name).is_non_zero()
  614. except mpp.internal.api_impl.PackagerError:
  615. raise Namespace.FieldError(field_name, 'does not exist')
  616. def _get_field_packager(self, field_name):
  617. if field_name in list(self.fields.keys()):
  618. return self.fields[field_name]
  619. else:
  620. raise mpp.internal.api_impl.PackagerError("unknown field " + field_name + " requested")
  621. class Loader(object):
  622. def __init__(self):
  623. self.namespaces = {}
  624. self.db = None
  625. self.last_file_data = None # for performance boost reasons
  626. def create_database(self, dbfile, previous_db = None):
  627. self.db = mpp.internal.dbwrap.Database()
  628. try:
  629. self.db.create(dbfile, clone_from=previous_db)
  630. except:
  631. return False
  632. return True
  633. def open_database(self, dbfile, read_only = True):
  634. self.db = mpp.internal.dbwrap.Database()
  635. if os.path.exists(dbfile) == False:
  636. return False
  637. try:
  638. self.db.connect(dbfile, read_only=read_only)
  639. except:
  640. return False
  641. for table in self.db.iterate_tables():
  642. self.create_namespace(table.name, table.support_regions)
  643. return True
  644. def set_property(self, property_name, value):
  645. if self.db == None:
  646. return None
  647. return self.db.set_property(property_name, str(value))
  648. def get_property(self, property_name):
  649. if self.db == None:
  650. return None
  651. return self.db.get_property(property_name)
  652. def iterate_properties(self):
  653. if self.db == None:
  654. return None
  655. return self.db.iterate_properties()
  656. def create_namespace(self, name, support_regions = False, version='1.0'):
  657. if self.db == None:
  658. return None
  659. if name in list(self.namespaces.keys()):
  660. raise Namespace.NamespaceError(name, "double used")
  661. new_namespace = Namespace(self.db, str(name), support_regions, version)
  662. self.namespaces[name] = new_namespace
  663. return new_namespace
  664. def iterate_namespace_names(self):
  665. for name in list(self.namespaces.keys()):
  666. yield name
  667. def get_namespace(self, name):
  668. if name in list(self.namespaces.keys()):
  669. return self.namespaces[name]
  670. else:
  671. return None
  672. def create_file_data(self, path, checksum, content):
  673. if self.db == None:
  674. return None
  675. (new_id, is_updated) = self.db.create_file(path, checksum)
  676. result = FileData(self, path, new_id, checksum, content)
  677. self.last_file_data = result
  678. return (result, is_updated)
  679. def load_file_data(self, path):
  680. if self.db == None:
  681. return None
  682. if self.last_file_data != None and self.last_file_data.get_path() == path:
  683. return self.last_file_data
  684. data = self.db.get_file(path)
  685. if data == None:
  686. return None
  687. result = FileData(self, data.path, data.id, data.checksum, None)
  688. self.last_file_data = result
  689. return result
  690. class DataNotPackable(Exception):
  691. def __init__(self, namespace, field, value, packager, extra_message):
  692. Exception.__init__(self, "Data '"
  693. + str(value)
  694. + "' of type "
  695. + str(value.__class__)
  696. + " referred by '"
  697. + namespace
  698. + "=>"
  699. + field
  700. + "' is not packable by registered packager '"
  701. + str(packager.__class__)
  702. + "': " + extra_message)
  703. def save_file_data(self, file_data):
  704. if self.db == None:
  705. return None
  706. class DataIterator(object):
  707. def iterate_packed_values(self, data, namespace, support_regions = False):
  708. for each in data.iterate_fields(namespace):
  709. space = self.loader.get_namespace(namespace)
  710. if space == None:
  711. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  712. try:
  713. packager = space._get_field_packager(each[0])
  714. except mpp.internal.api_impl.PackagerError:
  715. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  716. if space.support_regions != support_regions:
  717. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  718. try:
  719. packed_data = packager.pack(each[1])
  720. if packed_data == None:
  721. continue
  722. except mpp.internal.api_impl.PackagerError:
  723. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  724. yield (each[0], packed_data)
  725. def __init__(self, loader, data, namespace, support_regions = False):
  726. self.loader = loader
  727. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  728. def __iter__(self):
  729. return self.iterator
  730. # TODO can construct to add multiple rows at one sql query
  731. # to improve the performance
  732. for namespace in file_data.iterate_namespaces():
  733. if file_data.is_namespace_updated(namespace) == False:
  734. continue
  735. self.db.add_row(namespace,
  736. file_data.get_id(),
  737. None,
  738. DataIterator(self, file_data, namespace))
  739. if file_data.are_regions_loaded():
  740. for region in file_data.iterate_regions():
  741. for namespace in region.iterate_namespaces():
  742. if region.is_namespace_updated(namespace) == False:
  743. continue
  744. self.db.add_row(namespace,
  745. file_data.get_id(),
  746. region.get_id(),
  747. DataIterator(self, region, namespace, support_regions = True))
  748. def iterate_file_data(self, path = None, path_like_filter = "%"):
  749. if self.db == None:
  750. return None
  751. final_path_like = path_like_filter
  752. if path != None:
  753. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  754. return None
  755. final_path_like = path + path_like_filter
  756. class FileDataIterator(object):
  757. def iterate_file_data(self, loader, final_path_like):
  758. for data in loader.db.iterate_files(path_like=final_path_like):
  759. yield FileData(loader, data.path, data.id, data.checksum, None)
  760. def __init__(self, loader, final_path_like):
  761. self.iterator = self.iterate_file_data(loader, final_path_like)
  762. def __iter__(self):
  763. return self.iterator
  764. if self.db == None:
  765. return None
  766. return FileDataIterator(self, final_path_like)
  767. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  768. if self.db == None:
  769. return None
  770. final_path_like = path_like_filter
  771. if path != None:
  772. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  773. return None
  774. final_path_like = path + path_like_filter
  775. if namespaces == None:
  776. namespaces = list(self.namespaces.keys())
  777. result = AggregatedData(self, path)
  778. for name in namespaces:
  779. namespace = self.get_namespace(name)
  780. data = self.db.aggregate_rows(name, path_like = final_path_like)
  781. for field in list(data.keys()):
  782. if namespace.get_field_python_type(field) == str:
  783. continue
  784. data[field]['nonzero'] = namespace.is_field_non_zero(field)
  785. distribution = self.db.count_rows(name, path_like = final_path_like, group_by_column = field)
  786. data[field]['distribution-bars'] = []
  787. for each in distribution:
  788. if each[0] == None:
  789. continue
  790. assert(float(data[field]['count'] != 0))
  791. data[field]['distribution-bars'].append({'metric': each[0],
  792. 'count': each[1],
  793. 'ratio': (float(each[1]) / float(data[field]['count']))})
  794. result.set_data(name, field, data[field])
  795. return result
  796. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = [],
  797. sort_by = None, limit_by = None):
  798. if self.db == None:
  799. return None
  800. final_path_like = path_like_filter
  801. if path != None:
  802. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  803. return None
  804. final_path_like = path + path_like_filter
  805. namespace_obj = self.get_namespace(namespace)
  806. if namespace_obj == None:
  807. return None
  808. class SelectDataIterator(object):
  809. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  810. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters,
  811. order_by=sort_by, limit_by=limit_by):
  812. region_id = None
  813. if namespace_obj.are_regions_supported() == True:
  814. region_id = row['region_id']
  815. data = SelectData(loader, row['path'], row['id'], region_id)
  816. field_names = fields
  817. if fields == None:
  818. field_names = namespace_obj.iterate_field_names()
  819. for field in field_names:
  820. data.set_data(namespace, field, row[field])
  821. yield data
  822. def __init__(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  823. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  824. def __iter__(self):
  825. return self.iterator
  826. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  827. class BasePlugin(object):
  828. def initialize(self):
  829. pass
  830. def terminate(self):
  831. pass
  832. def set_name(self, name):
  833. self.name = name
  834. def get_name(self):
  835. if hasattr(self, 'name') == False:
  836. return None
  837. return self.name
  838. def get_namespace(self):
  839. return self.get_name()
  840. def set_version(self, version):
  841. self.version = version
  842. def get_version(self):
  843. if hasattr(self, 'version') == False:
  844. return None
  845. return self.version
  846. def _set_plugin_loader(self, loader):
  847. self.plugin_loader = loader
  848. def _get_plugin_loader(self):
  849. if hasattr(self, 'plugin_loader') == False:
  850. return None
  851. return self.plugin_loader
  852. def get_plugin(self, plugin_name):
  853. return self._get_plugin_loader().get_plugin(plugin_name)
  854. def get_action(self):
  855. return self._get_plugin_loader().get_action()
  856. class Plugin(BasePlugin):
  857. class Field(object):
  858. def __init__(self, name, ftype, non_zero=False):
  859. self.name = name
  860. self.type = ftype
  861. self.non_zero = non_zero
  862. self._regions_supported = True
  863. class Property(object):
  864. def __init__(self, name, value):
  865. self.name = name
  866. self.value = value
  867. def initialize(self, namespace=None, support_regions=True, fields=[], properties=[]):
  868. super(Plugin, self).initialize()
  869. if hasattr(self, 'is_updated') == False:
  870. self.is_updated = False # original initialization
  871. db_loader = self.get_plugin('mpp.dbf').get_loader()
  872. if namespace == None:
  873. namespace = self.get_name()
  874. if (len(fields) != 0 or len(properties) != 0):
  875. prev_version = db_loader.set_property(self.get_name() + ":version", self.get_version())
  876. if str(prev_version) != str(self.get_version()):
  877. self.is_updated = True
  878. for prop in properties:
  879. assert(prop.name != 'version')
  880. prev_prop = db_loader.set_property(self.get_name() + ":" + prop.name, prop.value)
  881. if str(prev_prop) != str(prop.value):
  882. self.is_updated = True
  883. if len(fields) != 0:
  884. namespace_obj = db_loader.create_namespace(namespace,
  885. support_regions=support_regions,
  886. version=self.get_version())
  887. for field in fields:
  888. is_created = namespace_obj.add_field(field.name, field.type, non_zero=field.non_zero)
  889. field._regions_supported = support_regions
  890. assert(is_created != None)
  891. # if field is created (not cloned from the previous db),
  892. # mark the plug-in as updated in order to trigger full rescan
  893. self.is_updated = self.is_updated or is_created
  894. class MetricPluginMixin(Parent):
  895. class AliasError(Exception):
  896. def __init__(self, alias):
  897. Exception.__init__(self, "Unknown pattern alias: " + str(alias))
  898. class PlainCounter(object):
  899. def __init__(self, namespace, field, plugin, alias, data, region):
  900. self.namespace = namespace
  901. self.field = field
  902. self.plugin = plugin
  903. self.alias = alias
  904. self.data = data
  905. self.region = region
  906. self.result = 0
  907. def count(self, marker, pattern_to_search):
  908. self.result += len(pattern_to_search.findall(self.data.get_content(),
  909. marker.get_offset_begin(),
  910. marker.get_offset_end()))
  911. def get_result(self):
  912. return self.result
  913. class IterIncrementCounter(PlainCounter):
  914. def count(self, marker, pattern_to_search):
  915. self.marker = marker
  916. self.pattern_to_search = pattern_to_search
  917. for match in pattern_to_search.finditer(self.data.get_content(),
  918. marker.get_offset_begin(),
  919. marker.get_offset_end()):
  920. self.result += self.increment(match)
  921. def increment(self, match):
  922. return 1
  923. class IterAssignCounter(PlainCounter):
  924. def count(self, marker, pattern_to_search):
  925. self.marker = marker
  926. self.pattern_to_search = pattern_to_search
  927. for match in pattern_to_search.finditer(self.data.get_content(),
  928. marker.get_offset_begin(),
  929. marker.get_offset_end()):
  930. self.result = self.assign(match)
  931. def assign(self, match):
  932. return self.result
  933. class RankedCounter(PlainCounter):
  934. def __init__(self, *args, **kwargs):
  935. super(MetricPluginMixin.RankedCounter, self).__init__(*args, **kwargs)
  936. self.result = self.region.get_data(self.namespace, self.field)
  937. if self.result == None:
  938. self.result = 1
  939. def get_result(self):
  940. sourced_metric = self.region.get_data(self.rank_source[0], self.rank_source[1])
  941. for (ind, range_pair) in enumerate(self.rank_ranges):
  942. if ((range_pair[0] == None or sourced_metric >= range_pair[0])
  943. and
  944. (range_pair[1] == None or sourced_metric <= range_pair[1])):
  945. self.result = self.result * (ind + 1)
  946. break
  947. return self.result
  948. def declare_metric(self, is_active, field,
  949. pattern_to_search_or_map_of_patterns,
  950. marker_type_mask=Marker.T.ANY,
  951. region_type_mask=Region.T.ANY,
  952. exclude_subregions=True,
  953. merge_markers=False):
  954. if hasattr(self, '_fields') == False:
  955. self._fields = {}
  956. if isinstance(pattern_to_search_or_map_of_patterns, dict):
  957. map_of_patterns = pattern_to_search_or_map_of_patterns
  958. else:
  959. map_of_patterns = {'*': pattern_to_search_or_map_of_patterns}
  960. # client may suply with pattern or pair of pattern + counter class
  961. for key in list(map_of_patterns.keys()):
  962. if isinstance(map_of_patterns[key], tuple) == False:
  963. # if it is not a pair, create a pair using default counter class
  964. map_of_patterns[key] = (map_of_patterns[key],
  965. MetricPluginMixin.PlainCounter)
  966. if is_active == True:
  967. self._fields[field.name] = (field,
  968. marker_type_mask,
  969. exclude_subregions,
  970. merge_markers,
  971. map_of_patterns,
  972. region_type_mask)
  973. def is_active(self, metric_name = None):
  974. if metric_name == None:
  975. return (len(list(self._fields.keys())) > 0)
  976. return (metric_name in list(self._fields.keys()))
  977. def get_fields(self):
  978. result = []
  979. for key in list(self._fields.keys()):
  980. result.append(self._fields[key][0])
  981. return result
  982. def callback(self, parent, data, is_updated):
  983. # count if metric is enabled,
  984. # and (optimization for the case of iterative rescan:)
  985. # if file is updated or this plugin's settings are updated
  986. is_updated = is_updated or self.is_updated
  987. if is_updated == True:
  988. for field in self.get_fields():
  989. self.count_if_active(self.get_namespace(),
  990. field.name,
  991. data,
  992. alias=parent.get_name())
  993. # this mixin implements parent interface
  994. self.notify_children(data, is_updated)
  995. def count_if_active(self, namespace, field, data, alias='*'):
  996. if self.is_active(field) == False:
  997. return
  998. field_data = self._fields[field]
  999. if alias not in list(field_data[4].keys()):
  1000. if '*' not in list(field_data[4].keys()):
  1001. raise self.AliasError(alias)
  1002. else:
  1003. alias = '*'
  1004. (pattern_to_search, counter_class) = field_data[4][alias]
  1005. if field_data[0]._regions_supported == True:
  1006. for region in data.iterate_regions(filter_group=field_data[5]):
  1007. counter = counter_class(namespace, field, self, alias, data, region)
  1008. if field_data[1] != Marker.T.NONE:
  1009. for marker in data.iterate_markers(
  1010. filter_group = field_data[1],
  1011. region_id = region.get_id(),
  1012. exclude_children = field_data[2],
  1013. merge=field_data[3]):
  1014. counter.count(marker, pattern_to_search)
  1015. count = counter.get_result()
  1016. if count != 0 or field_data[0].non_zero == False:
  1017. region.set_data(namespace, field, count)
  1018. else:
  1019. counter = counter_class(namespace, field, self, alias, data, None)
  1020. if field_data[1] != Marker.T.NONE:
  1021. for marker in data.iterate_markers(
  1022. filter_group = field_data[1],
  1023. region_id = None,
  1024. exclude_children = field_data[2],
  1025. merge=field_data[3]):
  1026. counter.count(marker, pattern_to_search)
  1027. count = counter.get_result()
  1028. if count != 0 or field_data[0].non_zero == False:
  1029. data.set_data(namespace, field, count)