api.py 51 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import os.path
  20. import sys
  21. import mpp.internal.dbwrap
  22. import mpp.internal.api_impl
  23. class InterfaceNotImplemented(Exception):
  24. def __init__(self, obj):
  25. Exception.__init__(self, "Method '"
  26. + sys._getframe(1).f_code.co_name
  27. + "' has not been implemented for "
  28. + str(obj.__class__))
  29. class IConfigurable(object):
  30. def configure(self, options):
  31. raise InterfaceNotImplemented(self)
  32. def declare_configuration(self, optparser):
  33. raise InterfaceNotImplemented(self)
  34. class IRunable(object):
  35. def run(self, args):
  36. raise InterfaceNotImplemented(self)
  37. class IParser(object):
  38. def process(self, parent, data, is_updated):
  39. raise InterfaceNotImplemented(self)
  40. class ICode(object):
  41. pass
  42. class CallbackNotImplemented(Exception):
  43. def __init__(self, obj, callback_name):
  44. Exception.__init__(self, "Callback '"
  45. + callback_name
  46. + "' has not been implemented for "
  47. + str(obj.__class__))
  48. class Child(object):
  49. def notify(self, parent, callback_name, *args):
  50. if hasattr(self, callback_name) == False:
  51. raise CallbackNotImplemented(self, callback_name)
  52. self.__getattribute__(callback_name)(parent, *args)
  53. def subscribe_by_parents_name(self, parent_name, callback_name='callback'):
  54. self.get_plugin(parent_name).subscribe(self, callback_name)
  55. def subscribe_by_parents_names(self, parent_names, callback_name='callback'):
  56. for parent_name in parent_names:
  57. self.get_plugin(parent_name).subscribe(self, callback_name)
  58. def subscribe_by_parents_interface(self, interface, callback_name='callback'):
  59. for plugin in self._get_plugin_loader().iterate_plugins():
  60. if isinstance(plugin, interface):
  61. plugin.subscribe(self, callback_name)
  62. class Parent(object):
  63. def init_Parent(self):
  64. if hasattr(self, 'children') == False:
  65. self.children = []
  66. def subscribe(self, obj, callback_name):
  67. self.init_Parent()
  68. if (isinstance(obj, Child) == False):
  69. raise TypeError()
  70. self.children.append((obj,callback_name))
  71. def unsubscribe(self, obj, callback_name):
  72. self.init_Parent()
  73. self.children.remove((obj, callback_name))
  74. def notify_children(self, *args):
  75. self.init_Parent()
  76. for child in self.children:
  77. child[0].notify(self, child[1], *args)
  78. def iterate_children(self):
  79. self.init_Parent()
  80. for child in self.children:
  81. yield child
  82. ##############################################################################
  83. #
  84. #
  85. #
  86. ##############################################################################
  87. class Data(object):
  88. def __init__(self):
  89. self.data = {}
  90. def get_data(self, namespace, field):
  91. if namespace not in self.data.keys():
  92. return None
  93. if field not in self.data[namespace].keys():
  94. return None
  95. return self.data[namespace][field]
  96. def set_data(self, namespace, field, value):
  97. if namespace not in self.data:
  98. self.data[namespace] = {}
  99. self.data[namespace][field] = value
  100. def iterate_namespaces(self):
  101. for namespace in self.data.keys():
  102. yield namespace
  103. def iterate_fields(self, namespace):
  104. for field in self.data[namespace].keys():
  105. yield (field, self.data[namespace][field])
  106. def get_data_tree(self, namespaces=None):
  107. return self.data
  108. def __repr__(self):
  109. return object.__repr__(self) + " with data " + self.data.__repr__()
  110. class LoadableData(Data):
  111. def __init__(self, loader, file_id, region_id):
  112. Data.__init__(self)
  113. self.loader = loader
  114. self.file_id = file_id
  115. self.region_id = region_id
  116. self.loaded_namespaces = []
  117. self.changed_namespaces = []
  118. def load_namespace(self, namespace):
  119. namespace_obj = self.loader.get_namespace(namespace)
  120. if namespace_obj == None:
  121. return
  122. regions_supported = namespace_obj.are_regions_supported()
  123. if ((self.region_id == None and regions_supported == True) or
  124. (self.region_id != None and regions_supported == False)):
  125. return
  126. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  127. if row == None:
  128. return
  129. for column_name in row.keys():
  130. try:
  131. packager = namespace_obj._get_field_packager(column_name)
  132. except mpp.internal.api_impl.PackagerError:
  133. continue
  134. if row[column_name] == None:
  135. continue
  136. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  137. def set_data(self, namespace, field, value):
  138. if namespace not in self.changed_namespaces:
  139. self.changed_namespaces.append(namespace)
  140. return Data.set_data(self, namespace, field, value)
  141. def get_data(self, namespace, field):
  142. if namespace not in self.loaded_namespaces:
  143. self.loaded_namespaces.append(namespace)
  144. self.load_namespace(namespace)
  145. return Data.get_data(self, namespace, field)
  146. def is_namespace_updated(self, namespace):
  147. return namespace in self.changed_namespaces
  148. def is_namespace_loaded(self, namespace):
  149. return namespace in self.loaded_namespaces
  150. def get_data_tree(self, namespaces=None):
  151. if namespaces == None:
  152. namespaces = self.loader.iterate_namespace_names()
  153. for each in namespaces:
  154. self.load_namespace(each)
  155. return Data.get_data_tree(self)
  156. class Region(LoadableData):
  157. class T(object):
  158. NONE = 0x00
  159. GLOBAL = 0x01
  160. CLASS = 0x02
  161. STRUCT = 0x04
  162. NAMESPACE = 0x08
  163. FUNCTION = 0x10
  164. INTERFACE = 0x20
  165. ANY = 0xFF
  166. def to_str(self, group):
  167. if group == self.NONE:
  168. return "none"
  169. elif group == self.GLOBAL:
  170. return "global"
  171. elif group == self.CLASS:
  172. return "class"
  173. elif group == self.STRUCT:
  174. return "struct"
  175. elif group == self.NAMESPACE:
  176. return "namespace"
  177. elif group == self.FUNCTION:
  178. return "function"
  179. elif group == self.INTERFACE:
  180. return "interface"
  181. else:
  182. assert(False)
  183. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  184. LoadableData.__init__(self, loader, file_id, region_id)
  185. self.name = region_name
  186. self.begin = offset_begin
  187. self.end = offset_end
  188. self.line_begin = line_begin
  189. self.line_end = line_end
  190. self.cursor = cursor_line
  191. self.group = group
  192. self.checksum = checksum
  193. self.children = []
  194. def get_id(self):
  195. return self.region_id
  196. def get_name(self):
  197. return self.name
  198. def get_offset_begin(self):
  199. return self.begin
  200. def get_offset_end(self):
  201. return self.end
  202. def get_line_begin(self):
  203. return self.line_begin
  204. def get_line_end(self):
  205. return self.line_end
  206. def get_cursor(self):
  207. return self.cursor
  208. def get_type(self):
  209. return self.group
  210. def get_checksum(self):
  211. return self.checksum
  212. def iterate_subregion_ids(self):
  213. return self.children
  214. def _register_subregion_id(self, child_id):
  215. self.children.append(child_id)
  216. class Marker(object):
  217. class T(object):
  218. NONE = 0x00
  219. COMMENT = 0x01
  220. STRING = 0x02
  221. PREPROCESSOR = 0x04
  222. CODE = 0x08
  223. ANY = 0xFF
  224. def to_str(self, group):
  225. if group == self.NONE:
  226. return "none"
  227. elif group == self.COMMENT:
  228. return "comment"
  229. elif group == self.STRING:
  230. return "string"
  231. elif group == self.PREPROCESSOR:
  232. return "preprocessor"
  233. elif group == self.CODE:
  234. return "code"
  235. else:
  236. assert(False)
  237. def __init__(self, offset_begin, offset_end, group):
  238. self.begin = offset_begin
  239. self.end = offset_end
  240. self.group = group
  241. def get_offset_begin(self):
  242. return self.begin
  243. def get_offset_end(self):
  244. return self.end
  245. def get_type(self):
  246. return self.group
  247. class FileData(LoadableData):
  248. def __init__(self, loader, path, file_id, checksum, content):
  249. LoadableData.__init__(self, loader, file_id, None)
  250. self.path = path
  251. self.checksum = checksum
  252. self.content = content
  253. self.regions = None
  254. self.markers = None
  255. self.loader = loader
  256. self.loading_tmp = []
  257. def get_id(self):
  258. return self.file_id
  259. def get_path(self):
  260. return self.path
  261. def get_checksum(self):
  262. return self.checksum
  263. def get_content(self):
  264. return self.content
  265. def _internal_append_region(self, region):
  266. # here we apply some magic - we rely on special ordering of coming regions,
  267. # which is supported by code parsers
  268. prev_id = None
  269. while True:
  270. if len(self.loading_tmp) == 0:
  271. break
  272. prev_id = self.loading_tmp.pop()
  273. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  274. self.loading_tmp.append(prev_id) # return back
  275. break
  276. self.loading_tmp.append(region.get_id())
  277. if prev_id != None:
  278. self.get_region(prev_id)._register_subregion_id(region.get_id())
  279. self.regions.append(region)
  280. def load_regions(self):
  281. if self.regions == None:
  282. self.regions = []
  283. for each in self.loader.db.iterate_regions(self.get_id()):
  284. self._internal_append_region(Region(self.loader,
  285. self.get_id(),
  286. each.region_id,
  287. each.name,
  288. each.begin,
  289. each.end,
  290. each.line_begin,
  291. each.line_end,
  292. each.cursor,
  293. each.group,
  294. each.checksum))
  295. assert(len(self.regions) == each.region_id)
  296. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  297. if self.regions == None:
  298. # # do not load regions and markers in time of collection
  299. # if region is added first by parser, set markers to empty list as well
  300. # because if there are no markers in a file, it forces loading of markers
  301. # during iterate_markers call
  302. self.regions = []
  303. self.markers = []
  304. new_id = len(self.regions) + 1
  305. self._internal_append_region(Region(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  306. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  307. return new_id
  308. def get_region(self, region_id):
  309. self.load_regions()
  310. return self.regions[region_id - 1]
  311. def iterate_regions(self, filter_group = Region.T.ANY):
  312. self.load_regions()
  313. for each in self.regions:
  314. if each.group & filter_group:
  315. yield each
  316. def are_regions_loaded(self):
  317. return self.regions != None
  318. def load_markers(self):
  319. if self.markers == None:
  320. # TODO add assert in case of an attempt to load data during collection
  321. assert(False) # TODO not used in post-processing tools for while, need to be fixed
  322. self.markers = []
  323. for each in self.loader.db.iterate_markers(self.get_id()):
  324. self.markers.append(Marker(each.begin, each.end, each.group))
  325. def add_marker(self, offset_begin, offset_end, group):
  326. if self.markers == None:
  327. # # do not load regions and markers in time of collection
  328. # if marker is added first by parser, set regions to empty list as well
  329. # because if there are no regions in a file, it forces loading of regions
  330. # during iterate_regions call
  331. self.regions = []
  332. self.markers = []
  333. self.markers.append(Marker(offset_begin, offset_end, group))
  334. # TODO drop collecting markers, it is faster to double parse
  335. # it is not the same with regions, it is faster to load regions
  336. # on iterative re-run
  337. #self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  338. def iterate_markers(self, filter_group = Marker.T.ANY,
  339. region_id = None, exclude_children = True, merge = False):
  340. self.load_markers()
  341. # merged markers
  342. if merge == True:
  343. next_marker = None
  344. for marker in self.iterate_markers(filter_group, region_id, exclude_children, merge = False):
  345. if next_marker != None:
  346. if next_marker.get_offset_end() == marker.get_offset_begin():
  347. # sequential markers
  348. next_marker = Marker(next_marker.get_offset_begin(),
  349. marker.get_offset_end(),
  350. next_marker.get_type() | marker.get_type())
  351. else:
  352. yield next_marker
  353. next_marker = None
  354. if next_marker == None:
  355. next_marker = Marker(marker.get_offset_begin(),
  356. marker.get_offset_end(),
  357. marker.get_type())
  358. if next_marker != None:
  359. yield next_marker
  360. # all markers per file
  361. elif region_id == None:
  362. next_code_marker_start = 0
  363. for marker in self.markers:
  364. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  365. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  366. if marker.group & filter_group:
  367. yield marker
  368. next_code_marker_start = marker.get_offset_end()
  369. if Marker.T.CODE & filter_group and next_code_marker_start < len(self.get_content()):
  370. yield Marker(next_code_marker_start, len(self.get_content()), Marker.T.CODE)
  371. # markers per region
  372. else:
  373. region = self.get_region(region_id)
  374. if region != None:
  375. # code parsers and database know about non-code markers
  376. # clients want to iterate code as markers as well
  377. # so, we embed code markers in run-time
  378. class CodeMarker(Marker):
  379. pass
  380. # cache markers for all regions if it does not exist
  381. if hasattr(region, '_markers_list') == False:
  382. # subroutine to populate _markers_list attribute
  383. # _markers_list does include code markers
  384. def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
  385. region = data.get_region(region_id)
  386. region._markers_list = []
  387. region._first_marker_ind = marker_start_ind
  388. #next_code_marker_start = region.get_offset_begin()
  389. for sub_id in region.iterate_subregion_ids():
  390. subregion = data.get_region(sub_id)
  391. # cache all markers before the subregion
  392. while len(data.markers) > marker_start_ind and \
  393. subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
  394. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  395. # append code markers coming before non-code marker
  396. region._markers_list.append(CodeMarker(next_code_marker_start,
  397. data.markers[marker_start_ind].get_offset_begin(),
  398. Marker.T.CODE))
  399. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  400. region._markers_list.append(marker_start_ind)
  401. marker_start_ind += 1
  402. # cache all code markers before the subregion but after the last marker
  403. if next_code_marker_start < subregion.get_offset_begin():
  404. region._markers_list.append(CodeMarker(next_code_marker_start,
  405. subregion.get_offset_begin(),
  406. Marker.T.CODE))
  407. next_code_marker_start = subregion.get_offset_begin()
  408. # here is the recursive call for all sub-regions
  409. (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
  410. sub_id,
  411. marker_start_ind,
  412. next_code_marker_start)
  413. # cache all markers after the last subregion
  414. while len(data.markers) > marker_start_ind and \
  415. region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
  416. # append code markers coming before non-code marker
  417. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  418. region._markers_list.append(CodeMarker(next_code_marker_start,
  419. data.markers[marker_start_ind].get_offset_begin(),
  420. Marker.T.CODE))
  421. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  422. region._markers_list.append(marker_start_ind)
  423. marker_start_ind += 1
  424. # cache the last code segment after the last marker
  425. if next_code_marker_start < region.get_offset_end():
  426. region._markers_list.append(CodeMarker(next_code_marker_start,
  427. region.get_offset_end(),
  428. Marker.T.CODE))
  429. next_code_marker_start = region.get_offset_end()
  430. # return the starting point for the next call of this function
  431. return (marker_start_ind, next_code_marker_start)
  432. # append markers list to all regions recursively
  433. (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
  434. assert(next_marker_pos == len(self.markers))
  435. # excluding subregions
  436. if exclude_children == True:
  437. for marker_ind in region._markers_list:
  438. if isinstance(marker_ind, int):
  439. marker = self.markers[marker_ind]
  440. else:
  441. marker = marker_ind # CodeMarker
  442. if marker.group & filter_group:
  443. yield marker
  444. # including subregions
  445. else:
  446. next_code_marker_start = region.get_offset_begin()
  447. for marker in self.markers[region._first_marker_ind:]:
  448. if marker.get_offset_begin() >= region.get_offset_end():
  449. break
  450. if region.get_offset_begin() > marker.get_offset_begin():
  451. continue
  452. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  453. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  454. if marker.group & filter_group:
  455. yield marker
  456. next_code_marker_start = marker.get_offset_end()
  457. if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
  458. yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
  459. def are_markers_loaded(self):
  460. return self.markers != None
  461. def __repr__(self):
  462. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  463. class AggregatedData(Data):
  464. def __init__(self, loader, path):
  465. Data.__init__(self)
  466. self.path = path
  467. self.loader = loader
  468. self.subdirs = None
  469. self.subfiles = None
  470. def get_subdirs(self):
  471. if self.subdirs != None:
  472. return self.subdirs
  473. self.subdirs = []
  474. if self.path != None:
  475. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  476. self.subdirs.append(subdir)
  477. return self.subdirs
  478. def get_subfiles(self):
  479. if self.subfiles != None:
  480. return self.subfiles
  481. self.subfiles = []
  482. if self.path != None:
  483. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  484. self.subfiles.append(subfile)
  485. return self.subfiles
  486. class SelectData(Data):
  487. def __init__(self, loader, path, file_id, region_id):
  488. Data.__init__(self)
  489. self.loader = loader
  490. self.path = path
  491. self.file_id = file_id
  492. self.region_id = region_id
  493. self.region = None
  494. def get_path(self):
  495. return self.path
  496. def get_region(self):
  497. if self.region == None and self.region_id != None:
  498. row = self.loader.db.get_region(self.file_id, self.region_id)
  499. if row != None:
  500. self.region = Region(self.loader,
  501. self.file_id,
  502. self.region_id,
  503. row.name,
  504. row.begin,
  505. row.end,
  506. row.line_begin,
  507. row.line_end,
  508. row.cursor,
  509. row.group,
  510. row.checksum)
  511. return self.region
  512. class DiffData(Data):
  513. def __init__(self, new_data, old_data):
  514. Data.__init__(self)
  515. self.new_data = new_data
  516. self.old_data = old_data
  517. def get_data(self, namespace, field):
  518. new_data = self.new_data.get_data(namespace, field)
  519. old_data = self.old_data.get_data(namespace, field)
  520. if new_data == None:
  521. return None
  522. if old_data == None:
  523. # non_zero fields has got zero value by default if missed
  524. # the data can be also unavailable,
  525. # because previous collection does not include that
  526. # but external tools (like limit.py) should warn about this,
  527. # using list of registered database properties
  528. old_data = 0
  529. return new_data - old_data
  530. ####################################
  531. # Loader
  532. ####################################
  533. class Namespace(object):
  534. class NamespaceError(Exception):
  535. def __init__(self, namespace, reason):
  536. Exception.__init__(self, "Namespace '"
  537. + namespace
  538. + "': '"
  539. + reason
  540. + "'")
  541. class FieldError(Exception):
  542. def __init__(self, field, reason):
  543. Exception.__init__(self, "Field '"
  544. + field
  545. + "': '"
  546. + reason
  547. + "'")
  548. def __init__(self, db_handle, name, support_regions = False, version='1.0'):
  549. if not isinstance(name, str):
  550. raise Namespace.NamespaceError(name, "name not a string")
  551. self.name = name
  552. self.support_regions = support_regions
  553. self.fields = {}
  554. self.db = db_handle
  555. if self.db.check_table(name) == False:
  556. self.db.create_table(name, support_regions, version)
  557. else:
  558. for column in self.db.iterate_columns(name):
  559. self.add_field(column.name,
  560. mpp.internal.api_impl.PackagerFactory().get_python_type(column.sql_type),
  561. non_zero=column.non_zero)
  562. def get_name(self):
  563. return self.name
  564. def are_regions_supported(self):
  565. return self.support_regions
  566. def add_field(self, field_name, python_type, non_zero=False):
  567. if not isinstance(field_name, str):
  568. raise Namespace.FieldError(field_name, "field_name not a string")
  569. packager = mpp.internal.api_impl.PackagerFactory().create(python_type, non_zero)
  570. if field_name in self.fields.keys():
  571. raise Namespace.FieldError(field_name, "double used")
  572. self.fields[field_name] = packager
  573. if self.db.check_column(self.get_name(), field_name) == False:
  574. # - False if cloned
  575. # - True if created
  576. return self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  577. return None # if double request
  578. def iterate_field_names(self):
  579. for name in self.fields.keys():
  580. yield name
  581. def check_field(self, field_name):
  582. try:
  583. self._get_field_packager(field_name)
  584. except mpp.internal.api_impl.PackagerError:
  585. return False
  586. return True
  587. def get_field_sql_type(self, field_name):
  588. try:
  589. return self._get_field_packager(field_name).get_sql_type()
  590. except mpp.internal.api_impl.PackagerError:
  591. raise Namespace.FieldError(field_name, 'does not exist')
  592. def get_field_python_type(self, field_name):
  593. try:
  594. return self._get_field_packager(field_name).get_python_type()
  595. except mpp.internal.api_impl.PackagerError:
  596. raise Namespace.FieldError(field_name, 'does not exist')
  597. def is_field_non_zero(self, field_name):
  598. try:
  599. return self._get_field_packager(field_name).is_non_zero()
  600. except mpp.internal.api_impl.PackagerError:
  601. raise Namespace.FieldError(field_name, 'does not exist')
  602. def _get_field_packager(self, field_name):
  603. if field_name in self.fields.keys():
  604. return self.fields[field_name]
  605. else:
  606. raise mpp.internal.api_impl.PackagerError("unknown field " + field_name + " requested")
  607. class Loader(object):
  608. def __init__(self):
  609. self.namespaces = {}
  610. self.db = None
  611. self.last_file_data = None # for performance boost reasons
  612. def create_database(self, dbfile, previous_db = None):
  613. self.db = mpp.internal.dbwrap.Database()
  614. try:
  615. self.db.create(dbfile, clone_from=previous_db)
  616. except:
  617. return False
  618. return True
  619. def open_database(self, dbfile, read_only = True):
  620. self.db = mpp.internal.dbwrap.Database()
  621. if os.path.exists(dbfile) == False:
  622. return False
  623. try:
  624. self.db.connect(dbfile, read_only=read_only)
  625. except:
  626. return False
  627. for table in self.db.iterate_tables():
  628. self.create_namespace(table.name, table.support_regions)
  629. return True
  630. def set_property(self, property_name, value):
  631. if self.db == None:
  632. return None
  633. return self.db.set_property(property_name, str(value))
  634. def get_property(self, property_name):
  635. if self.db == None:
  636. return None
  637. return self.db.get_property(property_name)
  638. def iterate_properties(self):
  639. if self.db == None:
  640. return None
  641. return self.db.iterate_properties()
  642. def create_namespace(self, name, support_regions = False, version='1.0'):
  643. if self.db == None:
  644. return None
  645. if name in self.namespaces.keys():
  646. raise Namespace.NamespaceError(name, "double used")
  647. new_namespace = Namespace(self.db, name, support_regions, version)
  648. self.namespaces[name] = new_namespace
  649. return new_namespace
  650. def iterate_namespace_names(self):
  651. for name in self.namespaces.keys():
  652. yield name
  653. def get_namespace(self, name):
  654. if name in self.namespaces.keys():
  655. return self.namespaces[name]
  656. else:
  657. return None
  658. def create_file_data(self, path, checksum, content):
  659. if self.db == None:
  660. return None
  661. (new_id, is_updated) = self.db.create_file(path, checksum)
  662. result = FileData(self, path, new_id, checksum, content)
  663. self.last_file_data = result
  664. return (result, is_updated)
  665. def load_file_data(self, path):
  666. if self.db == None:
  667. return None
  668. if self.last_file_data != None and self.last_file_data.get_path() == path:
  669. return self.last_file_data
  670. data = self.db.get_file(path)
  671. if data == None:
  672. return None
  673. result = FileData(self, data.path, data.id, data.checksum, None)
  674. self.last_file_data = result
  675. return result
  676. class DataNotPackable(Exception):
  677. def __init__(self, namespace, field, value, packager, extra_message):
  678. Exception.__init__(self, "Data '"
  679. + str(value)
  680. + "' of type "
  681. + str(value.__class__)
  682. + " referred by '"
  683. + namespace
  684. + "=>"
  685. + field
  686. + "' is not packable by registered packager '"
  687. + str(packager.__class__)
  688. + "': " + extra_message)
  689. def save_file_data(self, file_data):
  690. if self.db == None:
  691. return None
  692. class DataIterator(object):
  693. def iterate_packed_values(self, data, namespace, support_regions = False):
  694. for each in data.iterate_fields(namespace):
  695. space = self.loader.get_namespace(namespace)
  696. if space == None:
  697. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  698. try:
  699. packager = space._get_field_packager(each[0])
  700. except mpp.internal.api_impl.PackagerError:
  701. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  702. if space.support_regions != support_regions:
  703. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  704. try:
  705. packed_data = packager.pack(each[1])
  706. if packed_data == None:
  707. continue
  708. except mpp.internal.api_impl.PackagerError:
  709. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  710. yield (each[0], packed_data)
  711. def __init__(self, loader, data, namespace, support_regions = False):
  712. self.loader = loader
  713. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  714. def __iter__(self):
  715. return self.iterator
  716. # TODO can construct to add multiple rows at one sql query
  717. # to improve the performance
  718. for namespace in file_data.iterate_namespaces():
  719. if file_data.is_namespace_updated(namespace) == False:
  720. continue
  721. self.db.add_row(namespace,
  722. file_data.get_id(),
  723. None,
  724. DataIterator(self, file_data, namespace))
  725. if file_data.are_regions_loaded():
  726. for region in file_data.iterate_regions():
  727. for namespace in region.iterate_namespaces():
  728. if region.is_namespace_updated(namespace) == False:
  729. continue
  730. self.db.add_row(namespace,
  731. file_data.get_id(),
  732. region.get_id(),
  733. DataIterator(self, region, namespace, support_regions = True))
  734. def iterate_file_data(self, path = None, path_like_filter = "%"):
  735. if self.db == None:
  736. return None
  737. final_path_like = path_like_filter
  738. if path != None:
  739. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  740. return None
  741. final_path_like = path + path_like_filter
  742. class FileDataIterator(object):
  743. def iterate_file_data(self, loader, final_path_like):
  744. for data in loader.db.iterate_files(path_like=final_path_like):
  745. yield FileData(loader, data.path, data.id, data.checksum, None)
  746. def __init__(self, loader, final_path_like):
  747. self.iterator = self.iterate_file_data(loader, final_path_like)
  748. def __iter__(self):
  749. return self.iterator
  750. if self.db == None:
  751. return None
  752. return FileDataIterator(self, final_path_like)
  753. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  754. if self.db == None:
  755. return None
  756. final_path_like = path_like_filter
  757. if path != None:
  758. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  759. return None
  760. final_path_like = path + path_like_filter
  761. if namespaces == None:
  762. namespaces = self.namespaces.keys()
  763. result = AggregatedData(self, path)
  764. for name in namespaces:
  765. namespace = self.get_namespace(name)
  766. data = self.db.aggregate_rows(name, path_like = final_path_like)
  767. for field in data.keys():
  768. if namespace.get_field_python_type(field) == str:
  769. continue
  770. data[field]['nonzero'] = namespace.is_field_non_zero(field)
  771. distribution = self.db.count_rows(name, path_like = final_path_like, group_by_column = field)
  772. data[field]['distribution-bars'] = []
  773. for each in distribution:
  774. if each[0] == None:
  775. continue
  776. assert(float(data[field]['count'] != 0))
  777. data[field]['distribution-bars'].append({'metric': each[0],
  778. 'count': each[1],
  779. 'ratio': (float(each[1]) / float(data[field]['count']))})
  780. result.set_data(name, field, data[field])
  781. return result
  782. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = [],
  783. sort_by = None, limit_by = None):
  784. if self.db == None:
  785. return None
  786. final_path_like = path_like_filter
  787. if path != None:
  788. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  789. return None
  790. final_path_like = path + path_like_filter
  791. namespace_obj = self.get_namespace(namespace)
  792. if namespace_obj == None:
  793. return None
  794. class SelectDataIterator(object):
  795. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  796. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters,
  797. order_by=sort_by, limit_by=limit_by):
  798. region_id = None
  799. if namespace_obj.are_regions_supported() == True:
  800. region_id = row['region_id']
  801. data = SelectData(loader, row['path'], row['id'], region_id)
  802. field_names = fields
  803. if fields == None:
  804. field_names = namespace_obj.iterate_field_names()
  805. for field in field_names:
  806. data.set_data(namespace, field, row[field])
  807. yield data
  808. def __init__(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  809. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  810. def __iter__(self):
  811. return self.iterator
  812. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  813. class BasePlugin(object):
  814. def initialize(self):
  815. pass
  816. def terminate(self):
  817. pass
  818. def set_name(self, name):
  819. self.name = name
  820. def get_name(self):
  821. if hasattr(self, 'name') == False:
  822. return None
  823. return self.name
  824. def get_namespace(self):
  825. return self.get_name()
  826. def set_version(self, version):
  827. self.version = version
  828. def get_version(self):
  829. if hasattr(self, 'version') == False:
  830. return None
  831. return self.version
  832. def _set_plugin_loader(self, loader):
  833. self.plugin_loader = loader
  834. def _get_plugin_loader(self):
  835. if hasattr(self, 'plugin_loader') == False:
  836. return None
  837. return self.plugin_loader
  838. def get_plugin(self, plugin_name):
  839. return self._get_plugin_loader().get_plugin(plugin_name)
  840. def get_action(self):
  841. return self._get_plugin_loader().get_action()
  842. class Plugin(BasePlugin):
  843. class Field(object):
  844. def __init__(self, name, ftype, non_zero=False):
  845. self.name = name
  846. self.type = ftype
  847. self.non_zero = non_zero
  848. self._regions_supported = True
  849. class Property(object):
  850. def __init__(self, name, value):
  851. self.name = name
  852. self.value = value
  853. def initialize(self, namespace=None, support_regions=True, fields=[], properties=[]):
  854. super(Plugin, self).initialize()
  855. if hasattr(self, 'is_updated') == False:
  856. self.is_updated = False # original initialization
  857. db_loader = self.get_plugin('mpp.dbf').get_loader()
  858. if namespace == None:
  859. namespace = self.get_name()
  860. if (len(fields) != 0 or len(properties) != 0):
  861. prev_version = db_loader.set_property(self.get_name() + ":version", self.get_version())
  862. if str(prev_version) != str(self.get_version()):
  863. self.is_updated = True
  864. for prop in properties:
  865. assert(prop.name != 'version')
  866. prev_prop = db_loader.set_property(self.get_name() + ":" + prop.name, prop.value)
  867. if str(prev_prop) != str(prop.value):
  868. self.is_updated = True
  869. if len(fields) != 0:
  870. namespace_obj = db_loader.create_namespace(namespace,
  871. support_regions=support_regions,
  872. version=self.get_version())
  873. for field in fields:
  874. is_created = namespace_obj.add_field(field.name, field.type, non_zero=field.non_zero)
  875. field._regions_supported = support_regions
  876. assert(is_created != None)
  877. # if field is created (not cloned from the previous db),
  878. # mark the plug-in as updated in order to trigger full rescan
  879. self.is_updated = self.is_updated or is_created
  880. class MetricPluginMixin(Parent):
  881. class AliasError(Exception):
  882. def __init__(self, alias):
  883. Exception.__init__(self, "Unknown pattern alias: " + str(alias))
  884. class PlainCounter(object):
  885. def __init__(self, namespace, field, plugin, alias, data, region):
  886. self.namespace = namespace
  887. self.field = field
  888. self.plugin = plugin
  889. self.alias = alias
  890. self.data = data
  891. self.region = region
  892. self.result = 0
  893. def count(self, marker, pattern_to_search):
  894. self.result += len(pattern_to_search.findall(self.data.get_content(),
  895. marker.get_offset_begin(),
  896. marker.get_offset_end()))
  897. def get_result(self):
  898. return self.result
  899. class IterIncrementCounter(PlainCounter):
  900. def count(self, marker, pattern_to_search):
  901. self.marker = marker
  902. self.pattern_to_search = pattern_to_search
  903. for match in pattern_to_search.finditer(self.data.get_content(),
  904. marker.get_offset_begin(),
  905. marker.get_offset_end()):
  906. self.result += self.increment(match)
  907. def increment(self, match):
  908. return 1
  909. class IterAssignCounter(PlainCounter):
  910. def count(self, marker, pattern_to_search):
  911. self.marker = marker
  912. self.pattern_to_search = pattern_to_search
  913. for match in pattern_to_search.finditer(self.data.get_content(),
  914. marker.get_offset_begin(),
  915. marker.get_offset_end()):
  916. self.result = self.assign(match)
  917. def assign(self, match):
  918. return self.result
  919. class RankedCounter(PlainCounter):
  920. def __init__(self, *args, **kwargs):
  921. super(MetricPluginMixin.RankedCounter, self).__init__(*args, **kwargs)
  922. self.result = self.region.get_data(self.namespace, self.field)
  923. if self.result == None:
  924. self.result = 1
  925. def get_result(self):
  926. sourced_metric = self.region.get_data(self.rank_source[0], self.rank_source[1])
  927. for (ind, range_pair) in enumerate(self.rank_ranges):
  928. if ((range_pair[0] == None or sourced_metric >= range_pair[0])
  929. and
  930. (range_pair[1] == None or sourced_metric <= range_pair[1])):
  931. self.result = self.result * (ind + 1)
  932. break
  933. return self.result
  934. def declare_metric(self, is_active, field,
  935. pattern_to_search_or_map_of_patterns,
  936. marker_type_mask=Marker.T.ANY,
  937. region_type_mask=Region.T.ANY,
  938. exclude_subregions=True,
  939. merge_markers=False):
  940. if hasattr(self, '_fields') == False:
  941. self._fields = {}
  942. if isinstance(pattern_to_search_or_map_of_patterns, dict):
  943. map_of_patterns = pattern_to_search_or_map_of_patterns
  944. else:
  945. map_of_patterns = {'*': pattern_to_search_or_map_of_patterns}
  946. # client may suply with pattern or pair of pattern + counter class
  947. for key in map_of_patterns.keys():
  948. if isinstance(map_of_patterns[key], tuple) == False:
  949. # if it is not a pair, create a pair using default counter class
  950. map_of_patterns[key] = (map_of_patterns[key],
  951. MetricPluginMixin.PlainCounter)
  952. if is_active == True:
  953. self._fields[field.name] = (field,
  954. marker_type_mask,
  955. exclude_subregions,
  956. merge_markers,
  957. map_of_patterns,
  958. region_type_mask)
  959. def is_active(self, metric_name = None):
  960. if metric_name == None:
  961. return (len(self._fields.keys()) > 0)
  962. return (metric_name in self._fields.keys())
  963. def get_fields(self):
  964. result = []
  965. for key in self._fields.keys():
  966. result.append(self._fields[key][0])
  967. return result
  968. def callback(self, parent, data, is_updated):
  969. # count if metric is enabled,
  970. # and (optimization for the case of iterative rescan:)
  971. # if file is updated or this plugin's settings are updated
  972. is_updated = is_updated or self.is_updated
  973. if is_updated == True:
  974. for field in self.get_fields():
  975. self.count_if_active(self.get_namespace(),
  976. field.name,
  977. data,
  978. alias=parent.get_name())
  979. # this mixin implements parent interface
  980. self.notify_children(data, is_updated)
  981. def count_if_active(self, namespace, field, data, alias='*'):
  982. if self.is_active(field) == False:
  983. return
  984. field_data = self._fields[field]
  985. if alias not in field_data[4].keys():
  986. if '*' not in field_data[4].keys():
  987. raise self.AliasError(alias)
  988. else:
  989. alias = '*'
  990. (pattern_to_search, counter_class) = field_data[4][alias]
  991. if field_data[0]._regions_supported == True:
  992. for region in data.iterate_regions(filter_group=field_data[5]):
  993. counter = counter_class(namespace, field, self, alias, data, region)
  994. if field_data[1] != Marker.T.NONE:
  995. for marker in data.iterate_markers(
  996. filter_group = field_data[1],
  997. region_id = region.get_id(),
  998. exclude_children = field_data[2],
  999. merge=field_data[3]):
  1000. counter.count(marker, pattern_to_search)
  1001. count = counter.get_result()
  1002. if count != 0 or field_data[0].non_zero == False:
  1003. region.set_data(namespace, field, count)
  1004. else:
  1005. counter = counter_class(namespace, field, self, alias, data, None)
  1006. if field_data[1] != Marker.T.NONE:
  1007. for marker in data.iterate_markers(
  1008. filter_group = field_data[1],
  1009. region_id = None,
  1010. exclude_children = field_data[2],
  1011. merge=field_data[3]):
  1012. counter.count(marker, pattern_to_search)
  1013. count = counter.get_result()
  1014. if count != 0 or field_data[0].non_zero == False:
  1015. data.set_data(namespace, field, count)