api.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import os.path
  20. import sys
  21. import mpp.internal.dbwrap
  22. import mpp.internal.api_impl
  23. ##############################################################################
  24. #
  25. #
  26. #
  27. ##############################################################################
  28. class Data(object):
  29. def __init__(self):
  30. self.data = {}
  31. def get_data(self, namespace, field):
  32. if namespace not in self.data.keys():
  33. return None
  34. if field not in self.data[namespace].keys():
  35. return None
  36. return self.data[namespace][field]
  37. def set_data(self, namespace, field, value):
  38. if namespace not in self.data:
  39. self.data[namespace] = {}
  40. self.data[namespace][field] = value
  41. def iterate_namespaces(self):
  42. for namespace in self.data.keys():
  43. yield namespace
  44. def iterate_fields(self, namespace):
  45. for field in self.data[namespace].keys():
  46. yield (field, self.data[namespace][field])
  47. def get_data_tree(self, namespaces=None):
  48. return self.data
  49. def __repr__(self):
  50. return object.__repr__(self) + " with data " + self.data.__repr__()
  51. class LoadableData(Data):
  52. def __init__(self, loader, file_id, region_id):
  53. Data.__init__(self)
  54. self.loader = loader
  55. self.file_id = file_id
  56. self.region_id = region_id
  57. self.loaded_namespaces = []
  58. self.changed_namespaces = []
  59. def load_namespace(self, namespace):
  60. namespace_obj = self.loader.get_namespace(namespace)
  61. if namespace_obj == None:
  62. return
  63. regions_supported = namespace_obj.are_regions_supported()
  64. if ((self.region_id == None and regions_supported == True) or
  65. (self.region_id != None and regions_supported == False)):
  66. return
  67. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  68. if row == None:
  69. return
  70. for column_name in row.keys():
  71. try:
  72. packager = namespace_obj._get_field_packager(column_name)
  73. except mpp.internal.api_impl.PackagerError:
  74. continue
  75. if row[column_name] == None:
  76. continue
  77. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  78. def set_data(self, namespace, field, value):
  79. if namespace not in self.changed_namespaces:
  80. self.changed_namespaces.append(namespace)
  81. return Data.set_data(self, namespace, field, value)
  82. def get_data(self, namespace, field):
  83. if namespace not in self.loaded_namespaces:
  84. self.loaded_namespaces.append(namespace)
  85. self.load_namespace(namespace)
  86. return Data.get_data(self, namespace, field)
  87. def is_namespace_updated(self, namespace):
  88. return namespace in self.changed_namespaces
  89. def is_namespace_loaded(self, namespace):
  90. return namespace in self.loaded_namespaces
  91. def get_data_tree(self, namespaces=None):
  92. if namespaces == None:
  93. namespaces = self.loader.iterate_namespace_names()
  94. for each in namespaces:
  95. self.load_namespace(each)
  96. return Data.get_data_tree(self)
  97. class Region(LoadableData):
  98. class T(object):
  99. NONE = 0x00
  100. GLOBAL = 0x01
  101. CLASS = 0x02
  102. STRUCT = 0x04
  103. NAMESPACE = 0x08
  104. FUNCTION = 0x10
  105. INTERFACE = 0x20
  106. ANY = 0xFF
  107. def to_str(self, group):
  108. if group == self.NONE:
  109. return "none"
  110. elif group == self.GLOBAL:
  111. return "global"
  112. elif group == self.CLASS:
  113. return "class"
  114. elif group == self.STRUCT:
  115. return "struct"
  116. elif group == self.NAMESPACE:
  117. return "namespace"
  118. elif group == self.FUNCTION:
  119. return "function"
  120. elif group == self.INTERFACE:
  121. return "interface"
  122. else:
  123. assert(False)
  124. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  125. LoadableData.__init__(self, loader, file_id, region_id)
  126. self.name = region_name
  127. self.begin = offset_begin
  128. self.end = offset_end
  129. self.line_begin = line_begin
  130. self.line_end = line_end
  131. self.cursor = cursor_line
  132. self.group = group
  133. self.checksum = checksum
  134. self.children = []
  135. def get_id(self):
  136. return self.region_id
  137. def get_name(self):
  138. return self.name
  139. def get_offset_begin(self):
  140. return self.begin
  141. def get_offset_end(self):
  142. return self.end
  143. def get_line_begin(self):
  144. return self.line_begin
  145. def get_line_end(self):
  146. return self.line_end
  147. def get_cursor(self):
  148. return self.cursor
  149. def get_type(self):
  150. return self.group
  151. def get_checksum(self):
  152. return self.checksum
  153. def iterate_subregion_ids(self):
  154. return self.children
  155. def _register_subregion_id(self, child_id):
  156. self.children.append(child_id)
  157. class Marker(object):
  158. class T(object):
  159. NONE = 0x00
  160. COMMENT = 0x01
  161. STRING = 0x02
  162. PREPROCESSOR = 0x04
  163. CODE = 0x08
  164. ANY = 0xFF
  165. def to_str(self, group):
  166. if group == self.NONE:
  167. return "none"
  168. elif group == self.COMMENT:
  169. return "comment"
  170. elif group == self.STRING:
  171. return "string"
  172. elif group == self.PREPROCESSOR:
  173. return "preprocessor"
  174. elif group == self.CODE:
  175. return "code"
  176. else:
  177. assert(False)
  178. def __init__(self, offset_begin, offset_end, group):
  179. self.begin = offset_begin
  180. self.end = offset_end
  181. self.group = group
  182. def get_offset_begin(self):
  183. return self.begin
  184. def get_offset_end(self):
  185. return self.end
  186. def get_type(self):
  187. return self.group
  188. class FileData(LoadableData):
  189. def __init__(self, loader, path, file_id, checksum, content):
  190. LoadableData.__init__(self, loader, file_id, None)
  191. self.path = path
  192. self.checksum = checksum
  193. self.content = content
  194. self.regions = None
  195. self.markers = None
  196. self.loader = loader
  197. self.loading_tmp = []
  198. def get_id(self):
  199. return self.file_id
  200. def get_path(self):
  201. return self.path
  202. def get_checksum(self):
  203. return self.checksum
  204. def get_content(self):
  205. return self.content
  206. def _internal_append_region(self, region):
  207. # here we apply some magic - we rely on special ordering of coming regions,
  208. # which is supported by code parsers
  209. prev_id = None
  210. while True:
  211. if len(self.loading_tmp) == 0:
  212. break
  213. prev_id = self.loading_tmp.pop()
  214. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  215. self.loading_tmp.append(prev_id) # return back
  216. break
  217. self.loading_tmp.append(region.get_id())
  218. if prev_id != None:
  219. self.get_region(prev_id)._register_subregion_id(region.get_id())
  220. self.regions.append(region)
  221. def load_regions(self):
  222. if self.regions == None:
  223. self.regions = []
  224. for each in self.loader.db.iterate_regions(self.get_id()):
  225. self._internal_append_region(Region(self.loader,
  226. self.get_id(),
  227. each.region_id,
  228. each.name,
  229. each.begin,
  230. each.end,
  231. each.line_begin,
  232. each.line_end,
  233. each.cursor,
  234. each.group,
  235. each.checksum))
  236. assert(len(self.regions) == each.region_id)
  237. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  238. if self.regions == None:
  239. # # do not load regions and markers in time of collection
  240. # if region is added first by parser, set markers to empty list as well
  241. # because if there are no markers in a file, it forces loading of markers
  242. # during iterate_markers call
  243. self.regions = []
  244. self.markers = []
  245. new_id = len(self.regions) + 1
  246. self._internal_append_region(Region(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  247. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  248. return new_id
  249. def get_region(self, region_id):
  250. self.load_regions()
  251. return self.regions[region_id - 1]
  252. def iterate_regions(self, filter_group = Region.T.ANY):
  253. self.load_regions()
  254. for each in self.regions:
  255. if each.group & filter_group:
  256. yield each
  257. def are_regions_loaded(self):
  258. return self.regions != None
  259. def load_markers(self):
  260. if self.markers == None:
  261. # TODO add assert in case of an attempt to load data during collection
  262. assert(False) # TODO not used in post-processing tools for while, need to be fixed
  263. self.markers = []
  264. for each in self.loader.db.iterate_markers(self.get_id()):
  265. self.markers.append(Marker(each.begin, each.end, each.group))
  266. def add_marker(self, offset_begin, offset_end, group):
  267. if self.markers == None:
  268. # # do not load regions and markers in time of collection
  269. # if marker is added first by parser, set regions to empty list as well
  270. # because if there are no regions in a file, it forces loading of regions
  271. # during iterate_regions call
  272. self.regions = []
  273. self.markers = []
  274. self.markers.append(Marker(offset_begin, offset_end, group))
  275. # TODO drop collecting markers, it is faster to double parse
  276. # it is not the same with regions, it is faster to load regions
  277. # on iterative re-run
  278. #self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  279. def iterate_markers(self, filter_group = Marker.T.ANY,
  280. region_id = None, exclude_children = True, merge = False):
  281. self.load_markers()
  282. # merged markers
  283. if merge == True:
  284. next_marker = None
  285. for marker in self.iterate_markers(filter_group, region_id, exclude_children, merge = False):
  286. if next_marker != None:
  287. if next_marker.get_offset_end() == marker.get_offset_begin():
  288. # sequential markers
  289. next_marker = Marker(next_marker.get_offset_begin(),
  290. marker.get_offset_end(),
  291. next_marker.get_type() | marker.get_type())
  292. else:
  293. yield next_marker
  294. next_marker = None
  295. if next_marker == None:
  296. next_marker = Marker(marker.get_offset_begin(),
  297. marker.get_offset_end(),
  298. marker.get_type())
  299. if next_marker != None:
  300. yield next_marker
  301. # all markers per file
  302. elif region_id == None:
  303. next_code_marker_start = 0
  304. for marker in self.markers:
  305. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  306. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  307. if marker.group & filter_group:
  308. yield marker
  309. next_code_marker_start = marker.get_offset_end()
  310. if Marker.T.CODE & filter_group and next_code_marker_start < len(self.get_content()):
  311. yield Marker(next_code_marker_start, len(self.get_content()), Marker.T.CODE)
  312. # markers per region
  313. else:
  314. region = self.get_region(region_id)
  315. if region != None:
  316. # code parsers and database know about non-code markers
  317. # clients want to iterate code as markers as well
  318. # so, we embed code markers in run-time
  319. class CodeMarker(Marker):
  320. pass
  321. # cache markers for all regions if it does not exist
  322. if hasattr(region, '_markers_list') == False:
  323. # subroutine to populate _markers_list attribute
  324. # _markers_list does include code markers
  325. def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
  326. region = data.get_region(region_id)
  327. region._markers_list = []
  328. region._first_marker_ind = marker_start_ind
  329. #next_code_marker_start = region.get_offset_begin()
  330. for sub_id in region.iterate_subregion_ids():
  331. subregion = data.get_region(sub_id)
  332. # cache all markers before the subregion
  333. while len(data.markers) > marker_start_ind and \
  334. subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
  335. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  336. # append code markers coming before non-code marker
  337. region._markers_list.append(CodeMarker(next_code_marker_start,
  338. data.markers[marker_start_ind].get_offset_begin(),
  339. Marker.T.CODE))
  340. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  341. region._markers_list.append(marker_start_ind)
  342. marker_start_ind += 1
  343. # cache all code markers before the subregion but after the last marker
  344. if next_code_marker_start < subregion.get_offset_begin():
  345. region._markers_list.append(CodeMarker(next_code_marker_start,
  346. subregion.get_offset_begin(),
  347. Marker.T.CODE))
  348. next_code_marker_start = subregion.get_offset_begin()
  349. # here is the recursive call for all sub-regions
  350. (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
  351. sub_id,
  352. marker_start_ind,
  353. next_code_marker_start)
  354. # cache all markers after the last subregion
  355. while len(data.markers) > marker_start_ind and \
  356. region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
  357. # append code markers coming before non-code marker
  358. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  359. region._markers_list.append(CodeMarker(next_code_marker_start,
  360. data.markers[marker_start_ind].get_offset_begin(),
  361. Marker.T.CODE))
  362. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  363. region._markers_list.append(marker_start_ind)
  364. marker_start_ind += 1
  365. # cache the last code segment after the last marker
  366. if next_code_marker_start < region.get_offset_end():
  367. region._markers_list.append(CodeMarker(next_code_marker_start,
  368. region.get_offset_end(),
  369. Marker.T.CODE))
  370. next_code_marker_start = region.get_offset_end()
  371. # return the starting point for the next call of this function
  372. return (marker_start_ind, next_code_marker_start)
  373. # append markers list to all regions recursively
  374. (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
  375. assert(next_marker_pos == len(self.markers))
  376. # excluding subregions
  377. if exclude_children == True:
  378. for marker_ind in region._markers_list:
  379. if isinstance(marker_ind, int):
  380. marker = self.markers[marker_ind]
  381. else:
  382. marker = marker_ind # CodeMarker
  383. if marker.group & filter_group:
  384. yield marker
  385. # including subregions
  386. else:
  387. next_code_marker_start = region.get_offset_begin()
  388. for marker in self.markers[region._first_marker_ind:]:
  389. if marker.get_offset_begin() >= region.get_offset_end():
  390. break
  391. if region.get_offset_begin() > marker.get_offset_begin():
  392. continue
  393. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  394. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  395. if marker.group & filter_group:
  396. yield marker
  397. next_code_marker_start = marker.get_offset_end()
  398. if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
  399. yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
  400. def are_markers_loaded(self):
  401. return self.markers != None
  402. def __repr__(self):
  403. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  404. class AggregatedData(Data):
  405. def __init__(self, loader, path):
  406. Data.__init__(self)
  407. self.path = path
  408. self.loader = loader
  409. self.subdirs = None
  410. self.subfiles = None
  411. def get_subdirs(self):
  412. if self.subdirs != None:
  413. return self.subdirs
  414. self.subdirs = []
  415. if self.path != None:
  416. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  417. self.subdirs.append(subdir)
  418. return self.subdirs
  419. def get_subfiles(self):
  420. if self.subfiles != None:
  421. return self.subfiles
  422. self.subfiles = []
  423. if self.path != None:
  424. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  425. self.subfiles.append(subfile)
  426. return self.subfiles
  427. class SelectData(Data):
  428. def __init__(self, loader, path, file_id, region_id):
  429. Data.__init__(self)
  430. self.loader = loader
  431. self.path = path
  432. self.file_id = file_id
  433. self.region_id = region_id
  434. self.region = None
  435. def get_path(self):
  436. return self.path
  437. def get_region(self):
  438. if self.region == None and self.region_id != None:
  439. row = self.loader.db.get_region(self.file_id, self.region_id)
  440. if row != None:
  441. self.region = Region(self.loader,
  442. self.file_id,
  443. self.region_id,
  444. row.name,
  445. row.begin,
  446. row.end,
  447. row.line_begin,
  448. row.line_end,
  449. row.cursor,
  450. row.group,
  451. row.checksum)
  452. return self.region
  453. class DiffData(Data):
  454. def __init__(self, new_data, old_data):
  455. Data.__init__(self)
  456. self.new_data = new_data
  457. self.old_data = old_data
  458. def get_data(self, namespace, field):
  459. new_data = self.new_data.get_data(namespace, field)
  460. old_data = self.old_data.get_data(namespace, field)
  461. if new_data == None:
  462. return None
  463. if old_data == None:
  464. # non_zero fields has got zero value by default if missed
  465. # the data can be also unavailable,
  466. # because previous collection does not include that
  467. # but external tools (like limit.py) should warn about this,
  468. # using list of registered database properties
  469. old_data = 0
  470. return new_data - old_data
  471. ####################################
  472. # Loader
  473. ####################################
  474. class Namespace(object):
  475. class NamespaceError(Exception):
  476. def __init__(self, namespace, reason):
  477. Exception.__init__(self, "Namespace '"
  478. + namespace
  479. + "': '"
  480. + reason
  481. + "'")
  482. class FieldError(Exception):
  483. def __init__(self, field, reason):
  484. Exception.__init__(self, "Field '"
  485. + field
  486. + "': '"
  487. + reason
  488. + "'")
  489. def __init__(self, db_handle, name, support_regions = False, version='1.0'):
  490. if not isinstance(name, str):
  491. raise Namespace.NamespaceError(name, "name not a string")
  492. self.name = name
  493. self.support_regions = support_regions
  494. self.fields = {}
  495. self.db = db_handle
  496. if self.db.check_table(name) == False:
  497. self.db.create_table(name, support_regions, version)
  498. else:
  499. for column in self.db.iterate_columns(name):
  500. self.add_field(column.name,
  501. mpp.internal.api_impl.PackagerFactory().get_python_type(column.sql_type),
  502. non_zero=column.non_zero)
  503. def get_name(self):
  504. return self.name
  505. def are_regions_supported(self):
  506. return self.support_regions
  507. def add_field(self, field_name, python_type, non_zero=False):
  508. if not isinstance(field_name, str):
  509. raise Namespace.FieldError(field_name, "field_name not a string")
  510. packager = mpp.internal.api_impl.PackagerFactory().create(python_type, non_zero)
  511. if field_name in self.fields.keys():
  512. raise Namespace.FieldError(field_name, "double used")
  513. self.fields[field_name] = packager
  514. if self.db.check_column(self.get_name(), field_name) == False:
  515. # - False if cloned
  516. # - True if created
  517. return self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  518. return None # if double request
  519. def iterate_field_names(self):
  520. for name in self.fields.keys():
  521. yield name
  522. def check_field(self, field_name):
  523. try:
  524. self._get_field_packager(field_name)
  525. except mpp.internal.api_impl.PackagerError:
  526. return False
  527. return True
  528. def get_field_sql_type(self, field_name):
  529. try:
  530. return self._get_field_packager(field_name).get_sql_type()
  531. except mpp.internal.api_impl.PackagerError:
  532. raise Namespace.FieldError(field_name, 'does not exist')
  533. def get_field_python_type(self, field_name):
  534. try:
  535. return self._get_field_packager(field_name).get_python_type()
  536. except mpp.internal.api_impl.PackagerError:
  537. raise Namespace.FieldError(field_name, 'does not exist')
  538. def is_field_non_zero(self, field_name):
  539. try:
  540. return self._get_field_packager(field_name).is_non_zero()
  541. except mpp.internal.api_impl.PackagerError:
  542. raise Namespace.FieldError(field_name, 'does not exist')
  543. def _get_field_packager(self, field_name):
  544. if field_name in self.fields.keys():
  545. return self.fields[field_name]
  546. else:
  547. raise mpp.internal.api_impl.PackagerError("unknown field " + field_name + " requested")
  548. class Loader(object):
  549. def __init__(self):
  550. self.namespaces = {}
  551. self.db = None
  552. self.last_file_data = None # for performance boost reasons
  553. def create_database(self, dbfile, previous_db = None):
  554. self.db = mpp.internal.dbwrap.Database()
  555. try:
  556. self.db.create(dbfile, clone_from=previous_db)
  557. except:
  558. return False
  559. return True
  560. def open_database(self, dbfile, read_only = True):
  561. self.db = mpp.internal.dbwrap.Database()
  562. if os.path.exists(dbfile) == False:
  563. return False
  564. try:
  565. self.db.connect(dbfile, read_only=read_only)
  566. except:
  567. return False
  568. for table in self.db.iterate_tables():
  569. self.create_namespace(table.name, table.support_regions)
  570. return True
  571. def set_property(self, property_name, value):
  572. if self.db == None:
  573. return None
  574. return self.db.set_property(property_name, str(value))
  575. def get_property(self, property_name):
  576. if self.db == None:
  577. return None
  578. return self.db.get_property(property_name)
  579. def iterate_properties(self):
  580. if self.db == None:
  581. return None
  582. return self.db.iterate_properties()
  583. def create_namespace(self, name, support_regions = False, version='1.0'):
  584. if self.db == None:
  585. return None
  586. if name in self.namespaces.keys():
  587. raise Namespace.NamespaceError(name, "double used")
  588. new_namespace = Namespace(self.db, name, support_regions, version)
  589. self.namespaces[name] = new_namespace
  590. return new_namespace
  591. def iterate_namespace_names(self):
  592. for name in self.namespaces.keys():
  593. yield name
  594. def get_namespace(self, name):
  595. if name in self.namespaces.keys():
  596. return self.namespaces[name]
  597. else:
  598. return None
  599. def create_file_data(self, path, checksum, content):
  600. if self.db == None:
  601. return None
  602. (new_id, is_updated) = self.db.create_file(path, checksum)
  603. result = FileData(self, path, new_id, checksum, content)
  604. self.last_file_data = result
  605. return (result, is_updated)
  606. def load_file_data(self, path):
  607. if self.db == None:
  608. return None
  609. if self.last_file_data != None and self.last_file_data.get_path() == path:
  610. return self.last_file_data
  611. data = self.db.get_file(path)
  612. if data == None:
  613. return None
  614. result = FileData(self, data.path, data.id, data.checksum, None)
  615. self.last_file_data = result
  616. return result
  617. class DataNotPackable(Exception):
  618. def __init__(self, namespace, field, value, packager, extra_message):
  619. Exception.__init__(self, "Data '"
  620. + str(value)
  621. + "' of type "
  622. + str(value.__class__)
  623. + " referred by '"
  624. + namespace
  625. + "=>"
  626. + field
  627. + "' is not packable by registered packager '"
  628. + str(packager.__class__)
  629. + "': " + extra_message)
  630. def save_file_data(self, file_data):
  631. if self.db == None:
  632. return None
  633. class DataIterator(object):
  634. def iterate_packed_values(self, data, namespace, support_regions = False):
  635. for each in data.iterate_fields(namespace):
  636. space = self.loader.get_namespace(namespace)
  637. if space == None:
  638. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  639. try:
  640. packager = space._get_field_packager(each[0])
  641. except mpp.internal.api_impl.PackagerError:
  642. raise Loader.DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  643. if space.support_regions != support_regions:
  644. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  645. try:
  646. packed_data = packager.pack(each[1])
  647. if packed_data == None:
  648. continue
  649. except mpp.internal.api_impl.PackagerError:
  650. raise Loader.DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  651. yield (each[0], packed_data)
  652. def __init__(self, loader, data, namespace, support_regions = False):
  653. self.loader = loader
  654. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  655. def __iter__(self):
  656. return self.iterator
  657. # TODO can construct to add multiple rows at one sql query
  658. # to improve the performance
  659. for namespace in file_data.iterate_namespaces():
  660. if file_data.is_namespace_updated(namespace) == False:
  661. continue
  662. self.db.add_row(namespace,
  663. file_data.get_id(),
  664. None,
  665. DataIterator(self, file_data, namespace))
  666. if file_data.are_regions_loaded():
  667. for region in file_data.iterate_regions():
  668. for namespace in region.iterate_namespaces():
  669. if region.is_namespace_updated(namespace) == False:
  670. continue
  671. self.db.add_row(namespace,
  672. file_data.get_id(),
  673. region.get_id(),
  674. DataIterator(self, region, namespace, support_regions = True))
  675. def iterate_file_data(self, path = None, path_like_filter = "%"):
  676. if self.db == None:
  677. return None
  678. final_path_like = path_like_filter
  679. if path != None:
  680. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  681. return None
  682. final_path_like = path + path_like_filter
  683. class FileDataIterator(object):
  684. def iterate_file_data(self, loader, final_path_like):
  685. for data in loader.db.iterate_files(path_like=final_path_like):
  686. yield FileData(loader, data.path, data.id, data.checksum, None)
  687. def __init__(self, loader, final_path_like):
  688. self.iterator = self.iterate_file_data(loader, final_path_like)
  689. def __iter__(self):
  690. return self.iterator
  691. if self.db == None:
  692. return None
  693. return FileDataIterator(self, final_path_like)
  694. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  695. if self.db == None:
  696. return None
  697. final_path_like = path_like_filter
  698. if path != None:
  699. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  700. return None
  701. final_path_like = path + path_like_filter
  702. if namespaces == None:
  703. namespaces = self.namespaces.keys()
  704. result = AggregatedData(self, path)
  705. for name in namespaces:
  706. namespace = self.get_namespace(name)
  707. data = self.db.aggregate_rows(name, path_like = final_path_like)
  708. for field in data.keys():
  709. if namespace.get_field_python_type(field) == str:
  710. continue
  711. data[field]['nonzero'] = namespace.is_field_non_zero(field)
  712. distribution = self.db.count_rows(name, path_like = final_path_like, group_by_column = field)
  713. data[field]['distribution-bars'] = []
  714. for each in distribution:
  715. if each[0] == None:
  716. continue
  717. assert(float(data[field]['count'] != 0))
  718. data[field]['distribution-bars'].append({'metric': each[0],
  719. 'count': each[1],
  720. 'ratio': (float(each[1]) / float(data[field]['count']))})
  721. result.set_data(name, field, data[field])
  722. return result
  723. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = [],
  724. sort_by = None, limit_by = None):
  725. if self.db == None:
  726. return None
  727. final_path_like = path_like_filter
  728. if path != None:
  729. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  730. return None
  731. final_path_like = path + path_like_filter
  732. namespace_obj = self.get_namespace(namespace)
  733. if namespace_obj == None:
  734. return None
  735. class SelectDataIterator(object):
  736. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  737. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters,
  738. order_by=sort_by, limit_by=limit_by):
  739. region_id = None
  740. if namespace_obj.are_regions_supported() == True:
  741. region_id = row['region_id']
  742. data = SelectData(loader, row['path'], row['id'], region_id)
  743. field_names = fields
  744. if fields == None:
  745. field_names = namespace_obj.iterate_field_names()
  746. for field in field_names:
  747. data.set_data(namespace, field, row[field])
  748. yield data
  749. def __init__(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  750. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  751. def __iter__(self):
  752. return self.iterator
  753. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  754. class BasePlugin(object):
  755. def initialize(self):
  756. pass
  757. def terminate(self):
  758. pass
  759. def set_name(self, name):
  760. self.name = name
  761. def get_name(self):
  762. if hasattr(self, 'name') == False:
  763. return None
  764. return self.name
  765. def set_version(self, version):
  766. self.version = version
  767. def get_version(self):
  768. if hasattr(self, 'version') == False:
  769. return None
  770. return self.version
  771. def _set_plugin_loader(self, loader):
  772. self.plugin_loader = loader
  773. def _get_plugin_loader(self):
  774. if hasattr(self, 'plugin_loader') == False:
  775. return None
  776. return self.plugin_loader
  777. def get_plugin(self, plugin_name):
  778. return self._get_plugin_loader().get_plugin(plugin_name)
  779. def get_action(self):
  780. return self._get_plugin_loader().get_action()
  781. class Plugin(BasePlugin):
  782. class Field(object):
  783. def __init__(self, name, ftype, non_zero=False):
  784. self.name = name
  785. self.type = ftype
  786. self.non_zero = non_zero
  787. class Property(object):
  788. def __init__(self, name, value):
  789. self.name = name
  790. self.value = value
  791. def initialize(self, namespace=None, support_regions=True, fields=[], properties=[]):
  792. super(Plugin, self).initialize()
  793. if hasattr(self, 'is_updated') == False:
  794. self.is_updated = False # original initialization
  795. db_loader = self.get_plugin('mpp.dbf').get_loader()
  796. if namespace == None:
  797. namespace = self.get_name()
  798. if (len(fields) != 0 or len(properties) != 0):
  799. prev_version = db_loader.set_property(self.get_name() + ":version", self.get_version())
  800. if str(prev_version) != str(self.get_version()):
  801. self.is_updated = True
  802. for prop in properties:
  803. assert(prop.name != 'version')
  804. prev_prop = db_loader.set_property(self.get_name() + ":" + prop.name, prop.value)
  805. if str(prev_prop) != str(prop.value):
  806. self.is_updated = True
  807. if len(fields) != 0:
  808. namespace_obj = db_loader.create_namespace(namespace,
  809. support_regions=support_regions,
  810. version=self.get_version())
  811. for field in fields:
  812. is_created = namespace_obj.add_field(field.name, field.type, non_zero=field.non_zero)
  813. assert(is_created != None)
  814. # if field is created (not cloned from the previous db),
  815. # mark the plug-in as updated in order to trigger full rescan
  816. self.is_updated = self.is_updated or is_created
  817. class MetricPluginMixin(object):
  818. class AliasError(Exception):
  819. def __init__(self, alias):
  820. Exception.__init__(self, "Unknown pattern alias: " + str(alias))
  821. class PlainCounter(object):
  822. def __init__(self, plugin, alias, data, region):
  823. self.plugin = plugin
  824. self.alias = alias
  825. self.data = data
  826. self.region = region
  827. self.result = 0
  828. def count(self, marker, pattern_to_search):
  829. self.result += len(pattern_to_search.findall(self.data.get_content(),
  830. marker.get_offset_begin(),
  831. marker.get_offset_end()))
  832. def get_result(self):
  833. return self.result
  834. class IterIncrementCounter(PlainCounter):
  835. def count(self, marker, pattern_to_search):
  836. self.marker = marker
  837. self.pattern_to_search = pattern_to_search
  838. for match in pattern_to_search.finditer(self.data.get_content(),
  839. marker.get_offset_begin(),
  840. marker.get_offset_end()):
  841. self.result += self.increment(match)
  842. def increment(self, match):
  843. return 1
  844. class IterAssignCounter(PlainCounter):
  845. def count(self, marker, pattern_to_search):
  846. self.marker = marker
  847. self.pattern_to_search = pattern_to_search
  848. for match in pattern_to_search.finditer(self.data.get_content(),
  849. marker.get_offset_begin(),
  850. marker.get_offset_end()):
  851. self.result = self.assign(match)
  852. def assign(self, match):
  853. return self.result
  854. def declare_metric(self, is_active, field,
  855. pattern_to_search_or_map_of_patterns,
  856. marker_type_mask=Marker.T.ANY,
  857. region_type_mask=Region.T.ANY,
  858. exclude_subregions=True,
  859. merge_markers=False):
  860. if hasattr(self, '_fields') == False:
  861. self._fields = {}
  862. if isinstance(pattern_to_search_or_map_of_patterns, dict):
  863. map_of_patterns = pattern_to_search_or_map_of_patterns
  864. else:
  865. map_of_patterns = {'*': pattern_to_search_or_map_of_patterns}
  866. # client may suply with pattern or pair of pattern + counter class
  867. for key in map_of_patterns.keys():
  868. if isinstance(map_of_patterns[key], tuple) == False:
  869. # if it is not a pair, create a pair using default counter class
  870. map_of_patterns[key] = (map_of_patterns[key],
  871. MetricPluginMixin.PlainCounter)
  872. if is_active == True:
  873. self._fields[field.name] = (field,
  874. marker_type_mask,
  875. exclude_subregions,
  876. merge_markers,
  877. map_of_patterns,
  878. region_type_mask)
  879. def is_active(self, metric_name = None):
  880. if metric_name == None:
  881. return (len(self._fields.keys()) > 0)
  882. return (metric_name in self._fields.keys())
  883. def get_fields(self):
  884. result = []
  885. for key in self._fields.keys():
  886. result.append(self._fields[key][0])
  887. return result
  888. def callback(self, parent, data, is_updated):
  889. # count if metric is enabled,
  890. # and (optimization for the case of iterative rescan:)
  891. # if file is updated or this plugin's settings are updated
  892. is_updated = is_updated or self.is_updated
  893. if is_updated == True:
  894. for field in self.get_fields():
  895. self.count_if_active(field.name, data, alias=parent.get_name())
  896. # if parent, notify children
  897. if isinstance(self, Parent):
  898. self.notify_children(data, is_updated)
  899. def count_if_active(self, metric_name, data, namespace=None, alias='*'):
  900. if self.is_active(metric_name) == False:
  901. return
  902. if namespace == None:
  903. namespace = self.get_name()
  904. field_data = self._fields[metric_name]
  905. if alias not in field_data[4].keys():
  906. if '*' not in field_data[4].keys():
  907. raise self.AliasError(alias)
  908. else:
  909. alias = '*'
  910. (pattern_to_search, counter_class) = field_data[4][alias]
  911. for region in data.iterate_regions(filter_group=field_data[5]):
  912. counter = counter_class(self, alias, data, region)
  913. for marker in data.iterate_markers(
  914. filter_group = field_data[1],
  915. region_id = region.get_id(),
  916. exclude_children = field_data[2],
  917. merge=field_data[3]):
  918. counter.count(marker, pattern_to_search)
  919. count = counter.get_result()
  920. if count != 0 or field_data[0].non_zero == False:
  921. region.set_data(namespace, metric_name, count)
  922. class InterfaceNotImplemented(Exception):
  923. def __init__(self, obj):
  924. Exception.__init__(self, "Method '"
  925. + sys._getframe(1).f_code.co_name
  926. + "' has not been implemented for "
  927. + str(obj.__class__))
  928. class IConfigurable(object):
  929. def configure(self, options):
  930. raise InterfaceNotImplemented(self)
  931. def declare_configuration(self, optparser):
  932. raise InterfaceNotImplemented(self)
  933. class IRunable(object):
  934. def run(self, args):
  935. raise InterfaceNotImplemented(self)
  936. class IParser(object):
  937. def process(self, parent, data, is_updated):
  938. raise InterfaceNotImplemented(self)
  939. class ICode(object):
  940. pass
  941. class CallbackNotImplemented(Exception):
  942. def __init__(self, obj, callback_name):
  943. Exception.__init__(self, "Callback '"
  944. + callback_name
  945. + "' has not been implemented for "
  946. + str(obj.__class__))
  947. class Child(object):
  948. def notify(self, parent, callback_name, *args):
  949. if hasattr(self, callback_name) == False:
  950. raise CallbackNotImplemented(self, callback_name)
  951. self.__getattribute__(callback_name)(parent, *args)
  952. def subscribe_by_parents_name(self, parent_name, callback_name='callback'):
  953. self.get_plugin(parent_name).subscribe(self, callback_name)
  954. def subscribe_by_parents_names(self, parent_names, callback_name='callback'):
  955. for parent_name in parent_names:
  956. self.get_plugin(parent_name).subscribe(self, callback_name)
  957. def subscribe_by_parents_interface(self, interface, callback_name='callback'):
  958. for plugin in self._get_plugin_loader().iterate_plugins():
  959. if isinstance(plugin, interface):
  960. plugin.subscribe(self, callback_name)
  961. class Parent(object):
  962. def init_Parent(self):
  963. if hasattr(self, 'children') == False:
  964. self.children = []
  965. def subscribe(self, obj, callback_name):
  966. self.init_Parent()
  967. if (isinstance(obj, Child) == False):
  968. raise TypeError()
  969. self.children.append((obj,callback_name))
  970. def unsubscribe(self, obj, callback_name):
  971. self.init_Parent()
  972. self.children.remove((obj, callback_name))
  973. def notify_children(self, *args):
  974. self.init_Parent()
  975. for child in self.children:
  976. child[0].notify(self, child[1], *args)
  977. def iterate_children(self):
  978. self.init_Parent()
  979. for child in self.children:
  980. yield child