view.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import mpp.api
  20. import mpp.utils
  21. import mpp.cout
  22. class Plugin(mpp.api.Plugin, mpp.api.IConfigurable, mpp.api.IRunable):
  23. def declare_configuration(self, parser):
  24. parser.add_option("--format", "--ft", default='txt', choices=['txt', 'xml', 'python'],
  25. help="Format of the output data. "
  26. "Possible values are 'xml', 'txt' or 'python' [default: %default]")
  27. parser.add_option("--nest-regions", "--nr", action="store_true", default=False,
  28. help="If the option is set (True), data for regions is exported in the form of a tree. "
  29. "Otherwise, all regions are exported in plain list. [default: %default]")
  30. parser.add_option("--max-distribution-rows", "--mdr", type=int, default=20,
  31. help="Maximum number of rows in distribution tables. "
  32. "If it is set to 0, the tool does not optimize the size of distribution tables [default: %default]")
  33. def configure(self, options):
  34. self.out_format = options.__dict__['format']
  35. self.nest_regions = options.__dict__['nest_regions']
  36. self.dist_columns = options.__dict__['max_distribution_rows']
  37. def run(self, args):
  38. loader_prev = self.get_plugin_loader().get_plugin('mpp.dbf').get_loader_prev()
  39. loader = self.get_plugin_loader().get_plugin('mpp.dbf').get_loader()
  40. paths = None
  41. if len(args) == 0:
  42. paths = [""]
  43. else:
  44. paths = args
  45. (result, exit_code) = export_to_str(self.out_format,
  46. paths,
  47. loader,
  48. loader_prev,
  49. self.nest_regions,
  50. self.dist_columns)
  51. print result
  52. return exit_code
  53. def export_to_str(out_format, paths, loader, loader_prev, nest_regions, dist_columns):
  54. exit_code = 0
  55. result = ""
  56. if out_format == 'xml':
  57. result += "<export>\n"
  58. elif out_format == 'python':
  59. result += "{'export': ["
  60. for (ind, path) in enumerate(paths):
  61. path = mpp.utils.preprocess_path(path)
  62. aggregated_data = loader.load_aggregated_data(path)
  63. aggregated_data_tree = {}
  64. subdirs = []
  65. subfiles = []
  66. if aggregated_data != None:
  67. aggregated_data_tree = aggregated_data.get_data_tree()
  68. subdirs = aggregated_data.get_subdirs()
  69. subfiles = aggregated_data.get_subfiles()
  70. else:
  71. mpp.utils.report_bad_path(path)
  72. exit_code += 1
  73. aggregated_data_prev = loader_prev.load_aggregated_data(path)
  74. if aggregated_data_prev != None:
  75. aggregated_data_tree = append_diff(aggregated_data_tree,
  76. aggregated_data_prev.get_data_tree())
  77. aggregated_data_tree = compress_dist(aggregated_data_tree, dist_columns)
  78. file_data = loader.load_file_data(path)
  79. file_data_tree = {}
  80. if file_data != None:
  81. file_data_tree = file_data.get_data_tree()
  82. file_data_prev = loader_prev.load_file_data(path)
  83. append_regions(file_data_tree, file_data, file_data_prev, nest_regions)
  84. data = {"info": {"path": path, "id": ind + 1},
  85. "aggregated-data": aggregated_data_tree,
  86. "file-data": file_data_tree,
  87. "subdirs": subdirs,
  88. "subfiles": subfiles}
  89. if out_format == 'txt':
  90. cout_txt(data, loader)
  91. elif out_format == 'xml':
  92. result += mpp.utils.serialize_to_xml(data, root_name = "data") + "\n"
  93. elif out_format == 'python':
  94. postfix = ""
  95. if ind < len(paths) - 1:
  96. postfix = ", "
  97. result += mpp.utils.serialize_to_python(data, root_name = "data") + postfix
  98. if out_format == 'xml':
  99. result += "</export>"
  100. elif out_format == 'python':
  101. result += "]}"
  102. return (result, exit_code)
  103. def append_regions(file_data_tree, file_data, file_data_prev, nest_regions):
  104. regions_matcher = None
  105. if file_data_prev != None:
  106. file_data_tree = append_diff(file_data_tree,
  107. file_data_prev.get_data_tree())
  108. regions_matcher = mpp.utils.FileRegionsMatcher(file_data, file_data_prev)
  109. if nest_regions == False:
  110. regions = []
  111. for region in file_data.iterate_regions():
  112. region_data_tree = region.get_data_tree()
  113. if regions_matcher != None and regions_matcher.is_matched(region.get_id()):
  114. region_data_prev = file_data_prev.get_region(regions_matcher.get_prev_id(region.get_id()))
  115. region_data_tree = append_diff(region_data_tree,
  116. region_data_prev.get_data_tree())
  117. regions.append({"info": {"name" : region.name,
  118. 'type' : file_data.get_region_types()().to_str(region.get_type()),
  119. "cursor" : region.cursor,
  120. 'line_begin': region.line_begin,
  121. 'line_end': region.line_end,
  122. 'offset_begin': region.begin,
  123. 'offset_end': region.end},
  124. "data": region_data_tree})
  125. file_data_tree['regions'] = regions
  126. else:
  127. def append_rec(region_id, file_data_tree, file_data, file_data_prev):
  128. region = file_data.get_region(region_id)
  129. region_data_tree = region.get_data_tree()
  130. if regions_matcher != None and regions_matcher.is_matched(region.get_id()):
  131. region_data_prev = file_data_prev.get_region(regions_matcher.get_prev_id(region.get_id()))
  132. region_data_tree = append_diff(region_data_tree,
  133. region_data_prev.get_data_tree())
  134. result = {"info": {"name" : region.name,
  135. 'type' : file_data.get_region_types()().to_str(region.get_type()),
  136. "cursor" : region.cursor,
  137. 'line_begin': region.line_begin,
  138. 'line_end': region.line_end,
  139. 'offset_begin': region.begin,
  140. 'offset_end': region.end},
  141. "data": region_data_tree,
  142. "subregions": []}
  143. for sub_id in file_data.get_region(region_id).iterate_subregion_ids():
  144. result['subregions'].append(append_rec(sub_id, file_data_tree, file_data, file_data_prev))
  145. return result
  146. file_data_tree['regions'] = []
  147. file_data_tree['regions'].append(append_rec(1, file_data_tree, file_data, file_data_prev))
  148. def append_diff(main_tree, prev_tree):
  149. assert(main_tree != None)
  150. assert(prev_tree != None)
  151. for name in main_tree.keys():
  152. if name not in prev_tree.keys():
  153. continue
  154. for field in main_tree[name].keys():
  155. if field not in prev_tree[name].keys():
  156. continue
  157. if isinstance(main_tree[name][field], dict) and isinstance(prev_tree[name][field], dict):
  158. diff = {}
  159. for key in main_tree[name][field].keys():
  160. if key not in prev_tree[name][field].keys():
  161. continue
  162. main_val = main_tree[name][field][key]
  163. prev_val = prev_tree[name][field][key]
  164. if main_val == None:
  165. main_val = 0
  166. if prev_val == None:
  167. prev_val = 0
  168. if isinstance(main_val, list) and isinstance(prev_val, list):
  169. main_tree[name][field][key] = append_diff_list(main_val, prev_val)
  170. else:
  171. diff[key] = main_val - prev_val
  172. main_tree[name][field]['__diff__'] = diff
  173. elif (not isinstance(main_tree[name][field], dict)) and (not isinstance(prev_tree[name][field], dict)):
  174. if '__diff__' not in main_tree[name]:
  175. main_tree[name]['__diff__'] = {}
  176. main_tree[name]['__diff__'][field] = main_tree[name][field] - prev_tree[name][field]
  177. return main_tree
  178. def append_diff_list(main_list, prev_list):
  179. merged_list = {}
  180. for bar in main_list:
  181. merged_list[bar['metric']] = {'count': bar['count'], '__diff__':0, 'ratio': bar['ratio']}
  182. for bar in prev_list:
  183. if bar['metric'] in merged_list.keys():
  184. merged_list[bar['metric']]['__diff__'] = \
  185. merged_list[bar['metric']]['count'] - bar['count']
  186. else:
  187. merged_list[bar['metric']] = {'count': 0, '__diff__':-bar['count'], 'ratio': 0}
  188. result = []
  189. for metric in sorted(merged_list.keys()):
  190. result.append({'metric':metric,
  191. 'count':merged_list[metric]['count'],
  192. 'ratio':merged_list[metric]['ratio'],
  193. '__diff__':merged_list[metric]['__diff__']})
  194. return result
  195. def compress_dist(data, columns):
  196. if columns == 0:
  197. return data
  198. for namespace in data.keys():
  199. for field in data[namespace].keys():
  200. metric_data = data[namespace][field]
  201. distr = metric_data['distribution-bars']
  202. columns = float(columns) # to trigger floating calculations
  203. if metric_data['count'] == 0:
  204. continue
  205. new_dist = []
  206. remaining_count = metric_data['count']
  207. next_consume = None
  208. next_bar = None
  209. max_count = 0
  210. min_count = 0xFFFFFFFF
  211. sum_ratio = 0
  212. for (ind, bar) in enumerate(distr):
  213. if next_bar == None:
  214. # start new bar
  215. next_bar = {'count': bar['count'],
  216. 'ratio': bar['ratio'],
  217. 'metric_s': bar['metric'],
  218. 'metric_f': bar['metric']}
  219. if '__diff__' in bar.keys():
  220. next_bar['__diff__'] = bar['__diff__']
  221. next_consume = int(round(remaining_count/ (columns - len(new_dist))))
  222. else:
  223. # merge to existing bar
  224. next_bar['count'] += bar['count']
  225. next_bar['ratio'] += bar['ratio']
  226. next_bar['metric_f'] = bar['metric']
  227. if '__diff__' in bar.keys():
  228. next_bar['__diff__'] += bar['__diff__']
  229. next_consume -= bar['count']
  230. if (next_consume <= 0 # consumed enough
  231. or (ind + 1) == len(distr)): # or the last bar
  232. # append to new distribution
  233. if isinstance(next_bar['metric_s'], float):
  234. next_bar['metric_s'] = "{0:.4f}".format(next_bar['metric_s'])
  235. next_bar['metric_f'] = "{0:.4f}".format(next_bar['metric_f'])
  236. else:
  237. next_bar['metric_s'] = str(next_bar['metric_s'])
  238. next_bar['metric_f'] = str(next_bar['metric_f'])
  239. if next_bar['metric_s'] == next_bar['metric_f']:
  240. next_bar['metric'] = next_bar['metric_s']
  241. else:
  242. next_bar['metric'] = next_bar['metric_s'] + "-" + next_bar['metric_f']
  243. del next_bar['metric_s']
  244. del next_bar['metric_f']
  245. new_dist.append(next_bar)
  246. sum_ratio += next_bar['ratio']
  247. if max_count < next_bar['count']:
  248. max_count = next_bar['count']
  249. if min_count > next_bar['count'] and next_bar['count'] != 0:
  250. min_count = next_bar['count']
  251. remaining_count -= next_bar['count']
  252. next_bar = None
  253. # check that consumed all
  254. assert((ind + 1) != len(distr) or remaining_count == 0)
  255. if (float(max_count - min_count) / metric_data['count'] < 0.05 and
  256. metric_data['count'] > 1 and
  257. len(new_dist) > 1):
  258. # trick here: if all bars are even in the new distribution
  259. # it is better to do linear compression instead
  260. new_dist = []
  261. step = int(round(float(metric_data['max'] - metric_data['min']) / columns))
  262. next_end_limit = metric_data['min']
  263. next_bar = None
  264. for (ind, bar) in enumerate(distr):
  265. if next_bar == None:
  266. # start new bar
  267. next_bar = {'count': bar['count'],
  268. 'ratio': bar['ratio'],
  269. 'metric_s': next_end_limit,
  270. 'metric_f': bar['metric']}
  271. if '__diff__' in bar.keys():
  272. next_bar['__diff__'] = bar['__diff__']
  273. next_end_limit += step
  274. else:
  275. # merge to existing bar
  276. next_bar['count'] += bar['count']
  277. next_bar['ratio'] += bar['ratio']
  278. next_bar['metric_f'] = bar['metric']
  279. if '__diff__' in bar.keys():
  280. next_bar['__diff__'] += bar['__diff__']
  281. if (next_bar['metric_f'] >= next_end_limit # consumed enough
  282. or (ind + 1) == len(distr)): # or the last bar
  283. if (ind + 1) != len(distr):
  284. next_bar['metric_f'] = next_end_limit
  285. # append to new distribution
  286. if isinstance(next_bar['metric_s'], float):
  287. next_bar['metric_s'] = "{0:.4f}".format(next_bar['metric_s'])
  288. next_bar['metric_f'] = "{0:.4f}".format(next_bar['metric_f'])
  289. else:
  290. next_bar['metric_s'] = str(next_bar['metric_s'])
  291. next_bar['metric_f'] = str(next_bar['metric_f'])
  292. next_bar['metric'] = next_bar['metric_s'] + "-" + next_bar['metric_f']
  293. del next_bar['metric_s']
  294. del next_bar['metric_f']
  295. new_dist.append(next_bar)
  296. next_bar = None
  297. data[namespace][field]['distribution-bars'] = new_dist
  298. return data
  299. def cout_txt_regions(path, regions, indent = 0):
  300. for region in regions:
  301. details = [
  302. ('Region name', region['info']['name']),
  303. ('Region type', region['info']['type']),
  304. ('Offsets', str(region['info']['offset_begin']) + "-" + str(region['info']['offset_end'])),
  305. ('Line numbers', str(region['info']['line_begin']) + "-" + str(region['info']['line_end']))
  306. ]
  307. for namespace in region['data'].keys():
  308. diff_data = {}
  309. if '__diff__' in region['data'][namespace].keys():
  310. diff_data = region['data'][namespace]['__diff__']
  311. for field in region['data'][namespace].keys():
  312. diff_str = ""
  313. if field == '__diff__':
  314. continue
  315. if field in diff_data.keys():
  316. diff_str = " [" + ("+" if diff_data[field] >= 0 else "") + str(diff_data[field]) + "]"
  317. details.append((namespace + ":" + field, str(region['data'][namespace][field]) + diff_str))
  318. mpp.cout.notify(path,
  319. region['info']['cursor'],
  320. mpp.cout.SEVERITY_INFO,
  321. "Metrics per '" + region['info']['name']+ "' region",
  322. details,
  323. indent=indent)
  324. if 'subregions' in region.keys():
  325. cout_txt_regions(path, region['subregions'], indent=indent+1)
  326. def cout_txt(data, loader):
  327. details = []
  328. for key in data['file-data'].keys():
  329. if key == 'regions':
  330. cout_txt_regions(data['info']['path'], data['file-data'][key])
  331. else:
  332. namespace = key
  333. diff_data = {}
  334. if '__diff__' in data['file-data'][namespace].keys():
  335. diff_data = data['file-data'][namespace]['__diff__']
  336. for field in data['file-data'][namespace].keys():
  337. diff_str = ""
  338. if field == '__diff__':
  339. continue
  340. if field in diff_data.keys():
  341. diff_str = " [" + ("+" if diff_data[field] >= 0 else "") + str(diff_data[field]) + "]"
  342. details.append((namespace + ":" + field, str(data['file-data'][namespace][field]) + diff_str))
  343. if len(details) > 0:
  344. mpp.cout.notify(data['info']['path'],
  345. 0,
  346. mpp.cout.SEVERITY_INFO,
  347. "Metrics per file",
  348. details)
  349. attr_map = {'count': 'Measured',
  350. 'total': 'Total',
  351. 'avg': 'Average',
  352. 'min': 'Minimum',
  353. 'max': 'Maximum'}
  354. for namespace in data['aggregated-data'].keys():
  355. for field in data['aggregated-data'][namespace].keys():
  356. details = []
  357. diff_data = {}
  358. if '__diff__' in data['aggregated-data'][namespace][field].keys():
  359. diff_data = data['aggregated-data'][namespace][field]['__diff__']
  360. for attr in data['aggregated-data'][namespace][field].keys():
  361. diff_str = ""
  362. if attr == 'distribution-bars' or attr == '__diff__' or attr == 'count':
  363. continue
  364. if attr in diff_data.keys():
  365. diff_str = " [" + ("+" if diff_data[attr] >= 0 else "") + str(diff_data[attr]) + "]"
  366. details.append((attr_map[attr], str(data['aggregated-data'][namespace][field][attr]) + diff_str))
  367. measured = data['aggregated-data'][namespace][field]['count']
  368. if 'count' in diff_data.keys():
  369. diff_str = ' [{0:{1}}]'.format(diff_data['count'], '+' if diff_data['count'] >= 0 else '')
  370. count_str_len = len(str(measured))
  371. elem_name = 'regions'
  372. if loader.get_namespace(namespace).are_regions_supported() == False:
  373. elem_name = 'files'
  374. details.append(('Distribution', str(measured) + diff_str + ' ' + elem_name + ' measured'))
  375. details.append((' Metric value', 'Ratio : R-sum : Number of ' + elem_name))
  376. sum_ratio = 0
  377. for bar in data['aggregated-data'][namespace][field]['distribution-bars']:
  378. sum_ratio += bar['ratio']
  379. diff_str = ""
  380. if '__diff__' in bar.keys():
  381. diff_str = ' [{0:{1}}]'.format(bar['__diff__'], '+' if bar['__diff__'] >= 0 else '')
  382. if isinstance(bar['metric'], float):
  383. metric_str = "{0:.4f}".format(bar['metric'])
  384. else:
  385. metric_str = str(bar['metric'])
  386. metric_str = (" " * (mpp.cout.DETAILS_OFFSET - len(metric_str) - 1)) + metric_str
  387. count_str = str(bar['count'])
  388. count_str = ((" " * (count_str_len - len(count_str))) + count_str + diff_str + "\t")
  389. details.append((metric_str,
  390. "{0:.3f}".format(bar['ratio']) + " : " + "{0:.3f}".format(sum_ratio) + " : " +
  391. count_str + ('|' * int(round(bar['ratio']*100)))))
  392. mpp.cout.notify(data['info']['path'],
  393. '', # no line number
  394. mpp.cout.SEVERITY_INFO,
  395. "Overall metrics for '" + namespace + ":" + field + "' metric",
  396. details)
  397. details = []
  398. for each in data['subdirs']:
  399. details.append(('Directory', each))
  400. for each in data['subfiles']:
  401. details.append(('File', each))
  402. if len(details) > 0:
  403. mpp.cout.notify(data['info']['path'],
  404. '', # no line number
  405. mpp.cout.SEVERITY_INFO,
  406. "Directory content:",
  407. details)