collect.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193
  1. #
  2. # Metrix++, Copyright 2009-2019, Metrix++ Project
  3. # Link: https://github.com/metrixplusplus/metrixplusplus
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. import mpp.api
  8. import re
  9. import os
  10. import sys
  11. import logging
  12. import time
  13. import binascii
  14. import fnmatch
  15. import multiprocessing.pool
  16. class Plugin(mpp.api.Plugin, mpp.api.Parent, mpp.api.IConfigurable, mpp.api.IRunable):
  17. def __init__(self):
  18. self.reader = DirectoryReader()
  19. self.include_rules = []
  20. self.exclude_rules = []
  21. self.exclude_files = []
  22. self.parsers = []
  23. super(Plugin, self).__init__()
  24. def declare_configuration(self, parser):
  25. parser.add_option("--std.general.proctime", "--sgpt", action="store_true", default=False,
  26. help="If the option is set (True), the tool measures processing time per file [default: %default]")
  27. parser.add_option("--std.general.procerrors", "--sgpe", action="store_true", default=False,
  28. help="If the option is set (True), the tool counts number of processing/parsing errors per file [default: %default]")
  29. parser.add_option("--std.general.size", "--sgs", action="store_true", default=False,
  30. help="If the option is set (True), the tool collects file size metric (in bytes) [default: %default]")
  31. parser.add_option("--include-files", "--if", default=r'.*',
  32. help="Adds a regular expression pattern to include files in processing (files have to match any rule to be included) [default: %default]")
  33. parser.add_option("--exclude-files", "--ef", default=r'^[.]',
  34. help="Adds a regular expression pattern to exclude files or directories from processing [default: %default]")
  35. parser.add_option("--non-recursively", "--nr", action="store_true", default=False,
  36. help="If the option is set (True), sub-directories are not processed [default: %default]")
  37. self.optparser = parser
  38. def configure(self, options):
  39. self.is_proctime_enabled = options.__dict__['std.general.proctime']
  40. self.is_procerrors_enabled = options.__dict__['std.general.procerrors']
  41. self.is_size_enabled = options.__dict__['std.general.size']
  42. try:
  43. self.add_include_rule(re.compile(options.__dict__['include_files']))
  44. except Exception as e:
  45. self.optparser.error("option --include-files: " + str(e))
  46. try:
  47. self.add_exclude_rule(re.compile(options.__dict__['exclude_files']))
  48. except Exception as e:
  49. self.optparser.error("option --exclude-files: " + str(e))
  50. self.non_recursively = options.__dict__['non_recursively']
  51. def initialize(self):
  52. fields = []
  53. if self.is_proctime_enabled == True:
  54. fields.append(self.Field('proctime', float))
  55. if self.is_procerrors_enabled == True:
  56. fields.append(self.Field('procerrors', int))
  57. if self.is_size_enabled == True:
  58. fields.append(self.Field('size', int))
  59. super(Plugin, self).initialize(namespace='std.general', support_regions=False, fields=fields)
  60. self.add_exclude_file(self.get_plugin('mpp.dbf').get_dbfile_path())
  61. self.add_exclude_file(self.get_plugin('mpp.dbf').get_dbfile_prev_path())
  62. def run(self, args):
  63. if len(args) == 0:
  64. return self.reader.run(self, "./")
  65. retcode = 0
  66. for directory in args:
  67. retcode += self.reader.run(self, directory)
  68. return retcode
  69. def register_parser(self, fnmatch_exp_list, parser):
  70. self.parsers.append((fnmatch_exp_list, parser))
  71. def get_parser(self, file_path):
  72. for parser in self.parsers:
  73. for fnmatch_exp in parser[0]:
  74. if fnmatch.fnmatch(file_path, fnmatch_exp):
  75. return parser[1]
  76. return None
  77. def add_include_rule(self, re_compiled_pattern):
  78. self.include_rules.append(re_compiled_pattern)
  79. def add_exclude_rule(self, re_compiled_pattern):
  80. self.exclude_rules.append(re_compiled_pattern)
  81. def add_exclude_file(self, file_path):
  82. if file_path == None:
  83. return
  84. self.exclude_files.append(file_path)
  85. def is_file_excluded(self, file_name):
  86. # only apply the include rules to files - skip directories
  87. if os.path.isfile(file_name):
  88. for each in self.include_rules:
  89. if re.match(each, os.path.basename(file_name)) != None:
  90. break;
  91. # file is excluded if no include rule matches
  92. else:
  93. return True
  94. # check exclude rules for both, files and directories
  95. for each in self.exclude_rules:
  96. if re.match(each, os.path.basename(file_name)) != None:
  97. return True
  98. # finally check if a file is excluded directly
  99. for each in self.exclude_files:
  100. if os.path.basename(each) == os.path.basename(file_name):
  101. if os.stat(each) == os.stat(file_name):
  102. return True
  103. return False
  104. class DirectoryReader():
  105. def run(self, plugin, directory):
  106. IS_TEST_MODE = False
  107. if 'METRIXPLUSPLUS_TEST_MODE' in list(os.environ.keys()):
  108. IS_TEST_MODE = True
  109. def run_per_file(plugin, fname, full_path):
  110. exit_code = 0
  111. norm_path = re.sub(r'''[\\]''', "/", full_path)
  112. if os.path.isabs(norm_path) == False and norm_path.startswith('./') == False:
  113. norm_path = './' + norm_path
  114. if plugin.is_file_excluded(norm_path) == False:
  115. if os.path.isdir(full_path):
  116. if plugin.non_recursively == False:
  117. exit_code += run_recursively(plugin, full_path)
  118. else:
  119. parser = plugin.get_parser(full_path)
  120. if parser == None:
  121. logging.info("Skipping: " + norm_path)
  122. else:
  123. logging.info("Processing: " + norm_path)
  124. ts = time.time()
  125. f = open(full_path, 'rU');
  126. text = f.read();
  127. # getting along with the different string handling of python 2 and 3
  128. if sys.version_info[0] < 3:
  129. text = text.decode('utf-8')
  130. f.close()
  131. checksum = binascii.crc32(text.encode('utf8')) & 0xffffffff # to match python 3
  132. db_loader = plugin.get_plugin('mpp.dbf').get_loader()
  133. (data, is_updated) = db_loader.create_file_data(norm_path, checksum, text)
  134. procerrors = parser.process(plugin, data, is_updated)
  135. if plugin.is_proctime_enabled == True:
  136. data.set_data('std.general', 'proctime',
  137. (time.time() - ts) if IS_TEST_MODE == False else 0.01)
  138. if plugin.is_procerrors_enabled == True and procerrors != None and procerrors != 0:
  139. data.set_data('std.general', 'procerrors', procerrors)
  140. if plugin.is_size_enabled == True:
  141. data.set_data('std.general', 'size', len(text))
  142. db_loader.save_file_data(data)
  143. #logging.debug("-" * 60)
  144. exit_code += procerrors
  145. else:
  146. logging.info("Excluding: " + norm_path)
  147. return exit_code
  148. #thread_pool = multiprocessing.pool.ThreadPool()
  149. #def mp_worker(args):
  150. # run_per_file(args[0], args[1], args[2])
  151. def run_recursively(plugin, directory):
  152. exit_code = 0
  153. #thread_pool.map(mp_worker,
  154. # [(plugin, f, os.path.join(subdir, f))
  155. # for subdir, dirs, files in os.walk(directory) for f in files])
  156. for fname in sorted(os.listdir(directory)):
  157. full_path = os.path.join(directory, fname)
  158. exit_code += run_per_file(plugin, fname, full_path)
  159. return exit_code
  160. if os.path.exists(directory) == False:
  161. logging.error("Skipping (does not exist): " + directory)
  162. return 1
  163. if os.path.isdir(directory):
  164. total_errors = run_recursively(plugin, directory)
  165. else:
  166. total_errors = run_per_file(plugin, os.path.basename(directory), directory)
  167. total_errors = total_errors # used, warnings are per file if not zero
  168. return 0 # ignore errors, collection is successful anyway