collect.py 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. #
  2. # Metrix++, Copyright 2009-2019, Metrix++ Project
  3. # Link: https://github.com/metrixplusplus/metrixplusplus
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. import mpp.api
  8. import re
  9. import os
  10. import logging
  11. import time
  12. import binascii
  13. import fnmatch
  14. import multiprocessing.pool
  15. class Plugin(mpp.api.Plugin, mpp.api.Parent, mpp.api.IConfigurable, mpp.api.IRunable):
  16. def __init__(self):
  17. self.reader = DirectoryReader()
  18. self.include_rules = []
  19. self.exclude_rules = []
  20. self.exclude_files = []
  21. self.parsers = []
  22. super(Plugin, self).__init__()
  23. def declare_configuration(self, parser):
  24. parser.add_option("--std.general.proctime", "--sgpt", action="store_true", default=False,
  25. help="If the option is set (True), the tool measures processing time per file [default: %default]")
  26. parser.add_option("--std.general.procerrors", "--sgpe", action="store_true", default=False,
  27. help="If the option is set (True), the tool counts number of processing/parsing errors per file [default: %default]")
  28. parser.add_option("--std.general.size", "--sgs", action="store_true", default=False,
  29. help="If the option is set (True), the tool collects file size metric (in bytes) [default: %default]")
  30. parser.add_option("--include-files", "--if", default=r'.*',
  31. help="Defines the regular expression pattern to include files in processing [default: %default]")
  32. parser.add_option("--exclude-files", "--ef", default=r'^[.]',
  33. help="Defines the regular expression pattern to exclude files from processing [default: %default]")
  34. parser.add_option("--non-recursively", "--nr", action="store_true", default=False,
  35. help="If the option is set (True), sub-directories are not processed [default: %default]")
  36. self.optparser = parser
  37. def configure(self, options):
  38. self.is_proctime_enabled = options.__dict__['std.general.proctime']
  39. self.is_procerrors_enabled = options.__dict__['std.general.procerrors']
  40. self.is_size_enabled = options.__dict__['std.general.size']
  41. try:
  42. self.add_include_rule(re.compile(options.__dict__['include_files']))
  43. except Exception as e:
  44. self.optparser.error("option --include-files: " + str(e))
  45. try:
  46. self.add_exclude_rule(re.compile(options.__dict__['exclude_files']))
  47. except Exception as e:
  48. self.optparser.error("option --exclude-files: " + str(e))
  49. self.non_recursively = options.__dict__['non_recursively']
  50. def initialize(self):
  51. fields = []
  52. if self.is_proctime_enabled == True:
  53. fields.append(self.Field('proctime', float))
  54. if self.is_procerrors_enabled == True:
  55. fields.append(self.Field('procerrors', int))
  56. if self.is_size_enabled == True:
  57. fields.append(self.Field('size', int))
  58. super(Plugin, self).initialize(namespace='std.general', support_regions=False, fields=fields)
  59. self.add_exclude_file(self.get_plugin('mpp.dbf').get_dbfile_path())
  60. self.add_exclude_file(self.get_plugin('mpp.dbf').get_dbfile_prev_path())
  61. def run(self, args):
  62. if len(args) == 0:
  63. return self.reader.run(self, "./")
  64. retcode = 0
  65. for directory in args:
  66. retcode += self.reader.run(self, directory)
  67. return retcode
  68. def register_parser(self, fnmatch_exp_list, parser):
  69. self.parsers.append((fnmatch_exp_list, parser))
  70. def get_parser(self, file_path):
  71. for parser in self.parsers:
  72. for fnmatch_exp in parser[0]:
  73. if fnmatch.fnmatch(file_path, fnmatch_exp):
  74. return parser[1]
  75. return None
  76. def add_include_rule(self, re_compiled_pattern):
  77. self.include_rules.append(re_compiled_pattern)
  78. def add_exclude_rule(self, re_compiled_pattern):
  79. self.exclude_rules.append(re_compiled_pattern)
  80. def add_exclude_file(self, file_path):
  81. if file_path == None:
  82. return
  83. self.exclude_files.append(file_path)
  84. def is_file_excluded(self, file_name):
  85. # only apply the include rules to files - skip directories
  86. if os.path.isfile(file_name):
  87. for each in self.include_rules:
  88. if re.match(each, os.path.basename(file_name)) != None:
  89. break;
  90. # file is excluded if no include rule matches
  91. else:
  92. return True
  93. # check exclude rules for both, files and directories
  94. for each in self.exclude_rules:
  95. if re.match(each, os.path.basename(file_name)) != None:
  96. return True
  97. # finally check if a file is excluded directly
  98. for each in self.exclude_files:
  99. if os.path.basename(each) == os.path.basename(file_name):
  100. if os.stat(each) == os.stat(file_name):
  101. return True
  102. return False
  103. class DirectoryReader():
  104. def run(self, plugin, directory):
  105. IS_TEST_MODE = False
  106. if 'METRIXPLUSPLUS_TEST_MODE' in list(os.environ.keys()):
  107. IS_TEST_MODE = True
  108. def run_per_file(plugin, fname, full_path):
  109. exit_code = 0
  110. norm_path = re.sub(r'''[\\]''', "/", full_path)
  111. if os.path.isabs(norm_path) == False and norm_path.startswith('./') == False:
  112. norm_path = './' + norm_path
  113. if plugin.is_file_excluded(norm_path) == False:
  114. if os.path.isdir(full_path):
  115. if plugin.non_recursively == False:
  116. exit_code += run_recursively(plugin, full_path)
  117. else:
  118. parser = plugin.get_parser(full_path)
  119. if parser == None:
  120. logging.info("Skipping: " + norm_path)
  121. else:
  122. logging.info("Processing: " + norm_path)
  123. ts = time.time()
  124. f = open(full_path, 'rU');
  125. text = f.read();
  126. f.close()
  127. checksum = binascii.crc32(text.encode('utf8')) & 0xffffffff # to match python 3
  128. db_loader = plugin.get_plugin('mpp.dbf').get_loader()
  129. (data, is_updated) = db_loader.create_file_data(norm_path, checksum, str(text))
  130. procerrors = parser.process(plugin, data, is_updated)
  131. if plugin.is_proctime_enabled == True:
  132. data.set_data('std.general', 'proctime',
  133. (time.time() - ts) if IS_TEST_MODE == False else 0.01)
  134. if plugin.is_procerrors_enabled == True and procerrors != None and procerrors != 0:
  135. data.set_data('std.general', 'procerrors', procerrors)
  136. if plugin.is_size_enabled == True:
  137. data.set_data('std.general', 'size', len(text))
  138. db_loader.save_file_data(data)
  139. #logging.debug("-" * 60)
  140. exit_code += procerrors
  141. else:
  142. logging.info("Excluding: " + norm_path)
  143. return exit_code
  144. #thread_pool = multiprocessing.pool.ThreadPool()
  145. #def mp_worker(args):
  146. # run_per_file(args[0], args[1], args[2])
  147. def run_recursively(plugin, directory):
  148. exit_code = 0
  149. #thread_pool.map(mp_worker,
  150. # [(plugin, f, os.path.join(subdir, f))
  151. # for subdir, dirs, files in os.walk(directory) for f in files])
  152. for fname in sorted(os.listdir(directory)):
  153. full_path = os.path.join(directory, fname)
  154. exit_code += run_per_file(plugin, fname, full_path)
  155. return exit_code
  156. if os.path.exists(directory) == False:
  157. logging.error("Skipping (does not exist): " + directory)
  158. return 1
  159. if os.path.isdir(directory):
  160. total_errors = run_recursively(plugin, directory)
  161. else:
  162. total_errors = run_per_file(plugin, os.path.basename(directory), directory)
  163. total_errors = total_errors # used, warnings are per file if not zero
  164. return 0 # ignore errors, collection is successful anyway