java.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import re
  20. import binascii
  21. import core.api
  22. import core.cout
  23. class Plugin(core.api.Plugin, core.api.Parent, core.api.IParser, core.api.IConfigurable, core.api.ICode):
  24. def declare_configuration(self, parser):
  25. parser.add_option("--std.code.java.files", default="*.java",
  26. help="Enumerates filename extensions to match Java files [default: %default]")
  27. def configure(self, options):
  28. self.files = options.__dict__['std.code.java.files'].split(',')
  29. self.files.sort() # sorted list goes to properties
  30. def initialize(self):
  31. core.api.Plugin.initialize(self, properties=[
  32. self.Property('files', ','.join(self.files))
  33. ])
  34. self.get_plugin_loader().register_parser(self.files, self)
  35. def process(self, parent, data, is_updated):
  36. is_updated = is_updated or self.is_updated
  37. count_mismatched_brackets = 0
  38. if is_updated == True:
  39. count_mismatched_brackets = JavaCodeParser().run(data)
  40. self.notify_children(data, is_updated)
  41. return count_mismatched_brackets
  42. class JavaCodeParser(object):
  43. regex_cpp = re.compile(r'''
  44. //(?=\n|\r\n|\r) # Match Java style comments (empty comment line)
  45. | //.*?(?=\n|\r\n|\r) # Match Java style comments
  46. # NOTE: end of line is NOT consumed
  47. # NOTE: it is slightly different in C++
  48. | /\*\*/ # Match C style comments (empty comment line)
  49. # NOTE: it is slightly different in C++
  50. | /\*.*?\*/ # Match C style comments
  51. # NOTE: it is slightly different in C++
  52. | \'(?:\\.|[^\\\'])*\' # Match quoted strings
  53. | "(?:\\.|[^\\"])*" # Match double quoted strings
  54. | (?P<fn_name>([@]?[_$a-zA-Z][_$a-zA-Z0-9]*))\s*[(] # Match function
  55. # NOTE: Matches attributes which are excluded later
  56. # NOTE: Java may include $ in the name
  57. # LIMITATION: if there are comments after function name
  58. # and before '(', it is not detected
  59. | ((?P<block_type>class|interface) # Match class or namespace
  60. (?P<block_name>(\s+[_$a-zA-Z][_$a-zA-Z0-9]*)))
  61. # NOTE: noname instances are impossible in Java
  62. # LIMITATION: if there are comments between keyword and name,
  63. # it is not detected
  64. | [{};] # Match block start/end and statement separator
  65. # NOTE: C++ parser includes processing of <> and :
  66. # to handle template definitions, it is easier in Java
  67. | ((?:\n|\r\n|\r)\s*(?:\n|\r\n|\r)) # Match double empty line
  68. ''',
  69. re.DOTALL | re.MULTILINE | re.VERBOSE
  70. )
  71. # \r\n goes before \r in order to consume right number of lines on Unix for Windows files
  72. regex_ln = re.compile(r'(\n)|(\r\n)|(\r)')
  73. def run(self, data):
  74. self.__init__() # Go to initial state if it is called twice
  75. return self.parse(data)
  76. def finalize_block(self, text, block, block_end):
  77. space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
  78. block['start'] += space_match.end() # trim spaces at the beginning
  79. block['end'] = block_end
  80. start_pos = block['start']
  81. crc32 = 0
  82. for child in block['children']:
  83. # exclude children
  84. crc32 = binascii.crc32(text[start_pos:child['start']], crc32)
  85. start_pos = child['end']
  86. block['checksum'] = binascii.crc32(text[start_pos:block['end']], crc32) & 0xffffffff # to match python 3
  87. def add_lines_data(self, text, blocks):
  88. def add_lines_data_rec(self, text, blocks):
  89. for each in blocks:
  90. # add line begin
  91. self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['start']))
  92. each['line_begin'] = self.total_current
  93. self.total_last_pos = each['start']
  94. # process enclosed
  95. add_lines_data_rec(self, text, each['children'])
  96. # add line end
  97. self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['end']))
  98. each['line_end'] = self.total_current
  99. self.total_last_pos = each['end']
  100. self.total_last_pos = 0
  101. self.total_current = 1
  102. add_lines_data_rec(self, text, blocks)
  103. def add_regions(self, data, blocks):
  104. # Note: data.add_region() internals depend on special ordering of regions
  105. # in order to identify enclosed regions efficiently
  106. def add_regions_rec(self, data, blocks):
  107. def get_type_id(data, named_type):
  108. if named_type == "function":
  109. return data.get_region_types().FUNCTION
  110. elif named_type == "class":
  111. return data.get_region_types().CLASS
  112. elif named_type == "interface":
  113. return data.get_region_types().INTERFACE
  114. elif named_type == "__global__":
  115. return data.get_region_types().GLOBAL
  116. else:
  117. assert(False)
  118. for each in blocks:
  119. data.add_region(each['name'], each['start'], each['end'],
  120. each['line_begin'], each['line_end'], each['cursor'],
  121. get_type_id(data, each['type']), each['checksum'])
  122. add_regions_rec(self, data, each['children'])
  123. add_regions_rec(self, data, blocks)
  124. def parse(self, data):
  125. def reset_next_block(start):
  126. return {'name':'', 'start':start, 'cursor':0, 'type':''}
  127. count_mismatched_brackets = 0
  128. text = data.get_content()
  129. indent_current = 0;
  130. blocks = [{'name':'__global__', 'start':0, 'cursor':0, 'type':'__global__', 'indent_start':indent_current, 'children':[]}]
  131. curblk = 0
  132. next_block = reset_next_block(0)
  133. cursor_last_pos = 0
  134. cursor_current = 1
  135. for m in re.finditer(self.regex_cpp, text):
  136. # Comment
  137. if text[m.start()] == '/':
  138. data.add_marker(m.start(), m.end(), data.get_marker_types().COMMENT)
  139. # String
  140. elif text[m.start()] == '"' or text[m.start()] == '\'':
  141. data.add_marker(m.start() + 1, m.end() - 1, data.get_marker_types().STRING)
  142. # Statement end
  143. elif text[m.start()] == ';':
  144. # Reset next block name and start
  145. next_block['name'] = ""
  146. next_block['start'] = m.end() # potential region start
  147. # Double end line
  148. elif text[m.start()] == '\n' or text[m.start()] == '\r':
  149. # Reset next block start, if has not been named yet
  150. if next_block['name'] == "":
  151. next_block['start'] = m.end() # potential region start
  152. # Block start...
  153. elif text[m.start()] == '{':
  154. # shift indent right
  155. indent_current += 1
  156. # ... if name detected previously
  157. if next_block['name'] != '': # - Start of enclosed block
  158. blocks.append({'name':next_block['name'],
  159. 'start':next_block['start'],
  160. 'cursor':next_block['cursor'],
  161. 'type':next_block['type'],
  162. 'indent_start':indent_current,
  163. 'children':[]})
  164. next_block = reset_next_block(m.end())
  165. curblk += 1
  166. # ... reset next block start, otherwise
  167. else: # - unknown type of block start
  168. next_block['start'] = m.end() # potential region start
  169. # Block end...
  170. elif text[m.start()] == '}':
  171. # ... if indent level matches the start
  172. if blocks[curblk]['indent_start'] == indent_current:
  173. next_block = reset_next_block(m.end())
  174. if curblk == 0:
  175. core.cout.notify(data.get_path(),
  176. cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, m.start())),
  177. core.cout.SEVERITY_WARNING,
  178. "Non-matching closing bracket '}' detected.")
  179. count_mismatched_brackets += 1
  180. continue
  181. self.finalize_block(text, blocks[curblk], m.end())
  182. assert(blocks[curblk]['type'] != '__global__')
  183. curblk -= 1
  184. assert(curblk >= 0)
  185. blocks[curblk]['children'].append(blocks.pop())
  186. # shift indent left
  187. indent_current -= 1
  188. if indent_current < 0:
  189. core.cout.notify(data.get_path(),
  190. cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, m.start())),
  191. core.cout.SEVERITY_WARNING,
  192. "Non-matching closing bracket '}' detected.")
  193. count_mismatched_brackets += 1
  194. indent_current = 0
  195. # Potential class, interface
  196. elif m.group('block_type') != None:
  197. if next_block['name'] == "":
  198. # - 'name'
  199. next_block['name'] = m.group('block_name').strip()
  200. # - 'cursor'
  201. cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('block_name')))
  202. cursor_last_pos = m.start('block_name')
  203. next_block['cursor'] = cursor_current
  204. # - 'type'
  205. next_block['type'] = m.group('block_type').strip()
  206. # - 'start' detected earlier
  207. # Potential function name detected...
  208. elif m.group('fn_name') != None:
  209. # ... if outside of a function
  210. # (do not detect functions enclosed directly in a function, i.e. without classes)
  211. # ... and other name before has not been matched
  212. if blocks[curblk]['type'] != 'function' and (next_block['name'] == "") and m.group('fn_name')[0] != '@':
  213. # - 'name'
  214. next_block['name'] = m.group('fn_name').strip()
  215. # - 'cursor'
  216. cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('fn_name')))
  217. cursor_last_pos = m.start('fn_name')
  218. # NOTE: cursor could be collected together with line_begin, line_end,
  219. # but we keep it here separately for easier debugging of file parsing problems
  220. next_block['cursor'] = cursor_current
  221. # - 'type'
  222. next_block['type'] = 'function'
  223. # - 'start' detected earlier
  224. else:
  225. assert(len("Unknown match by regular expression") == 0)
  226. while indent_current > 0:
  227. # log all
  228. core.cout.notify(data.get_path(),
  229. cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, len(text))),
  230. core.cout.SEVERITY_WARNING,
  231. "Non-matching opening bracket '{' detected.")
  232. count_mismatched_brackets += 1
  233. indent_current -= 1
  234. for (ind, each) in enumerate(blocks):
  235. each = each # used
  236. block = blocks[len(blocks) - 1 - ind]
  237. self.finalize_block(text, block, len(text))
  238. self.add_lines_data(text, blocks)
  239. self.add_regions(data, blocks)
  240. return count_mismatched_brackets