123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307 |
- import re
- import binascii
- import core.api
- import core.cout
- class Plugin(core.api.Plugin, core.api.Parent, core.api.IParser, core.api.IConfigurable, core.api.ICode):
-
- def declare_configuration(self, parser):
- parser.add_option("--std.code.cpp.files", default="*.c,*.h,*.cpp,*.hpp,*.cc,*.hh,*.cxx,*.hxx",
- help="Enumerates filename extensions to match C/C++ files [default: %default]")
-
- def configure(self, options):
- self.files = options.__dict__['std.code.cpp.files'].split(',')
- self.files.sort()
-
- def initialize(self):
- core.api.Plugin.initialize(self, properties=[
- self.Property('files', ','.join(self.files))
- ])
- self.get_plugin_loader().register_parser(self.files, self)
-
- def process(self, parent, data, is_updated):
- is_updated = is_updated or self.is_updated
- count_mismatched_brackets = 0
- if is_updated == True:
- count_mismatched_brackets = CppCodeParser().run(data)
- self.notify_children(data, is_updated)
- return count_mismatched_brackets
-
- class CppCodeParser(object):
-
- regex_cpp = re.compile(r'''
- /([\\](?:\n|\r\n|\r))*/(?=\n|\r\n|\r) # Match C++ style comments (empty comment line)
- | /([\\](?:\n|\r\n|\r))*/.*?[^\\](?=\n|\r\n|\r) # Match C++ style comments
- # NOTE: end of line is NOT consumed
- # NOTE: ([\\](?:\n|\r\n|\r))* for new line separators,
- # Need to support new line separators in expense of efficiency?
- | /\*\*/ # Match C style comments (empty comment line)
- | /([\\](?:\n|\r\n|\r))*\*.*?\*([\\](?:\n|\r\n|\r))*/ # Match C style comments
- | \'(?:\\.|[^\\\'])*\' # Match quoted strings
- | "(?:\\.|[^\\"])*" # Match double quoted strings
- | (((?<=\n|\r)|^)[ \t]*[#].*?[^\\](?=\n|\r\n|\r)) # Match preprocessor
- # NOTE: end of line is NOT consumed
- # NOTE: beginning of line is NOT consumed
- | (?P<fn_name>
- (operator( # Match C++ operator ...
- (\s+[_a-zA-Z][_a-zA-Z0-9]*(\s*\[\s*\])?) # - cast, new and delete operators
- | (\s*\[\s*\]) # - operator []
- | (\s*\(\s*\)) # - operator ()
- | (\s*[+-\\*/=<>!%&^|~,?.]{1,3}) # - other operators (from 1 to 3 symbols)
- ))
- | ([~]?[_a-zA-Z][_a-zA-Z0-9]*) # ... or function or constructor
- )\s*[(] # LIMITATION: if there are comments after function name
- # and before '(', it is not detected
- # LIMITATION: if there are comments within operator definition,
- # if may be not detected
- | ((?P<block_type>class|struct|namespace) # Match C++ class or struct
- (?P<block_name>((\s+[a-zA-Z_][a-zA-Z0-9_]*)|(?=\s*[{])))) # noname is supported, symbol '{' is not consumed
- # LIMITATION: if there are comments between keyword and name,
- # it is not detected
- | [<>{};:] # Match block start/end, brackets and statement separator
- | ((?:\n|\r\n|\r)\s*(?:\n|\r\n|\r)) # Match double empty line
- ''',
- re.DOTALL | re.MULTILINE | re.VERBOSE
- )
-
-
- regex_ln = re.compile(r'(\n)|(\r\n)|(\r)')
- def run(self, data):
- self.__init__()
- return self.parse(data)
-
- def finalize_block(self, text, block, block_end):
- space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
- block['start'] += space_match.end()
- block['end'] = block_end
- start_pos = block['start']
- crc32 = 0
- for child in block['children']:
-
- crc32 = binascii.crc32(text[start_pos:child['start']], crc32)
- start_pos = child['end']
- block['checksum'] = binascii.crc32(text[start_pos:block['end']], crc32) & 0xffffffff
-
- def add_lines_data(self, text, blocks):
- def add_lines_data_rec(self, text, blocks):
- for each in blocks:
-
- self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['start']))
- each['line_begin'] = self.total_current
- self.total_last_pos = each['start']
-
- add_lines_data_rec(self, text, each['children'])
-
- self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['end']))
- each['line_end'] = self.total_current
- self.total_last_pos = each['end']
- self.total_last_pos = 0
- self.total_current = 1
- add_lines_data_rec(self, text, blocks)
- def add_regions(self, data, blocks):
-
-
- def add_regions_rec(self, data, blocks):
- def get_type_id(data, named_type):
- if named_type == "function":
- return data.get_region_types().FUNCTION
- elif named_type == "class":
- return data.get_region_types().CLASS
- elif named_type == "struct":
- return data.get_region_types().STRUCT
- elif named_type == "namespace":
- return data.get_region_types().NAMESPACE
- elif named_type == "__global__":
- return data.get_region_types().GLOBAL
- else:
- assert(False)
- for each in blocks:
- data.add_region(each['name'], each['start'], each['end'],
- each['line_begin'], each['line_end'], each['cursor'],
- get_type_id(data, each['type']), each['checksum'])
- add_regions_rec(self, data, each['children'])
- add_regions_rec(self, data, blocks)
-
- def parse(self, data):
-
- def reset_next_block(start):
- return {'name':'', 'start':start, 'cursor':0, 'type':'', 'confirmed':False}
-
- count_mismatched_brackets = 0
-
- text = data.get_content()
- indent_current = 0;
-
- blocks = [{'name':'__global__', 'start':0, 'cursor':0, 'type':'__global__', 'indent_start':indent_current, 'children':[]}]
- curblk = 0
-
- next_block = reset_next_block(0)
-
- cursor_last_pos = 0
- cursor_current = 1
-
- for m in re.finditer(self.regex_cpp, text):
-
- if text[m.start()] == '/':
- data.add_marker(m.start(), m.end(), data.get_marker_types().COMMENT)
-
-
- elif text[m.start()] == '"' or text[m.start()] == '\'':
- data.add_marker(m.start() + 1, m.end() - 1, data.get_marker_types().STRING)
-
-
- elif text[m.start()] == ' ' or text[m.start()] == '\t' or text[m.start()] == '#':
- data.add_marker(m.start(), m.end(), data.get_marker_types().PREPROCESSOR)
-
- elif text[m.start()] == ';':
-
- next_block['name'] = ""
- next_block['start'] = m.end()
-
- elif text[m.start()] == '>':
-
- if next_block['confirmed'] == False and (next_block['type'] == 'class' or next_block['type'] == 'struct'):
- next_block['name'] = ""
-
-
- elif text[m.start()] == ':' or text[m.start()] == '<':
-
- if next_block['type'] == 'class' or next_block['type'] == 'struct':
- next_block['confirmed'] = True
-
- elif text[m.start()] == '\n' or text[m.start()] == '\r':
-
- if next_block['name'] == "":
- next_block['start'] = m.end()
-
- elif text[m.start()] == '{':
-
- indent_current += 1
-
-
- if next_block['name'] != '':
- blocks.append({'name':next_block['name'],
- 'start':next_block['start'],
- 'cursor':next_block['cursor'],
- 'type':next_block['type'],
- 'indent_start':indent_current,
- 'children':[]})
- next_block = reset_next_block(m.end())
- curblk += 1
-
- else:
- next_block['start'] = m.end()
-
-
- elif text[m.start()] == '}':
-
- if blocks[curblk]['indent_start'] == indent_current:
- next_block = reset_next_block(m.end())
- if curblk == 0:
- core.cout.notify(data.get_path(),
- cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, m.start())),
- core.cout.SEVERITY_WARNING,
- "Non-matching closing bracket '}' detected.")
- count_mismatched_brackets += 1
- continue
-
- self.finalize_block(text, blocks[curblk], m.end())
- assert(blocks[curblk]['type'] != '__global__')
-
- curblk -= 1
- assert(curblk >= 0)
- blocks[curblk]['children'].append(blocks.pop())
-
- indent_current -= 1
- if indent_current < 0:
- core.cout.notify(data.get_path(),
- cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, m.start())),
- core.cout.SEVERITY_WARNING,
- "Non-matching closing bracket '}' detected.")
- count_mismatched_brackets += 1
- indent_current = 0
-
- elif m.group('block_type') != None:
- if next_block['name'] == "":
-
- next_block['name'] = m.group('block_name').strip()
- if next_block['name'] == "":
- next_block['name'] = '__noname__'
-
- cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('block_name')))
- cursor_last_pos = m.start('block_name')
- next_block['cursor'] = cursor_current
-
- next_block['type'] = m.group('block_type').strip()
-
-
- elif m.group('fn_name') != None:
-
-
-
- if blocks[curblk]['type'] != 'function' and (next_block['name'] == "" or next_block['type'] != 'function'):
-
- next_block['name'] = m.group('fn_name').strip()
-
- cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('fn_name')))
- cursor_last_pos = m.start('fn_name')
-
-
- next_block['cursor'] = cursor_current
-
- next_block['type'] = 'function'
-
- else:
- assert(len("Unknown match by regular expression") == 0)
- while indent_current > 0:
-
- core.cout.notify(data.get_path(),
- cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, len(text))),
- core.cout.SEVERITY_WARNING,
- "Non-matching opening bracket '{' detected.")
- count_mismatched_brackets += 1
- indent_current -= 1
- for (ind, each) in enumerate(blocks):
- each = each
- block = blocks[len(blocks) - 1 - ind]
- self.finalize_block(text, block, len(text))
- self.add_lines_data(text, blocks)
- self.add_regions(data, blocks)
-
- return count_mismatched_brackets
-
|