Browse Source

Added support for code markers. Fixed parsers to start global region at 0 pos.

avkonst 11 years ago
parent
commit
3cde70a3d1

+ 96 - 24
mainline/core/db/loader.py

@@ -118,7 +118,7 @@ class FileRegionData(LoadableData):
         NAMESPACE = 0x08
         FUNCTION  = 0x10
         INTERFACE = 0x20
-        ANY       = 0xFFFFFFFF
+        ANY       = 0xFF
         
         def to_str(self, group):
             if group == self.NONE:
@@ -190,7 +190,9 @@ class Marker(object):
         COMMENT         = 0x01
         STRING          = 0x02
         PREPROCESSOR    = 0x04
+        CODE            = 0x08
         ALL_EXCEPT_CODE = 0x07
+        ANY             = 0xFF
 
         def to_str(self, group):
             if group == self.NONE:
@@ -201,6 +203,8 @@ class Marker(object):
                 return "string"
             elif group == self.PREPROCESSOR:
                 return "preprocessor"
+            elif group == self.CODE:
+                return "code"
             else:
                 assert(False)
         
@@ -307,6 +311,9 @@ class FileData(LoadableData):
         self.load_regions()
         return self.regions[region_id - 1]
     
+    def get_region_types(self):
+        return FileRegionData.T
+
     def iterate_regions(self, filter_group = FileRegionData.T.ANY):
         self.load_regions()
         for each in self.regions:
@@ -332,52 +339,117 @@ class FileData(LoadableData):
                          Marker.T.STRING | Marker.T.PREPROCESSOR,
                          region_id = None, exclude_children = True):
         self.load_markers()
+
         if region_id == None:
+            # TODO bug here - does not handle CODE markers
             for each in self.markers:
                 if each.group & filter_group:
                     yield each
+        
         else:
+            # per region
             region = self.get_region(region_id)
             if region != None:
-                if hasattr(region, 'markers_list') == False:
-                    def cache_markers_list_req(data, region_id, marker_start_pos):
+                
+                # code parsers and database know about non-code markers
+                # clients want to iterate code as markers as well
+                # so, we embed code markers in run-time
+                class CodeMarker(Marker):
+                    pass
+                
+                # cache markers for all regions if it does not exist
+                if hasattr(region, '_markers_list') == False:
+                    # subroutine to populate _markers_list attribute
+                    # _markers_list does include code markers
+                    def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
                         region = data.get_region(region_id)
-                        region.markers_list = []
-                        region.first_marker_pos = marker_start_pos
+                        region._markers_list = []
+                        region._first_marker_ind = marker_start_ind
+                        #next_code_marker_start = region.get_offset_begin()
+                        
                         for sub_id in region.iterate_subregion_ids():
                             subregion = data.get_region(sub_id)
-                            while len(data.markers) > marker_start_pos and \
-                                subregion.get_offset_begin() > data.markers[marker_start_pos].get_offset_begin():
-                                    region.markers_list.append(marker_start_pos)
-                                    marker_start_pos += 1
-                            marker_start_pos = cache_markers_list_req(data, sub_id, marker_start_pos)
-                        while len(data.markers) > marker_start_pos and \
-                            region.get_offset_end() > data.markers[marker_start_pos].get_offset_begin():
-                                region.markers_list.append(marker_start_pos)
-                                marker_start_pos += 1
-                        return marker_start_pos
-                    next_marker_pos = cache_markers_list_req(self, 1, 0)
+                            # cache all markers before the subregion
+                            while len(data.markers) > marker_start_ind and \
+                                subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
+                                    if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
+                                        # append code markers coming before non-code marker
+                                        region._markers_list.append(CodeMarker(next_code_marker_start,
+                                                                               data.markers[marker_start_ind].get_offset_begin(),
+                                                                               Marker.T.CODE))
+                                    next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
+                                    region._markers_list.append(marker_start_ind)
+                                    marker_start_ind += 1
+                                    
+                            # cache all code markers before the subregion but after the last marker
+                            if next_code_marker_start < subregion.get_offset_begin():
+                                region._markers_list.append(CodeMarker(next_code_marker_start,
+                                                                       subregion.get_offset_begin(),
+                                                                       Marker.T.CODE))
+                            next_code_marker_start = subregion.get_offset_begin()
+                                
+                            # here is the recursive call for all sub-regions
+                            (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
+                                                                      sub_id,
+                                                                      marker_start_ind,
+                                                                      next_code_marker_start)
+                            
+                        # cache all markers after the last subregion
+                        while len(data.markers) > marker_start_ind and \
+                            region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
+                                # append code markers coming before non-code marker
+                                if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
+                                    region._markers_list.append(CodeMarker(next_code_marker_start,
+                                                                           data.markers[marker_start_ind].get_offset_begin(),
+                                                                           Marker.T.CODE))
+                                next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
+                                region._markers_list.append(marker_start_ind)
+                                marker_start_ind += 1
+                        
+                        # cache the last code segment after the last marker
+                        if next_code_marker_start < region.get_offset_end():
+                            region._markers_list.append(CodeMarker(next_code_marker_start,
+                                                                   region.get_offset_end(),
+                                                                   Marker.T.CODE))
+                        next_code_marker_start = region.get_offset_end()
+                        
+                        # return the starting point for the next call of this function
+                        return (marker_start_ind, next_code_marker_start)
+                    
+                    # append markers list to all regions recursively
+                    (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
                     assert(next_marker_pos == len(self.markers))
+                
+                # excluding subregions
                 if exclude_children == True:
-                    for marker_pos in region.markers_list:
-                        marker = self.markers[marker_pos]
+                    for marker_ind in region._markers_list:
+                        if isinstance(marker_ind, int):
+                            marker = self.markers[marker_ind]
+                        else:
+                            marker = marker_ind # CodeMarker
                         if marker.group & filter_group:
                             yield marker
-                elif len(self.markers) > region.first_marker_pos:
-                    for marker in self.markers[region.first_marker_pos:]:
+                            
+                            
+                # including subregions
+                else:
+                    next_code_marker_start = region.get_offset_begin() #TODO bug here global region does not start at 0
+                    for marker in self.markers[region._first_marker_ind:]:
                         if marker.get_offset_begin() >= region.get_offset_end():
                             break
                         if region.get_offset_begin() > marker.get_offset_begin():
                             continue
+                        if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
+                            yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
                         if marker.group & filter_group:
                             yield marker
-                        
+                        next_code_marker_start = marker.get_offset_end()
+                    if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
+                        yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
+
     def get_marker_types(self):
         return Marker.T
 
-    def get_region_types(self):
-        return FileRegionData.T
-
     def are_markers_loaded(self):
         return self.markers != None
 

+ 42 - 42
mainline/core/db/utils.py

@@ -17,55 +17,55 @@
 #    along with Metrix++.  If not, see <http://www.gnu.org/licenses/>.
 #
 
-class FileRegionsDisposableGetter(object):
-    
-    def __init__(self, file_data):
-        self.checksums = {}
-        self.names = {}
-        
-        for each in file_data.iterate_regions():
-            if each.get_checksum() not in self.checksums:
-                self.checksums[each.get_checksum()] = []
-            self.checksums[each.get_checksum()].append((each.get_id(), each.get_name())) 
-            
-            if each.get_name() not in self.names:
-                self.names[each.get_name()] = []
-            self.names[each.get_name()].append((each.get_id(), each.get_checksum())) 
-        
-    def get_next_id_once_by_checksum(self, checksum):
-        if checksum not in self.checksums.keys():
-            return None
-
-        if len(self.checksums[checksum]) == 0:
-            return None
-        
-        elem = self.checksums[checksum].pop(0)
-        next_id = elem[0]
-        next_name = elem[1]
-
-        self.names[next_name].remove((next_id, checksum))
-        return next_id
+class FileRegionsMatcher(object):
 
-    def get_next_id_once_by_name(self, name):
-        if name not in self.names.keys():
-            return None
+    class FileRegionsDisposableGetter(object):
         
-        if len(self.names[name]) == 0:
-            return None
-        
-        elem = self.names[name].pop(0)
-        next_id = elem[0]
-        next_checksum = elem[1]
-
-        self.checksums[next_checksum].remove((next_id, name))
-        return next_id
+        def __init__(self, file_data):
+            self.checksums = {}
+            self.names = {}
+            
+            for each in file_data.iterate_regions():
+                if each.get_checksum() not in self.checksums:
+                    self.checksums[each.get_checksum()] = []
+                self.checksums[each.get_checksum()].append((each.get_id(), each.get_name())) 
+                
+                if each.get_name() not in self.names:
+                    self.names[each.get_name()] = []
+                self.names[each.get_name()].append((each.get_id(), each.get_checksum())) 
+            
+        def get_next_id_once_by_checksum(self, checksum):
+            if checksum not in self.checksums.keys():
+                return None
     
-class FileRegionsMatcher(object):
+            if len(self.checksums[checksum]) == 0:
+                return None
+            
+            elem = self.checksums[checksum].pop(0)
+            next_id = elem[0]
+            next_name = elem[1]
+    
+            self.names[next_name].remove((next_id, checksum))
+            return next_id
+    
+        def get_next_id_once_by_name(self, name):
+            if name not in self.names.keys():
+                return None
+            
+            if len(self.names[name]) == 0:
+                return None
+            
+            elem = self.names[name].pop(0)
+            next_id = elem[0]
+            next_checksum = elem[1]
+    
+            self.checksums[next_checksum].remove((next_id, name))
+            return next_id
     
     def __init__(self, file_data, prev_file_data):
         self.ids = [None] # add one to shift id from zero
         
-        once_filter = FileRegionsDisposableGetter(prev_file_data)
+        once_filter = self.FileRegionsDisposableGetter(prev_file_data)
         unmatched_region_ids = []
         for (ind, region) in enumerate(file_data.iterate_regions()):
             assert(ind + 1 == region.get_id())

+ 31 - 5
mainline/doc/home.html

@@ -145,7 +145,13 @@
           </div>
           <div class="span9">
             <h5 class="text-right">Management of source code quality is possible.</h5>
-            <p class="text-right"><a href="https://sourceforge.net/projects/metrixplusplus/files/latest/download"><button type="button"class="btn btn-danger">Download</button></a> <button type="button"class="btn btn-warning">Donate</button></p>
+            <p class="text-right">
+                <a href="https://sourceforge.net/projects/metrixplusplus/files/latest/download"
+                    ><button type="button"class="btn btn-danger">Download</button></a>
+                <!--
+                <button type="button"class="btn btn-warning">Donate</button>
+                -->
+            </p>
           </div>
         </div>
       </div>
@@ -162,7 +168,11 @@
           <li><a href="#download_section"><i class="icon-chevron-right"></i> Download &amp; Install</a></li>
           <li><a href="#workflow_collect_section"><i class="icon-chevron-right"></i> Workflow: Collect data</a></li>
           <li><a href="#workflow_view_section"><i class="icon-chevron-right"></i> Workflow: View data</a></li>
+          <li><a href="#workflow_view_distributions_section"><i class="icon-hand-right"></i> &middot; distributions</a></li>
+          <li><a href="#workflow_view_details_section"><i class="icon-hand-right"></i> &middot; details</a></li>
           <li><a href="#workflow_limit_section"><i class="icon-chevron-right"></i> Workflow: Apply thresholds</a></li>
+          <li><a href="#workflow_limit_hotspots_section"><i class="icon-hand-right"></i> &middot; hotspots</a></li>
+          <li><a href="#workflow_limit_suppress_section"><i class="icon-hand-right"></i> &middot; suppressions</a></li>
           <li><a href="#extend_section"><i class="icon-chevron-right"></i> Create plugin</a></li>
           <li><a href="#contribute_section"><i class="icon-chevron-right"></i> Feedback &amp; Contribute</a></li>
         </ul>
@@ -187,7 +197,7 @@
             <li>Assisiting on <strong>per minute</strong> basis during code refactoring and code development, where coding and quality standards matter.</li>
           </ul>
 
-          <p>The workflow explained <a href="#workflow_collect_section">below</a> demonstrates basic application principles.</p>
+          <p>The workflow sections explain basic application principles.</p>
 
           <h3>Languages supported</h3>
           <p>The tool can parse C/C++, C# and Java source code files. The parser identifies certain regions in the code,
@@ -220,7 +230,7 @@ public:
     MyClass(): m_var(0) {
         char str[] = "unused string"
         
-        // nested region for better taste
+        // nested region for good measure
         struct MyStruct {};
     }
     
@@ -248,8 +258,8 @@ file: __global__: code
         function: MyClass: code
         function: MyClass: code, string
         function: MyClass: code
-            class: MyStruct: comment
-            class: MyStruct: code
+            struct: MyStruct: comment
+            struct: MyStruct: code
         function: MyClass: code
     class: MyClass: code
         function: set_max: comment
@@ -317,6 +327,16 @@ file: __global__: comment
                 <td><ul><li>Identification of highly complex code for review and refactoring.</li>
                     <li>Preventing complex functions (complexity is a reason of many defects and a reason of expensive maintaintenance).</li></ul></td>
               </tr>
+              <tr>
+                <td>std.suppress</td>
+                <td>An option enables collection of Metrix++ suppressions and 2 metrics: 'std.suppress:count' and 
+                    'std.suppress.file:count'. The first is number of suppressions per region.
+                    The second is the same but applies for file-scope metrics.</td>
+                <td><ul><li>Suppressing false-positives.</li>
+                    <li>Managing the amount of suppressions. Usually there are no false-positives to suppress with the right metric,
+                        but could be exceptions in specific cases. Managing suppressions is about managing exceptions.
+                        If there are many exceptional cases, maybe something is wrong with a metric or an application of a metric.</li></ul></td>
+              </tr>
               <tr class="info">
                 <td>std.general.procerrors</td>
                 <td>Number of errors detected by Metrix++ code parser.
@@ -378,6 +398,12 @@ file: __global__: comment
           <h2>Apply thresholds</h2>
           <p>...</p>
         </section>
+        <section id="workflow_limit_suppress_section">
+          <h3>Suppressions</h3>
+          <p>Metrix++ has got suppressions capability. Suppressions are collected from comments in code
+             and used by post-processing tools, like 'limit'. It allows to take fine grained control
+             over false-positive warnings, if there are.</p>
+        </section>
 
         <section id="extend_section">
           <div class="page-header">

+ 4 - 2
mainline/ext/std/code/cpp.py

@@ -93,8 +93,10 @@ class CppCodeParser(object):
         return self.parse(data)
         
     def finalize_block(self, text, block, block_end):
-        space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
-        block['start'] += space_match.end() # trim spaces at the beginning
+        if block['type'] != '__global__':
+            # do not trim spaces for __global__region
+            space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
+            block['start'] += space_match.end() # trim spaces at the beginning
         block['end'] = block_end
 
         start_pos = block['start']

+ 4 - 2
mainline/ext/std/code/cs.py

@@ -107,8 +107,10 @@ class CsCodeParser(object):
         return self.parse(data)
         
     def finalize_block(self, text, block, block_end):
-        space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
-        block['start'] += space_match.end() # trim spaces at the beginning
+        if block['type'] != '__global__':
+            # do not trim spaces for __global__region
+            space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
+            block['start'] += space_match.end() # trim spaces at the beginning
         block['end'] = block_end
 
         start_pos = block['start']

+ 4 - 2
mainline/ext/std/code/java.py

@@ -87,8 +87,10 @@ class JavaCodeParser(object):
         return self.parse(data)
         
     def finalize_block(self, text, block, block_end):
-        space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
-        block['start'] += space_match.end() # trim spaces at the beginning
+        if block['type'] != '__global__':
+            # do not trim spaces for __global__region
+            space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
+            block['start'] += space_match.end() # trim spaces at the beginning
         block['end'] = block_end
 
         start_pos = block['start']

+ 2 - 2
mainline/ext/std/code/lines.py

@@ -67,8 +67,8 @@ class Plugin(core.api.Plugin, core.api.Child, core.api.IConfigurable):
         is_updated = is_updated or self.is_updated
         if is_updated == True:
             if self.is_active_code == True:
-                self.count_in_code(data,
-                                   data.get_marker_types().ALL_EXCEPT_CODE,
+                self.count_in_markers(data,
+                                   data.get_marker_types().CODE,
                                    'code')
             if self.is_active_preprocessor == True:
                 self.count_in_markers(data,

+ 19 - 3
mainline/ext/std/code/test.py

@@ -31,8 +31,24 @@ class Plugin(core.api.Plugin, core.api.Child):
     def callback(self, parent, data, is_updated):
 
         text = data.get_content()
+        text_comb = ""
         for region in data.iterate_regions():
-            logging.warn(region.get_name() + " " + region.get_cursor())
-            for marker in data.iterate_markers(region_id=region.get_id(), exclude_children = True):
+            logging.warn(region.get_name() + " " + str(region.get_cursor()))
+            for marker in data.iterate_markers(region_id=region.get_id(),
+                                               filter_group = data.get_marker_types().ANY,
+                                               exclude_children = True):
                 logging.warn("\tMarker: " + data.get_marker_types()().to_str(marker.get_type()) +
-                             " " + text[marker.get_offset_begin():marker.get_offset_end()])
+                             " " + str(marker.get_offset_begin()) + " " + str(marker.get_offset_end()) +
+                             " >>>" + text[marker.get_offset_begin():marker.get_offset_end()] + "<<<")
+                text_comb += text[marker.get_offset_begin():marker.get_offset_end()]
+        print "LENGTH:", len(text), len(text_comb)
+
+        text_comb = ""
+        for marker in data.iterate_markers(region_id=1,
+                                           filter_group = data.get_marker_types().ANY,
+                                           exclude_children = False):
+            logging.warn("\tMarker: " + data.get_marker_types()().to_str(marker.get_type()) +
+                         " " + str(marker.get_offset_begin()) + " " + str(marker.get_offset_end()) +
+                         " >>>" + text[marker.get_offset_begin():marker.get_offset_end()] + "<<<")
+            text_comb += text[marker.get_offset_begin():marker.get_offset_end()]
+        print "LENGTH:", len(text), len(text_comb)

+ 3 - 3
mainline/tests/general/test_basic.py

@@ -216,14 +216,14 @@ class Test(tests.common.TestCase):
                                           '--std.code.lines.total'])
         self.assertExec(runner.run())
 
-        runner = tests.common.ToolRunner('view', ['--format=txt'], prefix='txt')
-        self.assertExec(runner.run())
-
         runner = tests.common.ToolRunner('view',
                                          ['--nest-regions', '--format=txt'],
                                          prefix='nest_per_file',
                                          dirs_list=['./simple.cpp'])
         self.assertExec(runner.run())
 
+        runner = tests.common.ToolRunner('view', ['--format=txt'], prefix='txt')
+        self.assertExec(runner.run())
+
 if __name__ == '__main__':
     unittest.main()

+ 2 - 2
mainline/tests/general/test_basic/test_std_general_metrics_view_nest_per_file_stdout.gold.txt

@@ -15,10 +15,10 @@ data:
 .   .   .   .   .   cursor="0"
 .   .   .   .   .   name="__global__"
 .   .   .   .   .   offset_end="487"
-.   .   .   .   .   line_begin="3"
+.   .   .   .   .   line_begin="1"
 .   .   .   .   .   type="global"
 .   .   .   .   .   line_end="52"
-.   .   .   .   .   offset_begin="2"
+.   .   .   .   .   offset_begin="0"
 .   .   .   .   data: 
 .   .   .   .   subregions:
 .   .   .   .   

+ 2 - 2
mainline/tests/general/test_basic/test_std_lines_metrics_view_nest_per_file_stdout.gold.txt

@@ -15,10 +15,10 @@ data:
 .   .   .   .   .   cursor="0"
 .   .   .   .   .   name="__global__"
 .   .   .   .   .   offset_end="487"
-.   .   .   .   .   line_begin="3"
+.   .   .   .   .   line_begin="1"
 .   .   .   .   .   type="global"
 .   .   .   .   .   line_end="52"
-.   .   .   .   .   offset_begin="2"
+.   .   .   .   .   offset_begin="0"
 .   .   .   .   data:  
 .   .   .   .   .   std.code.lines: 
 .   .   .   .   .   .   code="0"

+ 1 - 1
mainline/tests/general/test_basic/test_view_format_view_nest_per_file_stdout.gold.txt

@@ -5,7 +5,7 @@
         <file-data>
             <regions>
                 <region>
-                    <info cursor="0" name="__global__" offset_end="639" line_begin="3" type="global" line_end="65" offset_begin="2" />
+                    <info cursor="0" name="__global__" offset_end="639" line_begin="1" type="global" line_end="65" offset_begin="0" />
                     <data />
                     <subregions>
                         <subregion>

+ 1 - 1
mainline/tests/general/test_basic/test_workflow_view_second_per_file_stdout.gold.txt

@@ -5,7 +5,7 @@
         <file-data>
             <regions>
                 <region>
-                    <info cursor="0" name="__global__" offset_end="639" line_begin="3" type="global" line_end="65" offset_begin="2" />
+                    <info cursor="0" name="__global__" offset_end="639" line_begin="1" type="global" line_end="65" offset_begin="0" />
                     <data />
                 </region>
                 <region>

+ 4 - 4
mainline/tests/general/test_std_code_cpp/test_parser_view_files_stdout.gold.txt

@@ -15,10 +15,10 @@ data:
 .   .   .   .   .   cursor="0"
 .   .   .   .   .   name="__global__"
 .   .   .   .   .   offset_end="163"
-.   .   .   .   .   line_begin="2"
+.   .   .   .   .   line_begin="1"
 .   .   .   .   .   type="global"
 .   .   .   .   .   line_end="20"
-.   .   .   .   .   offset_begin="1"
+.   .   .   .   .   offset_begin="0"
 .   .   .   .   data: 
 .   .   
 .   .   .   region:  
@@ -111,10 +111,10 @@ data:
 .   .   .   .   .   cursor="0"
 .   .   .   .   .   name="__global__"
 .   .   .   .   .   offset_end="2068"
-.   .   .   .   .   line_begin="2"
+.   .   .   .   .   line_begin="1"
 .   .   .   .   .   type="global"
 .   .   .   .   .   line_end="86"
-.   .   .   .   .   offset_begin="1"
+.   .   .   .   .   offset_begin="0"
 .   .   .   .   data: 
 .   .   
 .   .   .   region:  

+ 2 - 2
mainline/tests/general/test_std_code_java/test_parser_view_files_stdout.gold.txt

@@ -697,10 +697,10 @@ data:
 .   .   .   .   .   cursor="0"
 .   .   .   .   .   name="__global__"
 .   .   .   .   .   offset_end="133"
-.   .   .   .   .   line_begin="2"
+.   .   .   .   .   line_begin="1"
 .   .   .   .   .   type="global"
 .   .   .   .   .   line_end="10"
-.   .   .   .   .   offset_begin="1"
+.   .   .   .   .   offset_begin="0"
 .   .   .   .   data: 
 .   .   .   .   subregions:
 .   .   .   .   

+ 3 - 0
mainline/tools/debug.py

@@ -87,6 +87,9 @@ def dumphtml(args, loader):
                 result += ('<span style="color:#009900">')
             elif marker.get_type() == data.get_marker_types().PREPROCESSOR:
                 result += ('<span style="color:#990000">')
+            else:
+                # TODO add tests for debug tool
+                assert False, "Uknown marker type"
             result += (cgi.escape(text[marker.begin:marker.end]))
             result += ('</span>')
             last_pos = marker.end