Sfoglia il codice sorgente

New developed version published.

avkonst 12 anni fa
parent
commit
2a0eddb455
39 ha cambiato i file con 3110 aggiunte e 0 eliminazioni
  1. 17 0
      mainline/.project
  2. 11 0
      mainline/.pydevproject
  3. 32 0
      mainline/collect.py
  4. 0 0
      mainline/core/__init__.py
  5. 116 0
      mainline/core/api.py
  6. 26 0
      mainline/core/cmdparser.py
  7. 0 0
      mainline/core/db/__init__.py
  8. 776 0
      mainline/core/db/loader.py
  9. 36 0
      mainline/core/db/post.py
  10. 543 0
      mainline/core/db/sqlite.py
  11. 76 0
      mainline/core/db/utils.py
  12. 88 0
      mainline/core/dir.py
  13. 0 0
      mainline/core/export/__init__.py
  14. 20 0
      mainline/core/export/convert.py
  15. 28 0
      mainline/core/export/cout.py
  16. 0 0
      mainline/core/export/utils/__init__.py
  17. 120 0
      mainline/core/export/utils/py2txt.py
  18. 123 0
      mainline/core/export/utils/py2xml.py
  19. 7 0
      mainline/core/ext-priority/core.db.post.ini
  20. 7 0
      mainline/core/ext-priority/core.dir.ini
  21. 7 0
      mainline/core/ext-priority/core.log.ini
  22. 111 0
      mainline/core/loader.py
  23. 34 0
      mainline/core/log.py
  24. 106 0
      mainline/core/warn.py
  25. 168 0
      mainline/export.py
  26. 0 0
      mainline/ext/std/__init__.py
  27. 0 0
      mainline/ext/std/code/__init__.py
  28. 8 0
      mainline/ext/std/code/api.py
  29. 7 0
      mainline/ext/std/code/brackets.ini
  30. 27 0
      mainline/ext/std/code/brackets.py
  31. 7 0
      mainline/ext/std/code/complexity.ini
  32. 47 0
      mainline/ext/std/code/complexity.py
  33. 7 0
      mainline/ext/std/code/cpp.ini
  34. 290 0
      mainline/ext/std/code/cpp.py
  35. 7 0
      mainline/ext/std/code/dumper.ini
  36. 63 0
      mainline/ext/std/code/dumper.py
  37. 7 0
      mainline/ext/std/code/test.ini
  38. 30 0
      mainline/ext/std/code/test.py
  39. 158 0
      mainline/limit.py

+ 17 - 0
mainline/.project

@@ -0,0 +1,17 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+	<name>plugins</name>
+	<comment></comment>
+	<projects>
+	</projects>
+	<buildSpec>
+		<buildCommand>
+			<name>org.python.pydev.PyDevBuilder</name>
+			<arguments>
+			</arguments>
+		</buildCommand>
+	</buildSpec>
+	<natures>
+		<nature>org.python.pydev.pythonNature</nature>
+	</natures>
+</projectDescription>

+ 11 - 0
mainline/.pydevproject

@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="UTF-8" standalone="no"?>
+<?eclipse-pydev version="1.0"?>
+
+<pydev_project>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_INTERPRETER">Default</pydev_property>
+<pydev_property name="org.python.pydev.PYTHON_PROJECT_VERSION">python 2.7</pydev_property>
+<pydev_pathproperty name="org.python.pydev.PROJECT_SOURCE_PATH">
+<path>/plugins</path>
+<path>/plugins/ext</path>
+</pydev_pathproperty>
+</pydev_project>

+ 32 - 0
mainline/collect.py

@@ -0,0 +1,32 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+
+import logging
+import os.path
+import time
+
+import core.loader
+import core.log
+import core.cmdparser
+
+
+def main():
+    loader = core.loader.Loader()
+    parser =core.cmdparser.MultiOptionParser(usage="Usage: %prog [options] -- <path 1> ... <path N>")
+    args = loader.load(os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ext'), parser)
+    logging.debug("Registered plugins:")
+    logging.debug(loader)
+    exit_code = loader.run(args)
+    loader.unload()
+    return exit_code
+            
+if __name__ == '__main__':
+    ts = time.time()
+    core.log.set_default_format()
+    exit_code = main()
+    logging.warning("Exit code: " + str(exit_code) + ". Time spent: " + str(round((time.time() - ts), 2)) + " seconds. Done")
+    exit(exit_code) # number of reported messages, errors are reported as non-handled exceptions

+ 0 - 0
mainline/core/__init__.py


+ 116 - 0
mainline/core/api.py

@@ -0,0 +1,116 @@
+'''
+Created on 25/07/2012
+
+@author: konstaa
+'''
+
+class Plugin(object):
+    
+    def initialize(self):
+        pass
+    
+    def terminate(self):
+        pass
+    
+    def set_name(self, name):
+        self.name = name
+
+    def get_name(self):
+        if hasattr(self, 'name') == False:
+            return None
+        return self.name
+
+    def set_plugin_loader(self, loader):
+        self.plugin_loader = loader
+
+    def get_plugin_loader(self):
+        if hasattr(self, 'plugin_loader') == False:
+            return None
+        return self.plugin_loader
+
+class InterfaceNotImplemented(Exception):
+    
+    def __init__(self, obj):
+        import sys
+        Exception.__init__(self, "Method '"
+                            + sys._getframe(1).f_code.co_name
+                            + "' has not been implemented for "
+                            + str(obj.__class__))
+
+class IConfigurable(object):
+    
+    def configure(self, options):
+        raise InterfaceNotImplemented(self)
+
+    def declare_configuration(self, optparser):
+        raise InterfaceNotImplemented(self)
+
+class IRunable(object):
+    def run(self, args):
+        raise InterfaceNotImplemented(self)
+    
+
+class CallbackNotImplemented(Exception):
+    
+    def __init__(self, obj, callback_name):
+        Exception.__init__(self, "Callback '"
+                           + callback_name
+                           + "' has not been implemented for "
+                           + str(obj.__class__))
+
+class Child(object):
+    
+    def notify(self, parent, callback_name, *args):
+        if hasattr(self, callback_name) == False:
+            raise CallbackNotImplemented(self, callback_name)
+        self.__getattribute__(callback_name)(parent, *args)
+
+class Parent(object):
+    
+    def init_Parent(self):
+        if hasattr(self, 'children') == False:
+            self.children = []
+            
+    def subscribe(self, obj, callback_name):
+        self.init_Parent()
+        if (isinstance(obj, Child) == False):
+            raise TypeError()
+        self.children.append((obj,callback_name))
+
+    def unsubscribe(self, obj, callback_name):
+        self.init_Parent()
+        self.children.remove((obj, callback_name))
+
+    def notify_children(self, *args):
+        self.init_Parent()
+        for child in self.children:
+            child[0].notify(self, child[1], *args)
+
+    def iterate_children(self):
+        self.init_Parent()
+        for child in self.children:
+            yield child
+
+class ExitError(Exception):
+    def __init__(self, plugin, reason):
+        Exception.__init__(self, "Plugin '"
+                           + plugin.get_name()
+                           + "' requested abnormal termination: "
+                           + reason)
+
+def subscribe_by_parents_name(parent_name, child, callback_name='callback'):
+    child.get_plugin_loader().get_plugin(parent_name).subscribe(child, callback_name)
+
+
+# interfaces for subscription
+class ICode(object):
+    pass
+
+def subscribe_by_parents_interface(interface, child, callback_name='callback'):
+    for plugin in child.get_plugin_loader().iterate_plugins():
+        if isinstance(plugin, interface):
+            plugin.subscribe(child, callback_name)
+
+
+
+

+ 26 - 0
mainline/core/cmdparser.py

@@ -0,0 +1,26 @@
+'''
+Created on 8/02/2013
+
+@author: konstaa
+'''
+
+import optparse
+
+class MultiOptionParser(optparse.OptionParser):
+    
+    class MultipleOption(optparse.Option):
+        ACTIONS = optparse.Option.ACTIONS + ("multiopt",)
+        STORE_ACTIONS = optparse.Option.STORE_ACTIONS + ("multiopt",)
+        TYPED_ACTIONS = optparse.Option.TYPED_ACTIONS + ("multiopt",)
+        ALWAYS_TYPED_ACTIONS = optparse.Option.ALWAYS_TYPED_ACTIONS + ("multiopt",)
+    
+        def take_action(self, action, dest, opt, value, values, parser):
+            if action == "multiopt":
+                values.ensure_value(dest, []).append(value)
+            else:
+                optparse.Option.take_action(self, action, dest, opt, value, values, parser)
+
+    
+    def __init__(self, *args, **kwargs):
+        optparse.OptionParser.__init__(self, *args, option_class=self.MultipleOption, **kwargs)
+        

+ 0 - 0
mainline/core/db/__init__.py


+ 776 - 0
mainline/core/db/loader.py

@@ -0,0 +1,776 @@
+'''
+Created on 25/07/2012
+
+@author: konstaa
+'''
+
+import logging
+import os.path
+
+import core.api
+import core.db.sqlite
+
+####################################
+# Data Interface
+####################################
+
+class Data(object):
+
+    def __init__(self):
+        self.data = {}
+
+    def get_data(self, namespace, field):
+        if namespace not in self.data.keys():
+            return None
+        if field not in self.data[namespace].keys():
+            return None
+        return self.data[namespace][field]
+
+    def set_data(self, namespace, field, value):
+        if namespace not in self.data:
+            self.data[namespace] = {}
+        self.data[namespace][field] = value
+    
+    def iterate_namespaces(self):
+        for namespace in self.data.keys():
+            yield namespace
+            
+    def iterate_fields(self, namespace):
+        for field in self.data[namespace].keys():
+            yield (field, self.data[namespace][field])
+
+    def get_data_tree(self, namespaces=None):
+        return self.data
+
+    def __repr__(self):
+        return object.__repr__(self) + " with data " + self.data.__repr__()
+
+
+class LoadableData(Data):
+    
+    def __init__(self, loader, file_id, region_id):
+        Data.__init__(self)
+        self.loader = loader
+        self.file_id = file_id
+        self.region_id = region_id
+        self.loaded_namespaces = []
+        self.changed_namespaces = []
+
+    def load_namespace(self, namespace):
+        try:
+            row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
+        except Exception:
+            logging.debug("No data in the database for namespace: " + namespace)
+            return
+        if row == None:
+            return 
+        for column_name in row.keys():
+            packager = self.loader.get_namespace(namespace).get_field_packager(column_name)
+            if packager == None:
+                continue
+            Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
+        
+    def set_data(self, namespace, field, value):
+        if namespace not in self.changed_namespaces:
+            self.changed_namespaces.append(namespace)
+        return Data.set_data(self, namespace, field, value)
+
+    def get_data(self, namespace, field):
+        if namespace not in self.loaded_namespaces:
+            self.loaded_namespaces.append(namespace)
+            self.load_namespace(namespace)
+        return Data.get_data(self, namespace, field)
+    
+    def is_namespace_updated(self, namespace):
+        return namespace in self.changed_namespaces
+
+    def is_namespace_loaded(self, namespace):
+        return namespace in self.loaded_namespaces
+
+    def get_data_tree(self, namespaces=None):
+        if namespaces == None:
+            namespaces = self.loader.iterate_namespace_names()
+        for each in namespaces:
+            self.load_namespace(each)
+        return Data.get_data_tree(self)
+    
+class FileRegionData(LoadableData):
+    class T(object):
+        NONE      = 0x00
+        GLOBAL    = 0x01
+        CLASS     = 0x02
+        STRUCT    = 0x04
+        NAMESPACE = 0x08
+        FUNCTION  = 0x10
+        ANY       = 0xFFFFFFFF
+        
+        def to_str(self, group):
+            if group == self.NONE:
+                return "none"
+            elif group == self.GLOBAL:
+                return "global"
+            elif group == self.CLASS:
+                return "class"
+            elif group == self.STRUCT:
+                return "struct"
+            elif group == self.NAMESPACE:
+                return "namespace"
+            elif group == self.FUNCTION:
+                return "function"
+            else:
+                assert(False)
+    
+    def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
+        LoadableData.__init__(self, loader, file_id, region_id)
+        self.name = region_name
+        self.begin = offset_begin
+        self.end = offset_end
+        self.line_begin = line_begin
+        self.line_end = line_end
+        self.cursor = cursor_line
+        self.group = group
+        self.checksum = checksum
+        
+        self.children = []
+    
+    def get_id(self):
+        return self.region_id
+
+    def get_name(self):
+        return self.name
+
+    def get_offset_begin(self):
+        return self.begin
+
+    def get_offset_end(self):
+        return self.end
+
+    def get_line_begin(self):
+        return self.line_begin
+
+    def get_line_end(self):
+        return self.line_end
+
+    def get_cursor(self):
+        return self.cursor
+
+    def get_type(self):
+        return self.group
+
+    def get_checksum(self):
+        return self.checksum
+    
+    def register_subregion_id(self, child_id):
+        self.children.append(child_id)
+
+    def iterate_subregion_ids(self):
+        return self.children
+
+class Marker(object):
+    class T(object):
+        NONE            = 0x00
+        COMMENT         = 0x01
+        STRING          = 0x02
+        PREPROCESSOR    = 0x04
+        ALL_EXCEPT_CODE = 0x07
+        
+    def __init__(self, offset_begin, offset_end, group):
+        self.begin = offset_begin
+        self.end = offset_end
+        self.group = group
+        
+    def get_offset_begin(self):
+        return self.begin
+
+    def get_offset_end(self):
+        return self.end
+
+    def get_type(self):
+        return self.group
+
+class FileData(LoadableData):
+    
+    def __init__(self, loader, path, file_id, checksum, content):
+        LoadableData.__init__(self, loader, file_id, None)
+        self.path = path
+        self.checksum = checksum
+        self.content = content
+        self.regions = None
+        self.markers = None
+        self.loader = loader
+        self.loading_tmp = []
+        
+    def get_id(self):
+        return self.file_id
+
+    def get_path(self):
+        return self.path
+
+    def get_checksum(self):
+        return self.checksum
+    
+    def get_content(self, exclude = Marker.T.NONE):
+        if exclude == Marker.T.NONE:
+            return self.content
+        
+        if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
+            # optimise frequent queries of this type
+            if hasattr(self, 'content_cache'):
+                return self.content_cache
+        
+        last_pos = 0
+        content = ""
+        for marker in self.iterate_markers(exclude):
+            content += self.content[last_pos:marker.begin]
+            content += " " * (marker.end - marker.begin)
+            last_pos = marker.end
+        content += self.content[last_pos:]
+
+        if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
+            self.content_cache = content
+        
+        assert(len(content) == len(self.content))
+        return content
+
+    def internal_append_region(self, region):
+        # here we apply some magic - we rely on special ordering of coming regions,
+        # which is supported by code parsers
+        prev_id = None
+        while True:
+            if len(self.loading_tmp) == 0:
+                break
+            prev_id = self.loading_tmp.pop()
+            if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
+                self.loading_tmp.append(prev_id) # return back
+                break
+        self.loading_tmp.append(region.get_id())
+        if prev_id != None:
+            self.get_region(prev_id).register_subregion_id(region.get_id())
+        self.regions.append(region)
+
+    def load_regions(self):
+        if self.regions == None:
+            self.regions = []
+            for each in self.loader.db.iterate_regions(self.get_id()):
+                self.internal_append_region(FileRegionData(self.loader,
+                                                   self.get_id(),
+                                                   each.region_id,
+                                                   each.name,
+                                                   each.begin,
+                                                   each.end,
+                                                   each.line_begin,
+                                                   each.line_end,
+                                                   each.cursor,
+                                                   each.group,
+                                                   each.checksum))
+                assert(len(self.regions) == each.region_id)
+        
+    def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
+        if self.regions == None:
+            self.regions = [] # do not load in time of collection
+        new_id = len(self.regions) + 1
+        self.internal_append_region(FileRegionData(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
+        self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
+        return new_id
+        
+    def get_region(self, region_id):
+        self.load_regions()
+        return self.regions[region_id - 1]
+    
+    def iterate_regions(self, filter_group = FileRegionData.T.ANY):
+        self.load_regions()
+        for each in self.regions:
+            if each.group & filter_group:
+                yield each
+
+    def are_regions_loaded(self):
+        return self.regions != None
+
+    def load_markers(self):
+        if self.markers == None:
+            self.markers = []
+            for each in self.loader.db.iterate_markers(self.get_id()):
+                self.markers.append(self.Marker(each.begin, each.end, each.group))
+        
+    def add_marker(self, offset_begin, offset_end, group):
+        if self.markers == None:
+            self.markers = [] # do not load in time of collection
+        self.markers.append(Marker(offset_begin, offset_end, group))
+        self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
+        
+    def iterate_markers(self, filter_group = Marker.T.COMMENT |
+                         Marker.T.STRING | Marker.T.PREPROCESSOR):
+        self.load_markers()
+        for each in self.markers:
+            if each.group & filter_group:
+                yield each
+    
+    def get_marker_types(self):
+        return Marker.T
+
+    def get_region_types(self):
+        return FileRegionData.T
+
+    def are_markers_loaded(self):
+        return self.markers != None
+
+    def __repr__(self):
+        return Data.__repr__(self) + " and regions " + self.regions.__repr__()
+
+class AggregatedData(Data):
+    
+    def __init__(self, loader, path):
+        Data.__init__(self)
+        self.path = path
+        self.loader = loader
+        self.subdirs = None
+        self.subfiles = None
+        
+    def get_subdirs(self):
+        if self.subdirs != None:
+            return self.subdirs
+        self.subdirs = []
+        if self.path != None:
+            for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
+                self.subdirs.append(subdir)
+        return self.subdirs
+    
+    def get_subfiles(self):
+        if self.subfiles != None:
+            return self.subfiles
+        self.subfiles = []
+        if self.path != None:
+            for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
+                self.subfiles.append(subfile)
+        return self.subfiles
+
+
+class SelectData(Data):
+
+    def __init__(self, loader, path, file_id, region_id):
+        Data.__init__(self)
+        self.loader = loader
+        self.path = path
+        self.file_id = file_id
+        self.region_id = region_id
+        self.region = None
+    
+    def get_path(self):
+        return self.path
+    
+    def get_region(self):
+        if self.region == None and self.region_id != None:
+            row = self.loader.db.get_region(self.file_id, self.region_id)
+            if row != None:
+                self.region = FileRegionData(self.loader,
+                                             self.file_id,
+                                             self.region_id,
+                                             row.name,
+                                             row.begin,
+                                             row.end,
+                                             row.line_begin,
+                                             row.line_end,
+                                             row.cursor,
+                                             row.group,
+                                             row.checksum)
+        return self.region
+
+
+class DiffData(Data):
+    
+    def __init__(self, new_data, old_data):
+        Data.__init__(self)
+        self.new_data = new_data
+        self.old_data = old_data
+    
+    def get_data(self, namespace, field):
+        new_data = self.new_data.get_data(namespace, field)
+        old_data = self.old_data.get_data(namespace, field)
+        if new_data == None or old_data == None:
+            return None
+        return new_data - old_data
+
+####################################
+# Packager Interface
+####################################
+
+class PackagerError(Exception):
+    def __init__(self):
+        Exception.__init__(self, "Failed to pack or unpack.")
+
+class PackagerFactory(object):
+
+    def create(self, python_type):
+        if python_type == None:
+            return PackagerFactory.SkipPackager()
+        if python_type == int:
+            return PackagerFactory.IntPackager()
+        if python_type == float:
+            return PackagerFactory.FloatPackager()
+        if python_type == str:
+            return PackagerFactory.StringPackager()
+        
+        class PackagerFactoryError(Exception):
+            def __init__(self, python_type):
+                Exception.__init__(self, "Python type '" + str(python_type) + "' is not supported by the factory.")
+        raise PackagerFactoryError(python_type)
+    
+    def get_python_type(self, sql_type):
+        if sql_type == "integer":
+            return int
+        if sql_type == "real":
+            return float
+        if sql_type == "text":
+            return str
+
+        class PackagerFactoryError(Exception):
+            def __init__(self, sql_type):
+                Exception.__init__(self, "SQL type '" + str(sql_type) + "' is not supported by the factory.")
+        raise PackagerFactoryError(sql_type)
+
+    class IPackager(object):
+        def pack(self, unpacked_data):
+            raise core.api.InterfaceNotImplemented(self)
+        def unpack(self, packed_data):
+            raise core.api.InterfaceNotImplemented(self)
+        def get_sql_type(self):
+            raise core.api.InterfaceNotImplemented(self)
+        def get_python_type(self):
+            raise core.api.InterfaceNotImplemented(self)
+        
+    class IntPackager(IPackager):
+        def pack(self, unpacked_data):
+            if not isinstance(unpacked_data, int):
+                raise PackagerError()
+            return str(unpacked_data)
+            
+        def unpack(self, packed_data): 
+            try:
+                return int(packed_data)
+            except ValueError:
+                raise PackagerError()
+    
+        def get_sql_type(self):
+            return "integer"
+        
+        def get_python_type(self):
+            return int
+    
+    class FloatPackager(IPackager):
+        def pack(self, unpacked_data):
+            if not isinstance(unpacked_data, float):
+                raise PackagerError()
+            return str(unpacked_data)
+            
+        def unpack(self, packed_data): 
+            try:
+                return float(packed_data)
+            except ValueError:
+                raise PackagerError()
+    
+        def get_sql_type(self):
+            return "real"
+
+        def get_python_type(self):
+            return float
+
+    class StringPackager(IPackager):
+        def pack(self, unpacked_data):
+            if not isinstance(unpacked_data, str):
+                raise PackagerError()
+            return str(unpacked_data)
+            
+        def unpack(self, packed_data): 
+            try:
+                return str(packed_data)
+            except ValueError:
+                raise PackagerError()
+    
+        def get_sql_type(self):
+            return "text"
+
+        def get_python_type(self):
+            return str
+    
+    class SkipPackager(IPackager):
+        def pack(self, unpacked_data):
+            return None
+            
+        def unpack(self, packed_data): 
+            return None
+    
+        def get_sql_type(self):
+            return None
+            
+        def get_python_type(self):
+            return None
+            
+####################################
+# Loader
+####################################
+
+class NamespaceError(Exception):
+    def __init__(self, namespace, reason):
+        Exception.__init__(self, "Namespace '"
+                        + namespace 
+                        + "': '"
+                        + reason
+                        + "'")
+
+class FieldError(Exception):
+    def __init__(self, field, reason):
+        Exception.__init__(self, "Field '"
+                    + field 
+                    + "': '"
+                    + reason
+                    + "'")
+
+class Namespace(object):
+    
+    def __init__(self, db_handle, name, support_regions = False):
+        if not isinstance(name, str):
+            raise NamespaceError(name, "name not a string")
+        self.name = name
+        self.support_regions = support_regions
+        self.fields = {}
+        self.db = db_handle
+        
+        if self.db.check_table(name) == False:        
+            self.db.create_table(name, support_regions)
+        else:
+            for column in self.db.iterate_columns(name):
+                self.add_field(column.name, PackagerFactory().get_python_type(column.sql_type))
+        
+    def get_name(self):
+        return self.name
+
+    def are_regions_supported(self):
+        return self.support_regions
+    
+    def add_field(self, field_name, python_type):
+        if not isinstance(field_name, str):
+            raise FieldError(field_name, "field_name not a string")
+        packager = PackagerFactory().create(python_type)
+        if field_name in self.fields.keys():
+            raise FieldError(field_name, "double used")
+        self.fields[field_name] = packager
+        
+        if self.db.check_column(self.get_name(), field_name) == False:        
+            self.db.create_column(self.name, field_name, packager.get_sql_type())
+    
+    def iterate_field_names(self):
+        for name in self.fields.keys():
+            yield name
+    
+    def get_field_packager(self, field_name):
+        if field_name in self.fields.keys():
+            return self.fields[field_name]
+        else:
+            return None
+        
+    def get_field_sql_type(self, field_name):
+        return self.get_field_packager(field_name).get_sql_type()
+
+    def get_field_python_type(self, field_name):
+        return self.get_field_packager(field_name).get_python_type()
+    
+class DataNotPackable(Exception):
+    def __init__(self, namespace, field, value, packager, extra_message):
+        Exception.__init__(self, "Data '"
+                           + str(value)
+                           + "' of type "
+                           + str(value.__class__) 
+                           + " referred by '"
+                           + namespace
+                           + "=>"
+                           + field
+                           + "' is not packable by registered packager '"
+                           + str(packager.__class__)
+                           + "': " + extra_message)
+
+class Loader(object):
+    
+    def __init__(self):
+        self.namespaces = {}
+        self.db = None
+        self.last_file_data = None # for performance boost reasons
+    
+    def create_database(self, dbfile, previous_db = None):
+        self.db = core.db.sqlite.Database()
+        if os.path.exists(dbfile):
+            logging.warn("Removing existing file: " + dbfile)
+            os.unlink(dbfile)
+        if previous_db != None and os.path.exists(previous_db) == False:
+            raise core.api.ExitError(self, "Database file '" + previous_db + "'  does not exist")
+
+        self.db.create(dbfile, clone_from=previous_db)
+        
+    def open_database(self, dbfile, read_only = True):
+        self.db = core.db.sqlite.Database()
+        if os.path.exists(dbfile) == False:
+            raise core.api.ExitError(self, "Database file '" + dbfile + "'  does not exist")
+        self.db.connect(dbfile, read_only=read_only)
+        
+        for table in self.db.iterate_tables():
+            self.create_namespace(table.name, table.support_regions)
+            
+    def create_namespace(self, name, support_regions = False):
+        if self.db == None:
+            return None
+        
+        if name in self.namespaces.keys():
+            raise NamespaceError(name, "double used")
+        new_namespace = Namespace(self.db, name, support_regions)
+        self.namespaces[name] = new_namespace
+        return new_namespace
+    
+    def iterate_namespace_names(self):
+        for name in self.namespaces.keys():
+            yield name
+
+    def get_namespace(self, name):
+        if name in self.namespaces.keys():
+            return self.namespaces[name]
+        else:
+            return None
+
+    def create_file_data(self, path, checksum, content):
+        if self.db == None:
+            return None
+
+        new_id = self.db.create_file(path, checksum)
+        result = FileData(self, path, new_id, checksum, content) 
+        self.last_file_data = result
+        return result
+
+    def load_file_data(self, path):
+        if self.db == None:
+            return None
+
+        if self.last_file_data != None and self.last_file_data.get_path() == path:
+            return self.last_file_data
+        
+        data = self.db.get_file(path)
+        if data == None:
+            return None
+        
+        result = FileData(self, data.path, data.id, data.checksum, None)
+        self.last_file_data = result
+        return result
+
+    def save_file_data(self, file_data):
+        if self.db == None:
+            return None
+
+        class DataIterator(object):
+
+            def iterate_packed_values(self, data, namespace, support_regions = False):
+                for each in data.iterate_fields(namespace):
+                    space = self.loader.get_namespace(namespace)
+                    if space == None:
+                        raise DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
+                    
+                    packager = space.get_field_packager(each[0])
+                    if packager == None:
+                        raise DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
+        
+                    if space.support_regions != support_regions:
+                        raise DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
+                    
+                    try:
+                        packed_data = packager.pack(each[1])
+                        if packed_data == None:
+                            continue
+                    except PackagerError:
+                        raise DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
+                    
+                    yield (each[0], packed_data)
+            
+            def __init__(self, loader, data, namespace, support_regions = False):
+                self.loader = loader
+                self.iterator = self.iterate_packed_values(data, namespace, support_regions)
+    
+            def __iter__(self):
+                return self.iterator
+        
+        for namespace in file_data.iterate_namespaces():
+            if file_data.is_namespace_updated(namespace) == False:
+                continue
+            self.db.add_row(namespace,
+                            file_data.get_id(),
+                            None,
+                            DataIterator(self, file_data, namespace))
+        
+        if file_data.are_regions_loaded():
+            for region in file_data.iterate_regions():
+                for namespace in region.iterate_namespaces():
+                    if region.is_namespace_updated(namespace) == False:
+                        continue
+                    self.db.add_row(namespace,
+                                    file_data.get_id(),
+                                    region.get_id(),
+                                    DataIterator(self, region, namespace, support_regions = True))
+
+    def iterate_file_data(self):
+        if self.db != None:
+            for data in self.db.iterate_files():
+                yield FileData(self, data.path, data.id, data.checksum, None)
+
+    def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
+        if self.db == None:
+            return None
+
+        final_path_like = path_like_filter
+        if path != None:
+            if self.db.check_dir(path) == False and self.db.check_file(path) == False:
+                return None
+            final_path_like = path + path_like_filter
+        
+        if namespaces == None:
+            namespaces = self.namespaces.keys()
+        
+        result = AggregatedData(self, path)
+        for name in namespaces:
+            data = self.db.aggregate_rows(name, path_like = final_path_like)
+            for field in data.keys():
+                result.set_data(name, field, data[field])
+        
+        return result
+    
+    def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = []):
+        if self.db == None:
+            return None
+
+        final_path_like = path_like_filter
+        if path != None:
+            if self.db.check_dir(path) == False and self.db.check_file(path) == False:
+                return None
+            final_path_like = path + path_like_filter
+        
+        namespace_obj = self.get_namespace(namespace)
+        if namespace_obj == None:
+            return None
+        
+        class SelectDataIterator(object):
+        
+            def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters):
+                for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters):
+                    region_id = None
+                    if namespace_obj.are_regions_supported() == True:
+                        region_id = row['region_id']
+                    data = SelectData(loader, row['path'], row['id'], region_id)
+                    field_names = fields
+                    if fields == None:
+                        field_names = namespace_obj.iterate_field_names()
+                    for field in field_names:
+                        data.set_data(namespace, field, row[field])
+                    yield data
+            
+            def __init__(self, loader, namespace_obj, final_path_like, fields, filters):
+                self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters)
+    
+            def __iter__(self):
+                return self.iterator
+
+        return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters)
+    

+ 36 - 0
mainline/core/db/post.py

@@ -0,0 +1,36 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+import os.path
+import re
+
+class Plugin(core.api.Plugin, core.api.IConfigurable):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--general.db-file", default=r'./source-metrics.db',
+                         help="Primary database file to write (by the collector) and post-process (by other tools) [default: %default]")
+        parser.add_option("--general.db-file-prev", default=None,
+                         help="Database file with data collected for the past/previous revision [default: %default].")
+    
+    def configure(self, options):
+        self.dbfile = options.__dict__['general.db_file']
+        self.dbfile_prev = options.__dict__['general.db_file_prev']
+        
+    def initialize(self):
+        
+        self.get_plugin_loader().get_database_loader().create_database(self.dbfile, previous_db = self.dbfile_prev)    
+        
+        # do not process files dumped by this module
+        self.get_plugin_loader().get_plugin('core.dir').add_exclude_rule(re.compile(r'^' + os.path.basename(self.dbfile) + r'$'))
+        if self.dbfile_prev != None:
+            self.get_plugin_loader().get_plugin('core.dir').add_exclude_rule(re.compile(r'^' + os.path.basename(self.dbfile_prev) + r'$'))
+        
+        
+
+
+    

+ 543 - 0
mainline/core/db/sqlite.py

@@ -0,0 +1,543 @@
+'''
+Created on 3/01/2013
+
+@author: konstaa
+'''
+
+import sqlite3
+import re
+import os.path
+import logging
+import itertools 
+import shutil
+
+class Database(object):
+    
+    last_used_id = 0
+    version = "1.0"
+    
+    class TableData(object):
+        def __init__(self, table_id, name, support_regions):
+            self.id = table_id
+            self.name = name
+            self.support_regions = support_regions
+    
+    class ColumnData(object):
+        def __init__(self, column_id, name, sql_type):
+            self.id = column_id
+            self.name = name
+            self.sql_type = sql_type
+
+    class FileData(object):
+        def __init__(self, file_id, path, checksum):
+            self.id = file_id
+            self.path = path
+            self.checksum = checksum
+
+    class RegionData(object):
+        def __init__(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
+            self.file_id = file_id
+            self.region_id = region_id
+            self.name = name
+            self.begin = begin
+            self.end = end
+            self.line_begin = line_begin
+            self.line_end = line_end
+            self.cursor = cursor
+            self.group = group
+            self.checksum = checksum
+
+    class MarkerData(object):
+        def __init__(self, file_id, begin, end, group):
+            self.file_id = file_id
+            self.begin = begin
+            self.end = end
+            self.group = group
+
+    def __init__(self):
+        self.read_only = False
+        self.conn = None
+        self.dirs = None
+        self.is_cloned = False
+        
+        self.last_used_id += 1
+        self.id = self.last_used_id
+    
+    def __del__(self):
+        if self.conn != None:
+            if self.is_cloned == True:
+                logging.debug("Cleaning up database file")
+                self.InternalCleanUpUtils().clean_up_not_confirmed(self)
+            logging.debug("Committing database file")
+            self.conn.commit()
+    
+    class InternalCleanUpUtils(object):
+        
+        def clean_up_not_confirmed(self, db_loader):
+            sql = "SELECT * FROM __tables__ WHERE (confirmed = 0)"
+            db_loader.log(sql)
+            for table in db_loader.conn.execute(sql).fetchall():
+                sql = "DELETE FROM __columns__ WHERE table_id = '" + str(table['id']) + "'"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+                sql = "DELETE FROM __tables__ WHERE id = '" + str(table['id']) + "'"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+                sql = "DROP TABLE '" + table['name'] + "'"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+
+            sql = "SELECT __columns__.name AS column_name, __tables__.name AS table_name, __columns__.id AS column_id FROM __columns__, __tables__ WHERE (__columns__.confirmed = 0 AND __columns__.table_id = __tables__.id)"
+            db_loader.log(sql)
+            for column in db_loader.conn.execute(sql).fetchall():
+                logging.warn("New database file inherits useless column: '" + column['table_name'] + "'.'" + column['column_name'] + "'")
+                sql = "DELETE FROM __columns__ WHERE id = '" + str(column['column_id']) + "'"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+                sql = "UPDATE '" + column['table_name'] + "' SET '" + column['column_name'] + "' = NULL"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+            
+            self.clean_up_file(db_loader)
+
+        def clean_up_file(self, db_loader, file_id = None):
+            sql = "SELECT * FROM __tables__"
+            db_loader.log(sql)
+            for table in itertools.chain(db_loader.conn.execute(sql).fetchall(), [{'name':'__regions__'}, {'name':'__markers__'}]):
+                sql = ""
+                if file_id == None:
+                    sql = "DELETE FROM '" + table['name'] + "' WHERE file_id IN (SELECT __files__.id FROM __files__ WHERE __files__.confirmed = 0)"
+                else:
+                    sql = "DELETE FROM '" + table['name'] + "' WHERE (file_id = " + str(file_id) + ")"
+                db_loader.log(sql)
+                db_loader.conn.execute(sql)
+            
+    class InternalPathUtils(object):
+        
+        def iterate_heads(self, path):
+            dirs = []
+            head = os.path.dirname(path)
+            last_head = None # to process Windows drives
+            while (head != "" and last_head != head):
+                dirs.append(os.path.basename(head))
+                last_head = head
+                head = os.path.dirname(head)
+            dirs.reverse()
+            for each in dirs:
+                yield each
+                
+        def normalize_path(self, path):
+            if path == None:
+                return None
+            return re.sub(r'''[\\]''', "/", path)
+        
+        def update_dirs(self, db_loader, path = None):
+            if db_loader.dirs == None:
+                if path == None:
+                    db_loader.dirs = {} # initial construction
+                else:
+                    return # avoid useless cache updates 
+            elif path == None:
+                return # avoid multiple initial constructions
+            
+            path = self.normalize_path(path)
+            rows = None
+            if path == None:
+                sql = "SELECT * FROM __files__"
+                db_loader.log(sql)
+                rows = db_loader.conn.execute(sql).fetchall()
+            else:
+                rows = [{"path": path}]
+            for row in rows:
+                cur_head = db_loader.dirs
+                for dir_name in self.iterate_heads(row["path"]):
+                    if dir_name not in cur_head.keys():
+                        cur_head[dir_name] = {}
+                    cur_head = cur_head[dir_name]
+                cur_head[os.path.basename(row["path"])] = None
+
+
+    def create(self, file_name, clone_from = None):
+        if clone_from != None:
+            self.is_cloned = True
+            logging.debug("Cloning database file: " + clone_from)
+            shutil.copy2(clone_from, file_name)
+            logging.debug("Connecting database file: " + file_name)
+            self.conn = sqlite3.connect(file_name)
+            self.conn.row_factory = sqlite3.Row
+            self.read_only = False
+            
+            sql = "UPDATE __tables__ SET confirmed = 0"
+            self.log(sql)
+            self.conn.execute(sql)
+            sql = "UPDATE __columns__ SET confirmed = 0"
+            self.log(sql)
+            self.conn.execute(sql)
+            sql = "UPDATE __files__ SET confirmed = 0"
+            self.log(sql)
+            self.conn.execute(sql)
+                
+        else:
+            self.connect(file_name)
+        
+    def connect(self, file_name, read_only = False):
+        logging.debug("Connecting database file: " + file_name)
+        self.conn = sqlite3.connect(file_name)
+        self.conn.row_factory = sqlite3.Row
+        self.read_only = read_only
+        if self.read_only == False:
+            try:
+                sql = "CREATE TABLE __info__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, property text NOT NULL, value text, UNIQUE (property) ON CONFLICT REPLACE)"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "INSERT INTO __info__ (property, value) VALUES ('version', '" + self.version + "')"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "CREATE TABLE __tables__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, support_regions integer NOT NULL, confirmed integer NOT NULL, UNIQUE (name))"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "CREATE TABLE __columns__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, type text NOT NULL, table_id integer NOT_NULL, confirmed integer NOT NULL, UNIQUE (name, table_id))"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "CREATE TABLE __files__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, path text NOT NULL, checksum integer NOT NULL, confirmed integer NOT NULL, UNIQUE(path))"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "CREATE TABLE __regions__ (file_id integer NOT NULL, region_id integer NOT NULL, name text NOT NULL, begin integer NOT NULL, end integer NOT NULL, line_begin integer NOT NULL, line_end integer NOT NULL, cursor integer NOT NULL, group_id integer NOT NULL, checksum integer NOT NULL, PRIMARY KEY (file_id, region_id))"
+                self.log(sql)
+                self.conn.execute(sql)
+                sql = "CREATE TABLE __markers__ (id integer NOT NULL PRIMARY KEY, file_id integer NOT NULL, begin integer NOT NULL, end integer NOT NULL, group_id integer NOT NULL)"
+                self.log(sql)
+                self.conn.execute(sql)
+            except sqlite3.OperationalError as e:
+                logging.debug("sqlite3.OperationalError: " + str(e))
+        
+    def create_table(self, table_name, support_regions = False):
+        assert(self.read_only == False)
+
+        sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "'AND confirmed == 0)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        if len(result) != 0:
+            sql = "UPDATE __tables__ SET confirmed = 1 WHERE (name = '" + table_name + "')"
+            self.log(sql)
+            self.conn.execute(sql)
+            return        
+        
+        sql = "CREATE TABLE '" + table_name + "' (file_id integer NOT NULL PRIMARY KEY)"
+        if support_regions == True:
+            sql = str("CREATE TABLE '" + table_name + "' (file_id integer NOT NULL, region_id integer NOT NULL, "
+                      + "PRIMARY KEY (file_id, region_id))")
+            
+        self.log(sql)
+        self.conn.execute(sql)
+        sql = "INSERT INTO __tables__ (name, support_regions, confirmed) VALUES ('" + table_name + "', '" + str(int(support_regions)) + "', 1)"
+        self.log(sql)
+        self.conn.execute(sql)
+        
+    def iterate_tables(self):
+        sql = "SELECT * FROM __tables__ WHERE (confirmed = 1)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        for row in result:
+            yield self.TableData(int(row["id"]), str(row["name"]), bool(row["support_regions"]))
+            
+    def check_table(self, table_name):
+        sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "' AND confirmed = 1)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        if len(result) == 0:
+            return False
+        return True
+
+    def create_column(self, table_name, column_name, column_type):
+        assert(self.read_only == False)
+        if column_type == None:
+            logging.debug("Skipping column '" + column_name + "' creation for table '" + table_name + "'")
+            return
+        
+        sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
+        self.log(sql)
+        table_id = self.conn.execute(sql).next()['id']
+
+        sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed == 0)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        if len(result) != 0:
+            sql = "UPDATE __columns__ SET confirmed = 1 WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "')"
+            self.log(sql)
+            self.conn.execute(sql)
+            return        
+        
+        sql = "ALTER TABLE '" + table_name + "' ADD COLUMN '" + column_name + "' " + column_type
+        self.log(sql)
+        self.conn.execute(sql)
+        sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
+        self.log(sql)
+        table_id = self.conn.execute(sql).next()['id']
+        sql = "INSERT INTO __columns__ (name, type, table_id, confirmed) VALUES ('" + column_name + "', '" + column_type + "', '" + str(table_id) + "', 1)"
+        self.log(sql)
+        self.conn.execute(sql)        
+
+    def iterate_columns(self, table_name):
+        sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
+        self.log(sql)
+        table_id = self.conn.execute(sql).next()['id']
+        sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND confirmed = 1)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        for row in result:
+            yield self.ColumnData(int(row["id"]), str(row["name"]), str(row["type"]))
+
+    def check_column(self, table_name, column_name):
+        sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
+        self.log(sql)
+        table_id = self.conn.execute(sql).next()['id']
+        sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed = 1)"
+        self.log(sql)
+        result = self.conn.execute(sql).fetchall()
+        if len(result) == 0:
+            return False
+        return True
+    
+    def create_file(self, path, checksum):
+        assert(self.read_only == False)
+        path = self.InternalPathUtils().normalize_path(path)
+
+        if self.is_cloned == True:
+            sql = "SELECT * FROM __files__ WHERE (path = '" + path + "')"
+            self.log(sql)
+            result = self.conn.execute(sql).fetchall()
+            if len(result) != 0:
+                if result[0]['checksum'] == checksum:
+                    old_id = result[0]['id']
+                    sql = "UPDATE __files__ SET confirmed = 1 WHERE (id = " + str(old_id) +")"
+                    self.log(sql)
+                    self.conn.execute(sql)
+                    return old_id
+                else:
+                    self.InternalCleanUpUtils().clean_up_file(self, result[0]['id'])
+        
+        sql = "INSERT OR REPLACE INTO __files__ (path, checksum, confirmed) VALUES (?, ?, 1)"
+        column_data = [path, checksum]
+        self.log(sql + " /with arguments: " + str(column_data))
+        cur = self.conn.cursor()
+        cur.execute(sql, column_data)
+        self.InternalPathUtils().update_dirs(self, path=path)
+        return cur.lastrowid
+    
+    def iterate_dircontent(self, path, include_subdirs = True, include_subfiles = True):
+        self.InternalPathUtils().update_dirs(self)
+        path = self.InternalPathUtils().normalize_path(path)
+        cur_head = self.dirs
+        valid = True
+        if path != "":
+            for head in self.InternalPathUtils().iterate_heads(path):
+                if head not in cur_head.keys():
+                    # non existing directory
+                    valid = False
+                else:
+                    cur_head = cur_head[head]
+            basename = os.path.basename(path)
+            if basename not in cur_head.keys() or cur_head[basename] == None:
+                # do not exist or points to the file
+                valid = False
+            else:
+                cur_head = cur_head[basename]
+        if valid == True:
+            for elem in cur_head.keys():
+                if include_subdirs == True and cur_head[elem] != None:
+                    yield elem
+                if include_subfiles == True and cur_head[elem] == None:
+                    yield elem
+
+    def check_file(self, path):
+        return self.get_file(path) != None
+
+    def check_dir(self, path):
+        for each in self.iterate_dircontent(path):
+            each = each # used
+            return True # there is at least one item
+        return False
+
+    def get_file(self, path):
+        path = self.InternalPathUtils().normalize_path(path)
+        result = self.select_rows("__files__", filters = [("path", "=", path), ("confirmed", "=", 1)])
+        if len(result) == 0:
+            return None
+        assert(len(result) == 1)
+        return self.FileData(result[0]['id'], result[0]['path'], result[0]['checksum'])
+
+    def iterate_files(self):
+        sql = "SELECT * FROM __files__ WHERE (confirmed = 1)"
+        self.log(sql)
+        for row in self.conn.execute(sql).fetchall(): 
+            yield self.FileData(row['id'], row['path'], row['checksum']) 
+
+    def create_region(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
+        assert(self.read_only == False)
+        sql = "INSERT OR REPLACE INTO __regions__ (file_id, region_id, name, begin, end, line_begin, line_end, cursor, group_id, checksum) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
+        column_data = [file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum]
+        self.log(sql + " /with arguments: " + str(column_data))
+        cur = self.conn.cursor()
+        cur.execute(sql, column_data)
+        return cur.lastrowid
+    
+    def get_region(self, file_id, region_id):
+        result = self.select_rows("__regions__", filters = [("file_id", "=", file_id), ("region_id", "=", region_id)])
+        if len(result) == 0:
+            return None
+        return self.RegionData(result[0]['file_id'],
+                               result[0]['region_id'],
+                               result[0]['name'],
+                               result[0]['begin'],
+                               result[0]['end'],
+                               result[0]['line_begin'],
+                               result[0]['line_end'],
+                               result[0]['cursor'],
+                               result[0]['group_id'],
+                               result[0]['checksum'])
+
+    def iterate_regions(self, file_id):
+        for each in self.select_rows("__regions__", filters = [("file_id", "=", file_id)]):
+            yield self.RegionData(each['file_id'],
+                                  each['region_id'],
+                                  each['name'],
+                                  each['begin'],
+                                  each['end'],
+                                  each['line_begin'],
+                                  each['line_end'],
+                                  each['cursor'],
+                                  each['group_id'],
+                                  each['checksum'])
+    
+    def create_marker(self, file_id, begin, end, group):
+        assert(self.read_only == False)
+        sql = "INSERT OR REPLACE INTO __markers__ (file_id, begin, end, group_id) VALUES (?, ?, ?, ?)"
+        column_data = [file_id, begin, end, group]
+        self.log(sql + " /with arguments: " + str(column_data))
+        cur = self.conn.cursor()
+        cur.execute(sql, column_data)
+        return cur.lastrowid
+    
+    def iterate_markers(self, file_id):
+        for each in self.select_rows("__markers__", filters = [("file_id", "=", file_id)]):
+            yield self.MarkerData(each['file_id'],
+                                  each['name'],
+                                  each['begin'],
+                                  each['group_id'])
+
+    def add_row(self, table_name, file_id, region_id, array_data):
+        assert(self.read_only == False)
+        column_names = "'file_id'"
+        column_values = "?"
+        column_data = [file_id]
+        if region_id != None:
+            column_names += ", 'region_id'"
+            column_values += ", ?"
+            column_data.append(region_id)
+        useful_data = 0
+        for each in array_data:
+            column_names +=  ", '" + each[0] + "'"
+            column_values += ", ?"
+            column_data.append(each[1])
+            useful_data += 1
+        if useful_data == 0:
+            return
+        sql = "INSERT OR REPLACE INTO '" + table_name + "' (" + column_names + ") VALUES (" + column_values + ")"
+        self.log(sql + " /with arguments: " + str(column_data))
+        cur = self.conn.cursor()
+        cur.execute(sql, column_data)
+        return cur.lastrowid
+
+    def select_rows(self, table_name, path_like = None, column_names = [], filters = []):
+        safe_column_names = []
+        for each in column_names:
+            safe_column_names.append("'" + each + "'")
+        return self.select_rows_unsafe(table_name, path_like = path_like, column_names = safe_column_names, filters = filters)
+
+    def select_rows_unsafe(self, table_name, path_like = None, column_names = [], filters = []):
+        path_like = self.InternalPathUtils().normalize_path(path_like)
+        if self.conn == None:
+            return []
+
+        table_stmt = "'" + table_name + "'"
+
+        what_stmt = ", ".join(column_names)
+        if len(what_stmt) == 0:
+            what_stmt = "*"
+        elif path_like != None:
+            what_stmt += ", '__files__'.'path', '__files__'.'id'"
+        inner_stmt = ""
+        if path_like != None:
+            inner_stmt = " INNER JOIN '__files__' ON '__files__'.'id' = '" + table_name + "'.'file_id' "
+
+        where_stmt = " "
+        values = ()
+        if len(filters) != 0:
+            if filters[0][1] == 'IN':
+                where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " " + filters[0][2]
+            else:    
+                where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " ?"
+                values = (filters[0][2],)
+            for each in filters[1:]:
+                if each[1] == 'IN':
+                    where_stmt += " AND `" + each[0] + "` " + each[1] + " " + each[2]
+                else:
+                    where_stmt += " AND `" + each[0] + "` " + each[1] + " ?"
+                    values += (each[2], )
+            if path_like != None:
+                where_stmt += " AND '__files__'.'path' LIKE ?"
+                values += (path_like, )
+            where_stmt += ")"
+        else:
+            where_stmt = " WHERE '__files__'.'path' LIKE ?"
+            values += (path_like, )
+
+        sql = "SELECT " + what_stmt + " FROM " + table_stmt + inner_stmt + where_stmt
+        self.log(sql + " /with arguments: " + str(values))
+        return self.conn.execute(sql, values).fetchall()
+
+    def get_row(self, table_name, file_id, region_id):
+        selected = self.get_rows(table_name, file_id, region_id)
+        # assures that only one row in database
+        # if assertion happens, caller's intention is not right, use get_rows instead    
+        assert(len(selected) == 0 or len(selected) == 1)
+        if len(selected) == 0:
+            return None
+        return selected[0]
+
+    def get_rows(self, table_name, file_id, region_id):
+        filters = [("file_id", '=', file_id)]
+        if region_id != None:
+            filters.append(("region_id", '=', region_id))
+        return self.select_rows(table_name, filters=filters)
+    
+    def aggregate_rows(self, table_name, path_like = None, column_names = None, filters = []):
+        
+        if column_names == None:
+            column_names = []
+            for column in self.iterate_columns(table_name):
+                column_names.append(column.name)
+                
+        if len(column_names) == 0:
+            # it is possible that a table does not have meanfull columns
+            return {} 
+        
+        total_column_names = []
+        for column_name in column_names:
+            for func in ['max', 'min', 'avg', 'total']:
+                total_column_names.append(func + "('" + table_name + "'.'" + column_name + "') AS " + "'" + column_name + "_" + func + "'")
+             
+        data = self.select_rows_unsafe(table_name, path_like = path_like, column_names = total_column_names, filters = filters)
+        assert(len(data) == 1)
+        result = {}
+        for column_name in column_names:
+            result[column_name] = {}
+            for func in ['max', 'min', 'avg', 'total']:
+                result[column_name][func] = data[0][column_name + "_" + func]
+        return result
+    
+    def log(self, sql):
+        #import traceback
+        #traceback.print_stack()
+        logging.debug("[" + str(self.id) + "] Executing query: " + sql)

+ 76 - 0
mainline/core/db/utils.py

@@ -0,0 +1,76 @@
+
+
+class FileRegionsDisposableGetter(object):
+    
+    def __init__(self, file_data):
+        self.checksums = {}
+        self.names = {}
+        
+        for each in file_data.iterate_regions():
+            if each.get_checksum() not in self.checksums:
+                self.checksums[each.get_checksum()] = []
+            self.checksums[each.get_checksum()].append((each.get_id(), each.get_name())) 
+            
+            if each.get_name() not in self.names:
+                self.names[each.get_name()] = []
+            self.names[each.get_name()].append((each.get_id(), each.get_checksum())) 
+        
+    def get_next_id_once_by_checksum(self, checksum):
+        if checksum not in self.checksums.keys():
+            return None
+
+        if len(self.checksums[checksum]) == 0:
+            return None
+        
+        elem = self.checksums[checksum].pop(0)
+        next_id = elem[0]
+        next_name = elem[1]
+
+        self.names[next_name].remove((next_id, checksum))
+        return next_id
+
+    def get_next_id_once_by_name(self, name):
+        if name not in self.names.keys():
+            return None
+        
+        if len(self.names[name]) == 0:
+            return None
+        
+        elem = self.names[name].pop(0)
+        next_id = elem[0]
+        next_checksum = elem[1]
+
+        self.checksums[next_checksum].remove((next_id, name))
+        return next_id
+    
+class FileRegionsMatcher(object):
+    
+    def __init__(self, file_data, prev_file_data):
+        self.ids = [None] # add one to shift id from zero
+        
+        once_filter = FileRegionsDisposableGetter(prev_file_data)
+        unmatched_region_ids = []
+        for (ind, region) in enumerate(file_data.iterate_regions()):
+            assert(ind + 1 == region.id)
+            # Identify corresponding region in previous database (attempt by checksum)
+            prev_id = once_filter.get_next_id_once_by_checksum(region.checksum)
+            if prev_id != None:
+                self.ids.append((prev_id, False))
+            else:
+                unmatched_region_ids.append(region.id)
+                self.ids.append((None, True))
+                            
+        # Identify corresponding region in previous database (attempt by name)
+        for region_id in unmatched_region_ids: 
+            prev_id = once_filter.get_next_id_once_by_name(file_data.get_region(region_id).name)
+            if prev_id != None:
+                self.ids[region_id] = (prev_id, True)
+    
+    def get_prev_id(self, curr_id):
+        return self.ids[curr_id][0]
+
+    def is_matched(self, curr_id):
+        return (self.ids[curr_id][0] != None)
+
+    def is_modified(self, curr_id):
+        return self.ids[curr_id][1]

+ 88 - 0
mainline/core/dir.py

@@ -0,0 +1,88 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+import re
+import os
+import logging
+import time
+import binascii
+
+class Plugin(core.api.Plugin, core.api.Parent, core.api.IConfigurable, core.api.IRunable):
+    
+    def __init__(self):
+        self.reader = DirectoryReader()
+        self.exclude_rules = []
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--general.non-recursively", action="store_true", default=False,
+                         help="If the option is set (True), sub-directories are not processed [default: %default]")
+        parser.add_option("--general.exclude-files", default=r'^[.]',
+                         help="Defines the pattern to exclude files from processing [default: %default]")
+        parser.add_option("--general.proctime.on", action="store_true", default=False,
+                         help="If the option is set (True), the tool measures processing time per every file [default: %default]")
+    
+    def configure(self, options):
+        self.non_recursively = options.__dict__['general.non_recursively']
+        self.add_exclude_rule(re.compile(options.__dict__['general.exclude_files']))
+        self.is_proctime_enabled = options.__dict__['general.proctime.on']
+
+    def initialize(self):
+        if self.is_proctime_enabled == True:
+            namespace = self.get_plugin_loader().get_database_loader().create_namespace('general')
+            namespace.add_field('proctime', self.measure_proctime_type)
+        
+    def run(self, args):
+        if len(args) == 0:
+            self.reader.run(self, "./")
+        for directory in args:
+            self.reader.run(self, directory)
+        
+    def add_exclude_rule(self, re_compiled_pattern):
+        self.exclude_rules.append(re_compiled_pattern)
+        
+    def is_file_excluded(self, file_name):
+        for each in self.exclude_rules:
+            if re.match(each, file_name) != None:
+                return True
+        return False 
+        
+class DirectoryReader():
+    
+    def run(self, plugin, directory):
+        
+        def run_recursively(plugin, directory):
+            for fname in os.listdir(directory):
+                full_path = os.path.join(directory, fname)
+                if plugin.is_file_excluded(fname) == False:
+                    if os.path.isdir(full_path):
+                        if plugin.non_recursively == False:
+                            run_recursively(plugin, full_path)
+                    else:
+                        logging.info("Processing: " + full_path)
+                        ts = time.time()
+                        
+                        f = open(full_path, 'r');
+                        text = f.read();
+                        f.close()
+                        checksum = binascii.crc32(text) & 0xffffffff # to match python 3
+
+                        data = plugin.get_plugin_loader().get_database_loader().create_file_data(full_path, checksum, text)
+                        plugin.notify_children(data)
+                        if plugin.is_proctime_enabled == True:
+                            data.set_data('general', 'proctime', time.time() - ts)
+                        plugin.get_plugin_loader().get_database_loader().save_file_data(data)
+                        logging.debug("-" * 60)
+                else:
+                    logging.info("Excluding: " + full_path)
+                    logging.debug("-" * 60)
+        
+        run_recursively(plugin, directory)
+    
+
+
+    

+ 0 - 0
mainline/core/export/__init__.py


+ 20 - 0
mainline/core/export/convert.py

@@ -0,0 +1,20 @@
+
+
+import core.export.utils.py2xml
+import core.export.utils.py2txt
+
+def to_xml(data, root_name = None):
+    serializer = core.export.utils.py2xml.Py2XML()
+    return serializer.parse(data, objName=root_name)
+
+def to_python(data, root_name = None):
+    prefix = ""
+    postfix = ""
+    if root_name != None:
+        prefix = "{'" + root_name + ": " 
+        postfix = "}"
+    return prefix + data.__repr__() + postfix
+
+def to_txt(data, root_name = None):
+    serializer = core.export.utils.py2txt.Py2TXT()
+    return serializer.parse(data, objName=root_name, indent = -1)

+ 28 - 0
mainline/core/export/cout.py

@@ -0,0 +1,28 @@
+'''
+Created on 8/02/2013
+
+@author: konstaa
+'''
+
+
+SEVERITY_INFO    = 0x01
+SEVERITY_WARNING = 0x02
+SEVERITY_ERROR   = 0x03
+
+def cout(path, cursor, level, message, details):
+    notification = path + ":" + str(cursor) + ": "
+    if level == SEVERITY_INFO:
+        notification += "info: "
+    elif level == SEVERITY_WARNING:
+        notification += "warning: "
+    elif level == SEVERITY_ERROR:
+        notification += "error: "
+    else:
+        assert(len("Invalid message severity level specified") == 0)
+    notification += message + "\n"
+
+    DETAILS_OFFSET = 15
+    for each in details:
+        notification += "\t" + str(each[0]) + (" " * (DETAILS_OFFSET - len(each[0]))) + ": " + str(each[1]) + "\n"
+        
+    print notification

+ 0 - 0
mainline/core/export/utils/__init__.py


+ 120 - 0
mainline/core/export/utils/py2txt.py

@@ -0,0 +1,120 @@
+
+# Copied from http://code.activestate.com/recipes/577268-python-data-structure-to-TXT-serialization/ and modified
+
+'''
+Py2TXT - Python to TXT serialization
+
+This code transforms a Python data structures into an TXT document
+
+Usage:
+    serializer = Py2TXT()
+    txt_string = serializer.parse( python_object )
+    print python_object
+    print txt_string
+'''
+
+INDENT_SPACE_SYMBOL = ".   " 
+
+class Py2TXT():
+
+    def __init__( self ):
+
+        self.data = "" # where we store the processed TXT string
+
+    def parse( self, pythonObj, objName=None, indent = 0 ):
+        '''
+        processes Python data structure into TXT string
+        needs objName if pythonObj is a List
+        '''
+        if pythonObj == None:
+            return "\n" + (INDENT_SPACE_SYMBOL * indent) + ""
+
+        if isinstance( pythonObj, dict ):
+            self.data = self._PyDict2TXT( pythonObj, objName, indent = indent + 1 )
+            
+        elif isinstance( pythonObj, list ):
+            # we need name for List object
+            self.data = self._PyList2TXT( pythonObj, objName, indent = indent + 1 )
+            
+        else:
+            self.data = "\n" + (INDENT_SPACE_SYMBOL * indent) + "%(n)s: %(o)s" % { 'n':objName, 'o':str( pythonObj ) }
+            
+        self.data = (INDENT_SPACE_SYMBOL * (indent + 1)) + "-" * 80 + self.data + "\n" + (INDENT_SPACE_SYMBOL * (indent + 1)) + "=" * 80 
+        return self.data
+
+    def _PyDict2TXT( self, pyDictObj, objName=None, indent = 0 ):
+        '''
+        process Python Dict objects
+        They can store TXT attributes and/or children
+        '''
+        tagStr = ""     # TXT string for this level
+        attributes = {} # attribute key/value pairs
+        attrStr = ""    # attribute string of this level
+        childStr = ""   # TXT string of this level's children
+
+        for k, v in pyDictObj.items():
+
+            if isinstance( v, dict ):
+                # child tags, with attributes
+                childStr += self._PyDict2TXT( v, k, indent = indent + 1 )
+
+            elif isinstance( v, list ):
+                # child tags, list of children
+                childStr += self._PyList2TXT( v, k, indent = indent + 1 )
+
+            else:
+                # tag could have many attributes, let's save until later
+                attributes.update( { k:v } )
+
+        if objName == None:
+            return childStr
+
+        # create TXT string for attributes
+        attrStr += ""
+        for k, v in attributes.items():
+            attrStr += "\n" + (INDENT_SPACE_SYMBOL * (indent + 1)) + "%s=\"%s\"" % ( k, v )
+
+        # let's assemble our tag string
+        if childStr == "":
+            tagStr += "\n" + (INDENT_SPACE_SYMBOL * indent) + "%(n)s: %(a)s" % { 'n':objName, 'a':attrStr }
+        else:
+            tagStr += "\n" + (INDENT_SPACE_SYMBOL * indent) + "%(n)s: %(a)s %(c)s" % { 'n':objName, 'a':attrStr, 'c':childStr }
+
+        return tagStr
+
+    def _PyList2TXT( self, pyListObj, objName=None, indent = 0 ):
+        '''
+        process Python List objects
+        They have no attributes, just children
+        Lists only hold Dicts or Strings
+        '''
+        tagStr = ""    # TXT string for this level
+        childStr = ""  # TXT string of children
+
+        for childObj in pyListObj:
+            
+            if isinstance( childObj, dict ):
+                # here's some Magic
+                # we're assuming that List parent has a plural name of child:
+                # eg, persons > person, so cut off last char
+                # name-wise, only really works for one level, however
+                # in practice, this is probably ok
+                childStr += "\n" + (INDENT_SPACE_SYMBOL * indent) + self._PyDict2TXT( childObj, objName[:-1], indent = indent + 1 )
+            elif isinstance( childObj, list ):
+                # here's some Magic
+                # we're assuming that List parent has a plural name of child:
+                # eg, persons > person, so cut off last char
+                # name-wise, only really works for one level, however
+                # in practice, this is probably ok
+                childStr += self._PyList2TXT( childObj, objName[:-1], indent = indent + 1 )
+            else:
+                childStr += "\n" + (INDENT_SPACE_SYMBOL * (indent + 1))
+                for string in childObj:
+                    childStr += string;
+
+        if objName == None:
+            return childStr
+
+        tagStr += "\n" + (INDENT_SPACE_SYMBOL * indent) + "%(n)s:%(c)s" % { 'n':objName, 'c':childStr }
+
+        return tagStr

+ 123 - 0
mainline/core/export/utils/py2xml.py

@@ -0,0 +1,123 @@
+
+# Copied from http://code.activestate.com/recipes/577268-python-data-structure-to-xml-serialization/
+# - indent feature and better formatting added
+# - fixed handling of lists in lists
+# - fixed root object name for dictionaries
+
+INDENT_SPACE_SYMBOL = "    " 
+
+'''
+Py2XML - Python to XML serialization
+
+This code transforms a Python data structures into an XML document
+
+Usage:
+    serializer = Py2XML()
+    xml_string = serializer.parse( python_object )
+    print python_object
+    print xml_string
+'''
+
+class Py2XML():
+
+    def __init__( self ):
+
+        self.data = "" # where we store the processed XML string
+
+    def parse( self, pythonObj, objName=None, indent = 0 ):
+        '''
+        processes Python data structure into XML string
+        needs objName if pythonObj is a List
+        '''
+        if pythonObj == None:
+            return "\n" + (INDENT_SPACE_SYMBOL * indent) + ""
+
+        if isinstance( pythonObj, dict ):
+            self.data = self._PyDict2XML( pythonObj, objName, indent=indent+1 )
+            
+        elif isinstance( pythonObj, list ):
+            # we need name for List object
+            self.data = self._PyList2XML( pythonObj, objName, indent=indent+1 )
+            
+        else:
+            self.data = "\n" + (INDENT_SPACE_SYMBOL * indent) + "<%(n)s>%(o)s</%(n)s>" % { 'n':objName, 'o':str( pythonObj ) }
+            
+        return self.data
+
+    def _PyDict2XML( self, pyDictObj, objName=None, indent = 0 ):
+        '''
+        process Python Dict objects
+        They can store XML attributes and/or children
+        '''
+        tagStr = ""     # XML string for this level
+        attributes = {} # attribute key/value pairs
+        attrStr = ""    # attribute string of this level
+        childStr = ""   # XML string of this level's children
+
+        for k, v in pyDictObj.items():
+
+            if isinstance( v, dict ):
+                # child tags, with attributes
+                childStr += self._PyDict2XML( v, k, indent=indent+1 )
+
+            elif isinstance( v, list ):
+                # child tags, list of children
+                childStr += self._PyList2XML( v, k, indent=indent+1 )
+
+            else:
+                # tag could have many attributes, let's save until later
+                attributes.update( { k:v } )
+
+        if objName == None:
+            return childStr
+
+        # create XML string for attributes
+        for k, v in attributes.items():
+            attrStr += " %s=\"%s\"" % ( k, v )
+
+        # let's assemble our tag string
+        if childStr == "":
+            tagStr += "\n" + (INDENT_SPACE_SYMBOL * indent) + "<%(n)s%(a)s />" % { 'n':objName, 'a':attrStr }
+        else:
+            tagStr += ("\n" + (INDENT_SPACE_SYMBOL * indent) + "<%(n)s%(a)s>%(c)s" + "\n" + (INDENT_SPACE_SYMBOL * indent) + "</%(n)s>") % { 'n':objName, 'a':attrStr, 'c':childStr }
+
+        return tagStr
+
+    def _PyList2XML( self, pyListObj, objName=None, indent = 0 ):
+        '''
+        process Python List objects
+        They have no attributes, just children
+        Lists only hold Dicts or Strings
+        '''
+        tagStr = ""    # XML string for this level
+        childStr = ""  # XML string of children
+
+        for childObj in pyListObj:
+            
+            if isinstance( childObj, dict ):
+                # here's some Magic
+                # we're assuming that List parent has a plural name of child:
+                # eg, persons > person, so cut off last char
+                # name-wise, only really works for one level, however
+                # in practice, this is probably ok
+                childStr += self._PyDict2XML( childObj, objName[:-1], indent=indent+1 )
+            elif isinstance( childObj, list ):
+                # here's some Magic
+                # we're assuming that List parent has a plural name of child:
+                # eg, persons > person, so cut off last char
+                # name-wise, only really works for one level, however
+                # in practice, this is probably ok
+                childStr += self._PyList2XML( childObj, objName[:-1], indent=indent+1 )
+                pass
+            else:
+                childStr += "\n" + (INDENT_SPACE_SYMBOL * (indent + 1)) + "<" + objName[:-1] + ">"
+                for string in childObj:
+                    childStr += string;
+                childStr += "</" + objName[:-1] + ">"
+                
+        if objName == None:
+            return childStr
+
+        tagStr += ("\n" + (INDENT_SPACE_SYMBOL * indent) + "<%(n)s>%(c)s" + "\n" + (INDENT_SPACE_SYMBOL * indent) + "</%(n)s>") % { 'n':objName, 'c':childStr }
+
+        return tagStr

+ 7 - 0
mainline/core/ext-priority/core.db.post.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: core.db
+module:  post
+class:   Plugin
+depends: None
+enabled: True

+ 7 - 0
mainline/core/ext-priority/core.dir.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: core
+module:  dir
+class:   Plugin
+depends: None
+enabled: True

+ 7 - 0
mainline/core/ext-priority/core.log.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: core
+module:  log
+class:   Plugin
+depends: None
+enabled: True

+ 111 - 0
mainline/core/loader.py

@@ -0,0 +1,111 @@
+'''
+Created on 10/08/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+import os
+import core.db.loader
+
+class Loader(object):
+
+    def __init__(self):
+        self.plugins = []
+        self.hash    = {}
+        self.exit_code = 0
+        self.db = core.db.loader.Loader()
+        
+    def get_database_loader(self):
+        return self.db
+
+    def get_plugin(self, name):
+        return self.hash[name]['instance']
+    
+    def iterate_plugins(self, is_reversed = False):
+        if is_reversed == False:
+            for item in self.plugins:
+                yield item['instance']
+        else:
+            for item in reversed(self.plugins):
+                yield item['instance']
+            
+
+    def load(self, directory, optparser):
+        import sys
+        sys.path.append(directory)
+        
+        def load_recursively(manager, directory):
+            import ConfigParser
+            import re
+        
+            pattern = re.compile(r'.*[.]ini$', flags=re.IGNORECASE)
+        
+            dirList = os.listdir(directory)
+            for fname in dirList:
+                fname = os.path.join(directory, fname)
+                if os.path.isdir(fname):
+                    load_recursively(manager, fname)
+                elif re.match(pattern, fname):
+                    config = ConfigParser.ConfigParser()
+                    config.read(fname)
+                    item = {'package': config.get('Plugin', 'package'),
+                            'module': config.get('Plugin', 'module'),
+                            'class': config.get('Plugin', 'class'),
+                            'version': config.get('Plugin', 'version'),
+                            'depends': config.get('Plugin', 'depends'),
+                            'enabled': config.getboolean('Plugin', 'enabled')}
+                    if item['enabled']:
+                        manager.plugins.append(item)
+                        manager.hash[item['package'] + '.' + item['module']] = item
+
+        load_recursively(self, os.path.join(os.path.dirname(os.path.abspath(__file__)), 'ext-priority'))
+        load_recursively(self, directory)
+        # TODO check dependencies
+        for item in self.plugins:
+            plugin = __import__(item['package'], globals(), locals(), [item['module']], -1)
+            module_attr = plugin.__getattribute__(item['module'])
+            class_attr = module_attr.__getattribute__(item['class'])
+            item['instance'] = class_attr.__new__(class_attr)
+            item['instance'].__init__()
+            item['instance'].set_name(item['package'] + "." + item['module'])
+            item['instance'].set_plugin_loader(self)
+
+        for item in self.iterate_plugins():
+            if (isinstance(item, core.api.IConfigurable)):
+                item.declare_configuration(optparser)
+
+        (options, args) = optparser.parse_args()
+        for item in self.iterate_plugins():
+            if (isinstance(item, core.api.IConfigurable)):
+                item.configure(options)
+
+        for item in self.iterate_plugins():
+            item.initialize()
+            
+        return args
+
+    def unload(self):
+        for item in self.iterate_plugins(is_reversed = True):
+            item.terminate()
+
+    def run(self, args):
+        for item in self.iterate_plugins():
+            if (isinstance(item, core.api.IRunable)):
+                item.run(args)
+        return self.exit_code
+
+    def inc_exit_code(self):
+        self.exit_code += 1
+
+    def __repr__(self):
+        result = object.__repr__(self) + ' with loaded:'
+        for item in self.iterate_plugins():
+            result += '\n\t' + item.__repr__()
+            if isinstance(item, core.api.Parent):
+                result += ' with subscribed:'
+                for child in item.iterate_children():
+                    result += '\n\t\t' + child.__repr__()
+        return result
+

+ 34 - 0
mainline/core/log.py

@@ -0,0 +1,34 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+import logging
+
+class Plugin(core.api.Plugin, core.api.IConfigurable):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--general.log-level", default=r'INFO', choices=['DEBUG','INFO','WARNING','ERROR'],
+                         help="Defines log level. Possible values are 'DEBUG','INFO','WARNING' or 'ERROR' [default: %default]")
+    
+    def configure(self, options):
+        if options.__dict__['general.log_level'] == 'ERROR':
+            log_level = logging.ERROR
+        elif options.__dict__['general.log_level'] == 'WARNING':
+            log_level = logging.WARNING
+        elif options.__dict__['general.log_level'] == 'INFO':
+            log_level = logging.INFO
+        elif options.__dict__['general.log_level'] == 'DEBUG':
+            log_level = logging.DEBUG
+        else:
+            raise AssertionError("Unhandled choice of log level")
+        
+        logging.getLogger().setLevel(log_level)
+        logging.warn("Logging enabled with " + options.__dict__['general.log_level'] + " level")
+
+
+
+def set_default_format():
+    logging.basicConfig(format="[LOG]: %(levelname)s:\t%(message)s", level=logging.WARN)

+ 106 - 0
mainline/core/warn.py

@@ -0,0 +1,106 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import re
+
+import core.api
+
+class Plugin(core.api.Plugin, core.api.IConfigurable):
+    
+    MODE_NEW     = 0x01
+    MODE_TREND   = 0x03
+    MODE_TOUCHED = 0x07
+    MODE_ALL     = 0x15
+    
+    
+    def declare_configuration(self, parser):
+        self.parser = parser
+        parser.add_option("--general.warn", default='all', choices=['new', 'trend', 'touched', 'all'],
+                         help="Defines the warnings mode. "
+                         "'off' - no warnings, 'new' - warnings for new regions only, "
+                         "'trend' - warnings for new regions and for bad trend of modified regions, "
+                         "'touched' - warnings for new regions and modified regions, "
+                         "'all' - all warnings active"
+                         "[default: %default]")
+
+        parser.add_option("--general.min-limit", action="multiopt",
+                          help='TBD')
+        parser.add_option("--general.max-limit", action="multiopt",
+                          help='TBD')
+        
+    def configure(self, options):
+        if options.__dict__['general.warn'] == 'new':
+            self.mode = self.MODE_NEW
+        elif options.__dict__['general.warn'] == 'trend':
+            self.mode = self.MODE_TREND
+        elif options.__dict__['general.warn'] == 'touched':
+            self.mode = self.MODE_TOUCHED
+        elif options.__dict__['general.warn'] == 'all':
+            self.mode = self.MODE_ALL
+
+        class Limit(object):
+            def __init__(self, limit_type, limit, namespace, field, db_filter):
+                self.type = limit_type
+                self.limit = limit
+                self.namespace = namespace
+                self.field = field
+                self.filter = db_filter
+                
+            def __repr__(self):
+                return "namespace '" + self.namespace + "', filter '" + str(self.filter) + "'"
+        
+        self.limits = []
+        pattern = re.compile(r'''([^:]+)[:]([^:]+)[:]([-+]?[0-9]+(?:[.][0-9]+)?)''')
+        if options.__dict__['general.max_limit'] != None:
+            for each in options.__dict__['general.max_limit']:
+                match = re.match(pattern, each)
+                if match == None:
+                    self.parser.error("Invalid format of the 'general.max-limit' option: " + each)
+                limit = Limit("max", match.group(3), match.group(1), match.group(2), (match.group(2), '>', float(match.group(3))))
+                self.limits.append(limit)
+        if options.__dict__['general.min_limit'] != None:
+            for each in options.__dict__['general.min_limit']:  
+                match = re.match(pattern, each)
+                if match == None:
+                    self.parser.error("Invalid format of the 'general.min-limit' option: " + each)
+                limit = Limit("min", match.group(3), match.group(1), match.group(2), (match.group(2), '<', float(match.group(3))))
+                self.limits.append(limit)
+                
+    def verify_namespaces(self, valid_namespaces):
+        valid = []
+        for each in valid_namespaces:
+            valid.append(each)
+        for each in self.limits:
+            if each.namespace not in valid:
+                self.parser.error("Invalid limit option (namespace does not exist): " + each.namespace)
+
+    def verify_fields(self, namespace, valid_fields):
+        valid = []
+        for each in valid_fields:
+            valid.append(each)
+        for each in self.limits:
+            if each.namespace == namespace:
+                if each.field not in valid:
+                    self.parser.error("Invalid limit option (field does not exist): " + each.namespace + ":" + each.field)
+                    
+    def iterate_limits(self):
+        for each in self.limits:
+            yield each   
+
+    def is_mode_matched(self, limit, value, diff, is_modified):
+        if is_modified == None:
+            return True
+        if self.mode == self.MODE_ALL:
+            return True 
+        if self.mode == self.MODE_TOUCHED and is_modified == True:
+            return True 
+        if self.mode == self.MODE_TREND and is_modified == True:
+            if limit < value and diff > 0:
+                return True
+            if limit > value and diff < 0:
+                return True
+        return False
+        

+ 168 - 0
mainline/export.py

@@ -0,0 +1,168 @@
+'''
+Created on 29/01/2013
+
+@author: konstaa
+'''
+
+
+
+import logging
+import time
+import re
+
+import core.log
+import core.db.loader
+import core.db.post
+import core.db.utils
+import core.cmdparser
+import core.export.convert
+
+def main():
+    
+    exit_code = 0
+    log_plugin = core.log.Plugin()
+    db_plugin = core.db.post.Plugin()
+
+    parser = core.cmdparser.MultiOptionParser(usage="Usage: %prog [options] -- <path 1> ... <path N>")
+    log_plugin.declare_configuration(parser)
+    db_plugin.declare_configuration(parser)
+    parser.add_option("--general.format", default='xml', choices=['txt', 'xml', 'python'], help="Format of the output data [default: %default]")
+    parser.add_option("--general.namespaces", default=None, help="Allows to enumerate namespaces of interest."
+                      " If not defined all namespaces available in database file will be processed."
+                      " Separate several namespaces by comma, for example 'general,std.code.complexity'"
+                      " [default: %default]")
+
+    (options, args) = parser.parse_args()
+    log_plugin.configure(options)
+    db_plugin.configure(options)
+    out_format = options.__dict__['general.format']
+    namespaces = None
+    if options.__dict__['general.namespaces'] != None:
+        namespaces = re.split(',', options.__dict__['general.namespaces'])
+
+    loader_prev = core.db.loader.Loader()
+    if db_plugin.dbfile_prev != None:
+        loader_prev.open_database(db_plugin.dbfile_prev)
+
+    loader = core.db.loader.Loader()
+    loader.open_database(db_plugin.dbfile)
+    
+    paths = None
+    if len(args) == 0:
+        paths = [""]
+    else:
+        paths = args
+        
+    if out_format == 'txt':
+        print "=" * 80 + "\n" + "Export" + "\n" + "_" * 80 + "\n"
+    elif out_format == 'xml':
+        print "<export>"
+    elif out_format == 'python':
+        print "{'export': ["
+
+    for (ind, path) in enumerate(paths):
+        logging.info("Processing: " + path)
+        
+        aggregated_data = loader.load_aggregated_data(path, namespaces=namespaces)
+        aggregated_data_tree = {}
+        subdirs = []
+        subfiles = []
+        if aggregated_data != None:
+            aggregated_data_tree = aggregated_data.get_data_tree(namespaces=namespaces)
+            subdirs = aggregated_data.get_subdirs()
+            subfiles = aggregated_data.get_subfiles()
+        else:
+            logging.error("Specified path '" + path + "' is invalid (not found in the database records)")
+            exit_code += 1
+        aggregated_data_prev = loader_prev.load_aggregated_data(path, namespaces=namespaces)
+        if aggregated_data_prev != None:
+            aggregated_data_tree = append_diff(aggregated_data_tree,
+                                           aggregated_data_prev.get_data_tree(namespaces=namespaces))
+        
+        file_data = loader.load_file_data(path)
+        file_data_tree = {}
+        if file_data != None:
+            file_data_tree = file_data.get_data_tree(namespaces=namespaces) 
+            file_data_prev = loader_prev.load_file_data(path)
+            regions_matcher = None
+            if file_data_prev != None:
+                file_data_tree = append_diff(file_data_tree,
+                                             file_data_prev.get_data_tree(namespaces=namespaces))
+                regions_matcher = core.db.utils.FileRegionsMatcher(file_data, file_data_prev)
+            
+            regions = []
+            for each in file_data.iterate_regions():
+                region_data_tree = each.get_data_tree(namespaces=namespaces)
+                if regions_matcher != None and regions_matcher.is_matched(each.id):
+                    region_data_prev = file_data_prev.get_region(regions_matcher.get_prev_id(each.id))
+                    region_data_tree = append_diff(region_data_tree,
+                                                   region_data_prev.get_data_tree(namespaces=namespaces))
+                regions.append({"info": {"name" : each.name,
+                                         'type' : file_data.get_region_types()().to_str(each.get_type()),
+                                         "cursor" : each.cursor,
+                                         'line_begin': each.line_begin,
+                                         'line_end': each.line_end,
+                                         'offset_begin': each.begin,
+                                         'offset_end': each.end},
+                                "data": region_data_tree})
+                
+            file_data_tree['regions'] = regions
+        
+        data = {"info": {"path": path, "id": ind + 1},
+                "aggregated-data": aggregated_data_tree,
+                "file-data": file_data_tree,
+                "subdirs": subdirs,
+                "subfiles": subfiles}
+
+        if out_format == 'txt':
+            print core.export.convert.to_txt(data, root_name = "data")
+        elif out_format == 'xml':
+            print core.export.convert.to_xml(data, root_name = "data")
+        elif out_format == 'python':
+            postfix = ""
+            if ind < len(paths) - 1:
+                postfix = ", "
+            print core.export.convert.to_python(data, root_name = "data") + postfix
+
+    if out_format == 'txt':
+        print "\n"
+    elif out_format == 'xml':
+        print "</export>"
+    elif out_format == 'python':
+        print "]}"
+
+
+    return exit_code
+
+def append_diff(main_tree, prev_tree):
+    assert(main_tree != None)
+    assert(prev_tree != None)
+    
+    for name in main_tree.keys():
+        if name not in prev_tree.keys():
+            continue
+        for field in main_tree[name].keys():
+            if field not in prev_tree[name].keys():
+                continue
+            if isinstance(main_tree[name][field], dict) and isinstance(prev_tree[name][field], dict):
+                diff = {}
+                for key in main_tree[name][field].keys():
+                    if key not in prev_tree[name][field].keys():
+                        continue
+                    diff[key] = main_tree[name][field][key] - prev_tree[name][field][key]
+                main_tree[name][field]['__diff__'] = diff
+            elif (not isinstance(main_tree[name][field], dict)) and (not isinstance(prev_tree[name][field], dict)):
+                if '__diff__' not in main_tree[name]:
+                    main_tree[name]['__diff__'] = {}
+                main_tree[name]['__diff__'][field] = main_tree[name][field] - prev_tree[name][field]
+    return main_tree
+
+if __name__ == '__main__':
+    ts = time.time()
+    core.log.set_default_format()
+    exit_code = main()
+    logging.warning("Exit code: " + str(exit_code) + ". Time spent: " + str(round((time.time() - ts), 2)) + " seconds. Done")
+    exit(exit_code)
+    
+    
+  

+ 0 - 0
mainline/ext/std/__init__.py


+ 0 - 0
mainline/ext/std/code/__init__.py


+ 8 - 0
mainline/ext/std/code/api.py

@@ -0,0 +1,8 @@
+'''
+Created on 12/02/2013
+
+@author: konstaa
+'''
+
+class ICode(object):
+    pass

+ 7 - 0
mainline/ext/std/code/brackets.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: std.code
+module:  brackets
+class:   Plugin
+depends: None
+enabled: True

+ 27 - 0
mainline/ext/std/code/brackets.py

@@ -0,0 +1,27 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+class Plugin(core.api.Plugin, core.api.Child, core.api.IConfigurable):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--std.code.brackets.on", action="store_true", default=False,
+                         help="If enabled, counts number of mismatched brackets '{}' [default: %default]")
+    
+    def configure(self, options):
+        self.is_active = options.__dict__['std.code.brackets.on']
+        
+    def initialize(self):
+        if self.is_active == True:
+            namespace = self.get_plugin_loader().get_database_loader().create_namespace(self.get_name())
+            namespace.add_field('curly', int)
+            core.api.subscribe_by_parents_name('std.code.cpp', self, 'callback_cpp')
+
+    def callback_cpp(self, parent, data):
+        if data.get_data(self.get_name(), 'curly') != None:
+            return # data is available from cloned database, skip collection
+        data.set_data(self.get_name(), 'curly', data.get_data(parent.get_name(), 'mismatched_brackets'))

+ 7 - 0
mainline/ext/std/code/complexity.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: std.code
+module:  complexity
+class:   Plugin
+depends: None
+enabled: True

+ 47 - 0
mainline/ext/std/code/complexity.py

@@ -0,0 +1,47 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+import re
+
+class Plugin(core.api.Plugin, core.api.Child, core.api.IConfigurable):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--std.code.complexity.on", action="store_true", default=False,
+                         help="Enables processing of complexity metrics: cyclomatic by McCabe [default: %default]")
+    
+    def configure(self, options):
+        self.is_active = options.__dict__['std.code.complexity.on']
+        
+    def initialize(self):
+        if self.is_active == True:
+            namespace = self.get_plugin_loader().get_database_loader().create_namespace(self.get_name(), support_regions = True)
+            namespace.add_field('cyclomatic', int)
+            core.api.subscribe_by_parents_name('std.code.cpp', self, 'callback_cpp')
+
+    # cyclomatic complexity pattern
+    pattern = re.compile(r'''([^0-9A-Za-z_]((if)|(case)|(for)|(while))[^0-9A-Za-z_])|[&][&]|[|][|]|[?]''')
+
+    def callback_cpp(self, parent, data):
+        
+        text = None
+        for (ind, region) in enumerate(data.iterate_regions(filter_group=data.get_region_types().FUNCTION)):
+            # cyclomatic complexity
+            if ind == 0 and region.get_data(self.get_name(), 'cyclomatic') != None:
+                return # data is available in first from cloned database, skip collection
+            if text == None: # lazy loading for performance benefits
+                text = data.get_content(exclude = data.get_marker_types().ALL_EXCEPT_CODE)
+            
+            count = 0
+            start_pos = region.get_offset_begin()
+            for sub_id in region.iterate_subregion_ids():
+                # exclude sub regions, like enclosed classes
+                count += len(self.pattern.findall(text, start_pos, data.get_region(sub_id).get_offset_begin()))
+                start_pos = data.get_region(sub_id).get_offset_end()
+            count += len(self.pattern.findall(text, start_pos, region.get_offset_end()))
+            region.set_data(self.get_name(), 'cyclomatic', count)
+

+ 7 - 0
mainline/ext/std/code/cpp.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: std.code
+module:  cpp
+class:   Plugin
+depends: None
+enabled: True

+ 290 - 0
mainline/ext/std/code/cpp.py

@@ -0,0 +1,290 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import fnmatch
+import re
+import binascii
+import logging
+
+import core.api
+
+class Plugin(core.api.Plugin, core.api.Parent, core.api.Child, core.api.IConfigurable, core.api.ICode):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--std.code.cpp.files", default="*.c,*.cpp,*.h,*.hpp",
+                         help="Enumerates filename extensions to match C/C++ files [default: %default]")
+    
+    def configure(self, options):
+        self.files = options.__dict__['std.code.cpp.files'].split(',')
+        
+    def initialize(self):
+        core.api.subscribe_by_parents_name('core.dir', self)
+        
+        namespace = self.get_plugin_loader().get_database_loader().create_namespace(self.get_name())
+        namespace.add_field('files', int)
+        namespace.add_field('mismatched_brackets', None)
+    
+    def callback(self, parent, data):
+        for ext in self.files:
+            if fnmatch.fnmatch(data.get_path(), ext):
+                if data.get_data(self.get_name(), 'files') != None:
+                    self.notify_children(data)
+                    return
+
+                count_mismatched_brackets = CppCodeParser().run(data)
+                data.set_data(self.get_name(), 'files', 1)
+                data.set_data(self.get_name(), 'mismatched_brackets', count_mismatched_brackets)
+                self.notify_children(data)
+                break
+            
+class CppCodeParser(object):
+    
+    regex_cpp = re.compile(r'''
+                   /([\\](?:\n|\r|\r\n))*/(?=\n|\r|\r\n)              # Match C++ style comments (empty comment line)
+                |  /([\\](?:\n|\r|\r\n))*/.*?[^\\](?=\n|\r|\r\n)      # Match C++ style comments
+                                                                      # NOTE: end of line is NOT consumed
+                                                                      # NOTE: ([\\](?:\n|\r|\r\n))* for new line separators,
+                                                                      # Need to support new line separators in expense of efficiency?
+                | /([\\](?:\n|\r|\r\n))*\*.*?\*([\\](?:\n|\r|\r\n))*/ # Match C style comments
+                | \'(?:\\.|[^\\\'])*\'                                # Match quoted strings
+                | "(?:\\.|[^\\"])*"                                   # Match double quoted strings
+                | (((?<=\n|\r)|^)[ \t]*[#].*?[^\\](?=\n|\r|\r\n))     # Match preprocessor
+                                                                      # NOTE: end of line is NOT consumed
+                                                                      # NOTE: beginning of line is NOT consumed
+                | (?P<fn_name>
+                      (operator(                                      # Match C++ operator ...
+                         (\s+[_a-zA-Z][_a-zA-Z0-9]*(\s*\[\s*\])?)     # - cast, new and delete operators
+                       | (\s*\[\s*\])                                 # - operator []
+                       | (\s*\(\s*\))                                 # - operator ()
+                       | (\s*[+-\\*/=<>!%&^|~,?.]{1,3})               # - other operators (from 1 to 3 symbols)
+                      ))                                               
+                    | ([~]?[_a-zA-Z][_a-zA-Z0-9]*)                    # ... or function or constructor
+                  )\s*[(]                                             # LIMITATION: if there are comments after function name
+                                                                      # and before '(', it is not detected
+                                                                      # LIMITATION: if there are comments within operator definition,
+                                                                      # if may be not detected
+                | ((?P<block_type>class|struct|namespace)             # Match C++ class or struct
+                    (?P<block_name>((\s+[a-zA-Z_][a-zA-Z0-9_]*)|(?=\s*[{])))) # noname is supported, symbol '{' is not consumed
+                                                                      # LIMITATION: if there are comments between keyword and name,
+                                                                      # it is not detected
+                | [<>{};:]                                            # Match block start/end, brackets and statement separator
+                | ((?:\n|\r|\r\n)\s*(?:\n|\r|\r\n))                   # Match double empty line
+            ''',
+            re.DOTALL | re.MULTILINE | re.VERBOSE
+        )
+
+    regex_ln = re.compile(r'(\n)|(\r)|(\r\n)')
+
+    def run(self, data):
+        self.__init__() # Go to initial state if it is called twice
+        return self.parse(data)
+        
+    def finalize_block(self, text, block, block_end):
+        space_match = re.match('^\s*', text[block['start']:block_end], re.MULTILINE)
+        block['start'] += space_match.end() # trim spaces at the beginning
+        block['end'] = block_end
+
+        start_pos = block['start']
+        crc32 = 0
+        for child in block['children']:
+            # exclude children
+            crc32 = binascii.crc32(text[start_pos:child['start']], crc32)
+            start_pos = child['end']
+        block['checksum'] = binascii.crc32(text[start_pos:block['end']], crc32) & 0xffffffff # to match python 3
+        
+    def add_lines_data(self, text, blocks):
+        def add_lines_data_rec(self, text, blocks):
+            for each in blocks:
+                # add line begin
+                self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['start']))
+                each['line_begin'] = self.total_current
+                self.total_last_pos = each['start']
+                # process enclosed
+                add_lines_data_rec(self, text, each['children'])
+                # add line end
+                self.total_current += len(self.regex_ln.findall(text, self.total_last_pos, each['end']))
+                each['line_end'] = self.total_current
+                self.total_last_pos = each['end']
+        self.total_last_pos = 0
+        self.total_current = 1
+        add_lines_data_rec(self, text, blocks)
+
+    def add_regions(self, data, blocks):
+        # Note: data.add_region() internals depend on special ordering of regions
+        # in order to identify enclosed regions efficiently
+        def add_regions_rec(self, data, blocks):
+            def get_type_id(data, named_type):
+                if named_type == "function":
+                    return data.get_region_types().FUNCTION
+                elif named_type == "class":
+                    return data.get_region_types().CLASS
+                elif named_type == "struct":
+                    return data.get_region_types().STRUCT
+                elif named_type == "namespace":
+                    return data.get_region_types().NAMESPACE
+                elif named_type == "__global__":
+                    return data.get_region_types().GLOBAL
+                else:
+                    assert(False)
+            for each in blocks:
+                data.add_region(each['name'], each['start'], each['end'],
+                                each['line_begin'], each['line_end'], each['cursor'],
+                                get_type_id(data, each['type']), each['checksum'])
+                add_regions_rec(self, data, each['children'])
+        add_regions_rec(self, data, blocks)
+        
+    def parse(self, data):
+        
+        def reset_next_block(start):
+            return {'name':'', 'start':start, 'cursor':0, 'type':'', 'confirmed':False}
+        
+        count_mismatched_brackets = 0
+        
+        text = data.get_content()
+        indent_current = 0;
+        
+        blocks = [{'name':'__global__', 'start':0, 'cursor':0, 'type':'__global__', 'indent_start':indent_current, 'children':[]}]
+        curblk = 0
+        
+        next_block = reset_next_block(0)
+        
+        cursor_last_pos = 0
+        cursor_current = 1
+        
+        for m in re.finditer(self.regex_cpp, text):
+            # Comment
+            if text[m.start()] == '/':
+                data.add_marker(m.start(), m.end(), data.get_marker_types().COMMENT)
+                if text[m.start():m.end()].startswith("//\n"):
+                    print text[m.start():m.end()]
+            
+            # String
+            elif text[m.start()] == '"' or text[m.start()] == '\'':
+                data.add_marker(m.start() + 1, m.end() - 1, data.get_marker_types().STRING)
+            
+            # Preprocessor (including internal comments)
+            elif text[m.start()] == ' ' or text[m.start()] == '\t' or text[m.start()] == '#':
+                data.add_marker(m.start(), m.end(), data.get_marker_types().PREPROCESSOR)
+
+            # Statement end
+            elif text[m.start()] == ';':
+                # Reset next block name and start
+                next_block['name'] = ""
+                next_block['start'] = m.end() # potential region start
+
+            # Template argument closing bracket
+            elif text[m.start()] == '>':
+                # Reset next block name and start (in order to skip class names in templates), if has not been confirmed before
+                if next_block['confirmed'] == False and (next_block['type'] == 'class' or next_block['type'] == 'struct'):
+                    next_block['name'] = ""
+                    next_block['start'] = m.end() # potential region start
+                    
+            # Template argument opening bracket or after class inheritance specification
+            elif text[m.start()] == ':' or text[m.start()] == '<':
+                # .. if goes after calss definition
+                if next_block['type'] == 'class' or next_block['type'] == 'struct':
+                    next_block['confirmed'] = True
+
+            # Double end line
+            elif text[m.start()] == '\n' or text[m.start()] == '\r':
+                # Reset next block start, if has not been named yet
+                if next_block['name'] == "":
+                    next_block['start'] = m.end() # potential region start
+
+            # Block start...
+            elif text[m.start()] == '{':
+                # shift indent right
+                indent_current += 1
+                
+                # ... if name detected previously
+                if next_block['name'] != '': # - Start of enclosed block
+                    blocks.append({'name':next_block['name'],
+                                   'start':next_block['start'],
+                                   'cursor':next_block['cursor'],
+                                   'type':next_block['type'],
+                                   'indent_start':indent_current,
+                                   'children':[]})
+                    next_block = reset_next_block(m.end())
+                    curblk += 1
+                # ... reset next block start, otherwise
+                else: # - unknown type of block start
+                    next_block['start'] = m.end() # potential region start
+            
+            # Block end...
+            elif text[m.start()] == '}':
+                # ... if indent level matches the start
+                if blocks[curblk]['indent_start'] == indent_current:
+                    next_block = reset_next_block(m.end())
+                    if curblk == 0:
+                        print data.get_path()
+                        print cursor_current + len(self.regex_ln.findall(text, cursor_last_pos, m.start()))
+                        logging.warning("Non-matching closing bracket '}' detected")
+                        count_mismatched_brackets += 1
+                        continue
+                    
+                    self.finalize_block(text, blocks[curblk], m.end())
+                    assert(blocks[curblk]['type'] != '__global__')
+                    
+                    curblk -= 1
+                    assert(curblk >= 0)
+                    blocks[curblk]['children'].append(blocks.pop())
+
+                # shift indent left
+                indent_current -= 1
+                if indent_current < 0:
+                    logging.warning("Non-matching closing bracket '}' detected")
+                    count_mismatched_brackets += 1
+                    indent_current = 0
+
+            # Potential namespace, struct, class
+            elif text[m.start():m.end()].startswith(('class','struct','namespace')) == True \
+                and m.group('fn_name') == None: # function name can start with keyword, for example class_id_type()
+                if next_block['name'] == "":
+                    # - 'name'
+                    next_block['name'] = m.group('block_name').strip()
+                    if next_block['name'] == "":
+                        next_block['name'] = '__noname__'
+                    # - 'cursor'
+                    cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('block_name')))
+                    cursor_last_pos = m.start('block_name')
+                    next_block['cursor'] = cursor_current
+                    # - 'type'
+                    next_block['type'] = m.group('block_type').strip()
+                    # - 'start' detected earlier
+
+            # Potential function name detected...
+            else:
+                # ... if outside of a function (do not detect enclosed functions, unless classes are matched)
+                if blocks[curblk]['type'] != 'function' and (next_block['name'] == "" or next_block['type'] != 'function'):
+                    # - 'name'
+                    next_block['name'] = m.group('fn_name').strip()
+                    # - 'cursor'
+                    cursor_current += len(self.regex_ln.findall(text, cursor_last_pos, m.start('fn_name')))
+                    cursor_last_pos = m.start('fn_name')
+                    # NOTE: cursor could be collected together with line_begin, line_end,
+                    # but we keep it here separately for easier debugging of file parsing problems
+                    next_block['cursor'] = cursor_current
+                    # - 'type'
+                    next_block['type'] = 'function'
+                    # - 'start' detected earlier
+
+        while indent_current > 0:
+            # log all
+            logging.warning("Non-matching opening bracket '{' detected")
+            count_mismatched_brackets += 1
+            indent_current -= 1
+
+        for (ind, each) in enumerate(blocks):
+            each = each # used
+            block = blocks[len(blocks) - 1 - ind]
+            self.finalize_block(text, block, len(text))
+
+        self.add_lines_data(text, blocks)
+        self.add_regions(data, blocks)
+        
+        return count_mismatched_brackets
+
+            

+ 7 - 0
mainline/ext/std/code/dumper.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: std.code
+module:  dumper
+class:   Plugin
+depends: None
+enabled: True

+ 63 - 0
mainline/ext/std/code/dumper.py

@@ -0,0 +1,63 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+import re
+
+class Plugin(core.api.Plugin, core.api.Child, core.api.IConfigurable):
+    
+    POST_NAME = '.ss.std.code.dumper.html'
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--std.code.dumper.on", action="store_true", default=False,
+                         help="If the option is set (True), HTML files are generated for every parsed file containing code (for troubleshooting purposes only) [default: %default]")
+    
+    def configure(self, options):
+        self.dump_html = options.__dict__['std.code.dumper.on']
+        
+    def initialize(self):
+        if self.dump_html == True:
+            core.api.subscribe_by_parents_interface(core.api.ICode, self)
+        
+        # do not process files dumped by previous run of this module    
+        self.get_plugin_loader().get_plugin('core.dir').add_exclude_rule(re.compile(r'.*' + Plugin.POST_NAME + r'$'))
+        
+    def callback(self, parent, data):
+        file_name = data.get_path()
+        text = data.get_content()
+        
+        import cgi
+        f = open(file_name + Plugin.POST_NAME, 'w')
+        f.write('<html><body><table><tr><td><pre>')
+        last_pos = 0
+        for marker in data.iterate_markers():
+            f.write(cgi.escape(text[last_pos:marker.begin]))
+            if marker.get_type() == data.get_marker_types().STRING:
+                f.write('<span style="color:#0000FF">')
+            elif marker.get_type() == data.get_marker_types().COMMENT:
+                f.write('<span style="color:#009900">')
+            elif marker.get_type() == data.get_marker_types().PREPROCESSOR:
+                f.write('<span style="color:#990000">')
+            f.write(cgi.escape(text[marker.begin:marker.end]))
+            f.write('</span>')
+            last_pos = marker.end
+        f.write(cgi.escape(text[last_pos:]))
+        last_pos = 0
+        f.write('</pre></td><td><pre>')
+        styles = ['<span style="background-color:#ffff80">', '<span style="background-color:#ff80ff">']
+        for item in enumerate(data.iterate_regions(filter_group=data.get_region_types().FUNCTION)):
+            reg = item[1]
+            f.write(cgi.escape(text[last_pos:reg.get_offset_begin()]))
+            f.write(styles[item[0] % 2])
+            f.write('<a href="#line' + str(reg.get_cursor()) + '" id=line"' + str(reg.get_cursor()) + '"></a>')
+            f.write(cgi.escape(text[reg.get_offset_begin():reg.get_offset_end()]))
+            f.write('</span>')
+            last_pos = reg.get_offset_end()
+        f.write(cgi.escape(text[last_pos:]))
+        f.write('</pre></td></tr></table></body></html>')
+        f.close()
+

+ 7 - 0
mainline/ext/std/code/test.ini

@@ -0,0 +1,7 @@
+[Plugin]
+version: 1.0
+package: std.code
+module:  test
+class:   Plugin
+depends: None
+enabled: True

+ 30 - 0
mainline/ext/std/code/test.py

@@ -0,0 +1,30 @@
+'''
+Created on 26/06/2012
+
+@author: konstaa
+'''
+
+import core.api
+
+class Plugin(core.api.Plugin, core.api.Child, core.api.IConfigurable):
+    
+    def declare_configuration(self, parser):
+        parser.add_option("--std.code.test.on", action="store_true", default=False,
+                         help="Enables test plugin (for development purposes) [default: %default]")
+    
+    def configure(self, options):
+        self.is_active = options.__dict__['std.code.test.on']
+        
+    def initialize(self):
+        if self.is_active == True:
+            core.api.subscribe_by_parents_interface(core.api.ICode, self)
+
+    def callback(self, parent, data):
+        
+        def print_rec(data, indent, region_id):
+            print ("   ." * indent) + str(data.get_region(region_id).get_type()) + " " + data.get_region(region_id).get_name() + " " + str(data.get_region(region_id).get_cursor())
+            for sub_id in data.get_region(region_id).iterate_subregion_ids():
+                print_rec(data, indent + 1, sub_id) 
+        
+        print_rec(data, 0, 1)
+        

+ 158 - 0
mainline/limit.py

@@ -0,0 +1,158 @@
+'''
+Created on 29/01/2013
+
+@author: konstaa
+'''
+
+
+import logging
+import time
+
+import core.log
+import core.db.loader
+import core.db.post
+import core.db.utils
+import core.export.cout
+import core.warn
+import core.cmdparser
+
+
+def main():
+    
+    exit_code = 0
+    log_plugin = core.log.Plugin()
+    db_plugin = core.db.post.Plugin()
+    warn_plugin = core.warn.Plugin()
+
+    parser = core.cmdparser.MultiOptionParser(usage="Usage: %prog [options] -- <path 1> ... <path N>")
+    log_plugin.declare_configuration(parser)
+    db_plugin.declare_configuration(parser)
+    warn_plugin.declare_configuration(parser)
+
+    (options, args) = parser.parse_args()
+    log_plugin.configure(options)
+    db_plugin.configure(options)
+    warn_plugin.configure(options)
+
+    loader_prev = core.db.loader.Loader()
+    if db_plugin.dbfile_prev != None:
+        loader_prev.open_database(db_plugin.dbfile_prev)
+
+    loader = core.db.loader.Loader()
+    loader.open_database(db_plugin.dbfile)
+    
+    warn_plugin.verify_namespaces(loader.iterate_namespace_names())
+    for each in loader.iterate_namespace_names():
+        warn_plugin.verify_fields(each, loader.get_namespace(each).iterate_field_names())
+    
+    paths = None
+    if len(args) == 0:
+        paths = [""]
+    else:
+        paths = args
+
+    # Try to optimise iterative change scans
+    modified_file_ids = None
+    if warn_plugin.mode != warn_plugin.MODE_ALL:
+        modified_file_ids = get_list_of_modified_files(loader, loader_prev)
+        
+    for path in paths:
+        logging.info("Processing: " + path)
+        
+        for limit in warn_plugin.iterate_limits():
+            logging.info("Applying limit: " + str(limit))
+            filters = [limit.filter]
+            if modified_file_ids != None:
+                filters.append(('file_id', 'IN', modified_file_ids))
+            selected_data = loader.load_selected_data(limit.namespace,
+                                                   fields = [limit.field],
+                                                   path=path,
+                                                   filters = filters)
+            if selected_data == None:
+                logging.error("Specified path '" + path + "' is invalid (not found in the database records)")
+                exit_code += 1
+                continue
+            
+            for select_data in selected_data:
+                is_modified = None
+                diff = None
+                file_data = loader.load_file_data(select_data.get_path())
+                file_data_prev = loader_prev.load_file_data(select_data.get_path())
+                if file_data_prev != None:
+                    if file_data.get_checksum() == file_data_prev.get_checksum():
+                        diff = 0
+                        is_modified = False
+                    else:
+                        matcher = core.db.utils.FileRegionsMatcher(file_data, file_data_prev)
+                        prev_id = matcher.get_prev_id(select_data.get_region().get_id())
+                        if matcher.is_matched(select_data.get_region().get_id()):
+                            if matcher.is_modified(select_data.get_region().get_id()):
+                                is_modified = True
+                            else:
+                                is_modified = False
+                            diff = core.db.loader.DiffData(select_data,
+                                                           file_data_prev.get_region(prev_id)).get_data(limit.namespace, limit.field)
+
+                if warn_plugin.is_mode_matched(limit.limit, select_data.get_data(limit.namespace, limit.field), diff, is_modified):
+                    exit_code += 1
+                    region_cursor = 0
+                    region_name = ""
+                    if select_data.get_region() != None:
+                        region_cursor = select_data.get_region().cursor
+                        region_name = select_data.get_region().name
+                    report_limit_exceeded(select_data.get_path(),
+                                      region_cursor,
+                                      limit.namespace,
+                                      limit.field,
+                                      region_name,
+                                      select_data.get_data(limit.namespace, limit.field),
+                                      diff,
+                                      limit.limit,
+                                      is_modified)
+    return exit_code
+
+
+def get_list_of_modified_files(loader, loader_prev):
+    modified_file_ids = []
+    logging.info("Identifying changed files...")
+    
+    old_files_map = {}
+    for each in loader_prev.iterate_file_data():
+        old_files_map[each.get_path()] = each.get_checksum()
+    if len(old_files_map) == 0:
+        return None
+    
+    for each in loader.iterate_file_data():
+        if len(modified_file_ids) > 1000: # If more than 1000 files changed, skip optimisation
+            modified_file_ids = None
+            break
+        if (each.get_path() not in old_files_map.keys()) or old_files_map[each.get_path()] != each.get_checksum():
+            modified_file_ids.append(each.get_id())
+            
+    if modified_file_ids != None:
+        modified_file_ids = " , ".join(modified_file_ids)
+        modified_file_ids = "(" + modified_file_ids + ")"
+    old_files_map = None
+    
+    return modified_file_ids
+    
+
+def report_limit_exceeded(path, cursor, namespace, field, region_name, stat_level, trend_value, stat_limit, is_modified):
+    message = "Metric '" + namespace + "/" + field + "' for region '" + region_name + "' exceeds the limit."
+    details = [("Metric name", namespace + "/" + field),
+               ("Region name", region_name),
+               ("Metric value", stat_level),
+               ("Modified", is_modified),
+               ("Change trend", '{0:{1}}'.format(trend_value, '+' if trend_value else '')),
+               ("Limit", stat_limit)]
+    core.export.cout.cout(path, cursor, core.export.cout.SEVERITY_WARNING, message, details)
+
+if __name__ == '__main__':
+    ts = time.time()
+    core.log.set_default_format()
+    exit_code = main()
+    logging.warning("Exit code: " + str(exit_code) + ". Time spent: " + str(round((time.time() - ts), 2)) + " seconds. Done")
+    exit(exit_code)
+    
+    
+