loader.py 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import logging
  20. import os.path
  21. import core.api
  22. import core.db.sqlite
  23. ####################################
  24. # Data Interface
  25. ####################################
  26. class Data(object):
  27. def __init__(self):
  28. self.data = {}
  29. def get_data(self, namespace, field):
  30. if namespace not in self.data.keys():
  31. return None
  32. if field not in self.data[namespace].keys():
  33. return None
  34. return self.data[namespace][field]
  35. def set_data(self, namespace, field, value):
  36. if namespace not in self.data:
  37. self.data[namespace] = {}
  38. self.data[namespace][field] = value
  39. def iterate_namespaces(self):
  40. for namespace in self.data.keys():
  41. yield namespace
  42. def iterate_fields(self, namespace):
  43. for field in self.data[namespace].keys():
  44. yield (field, self.data[namespace][field])
  45. def get_data_tree(self, namespaces=None):
  46. return self.data
  47. def __repr__(self):
  48. return object.__repr__(self) + " with data " + self.data.__repr__()
  49. class LoadableData(Data):
  50. def __init__(self, loader, file_id, region_id):
  51. Data.__init__(self)
  52. self.loader = loader
  53. self.file_id = file_id
  54. self.region_id = region_id
  55. self.loaded_namespaces = []
  56. self.changed_namespaces = []
  57. def load_namespace(self, namespace):
  58. try:
  59. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  60. except Exception:
  61. logging.debug("No data in the database for namespace: " + namespace)
  62. return
  63. if row == None:
  64. return
  65. for column_name in row.keys():
  66. packager = self.loader.get_namespace(namespace).get_field_packager(column_name)
  67. if packager == None:
  68. continue
  69. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  70. def set_data(self, namespace, field, value):
  71. if namespace not in self.changed_namespaces:
  72. self.changed_namespaces.append(namespace)
  73. return Data.set_data(self, namespace, field, value)
  74. def get_data(self, namespace, field):
  75. if namespace not in self.loaded_namespaces:
  76. self.loaded_namespaces.append(namespace)
  77. self.load_namespace(namespace)
  78. return Data.get_data(self, namespace, field)
  79. def is_namespace_updated(self, namespace):
  80. return namespace in self.changed_namespaces
  81. def is_namespace_loaded(self, namespace):
  82. return namespace in self.loaded_namespaces
  83. def get_data_tree(self, namespaces=None):
  84. if namespaces == None:
  85. namespaces = self.loader.iterate_namespace_names()
  86. for each in namespaces:
  87. self.load_namespace(each)
  88. return Data.get_data_tree(self)
  89. class FileRegionData(LoadableData):
  90. class T(object):
  91. NONE = 0x00
  92. GLOBAL = 0x01
  93. CLASS = 0x02
  94. STRUCT = 0x04
  95. NAMESPACE = 0x08
  96. FUNCTION = 0x10
  97. INTERFACE = 0x20
  98. ANY = 0xFFFFFFFF
  99. def to_str(self, group):
  100. if group == self.NONE:
  101. return "none"
  102. elif group == self.GLOBAL:
  103. return "global"
  104. elif group == self.CLASS:
  105. return "class"
  106. elif group == self.STRUCT:
  107. return "struct"
  108. elif group == self.NAMESPACE:
  109. return "namespace"
  110. elif group == self.FUNCTION:
  111. return "function"
  112. elif group == self.INTERFACE:
  113. return "interface"
  114. else:
  115. assert(False)
  116. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  117. LoadableData.__init__(self, loader, file_id, region_id)
  118. self.name = region_name
  119. self.begin = offset_begin
  120. self.end = offset_end
  121. self.line_begin = line_begin
  122. self.line_end = line_end
  123. self.cursor = cursor_line
  124. self.group = group
  125. self.checksum = checksum
  126. self.children = []
  127. def get_id(self):
  128. return self.region_id
  129. def get_name(self):
  130. return self.name
  131. def get_offset_begin(self):
  132. return self.begin
  133. def get_offset_end(self):
  134. return self.end
  135. def get_line_begin(self):
  136. return self.line_begin
  137. def get_line_end(self):
  138. return self.line_end
  139. def get_cursor(self):
  140. return self.cursor
  141. def get_type(self):
  142. return self.group
  143. def get_checksum(self):
  144. return self.checksum
  145. def register_subregion_id(self, child_id):
  146. self.children.append(child_id)
  147. def iterate_subregion_ids(self):
  148. return self.children
  149. class Marker(object):
  150. class T(object):
  151. NONE = 0x00
  152. COMMENT = 0x01
  153. STRING = 0x02
  154. PREPROCESSOR = 0x04
  155. ALL_EXCEPT_CODE = 0x07
  156. def __init__(self, offset_begin, offset_end, group):
  157. self.begin = offset_begin
  158. self.end = offset_end
  159. self.group = group
  160. def get_offset_begin(self):
  161. return self.begin
  162. def get_offset_end(self):
  163. return self.end
  164. def get_type(self):
  165. return self.group
  166. class FileData(LoadableData):
  167. def __init__(self, loader, path, file_id, checksum, content):
  168. LoadableData.__init__(self, loader, file_id, None)
  169. self.path = path
  170. self.checksum = checksum
  171. self.content = content
  172. self.regions = None
  173. self.markers = None
  174. self.loader = loader
  175. self.loading_tmp = []
  176. def get_id(self):
  177. return self.file_id
  178. def get_path(self):
  179. return self.path
  180. def get_checksum(self):
  181. return self.checksum
  182. def get_content(self, exclude = Marker.T.NONE):
  183. if exclude == Marker.T.NONE:
  184. return self.content
  185. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  186. # optimise frequent queries of this type
  187. if hasattr(self, 'content_cache'):
  188. return self.content_cache
  189. last_pos = 0
  190. content = ""
  191. for marker in self.iterate_markers(exclude):
  192. content += self.content[last_pos:marker.begin]
  193. content += " " * (marker.end - marker.begin)
  194. last_pos = marker.end
  195. content += self.content[last_pos:]
  196. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  197. self.content_cache = content
  198. assert(len(content) == len(self.content))
  199. return content
  200. def internal_append_region(self, region):
  201. # here we apply some magic - we rely on special ordering of coming regions,
  202. # which is supported by code parsers
  203. prev_id = None
  204. while True:
  205. if len(self.loading_tmp) == 0:
  206. break
  207. prev_id = self.loading_tmp.pop()
  208. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  209. self.loading_tmp.append(prev_id) # return back
  210. break
  211. self.loading_tmp.append(region.get_id())
  212. if prev_id != None:
  213. self.get_region(prev_id).register_subregion_id(region.get_id())
  214. self.regions.append(region)
  215. def load_regions(self):
  216. if self.regions == None:
  217. self.regions = []
  218. for each in self.loader.db.iterate_regions(self.get_id()):
  219. self.internal_append_region(FileRegionData(self.loader,
  220. self.get_id(),
  221. each.region_id,
  222. each.name,
  223. each.begin,
  224. each.end,
  225. each.line_begin,
  226. each.line_end,
  227. each.cursor,
  228. each.group,
  229. each.checksum))
  230. assert(len(self.regions) == each.region_id)
  231. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  232. if self.regions == None:
  233. self.regions = [] # do not load in time of collection
  234. new_id = len(self.regions) + 1
  235. self.internal_append_region(FileRegionData(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  236. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  237. return new_id
  238. def get_region(self, region_id):
  239. self.load_regions()
  240. return self.regions[region_id - 1]
  241. def iterate_regions(self, filter_group = FileRegionData.T.ANY):
  242. self.load_regions()
  243. for each in self.regions:
  244. if each.group & filter_group:
  245. yield each
  246. def are_regions_loaded(self):
  247. return self.regions != None
  248. def load_markers(self):
  249. if self.markers == None:
  250. self.markers = []
  251. for each in self.loader.db.iterate_markers(self.get_id()):
  252. self.markers.append(Marker(each.begin, each.end, each.group))
  253. def add_marker(self, offset_begin, offset_end, group):
  254. if self.markers == None:
  255. self.markers = [] # do not load in time of collection
  256. self.markers.append(Marker(offset_begin, offset_end, group))
  257. self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  258. def iterate_markers(self, filter_group = Marker.T.COMMENT |
  259. Marker.T.STRING | Marker.T.PREPROCESSOR):
  260. self.load_markers()
  261. for each in self.markers:
  262. if each.group & filter_group:
  263. yield each
  264. def get_marker_types(self):
  265. return Marker.T
  266. def get_region_types(self):
  267. return FileRegionData.T
  268. def are_markers_loaded(self):
  269. return self.markers != None
  270. def __repr__(self):
  271. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  272. class AggregatedData(Data):
  273. def __init__(self, loader, path):
  274. Data.__init__(self)
  275. self.path = path
  276. self.loader = loader
  277. self.subdirs = None
  278. self.subfiles = None
  279. def get_subdirs(self):
  280. if self.subdirs != None:
  281. return self.subdirs
  282. self.subdirs = []
  283. if self.path != None:
  284. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  285. self.subdirs.append(subdir)
  286. return self.subdirs
  287. def get_subfiles(self):
  288. if self.subfiles != None:
  289. return self.subfiles
  290. self.subfiles = []
  291. if self.path != None:
  292. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  293. self.subfiles.append(subfile)
  294. return self.subfiles
  295. class SelectData(Data):
  296. def __init__(self, loader, path, file_id, region_id):
  297. Data.__init__(self)
  298. self.loader = loader
  299. self.path = path
  300. self.file_id = file_id
  301. self.region_id = region_id
  302. self.region = None
  303. def get_path(self):
  304. return self.path
  305. def get_region(self):
  306. if self.region == None and self.region_id != None:
  307. row = self.loader.db.get_region(self.file_id, self.region_id)
  308. if row != None:
  309. self.region = FileRegionData(self.loader,
  310. self.file_id,
  311. self.region_id,
  312. row.name,
  313. row.begin,
  314. row.end,
  315. row.line_begin,
  316. row.line_end,
  317. row.cursor,
  318. row.group,
  319. row.checksum)
  320. return self.region
  321. class DiffData(Data):
  322. def __init__(self, new_data, old_data):
  323. Data.__init__(self)
  324. self.new_data = new_data
  325. self.old_data = old_data
  326. def get_data(self, namespace, field):
  327. new_data = self.new_data.get_data(namespace, field)
  328. old_data = self.old_data.get_data(namespace, field)
  329. if new_data == None:
  330. return None
  331. if old_data == None:
  332. # non_zero fields has got zero value by default if missed
  333. # the data can be also unavailable,
  334. # because previous collection does not include that
  335. # but external tools (like limit.py) should warn about this,
  336. # using list of registered database properties
  337. old_data = 0
  338. return new_data - old_data
  339. ####################################
  340. # Packager Interface
  341. ####################################
  342. class PackagerError(Exception):
  343. def __init__(self):
  344. Exception.__init__(self, "Failed to pack or unpack.")
  345. class PackagerFactory(object):
  346. def create(self, python_type, non_zero):
  347. if python_type == None:
  348. return PackagerFactory.SkipPackager()
  349. if python_type == int:
  350. if non_zero == False:
  351. return PackagerFactory.IntPackager()
  352. else:
  353. return PackagerFactory.IntNonZeroPackager()
  354. if python_type == float and non_zero == False:
  355. return PackagerFactory.FloatPackager()
  356. if python_type == str:
  357. return PackagerFactory.StringPackager()
  358. class PackagerFactoryError(Exception):
  359. def __init__(self, python_type):
  360. Exception.__init__(self, "Python type '" + str(python_type) + "' is not supported by the factory.")
  361. raise PackagerFactoryError(python_type)
  362. def get_python_type(self, sql_type):
  363. if sql_type == "integer":
  364. return int
  365. if sql_type == "real":
  366. return float
  367. if sql_type == "text":
  368. return str
  369. class PackagerFactoryError(Exception):
  370. def __init__(self, sql_type):
  371. Exception.__init__(self, "SQL type '" + str(sql_type) + "' is not supported by the factory.")
  372. raise PackagerFactoryError(sql_type)
  373. class IPackager(object):
  374. def pack(self, unpacked_data):
  375. raise core.api.InterfaceNotImplemented(self)
  376. def unpack(self, packed_data):
  377. raise core.api.InterfaceNotImplemented(self)
  378. def get_sql_type(self):
  379. raise core.api.InterfaceNotImplemented(self)
  380. def get_python_type(self):
  381. raise core.api.InterfaceNotImplemented(self)
  382. def is_non_zero(self):
  383. return False
  384. class IntPackager(IPackager):
  385. def pack(self, unpacked_data):
  386. if not isinstance(unpacked_data, int):
  387. raise PackagerError()
  388. return str(unpacked_data)
  389. def unpack(self, packed_data):
  390. try:
  391. return int(packed_data)
  392. except ValueError:
  393. raise PackagerError()
  394. def get_sql_type(self):
  395. return "integer"
  396. def get_python_type(self):
  397. return int
  398. class IntNonZeroPackager(IntPackager):
  399. def pack(self, unpacked_data):
  400. if unpacked_data == 0:
  401. raise PackagerError()
  402. return PackagerFactory.IntPackager.pack(self, unpacked_data)
  403. def is_non_zero(self):
  404. return True
  405. class FloatPackager(IPackager):
  406. def pack(self, unpacked_data):
  407. if not isinstance(unpacked_data, float):
  408. raise PackagerError()
  409. return str(unpacked_data)
  410. def unpack(self, packed_data):
  411. try:
  412. return float(packed_data)
  413. except ValueError:
  414. raise PackagerError()
  415. def get_sql_type(self):
  416. return "real"
  417. def get_python_type(self):
  418. return float
  419. class FloatNonZeroPackager(FloatPackager):
  420. def pack(self, unpacked_data):
  421. if unpacked_data == 0:
  422. raise PackagerError()
  423. return PackagerFactory.FloatPackager.pack(self, unpacked_data)
  424. def is_non_zero(self):
  425. return True
  426. class StringPackager(IPackager):
  427. def pack(self, unpacked_data):
  428. if not isinstance(unpacked_data, str):
  429. raise PackagerError()
  430. return str(unpacked_data)
  431. def unpack(self, packed_data):
  432. try:
  433. return str(packed_data)
  434. except ValueError:
  435. raise PackagerError()
  436. def get_sql_type(self):
  437. return "text"
  438. def get_python_type(self):
  439. return str
  440. class SkipPackager(IPackager):
  441. def pack(self, unpacked_data):
  442. return None
  443. def unpack(self, packed_data):
  444. return None
  445. def get_sql_type(self):
  446. return None
  447. def get_python_type(self):
  448. return None
  449. ####################################
  450. # Loader
  451. ####################################
  452. class NamespaceError(Exception):
  453. def __init__(self, namespace, reason):
  454. Exception.__init__(self, "Namespace '"
  455. + namespace
  456. + "': '"
  457. + reason
  458. + "'")
  459. class FieldError(Exception):
  460. def __init__(self, field, reason):
  461. Exception.__init__(self, "Field '"
  462. + field
  463. + "': '"
  464. + reason
  465. + "'")
  466. class Namespace(object):
  467. def __init__(self, db_handle, name, support_regions = False):
  468. if not isinstance(name, str):
  469. raise NamespaceError(name, "name not a string")
  470. self.name = name
  471. self.support_regions = support_regions
  472. self.fields = {}
  473. self.db = db_handle
  474. if self.db.check_table(name) == False:
  475. self.db.create_table(name, support_regions)
  476. else:
  477. for column in self.db.iterate_columns(name):
  478. self.add_field(column.name, PackagerFactory().get_python_type(column.sql_type), non_zero=column.non_zero)
  479. def get_name(self):
  480. return self.name
  481. def are_regions_supported(self):
  482. return self.support_regions
  483. def add_field(self, field_name, python_type, non_zero=False):
  484. if not isinstance(field_name, str):
  485. raise FieldError(field_name, "field_name not a string")
  486. packager = PackagerFactory().create(python_type, non_zero)
  487. if field_name in self.fields.keys():
  488. raise FieldError(field_name, "double used")
  489. self.fields[field_name] = packager
  490. if self.db.check_column(self.get_name(), field_name) == False:
  491. self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  492. def iterate_field_names(self):
  493. for name in self.fields.keys():
  494. yield name
  495. def get_field_packager(self, field_name):
  496. if field_name in self.fields.keys():
  497. return self.fields[field_name]
  498. else:
  499. return None
  500. def get_field_sql_type(self, field_name):
  501. return self.get_field_packager(field_name).get_sql_type()
  502. def get_field_python_type(self, field_name):
  503. return self.get_field_packager(field_name).get_python_type()
  504. class DataNotPackable(Exception):
  505. def __init__(self, namespace, field, value, packager, extra_message):
  506. Exception.__init__(self, "Data '"
  507. + str(value)
  508. + "' of type "
  509. + str(value.__class__)
  510. + " referred by '"
  511. + namespace
  512. + "=>"
  513. + field
  514. + "' is not packable by registered packager '"
  515. + str(packager.__class__)
  516. + "': " + extra_message)
  517. class Loader(object):
  518. def __init__(self):
  519. self.namespaces = {}
  520. self.db = None
  521. self.last_file_data = None # for performance boost reasons
  522. def create_database(self, dbfile, previous_db = None):
  523. self.db = core.db.sqlite.Database()
  524. if os.path.exists(dbfile):
  525. logging.warn("Removing existing file: " + dbfile)
  526. os.unlink(dbfile)
  527. if previous_db != None and os.path.exists(previous_db) == False:
  528. raise core.api.ExitError(None, "Database file '" + previous_db + "' does not exist")
  529. self.db.create(dbfile, clone_from=previous_db)
  530. def open_database(self, dbfile, read_only = True):
  531. self.db = core.db.sqlite.Database()
  532. if os.path.exists(dbfile) == False:
  533. raise core.api.ExitError(None, "Database file '" + dbfile + "' does not exist")
  534. self.db.connect(dbfile, read_only=read_only)
  535. for table in self.db.iterate_tables():
  536. self.create_namespace(table.name, table.support_regions)
  537. def set_property(self, property_name, value):
  538. if self.db == None:
  539. return None
  540. return self.db.set_property(property_name, value)
  541. def get_property(self, property_name):
  542. if self.db == None:
  543. return None
  544. return self.db.get_property(property_name)
  545. def iterate_properties(self):
  546. if self.db == None:
  547. return None
  548. return self.db.iterate_properties()
  549. def create_namespace(self, name, support_regions = False):
  550. if self.db == None:
  551. return None
  552. if name in self.namespaces.keys():
  553. raise NamespaceError(name, "double used")
  554. new_namespace = Namespace(self.db, name, support_regions)
  555. self.namespaces[name] = new_namespace
  556. return new_namespace
  557. def iterate_namespace_names(self):
  558. for name in self.namespaces.keys():
  559. yield name
  560. def get_namespace(self, name):
  561. if name in self.namespaces.keys():
  562. return self.namespaces[name]
  563. else:
  564. return None
  565. def create_file_data(self, path, checksum, content):
  566. if self.db == None:
  567. return None
  568. (new_id, is_updated) = self.db.create_file(path, checksum)
  569. result = FileData(self, path, new_id, checksum, content)
  570. self.last_file_data = result
  571. return (result, is_updated)
  572. def load_file_data(self, path):
  573. if self.db == None:
  574. return None
  575. if self.last_file_data != None and self.last_file_data.get_path() == path:
  576. return self.last_file_data
  577. data = self.db.get_file(path)
  578. if data == None:
  579. return None
  580. result = FileData(self, data.path, data.id, data.checksum, None)
  581. self.last_file_data = result
  582. return result
  583. def save_file_data(self, file_data):
  584. if self.db == None:
  585. return None
  586. class DataIterator(object):
  587. def iterate_packed_values(self, data, namespace, support_regions = False):
  588. for each in data.iterate_fields(namespace):
  589. space = self.loader.get_namespace(namespace)
  590. if space == None:
  591. raise DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  592. packager = space.get_field_packager(each[0])
  593. if packager == None:
  594. raise DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  595. if space.support_regions != support_regions:
  596. raise DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  597. try:
  598. packed_data = packager.pack(each[1])
  599. if packed_data == None:
  600. continue
  601. except PackagerError:
  602. raise DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  603. yield (each[0], packed_data)
  604. def __init__(self, loader, data, namespace, support_regions = False):
  605. self.loader = loader
  606. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  607. def __iter__(self):
  608. return self.iterator
  609. for namespace in file_data.iterate_namespaces():
  610. if file_data.is_namespace_updated(namespace) == False:
  611. continue
  612. self.db.add_row(namespace,
  613. file_data.get_id(),
  614. None,
  615. DataIterator(self, file_data, namespace))
  616. if file_data.are_regions_loaded():
  617. for region in file_data.iterate_regions():
  618. for namespace in region.iterate_namespaces():
  619. if region.is_namespace_updated(namespace) == False:
  620. continue
  621. self.db.add_row(namespace,
  622. file_data.get_id(),
  623. region.get_id(),
  624. DataIterator(self, region, namespace, support_regions = True))
  625. def iterate_file_data(self):
  626. if self.db != None:
  627. for data in self.db.iterate_files():
  628. yield FileData(self, data.path, data.id, data.checksum, None)
  629. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  630. if self.db == None:
  631. return None
  632. final_path_like = path_like_filter
  633. if path != None:
  634. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  635. return None
  636. final_path_like = path + path_like_filter
  637. if namespaces == None:
  638. namespaces = self.namespaces.keys()
  639. result = AggregatedData(self, path)
  640. for name in namespaces:
  641. namespace = self.get_namespace(name)
  642. data = self.db.aggregate_rows(name, path_like = final_path_like)
  643. for field in data.keys():
  644. if namespace.get_field_packager(field).is_non_zero() == True:
  645. data[field]['min'] = None
  646. data[field]['avg'] = None
  647. result.set_data(name, field, data[field])
  648. return result
  649. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = []):
  650. if self.db == None:
  651. return None
  652. # TODO implement restriction for non_zero fields
  653. final_path_like = path_like_filter
  654. if path != None:
  655. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  656. return None
  657. final_path_like = path + path_like_filter
  658. namespace_obj = self.get_namespace(namespace)
  659. if namespace_obj == None:
  660. return None
  661. class SelectDataIterator(object):
  662. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters):
  663. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters):
  664. region_id = None
  665. if namespace_obj.are_regions_supported() == True:
  666. region_id = row['region_id']
  667. data = SelectData(loader, row['path'], row['id'], region_id)
  668. field_names = fields
  669. if fields == None:
  670. field_names = namespace_obj.iterate_field_names()
  671. for field in field_names:
  672. data.set_data(namespace, field, row[field])
  673. yield data
  674. def __init__(self, loader, namespace_obj, final_path_like, fields, filters):
  675. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters)
  676. def __iter__(self):
  677. return self.iterator
  678. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters)