loader.py 40 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import logging
  20. import os.path
  21. import core.api
  22. import core.db.sqlite
  23. ####################################
  24. # Data Interface
  25. ####################################
  26. class Data(object):
  27. def __init__(self):
  28. self.data = {}
  29. def get_data(self, namespace, field):
  30. if namespace not in self.data.keys():
  31. return None
  32. if field not in self.data[namespace].keys():
  33. return None
  34. return self.data[namespace][field]
  35. def set_data(self, namespace, field, value):
  36. if namespace not in self.data:
  37. self.data[namespace] = {}
  38. self.data[namespace][field] = value
  39. def iterate_namespaces(self):
  40. for namespace in self.data.keys():
  41. yield namespace
  42. def iterate_fields(self, namespace):
  43. for field in self.data[namespace].keys():
  44. yield (field, self.data[namespace][field])
  45. def get_data_tree(self, namespaces=None):
  46. return self.data
  47. def __repr__(self):
  48. return object.__repr__(self) + " with data " + self.data.__repr__()
  49. class LoadableData(Data):
  50. def __init__(self, loader, file_id, region_id):
  51. Data.__init__(self)
  52. self.loader = loader
  53. self.file_id = file_id
  54. self.region_id = region_id
  55. self.loaded_namespaces = []
  56. self.changed_namespaces = []
  57. def load_namespace(self, namespace):
  58. try:
  59. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  60. except Exception:
  61. logging.debug("No data in the database for namespace: " + namespace)
  62. return
  63. if row == None:
  64. return
  65. for column_name in row.keys():
  66. packager = self.loader.get_namespace(namespace).get_field_packager(column_name)
  67. if packager == None:
  68. continue
  69. if row[column_name] == None:
  70. continue
  71. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  72. def set_data(self, namespace, field, value):
  73. if namespace not in self.changed_namespaces:
  74. self.changed_namespaces.append(namespace)
  75. return Data.set_data(self, namespace, field, value)
  76. def get_data(self, namespace, field):
  77. if namespace not in self.loaded_namespaces:
  78. self.loaded_namespaces.append(namespace)
  79. self.load_namespace(namespace)
  80. return Data.get_data(self, namespace, field)
  81. def is_namespace_updated(self, namespace):
  82. return namespace in self.changed_namespaces
  83. def is_namespace_loaded(self, namespace):
  84. return namespace in self.loaded_namespaces
  85. def get_data_tree(self, namespaces=None):
  86. if namespaces == None:
  87. namespaces = self.loader.iterate_namespace_names()
  88. for each in namespaces:
  89. self.load_namespace(each)
  90. return Data.get_data_tree(self)
  91. class FileRegionData(LoadableData):
  92. class T(object):
  93. NONE = 0x00
  94. GLOBAL = 0x01
  95. CLASS = 0x02
  96. STRUCT = 0x04
  97. NAMESPACE = 0x08
  98. FUNCTION = 0x10
  99. INTERFACE = 0x20
  100. ANY = 0xFF
  101. def to_str(self, group):
  102. if group == self.NONE:
  103. return "none"
  104. elif group == self.GLOBAL:
  105. return "global"
  106. elif group == self.CLASS:
  107. return "class"
  108. elif group == self.STRUCT:
  109. return "struct"
  110. elif group == self.NAMESPACE:
  111. return "namespace"
  112. elif group == self.FUNCTION:
  113. return "function"
  114. elif group == self.INTERFACE:
  115. return "interface"
  116. else:
  117. assert(False)
  118. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  119. LoadableData.__init__(self, loader, file_id, region_id)
  120. self.name = region_name
  121. self.begin = offset_begin
  122. self.end = offset_end
  123. self.line_begin = line_begin
  124. self.line_end = line_end
  125. self.cursor = cursor_line
  126. self.group = group
  127. self.checksum = checksum
  128. self.children = []
  129. def get_id(self):
  130. return self.region_id
  131. def get_name(self):
  132. return self.name
  133. def get_offset_begin(self):
  134. return self.begin
  135. def get_offset_end(self):
  136. return self.end
  137. def get_line_begin(self):
  138. return self.line_begin
  139. def get_line_end(self):
  140. return self.line_end
  141. def get_cursor(self):
  142. return self.cursor
  143. def get_type(self):
  144. return self.group
  145. def get_checksum(self):
  146. return self.checksum
  147. def register_subregion_id(self, child_id):
  148. self.children.append(child_id)
  149. def iterate_subregion_ids(self):
  150. return self.children
  151. from core.api import Marker
  152. class FileData(LoadableData):
  153. def __init__(self, loader, path, file_id, checksum, content):
  154. LoadableData.__init__(self, loader, file_id, None)
  155. self.path = path
  156. self.checksum = checksum
  157. self.content = content
  158. self.regions = None
  159. self.markers = None
  160. self.loader = loader
  161. self.loading_tmp = []
  162. def get_id(self):
  163. return self.file_id
  164. def get_path(self):
  165. return self.path
  166. def get_checksum(self):
  167. return self.checksum
  168. def get_content(self, exclude = Marker.T.NONE):
  169. if exclude == Marker.T.NONE:
  170. return self.content
  171. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  172. # optimise frequent queries of this type
  173. if hasattr(self, 'content_cache'):
  174. return self.content_cache
  175. last_pos = 0
  176. content = ""
  177. for marker in self.iterate_markers(exclude):
  178. content += self.content[last_pos:marker.begin]
  179. content += " " * (marker.end - marker.begin)
  180. last_pos = marker.end
  181. content += self.content[last_pos:]
  182. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  183. self.content_cache = content
  184. assert(len(content) == len(self.content))
  185. return content
  186. def internal_append_region(self, region):
  187. # here we apply some magic - we rely on special ordering of coming regions,
  188. # which is supported by code parsers
  189. prev_id = None
  190. while True:
  191. if len(self.loading_tmp) == 0:
  192. break
  193. prev_id = self.loading_tmp.pop()
  194. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  195. self.loading_tmp.append(prev_id) # return back
  196. break
  197. self.loading_tmp.append(region.get_id())
  198. if prev_id != None:
  199. self.get_region(prev_id).register_subregion_id(region.get_id())
  200. self.regions.append(region)
  201. def load_regions(self):
  202. if self.regions == None:
  203. self.regions = []
  204. for each in self.loader.db.iterate_regions(self.get_id()):
  205. self.internal_append_region(FileRegionData(self.loader,
  206. self.get_id(),
  207. each.region_id,
  208. each.name,
  209. each.begin,
  210. each.end,
  211. each.line_begin,
  212. each.line_end,
  213. each.cursor,
  214. each.group,
  215. each.checksum))
  216. assert(len(self.regions) == each.region_id)
  217. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  218. if self.regions == None:
  219. self.regions = [] # do not load in time of collection
  220. new_id = len(self.regions) + 1
  221. self.internal_append_region(FileRegionData(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  222. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  223. return new_id
  224. def get_region(self, region_id):
  225. self.load_regions()
  226. return self.regions[region_id - 1]
  227. def get_region_types(self):
  228. return FileRegionData.T
  229. def iterate_regions(self, filter_group = FileRegionData.T.ANY):
  230. self.load_regions()
  231. for each in self.regions:
  232. if each.group & filter_group:
  233. yield each
  234. def are_regions_loaded(self):
  235. return self.regions != None
  236. def load_markers(self):
  237. if self.markers == None:
  238. self.markers = []
  239. for each in self.loader.db.iterate_markers(self.get_id()):
  240. self.markers.append(Marker(each.begin, each.end, each.group))
  241. def add_marker(self, offset_begin, offset_end, group):
  242. if self.markers == None:
  243. self.markers = [] # do not load in time of collection
  244. self.markers.append(Marker(offset_begin, offset_end, group))
  245. self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  246. def iterate_markers(self, filter_group = Marker.T.COMMENT |
  247. Marker.T.STRING | Marker.T.PREPROCESSOR,
  248. region_id = None, exclude_children = True, merge = False):
  249. self.load_markers()
  250. # merged markers
  251. if merge == True:
  252. next_marker = None
  253. for marker in self.iterate_markers(filter_group, region_id, exclude_children, merge = False):
  254. if next_marker != None:
  255. if next_marker.get_offset_end() == marker.get_offset_begin():
  256. # sequential markers
  257. next_marker = Marker(next_marker.get_offset_begin(),
  258. marker.get_offset_end(),
  259. next_marker.get_type() | marker.get_type())
  260. else:
  261. yield next_marker
  262. next_marker = None
  263. if next_marker == None:
  264. next_marker = Marker(marker.get_offset_begin(),
  265. marker.get_offset_end(),
  266. marker.get_type())
  267. if next_marker != None:
  268. yield next_marker
  269. # all markers per file
  270. elif region_id == None:
  271. next_code_marker_start = 0
  272. for marker in self.markers:
  273. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  274. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  275. if marker.group & filter_group:
  276. yield marker
  277. next_code_marker_start = marker.get_offset_end()
  278. if Marker.T.CODE & filter_group and next_code_marker_start < len(self.get_content()):
  279. yield Marker(next_code_marker_start, len(self.get_content()), Marker.T.CODE)
  280. # markers per region
  281. else:
  282. region = self.get_region(region_id)
  283. if region != None:
  284. # code parsers and database know about non-code markers
  285. # clients want to iterate code as markers as well
  286. # so, we embed code markers in run-time
  287. class CodeMarker(Marker):
  288. pass
  289. # cache markers for all regions if it does not exist
  290. if hasattr(region, '_markers_list') == False:
  291. # subroutine to populate _markers_list attribute
  292. # _markers_list does include code markers
  293. def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
  294. region = data.get_region(region_id)
  295. region._markers_list = []
  296. region._first_marker_ind = marker_start_ind
  297. #next_code_marker_start = region.get_offset_begin()
  298. for sub_id in region.iterate_subregion_ids():
  299. subregion = data.get_region(sub_id)
  300. # cache all markers before the subregion
  301. while len(data.markers) > marker_start_ind and \
  302. subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
  303. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  304. # append code markers coming before non-code marker
  305. region._markers_list.append(CodeMarker(next_code_marker_start,
  306. data.markers[marker_start_ind].get_offset_begin(),
  307. Marker.T.CODE))
  308. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  309. region._markers_list.append(marker_start_ind)
  310. marker_start_ind += 1
  311. # cache all code markers before the subregion but after the last marker
  312. if next_code_marker_start < subregion.get_offset_begin():
  313. region._markers_list.append(CodeMarker(next_code_marker_start,
  314. subregion.get_offset_begin(),
  315. Marker.T.CODE))
  316. next_code_marker_start = subregion.get_offset_begin()
  317. # here is the recursive call for all sub-regions
  318. (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
  319. sub_id,
  320. marker_start_ind,
  321. next_code_marker_start)
  322. # cache all markers after the last subregion
  323. while len(data.markers) > marker_start_ind and \
  324. region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
  325. # append code markers coming before non-code marker
  326. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  327. region._markers_list.append(CodeMarker(next_code_marker_start,
  328. data.markers[marker_start_ind].get_offset_begin(),
  329. Marker.T.CODE))
  330. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  331. region._markers_list.append(marker_start_ind)
  332. marker_start_ind += 1
  333. # cache the last code segment after the last marker
  334. if next_code_marker_start < region.get_offset_end():
  335. region._markers_list.append(CodeMarker(next_code_marker_start,
  336. region.get_offset_end(),
  337. Marker.T.CODE))
  338. next_code_marker_start = region.get_offset_end()
  339. # return the starting point for the next call of this function
  340. return (marker_start_ind, next_code_marker_start)
  341. # append markers list to all regions recursively
  342. (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
  343. assert(next_marker_pos == len(self.markers))
  344. # excluding subregions
  345. if exclude_children == True:
  346. for marker_ind in region._markers_list:
  347. if isinstance(marker_ind, int):
  348. marker = self.markers[marker_ind]
  349. else:
  350. marker = marker_ind # CodeMarker
  351. if marker.group & filter_group:
  352. yield marker
  353. # including subregions
  354. else:
  355. next_code_marker_start = region.get_offset_begin()
  356. for marker in self.markers[region._first_marker_ind:]:
  357. if marker.get_offset_begin() >= region.get_offset_end():
  358. break
  359. if region.get_offset_begin() > marker.get_offset_begin():
  360. continue
  361. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  362. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  363. if marker.group & filter_group:
  364. yield marker
  365. next_code_marker_start = marker.get_offset_end()
  366. if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
  367. yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
  368. def get_marker_types(self):
  369. return Marker.T
  370. def are_markers_loaded(self):
  371. return self.markers != None
  372. def __repr__(self):
  373. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  374. class AggregatedData(Data):
  375. def __init__(self, loader, path):
  376. Data.__init__(self)
  377. self.path = path
  378. self.loader = loader
  379. self.subdirs = None
  380. self.subfiles = None
  381. def get_subdirs(self):
  382. if self.subdirs != None:
  383. return self.subdirs
  384. self.subdirs = []
  385. if self.path != None:
  386. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  387. self.subdirs.append(subdir)
  388. return self.subdirs
  389. def get_subfiles(self):
  390. if self.subfiles != None:
  391. return self.subfiles
  392. self.subfiles = []
  393. if self.path != None:
  394. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  395. self.subfiles.append(subfile)
  396. return self.subfiles
  397. class SelectData(Data):
  398. def __init__(self, loader, path, file_id, region_id):
  399. Data.__init__(self)
  400. self.loader = loader
  401. self.path = path
  402. self.file_id = file_id
  403. self.region_id = region_id
  404. self.region = None
  405. def get_path(self):
  406. return self.path
  407. def get_region(self):
  408. if self.region == None and self.region_id != None:
  409. row = self.loader.db.get_region(self.file_id, self.region_id)
  410. if row != None:
  411. self.region = FileRegionData(self.loader,
  412. self.file_id,
  413. self.region_id,
  414. row.name,
  415. row.begin,
  416. row.end,
  417. row.line_begin,
  418. row.line_end,
  419. row.cursor,
  420. row.group,
  421. row.checksum)
  422. return self.region
  423. class DiffData(Data):
  424. def __init__(self, new_data, old_data):
  425. Data.__init__(self)
  426. self.new_data = new_data
  427. self.old_data = old_data
  428. def get_data(self, namespace, field):
  429. new_data = self.new_data.get_data(namespace, field)
  430. old_data = self.old_data.get_data(namespace, field)
  431. if new_data == None:
  432. return None
  433. if old_data == None:
  434. # non_zero fields has got zero value by default if missed
  435. # the data can be also unavailable,
  436. # because previous collection does not include that
  437. # but external tools (like limit.py) should warn about this,
  438. # using list of registered database properties
  439. old_data = 0
  440. return new_data - old_data
  441. ####################################
  442. # Packager Interface
  443. ####################################
  444. class PackagerError(Exception):
  445. def __init__(self):
  446. Exception.__init__(self, "Failed to pack or unpack.")
  447. class PackagerFactory(object):
  448. def create(self, python_type, non_zero):
  449. if python_type == None:
  450. return PackagerFactory.SkipPackager()
  451. if python_type == int:
  452. if non_zero == False:
  453. return PackagerFactory.IntPackager()
  454. else:
  455. return PackagerFactory.IntNonZeroPackager()
  456. if python_type == float and non_zero == False:
  457. return PackagerFactory.FloatPackager()
  458. if python_type == str:
  459. return PackagerFactory.StringPackager()
  460. class PackagerFactoryError(Exception):
  461. def __init__(self, python_type):
  462. Exception.__init__(self, "Python type '" + str(python_type) + "' is not supported by the factory.")
  463. raise PackagerFactoryError(python_type)
  464. def get_python_type(self, sql_type):
  465. if sql_type == "integer":
  466. return int
  467. if sql_type == "real":
  468. return float
  469. if sql_type == "text":
  470. return str
  471. class PackagerFactoryError(Exception):
  472. def __init__(self, sql_type):
  473. Exception.__init__(self, "SQL type '" + str(sql_type) + "' is not supported by the factory.")
  474. raise PackagerFactoryError(sql_type)
  475. class IPackager(object):
  476. def pack(self, unpacked_data):
  477. raise core.api.InterfaceNotImplemented(self)
  478. def unpack(self, packed_data):
  479. raise core.api.InterfaceNotImplemented(self)
  480. def get_sql_type(self):
  481. raise core.api.InterfaceNotImplemented(self)
  482. def get_python_type(self):
  483. raise core.api.InterfaceNotImplemented(self)
  484. def is_non_zero(self):
  485. return False
  486. class IntPackager(IPackager):
  487. def pack(self, unpacked_data):
  488. if not isinstance(unpacked_data, int):
  489. raise PackagerError()
  490. return str(unpacked_data)
  491. def unpack(self, packed_data):
  492. try:
  493. return int(packed_data)
  494. except ValueError:
  495. raise PackagerError()
  496. def get_sql_type(self):
  497. return "integer"
  498. def get_python_type(self):
  499. return int
  500. class IntNonZeroPackager(IntPackager):
  501. def pack(self, unpacked_data):
  502. if unpacked_data == 0:
  503. raise PackagerError()
  504. return PackagerFactory.IntPackager.pack(self, unpacked_data)
  505. def is_non_zero(self):
  506. return True
  507. class FloatPackager(IPackager):
  508. def pack(self, unpacked_data):
  509. if not isinstance(unpacked_data, float):
  510. raise PackagerError()
  511. return str(unpacked_data)
  512. def unpack(self, packed_data):
  513. try:
  514. return float(packed_data)
  515. except ValueError:
  516. raise PackagerError()
  517. def get_sql_type(self):
  518. return "real"
  519. def get_python_type(self):
  520. return float
  521. class FloatNonZeroPackager(FloatPackager):
  522. def pack(self, unpacked_data):
  523. if unpacked_data == 0:
  524. raise PackagerError()
  525. return PackagerFactory.FloatPackager.pack(self, unpacked_data)
  526. def is_non_zero(self):
  527. return True
  528. class StringPackager(IPackager):
  529. def pack(self, unpacked_data):
  530. if not isinstance(unpacked_data, str):
  531. raise PackagerError()
  532. return str(unpacked_data)
  533. def unpack(self, packed_data):
  534. try:
  535. return str(packed_data)
  536. except ValueError:
  537. raise PackagerError()
  538. def get_sql_type(self):
  539. return "text"
  540. def get_python_type(self):
  541. return str
  542. class SkipPackager(IPackager):
  543. def pack(self, unpacked_data):
  544. return None
  545. def unpack(self, packed_data):
  546. return None
  547. def get_sql_type(self):
  548. return None
  549. def get_python_type(self):
  550. return None
  551. ####################################
  552. # Loader
  553. ####################################
  554. class NamespaceError(Exception):
  555. def __init__(self, namespace, reason):
  556. Exception.__init__(self, "Namespace '"
  557. + namespace
  558. + "': '"
  559. + reason
  560. + "'")
  561. class FieldError(Exception):
  562. def __init__(self, field, reason):
  563. Exception.__init__(self, "Field '"
  564. + field
  565. + "': '"
  566. + reason
  567. + "'")
  568. class Namespace(object):
  569. def __init__(self, db_handle, name, support_regions = False, version='1.0'):
  570. if not isinstance(name, str):
  571. raise NamespaceError(name, "name not a string")
  572. self.name = name
  573. self.support_regions = support_regions
  574. self.fields = {}
  575. self.db = db_handle
  576. if self.db.check_table(name) == False:
  577. self.db.create_table(name, support_regions, version)
  578. else:
  579. for column in self.db.iterate_columns(name):
  580. self.add_field(column.name, PackagerFactory().get_python_type(column.sql_type), non_zero=column.non_zero)
  581. def get_name(self):
  582. return self.name
  583. def are_regions_supported(self):
  584. return self.support_regions
  585. def add_field(self, field_name, python_type, non_zero=False):
  586. if not isinstance(field_name, str):
  587. raise FieldError(field_name, "field_name not a string")
  588. packager = PackagerFactory().create(python_type, non_zero)
  589. if field_name in self.fields.keys():
  590. raise FieldError(field_name, "double used")
  591. self.fields[field_name] = packager
  592. if self.db.check_column(self.get_name(), field_name) == False:
  593. # - False if cloned
  594. # - True if created
  595. return self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  596. return None # if double request
  597. def iterate_field_names(self):
  598. for name in self.fields.keys():
  599. yield name
  600. def get_field_packager(self, field_name):
  601. if field_name in self.fields.keys():
  602. return self.fields[field_name]
  603. else:
  604. return None
  605. def get_field_sql_type(self, field_name):
  606. return self.get_field_packager(field_name).get_sql_type()
  607. def get_field_python_type(self, field_name):
  608. return self.get_field_packager(field_name).get_python_type()
  609. class DataNotPackable(Exception):
  610. def __init__(self, namespace, field, value, packager, extra_message):
  611. Exception.__init__(self, "Data '"
  612. + str(value)
  613. + "' of type "
  614. + str(value.__class__)
  615. + " referred by '"
  616. + namespace
  617. + "=>"
  618. + field
  619. + "' is not packable by registered packager '"
  620. + str(packager.__class__)
  621. + "': " + extra_message)
  622. class Loader(object):
  623. def __init__(self):
  624. self.namespaces = {}
  625. self.db = None
  626. self.last_file_data = None # for performance boost reasons
  627. def create_database(self, dbfile, previous_db = None):
  628. self.db = core.db.sqlite.Database()
  629. if os.path.exists(dbfile):
  630. logging.warn("Removing existing file: " + dbfile)
  631. # TODO can reuse existing db file to speed up the processing?
  632. # TODO add option to choose to remove or to overwrite?
  633. os.unlink(dbfile)
  634. if previous_db != None and os.path.exists(previous_db) == False:
  635. raise core.api.ExitError(None, "Database file '" + previous_db + "' does not exist")
  636. self.db.create(dbfile, clone_from=previous_db)
  637. def open_database(self, dbfile, read_only = True):
  638. self.db = core.db.sqlite.Database()
  639. if os.path.exists(dbfile) == False:
  640. raise core.api.ExitError(None, "Database file '" + dbfile + "' does not exist")
  641. self.db.connect(dbfile, read_only=read_only)
  642. for table in self.db.iterate_tables():
  643. self.create_namespace(table.name, table.support_regions)
  644. def set_property(self, property_name, value):
  645. if self.db == None:
  646. return None
  647. return self.db.set_property(property_name, value)
  648. def get_property(self, property_name):
  649. if self.db == None:
  650. return None
  651. return self.db.get_property(property_name)
  652. def iterate_properties(self):
  653. if self.db == None:
  654. return None
  655. return self.db.iterate_properties()
  656. def create_namespace(self, name, support_regions = False, version='1.0'):
  657. if self.db == None:
  658. return None
  659. if name in self.namespaces.keys():
  660. raise NamespaceError(name, "double used")
  661. new_namespace = Namespace(self.db, name, support_regions, version)
  662. self.namespaces[name] = new_namespace
  663. return new_namespace
  664. def iterate_namespace_names(self):
  665. for name in self.namespaces.keys():
  666. yield name
  667. def get_namespace(self, name):
  668. if name in self.namespaces.keys():
  669. return self.namespaces[name]
  670. else:
  671. return None
  672. def create_file_data(self, path, checksum, content):
  673. if self.db == None:
  674. return None
  675. (new_id, is_updated) = self.db.create_file(path, checksum)
  676. result = FileData(self, path, new_id, checksum, content)
  677. self.last_file_data = result
  678. return (result, is_updated)
  679. def load_file_data(self, path):
  680. if self.db == None:
  681. return None
  682. if self.last_file_data != None and self.last_file_data.get_path() == path:
  683. return self.last_file_data
  684. data = self.db.get_file(path)
  685. if data == None:
  686. return None
  687. result = FileData(self, data.path, data.id, data.checksum, None)
  688. self.last_file_data = result
  689. return result
  690. def save_file_data(self, file_data):
  691. if self.db == None:
  692. return None
  693. class DataIterator(object):
  694. def iterate_packed_values(self, data, namespace, support_regions = False):
  695. for each in data.iterate_fields(namespace):
  696. space = self.loader.get_namespace(namespace)
  697. if space == None:
  698. raise DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  699. packager = space.get_field_packager(each[0])
  700. if packager == None:
  701. raise DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  702. if space.support_regions != support_regions:
  703. raise DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  704. try:
  705. packed_data = packager.pack(each[1])
  706. if packed_data == None:
  707. continue
  708. except PackagerError:
  709. raise DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  710. yield (each[0], packed_data)
  711. def __init__(self, loader, data, namespace, support_regions = False):
  712. self.loader = loader
  713. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  714. def __iter__(self):
  715. return self.iterator
  716. for namespace in file_data.iterate_namespaces():
  717. if file_data.is_namespace_updated(namespace) == False:
  718. continue
  719. self.db.add_row(namespace,
  720. file_data.get_id(),
  721. None,
  722. DataIterator(self, file_data, namespace))
  723. if file_data.are_regions_loaded():
  724. for region in file_data.iterate_regions():
  725. for namespace in region.iterate_namespaces():
  726. if region.is_namespace_updated(namespace) == False:
  727. continue
  728. self.db.add_row(namespace,
  729. file_data.get_id(),
  730. region.get_id(),
  731. DataIterator(self, region, namespace, support_regions = True))
  732. def iterate_file_data(self, path = None, path_like_filter = "%"):
  733. if self.db == None:
  734. return None
  735. final_path_like = path_like_filter
  736. if path != None:
  737. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  738. return None
  739. final_path_like = path + path_like_filter
  740. class FileDataIterator(object):
  741. def iterate_file_data(self, loader, final_path_like):
  742. for data in loader.db.iterate_files(path_like=final_path_like):
  743. yield FileData(loader, data.path, data.id, data.checksum, None)
  744. def __init__(self, loader, final_path_like):
  745. self.iterator = self.iterate_file_data(loader, final_path_like)
  746. def __iter__(self):
  747. return self.iterator
  748. if self.db == None:
  749. return None
  750. return FileDataIterator(self, final_path_like)
  751. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  752. if self.db == None:
  753. return None
  754. final_path_like = path_like_filter
  755. if path != None:
  756. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  757. return None
  758. final_path_like = path + path_like_filter
  759. if namespaces == None:
  760. namespaces = self.namespaces.keys()
  761. result = AggregatedData(self, path)
  762. for name in namespaces:
  763. namespace = self.get_namespace(name)
  764. data = self.db.aggregate_rows(name, path_like = final_path_like)
  765. for field in data.keys():
  766. if namespace.get_field_packager(field).get_python_type() == str:
  767. continue
  768. if namespace.get_field_packager(field).is_non_zero() == True:
  769. data[field]['min'] = None
  770. data[field]['avg'] = None
  771. distribution = self.db.count_rows(name, path_like = final_path_like, group_by_column = field)
  772. data[field]['distribution-bars'] = []
  773. for each in distribution:
  774. if each[0] == None:
  775. continue
  776. assert(float(data[field]['count'] != 0))
  777. data[field]['distribution-bars'].append({'metric': each[0],
  778. 'count': each[1],
  779. 'ratio': round((float(each[1]) / float(data[field]['count'])), 4)})
  780. result.set_data(name, field, data[field])
  781. return result
  782. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = [],
  783. sort_by = None, limit_by = None):
  784. if self.db == None:
  785. return None
  786. final_path_like = path_like_filter
  787. if path != None:
  788. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  789. return None
  790. final_path_like = path + path_like_filter
  791. namespace_obj = self.get_namespace(namespace)
  792. if namespace_obj == None:
  793. return None
  794. class SelectDataIterator(object):
  795. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  796. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters,
  797. order_by=sort_by, limit_by=limit_by):
  798. region_id = None
  799. if namespace_obj.are_regions_supported() == True:
  800. region_id = row['region_id']
  801. data = SelectData(loader, row['path'], row['id'], region_id)
  802. field_names = fields
  803. if fields == None:
  804. field_names = namespace_obj.iterate_field_names()
  805. for field in field_names:
  806. data.set_data(namespace, field, row[field])
  807. yield data
  808. def __init__(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  809. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  810. def __iter__(self):
  811. return self.iterator
  812. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)