loader.py 41 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025
  1. #
  2. # Metrix++, Copyright 2009-2013, Metrix++ Project
  3. # Link: http://metrixplusplus.sourceforge.net
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. # Metrix++ is free software: you can redistribute it and/or modify
  8. # it under the terms of the GNU General Public License as published by
  9. # the Free Software Foundation, version 3 of the License.
  10. #
  11. # Metrix++ is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with Metrix++. If not, see <http://www.gnu.org/licenses/>.
  18. #
  19. import logging
  20. import os.path
  21. import core.api
  22. import core.db.sqlite
  23. ####################################
  24. # Data Interface
  25. ####################################
  26. class Data(object):
  27. def __init__(self):
  28. self.data = {}
  29. def get_data(self, namespace, field):
  30. if namespace not in self.data.keys():
  31. return None
  32. if field not in self.data[namespace].keys():
  33. return None
  34. return self.data[namespace][field]
  35. def set_data(self, namespace, field, value):
  36. if namespace not in self.data:
  37. self.data[namespace] = {}
  38. self.data[namespace][field] = value
  39. def iterate_namespaces(self):
  40. for namespace in self.data.keys():
  41. yield namespace
  42. def iterate_fields(self, namespace):
  43. for field in self.data[namespace].keys():
  44. yield (field, self.data[namespace][field])
  45. def get_data_tree(self, namespaces=None):
  46. return self.data
  47. def __repr__(self):
  48. return object.__repr__(self) + " with data " + self.data.__repr__()
  49. class LoadableData(Data):
  50. def __init__(self, loader, file_id, region_id):
  51. Data.__init__(self)
  52. self.loader = loader
  53. self.file_id = file_id
  54. self.region_id = region_id
  55. self.loaded_namespaces = []
  56. self.changed_namespaces = []
  57. def load_namespace(self, namespace):
  58. try:
  59. row = self.loader.db.get_row(namespace, self.file_id, self.region_id)
  60. except Exception:
  61. logging.debug("No data in the database for namespace: " + namespace)
  62. return
  63. if row == None:
  64. return
  65. for column_name in row.keys():
  66. packager = self.loader.get_namespace(namespace).get_field_packager(column_name)
  67. if packager == None:
  68. continue
  69. if row[column_name] == None:
  70. continue
  71. Data.set_data(self, namespace, column_name, packager.unpack(row[column_name]))
  72. def set_data(self, namespace, field, value):
  73. if namespace not in self.changed_namespaces:
  74. self.changed_namespaces.append(namespace)
  75. return Data.set_data(self, namespace, field, value)
  76. def get_data(self, namespace, field):
  77. if namespace not in self.loaded_namespaces:
  78. self.loaded_namespaces.append(namespace)
  79. self.load_namespace(namespace)
  80. return Data.get_data(self, namespace, field)
  81. def is_namespace_updated(self, namespace):
  82. return namespace in self.changed_namespaces
  83. def is_namespace_loaded(self, namespace):
  84. return namespace in self.loaded_namespaces
  85. def get_data_tree(self, namespaces=None):
  86. if namespaces == None:
  87. namespaces = self.loader.iterate_namespace_names()
  88. for each in namespaces:
  89. self.load_namespace(each)
  90. return Data.get_data_tree(self)
  91. class FileRegionData(LoadableData):
  92. class T(object):
  93. NONE = 0x00
  94. GLOBAL = 0x01
  95. CLASS = 0x02
  96. STRUCT = 0x04
  97. NAMESPACE = 0x08
  98. FUNCTION = 0x10
  99. INTERFACE = 0x20
  100. ANY = 0xFF
  101. def to_str(self, group):
  102. if group == self.NONE:
  103. return "none"
  104. elif group == self.GLOBAL:
  105. return "global"
  106. elif group == self.CLASS:
  107. return "class"
  108. elif group == self.STRUCT:
  109. return "struct"
  110. elif group == self.NAMESPACE:
  111. return "namespace"
  112. elif group == self.FUNCTION:
  113. return "function"
  114. elif group == self.INTERFACE:
  115. return "interface"
  116. else:
  117. assert(False)
  118. def __init__(self, loader, file_id, region_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  119. LoadableData.__init__(self, loader, file_id, region_id)
  120. self.name = region_name
  121. self.begin = offset_begin
  122. self.end = offset_end
  123. self.line_begin = line_begin
  124. self.line_end = line_end
  125. self.cursor = cursor_line
  126. self.group = group
  127. self.checksum = checksum
  128. self.children = []
  129. def get_id(self):
  130. return self.region_id
  131. def get_name(self):
  132. return self.name
  133. def get_offset_begin(self):
  134. return self.begin
  135. def get_offset_end(self):
  136. return self.end
  137. def get_line_begin(self):
  138. return self.line_begin
  139. def get_line_end(self):
  140. return self.line_end
  141. def get_cursor(self):
  142. return self.cursor
  143. def get_type(self):
  144. return self.group
  145. def get_checksum(self):
  146. return self.checksum
  147. def register_subregion_id(self, child_id):
  148. self.children.append(child_id)
  149. def iterate_subregion_ids(self):
  150. return self.children
  151. class Marker(object):
  152. class T(object):
  153. NONE = 0x00
  154. COMMENT = 0x01
  155. STRING = 0x02
  156. PREPROCESSOR = 0x04
  157. CODE = 0x08
  158. ALL_EXCEPT_CODE = 0x07
  159. ANY = 0xFF
  160. def to_str(self, group):
  161. if group == self.NONE:
  162. return "none"
  163. elif group == self.COMMENT:
  164. return "comment"
  165. elif group == self.STRING:
  166. return "string"
  167. elif group == self.PREPROCESSOR:
  168. return "preprocessor"
  169. elif group == self.CODE:
  170. return "code"
  171. else:
  172. assert(False)
  173. def __init__(self, offset_begin, offset_end, group):
  174. self.begin = offset_begin
  175. self.end = offset_end
  176. self.group = group
  177. def get_offset_begin(self):
  178. return self.begin
  179. def get_offset_end(self):
  180. return self.end
  181. def get_type(self):
  182. return self.group
  183. class FileData(LoadableData):
  184. def __init__(self, loader, path, file_id, checksum, content):
  185. LoadableData.__init__(self, loader, file_id, None)
  186. self.path = path
  187. self.checksum = checksum
  188. self.content = content
  189. self.regions = None
  190. self.markers = None
  191. self.loader = loader
  192. self.loading_tmp = []
  193. def get_id(self):
  194. return self.file_id
  195. def get_path(self):
  196. return self.path
  197. def get_checksum(self):
  198. return self.checksum
  199. def get_content(self, exclude = Marker.T.NONE):
  200. if exclude == Marker.T.NONE:
  201. return self.content
  202. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  203. # optimise frequent queries of this type
  204. if hasattr(self, 'content_cache'):
  205. return self.content_cache
  206. last_pos = 0
  207. content = ""
  208. for marker in self.iterate_markers(exclude):
  209. content += self.content[last_pos:marker.begin]
  210. content += " " * (marker.end - marker.begin)
  211. last_pos = marker.end
  212. content += self.content[last_pos:]
  213. if exclude == (Marker.T.COMMENT | Marker.T.STRING | Marker.T.PREPROCESSOR):
  214. self.content_cache = content
  215. assert(len(content) == len(self.content))
  216. return content
  217. def internal_append_region(self, region):
  218. # here we apply some magic - we rely on special ordering of coming regions,
  219. # which is supported by code parsers
  220. prev_id = None
  221. while True:
  222. if len(self.loading_tmp) == 0:
  223. break
  224. prev_id = self.loading_tmp.pop()
  225. if self.get_region(prev_id).get_offset_end() > region.get_offset_begin():
  226. self.loading_tmp.append(prev_id) # return back
  227. break
  228. self.loading_tmp.append(region.get_id())
  229. if prev_id != None:
  230. self.get_region(prev_id).register_subregion_id(region.get_id())
  231. self.regions.append(region)
  232. def load_regions(self):
  233. if self.regions == None:
  234. self.regions = []
  235. for each in self.loader.db.iterate_regions(self.get_id()):
  236. self.internal_append_region(FileRegionData(self.loader,
  237. self.get_id(),
  238. each.region_id,
  239. each.name,
  240. each.begin,
  241. each.end,
  242. each.line_begin,
  243. each.line_end,
  244. each.cursor,
  245. each.group,
  246. each.checksum))
  247. assert(len(self.regions) == each.region_id)
  248. def add_region(self, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum):
  249. if self.regions == None:
  250. self.regions = [] # do not load in time of collection
  251. new_id = len(self.regions) + 1
  252. self.internal_append_region(FileRegionData(self.loader, self.get_id(), new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum))
  253. self.loader.db.create_region(self.file_id, new_id, region_name, offset_begin, offset_end, line_begin, line_end, cursor_line, group, checksum)
  254. return new_id
  255. def get_region(self, region_id):
  256. self.load_regions()
  257. return self.regions[region_id - 1]
  258. def get_region_types(self):
  259. return FileRegionData.T
  260. def iterate_regions(self, filter_group = FileRegionData.T.ANY):
  261. self.load_regions()
  262. for each in self.regions:
  263. if each.group & filter_group:
  264. yield each
  265. def are_regions_loaded(self):
  266. return self.regions != None
  267. def load_markers(self):
  268. if self.markers == None:
  269. self.markers = []
  270. for each in self.loader.db.iterate_markers(self.get_id()):
  271. self.markers.append(Marker(each.begin, each.end, each.group))
  272. def add_marker(self, offset_begin, offset_end, group):
  273. if self.markers == None:
  274. self.markers = [] # do not load in time of collection
  275. self.markers.append(Marker(offset_begin, offset_end, group))
  276. self.loader.db.create_marker(self.file_id, offset_begin, offset_end, group)
  277. def iterate_markers(self, filter_group = Marker.T.COMMENT |
  278. Marker.T.STRING | Marker.T.PREPROCESSOR,
  279. region_id = None, exclude_children = True, merge = False):
  280. self.load_markers()
  281. # merged markers
  282. if merge == True:
  283. next_marker = None
  284. for marker in self.iterate_markers(filter_group, region_id, exclude_children, merge = False):
  285. if next_marker != None:
  286. if next_marker.get_offset_end() == marker.get_offset_begin():
  287. # sequential markers
  288. next_marker = Marker(next_marker.get_offset_begin(),
  289. marker.get_offset_end(),
  290. next_marker.get_type() | marker.get_type())
  291. else:
  292. yield next_marker
  293. next_marker = None
  294. if next_marker == None:
  295. next_marker = Marker(marker.get_offset_begin(),
  296. marker.get_offset_end(),
  297. marker.get_type())
  298. if next_marker != None:
  299. yield next_marker
  300. # all markers per file
  301. elif region_id == None:
  302. next_code_marker_start = 0
  303. for marker in self.markers:
  304. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  305. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  306. if marker.group & filter_group:
  307. yield marker
  308. next_code_marker_start = marker.get_offset_end()
  309. if Marker.T.CODE & filter_group and next_code_marker_start < len(self.get_content()):
  310. yield Marker(next_code_marker_start, len(self.get_content()), Marker.T.CODE)
  311. # markers per region
  312. else:
  313. region = self.get_region(region_id)
  314. if region != None:
  315. # code parsers and database know about non-code markers
  316. # clients want to iterate code as markers as well
  317. # so, we embed code markers in run-time
  318. class CodeMarker(Marker):
  319. pass
  320. # cache markers for all regions if it does not exist
  321. if hasattr(region, '_markers_list') == False:
  322. # subroutine to populate _markers_list attribute
  323. # _markers_list does include code markers
  324. def cache_markers_list_rec(data, region_id, marker_start_ind, next_code_marker_start):
  325. region = data.get_region(region_id)
  326. region._markers_list = []
  327. region._first_marker_ind = marker_start_ind
  328. #next_code_marker_start = region.get_offset_begin()
  329. for sub_id in region.iterate_subregion_ids():
  330. subregion = data.get_region(sub_id)
  331. # cache all markers before the subregion
  332. while len(data.markers) > marker_start_ind and \
  333. subregion.get_offset_begin() > data.markers[marker_start_ind].get_offset_begin():
  334. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  335. # append code markers coming before non-code marker
  336. region._markers_list.append(CodeMarker(next_code_marker_start,
  337. data.markers[marker_start_ind].get_offset_begin(),
  338. Marker.T.CODE))
  339. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  340. region._markers_list.append(marker_start_ind)
  341. marker_start_ind += 1
  342. # cache all code markers before the subregion but after the last marker
  343. if next_code_marker_start < subregion.get_offset_begin():
  344. region._markers_list.append(CodeMarker(next_code_marker_start,
  345. subregion.get_offset_begin(),
  346. Marker.T.CODE))
  347. next_code_marker_start = subregion.get_offset_begin()
  348. # here is the recursive call for all sub-regions
  349. (marker_start_ind, next_code_marker_start) = cache_markers_list_rec(data,
  350. sub_id,
  351. marker_start_ind,
  352. next_code_marker_start)
  353. # cache all markers after the last subregion
  354. while len(data.markers) > marker_start_ind and \
  355. region.get_offset_end() > data.markers[marker_start_ind].get_offset_begin():
  356. # append code markers coming before non-code marker
  357. if next_code_marker_start < data.markers[marker_start_ind].get_offset_begin():
  358. region._markers_list.append(CodeMarker(next_code_marker_start,
  359. data.markers[marker_start_ind].get_offset_begin(),
  360. Marker.T.CODE))
  361. next_code_marker_start = data.markers[marker_start_ind].get_offset_end()
  362. region._markers_list.append(marker_start_ind)
  363. marker_start_ind += 1
  364. # cache the last code segment after the last marker
  365. if next_code_marker_start < region.get_offset_end():
  366. region._markers_list.append(CodeMarker(next_code_marker_start,
  367. region.get_offset_end(),
  368. Marker.T.CODE))
  369. next_code_marker_start = region.get_offset_end()
  370. # return the starting point for the next call of this function
  371. return (marker_start_ind, next_code_marker_start)
  372. # append markers list to all regions recursively
  373. (next_marker_pos, next_code_marker_start) = cache_markers_list_rec(self, 1, 0, 0)
  374. assert(next_marker_pos == len(self.markers))
  375. # excluding subregions
  376. if exclude_children == True:
  377. for marker_ind in region._markers_list:
  378. if isinstance(marker_ind, int):
  379. marker = self.markers[marker_ind]
  380. else:
  381. marker = marker_ind # CodeMarker
  382. if marker.group & filter_group:
  383. yield marker
  384. # including subregions
  385. else:
  386. next_code_marker_start = region.get_offset_begin()
  387. for marker in self.markers[region._first_marker_ind:]:
  388. if marker.get_offset_begin() >= region.get_offset_end():
  389. break
  390. if region.get_offset_begin() > marker.get_offset_begin():
  391. continue
  392. if Marker.T.CODE & filter_group and next_code_marker_start < marker.get_offset_begin():
  393. yield Marker(next_code_marker_start, marker.get_offset_begin(), Marker.T.CODE)
  394. if marker.group & filter_group:
  395. yield marker
  396. next_code_marker_start = marker.get_offset_end()
  397. if Marker.T.CODE & filter_group and next_code_marker_start < region.get_offset_end():
  398. yield Marker(next_code_marker_start, region.get_offset_end(), Marker.T.CODE)
  399. def get_marker_types(self):
  400. return Marker.T
  401. def are_markers_loaded(self):
  402. return self.markers != None
  403. def __repr__(self):
  404. return Data.__repr__(self) + " and regions " + self.regions.__repr__()
  405. class AggregatedData(Data):
  406. def __init__(self, loader, path):
  407. Data.__init__(self)
  408. self.path = path
  409. self.loader = loader
  410. self.subdirs = None
  411. self.subfiles = None
  412. def get_subdirs(self):
  413. if self.subdirs != None:
  414. return self.subdirs
  415. self.subdirs = []
  416. if self.path != None:
  417. for subdir in self.loader.db.iterate_dircontent(self.path, include_subdirs = True, include_subfiles = False):
  418. self.subdirs.append(subdir)
  419. return self.subdirs
  420. def get_subfiles(self):
  421. if self.subfiles != None:
  422. return self.subfiles
  423. self.subfiles = []
  424. if self.path != None:
  425. for subfile in self.loader.db.iterate_dircontent(self.path, include_subdirs = False, include_subfiles = True):
  426. self.subfiles.append(subfile)
  427. return self.subfiles
  428. class SelectData(Data):
  429. def __init__(self, loader, path, file_id, region_id):
  430. Data.__init__(self)
  431. self.loader = loader
  432. self.path = path
  433. self.file_id = file_id
  434. self.region_id = region_id
  435. self.region = None
  436. def get_path(self):
  437. return self.path
  438. def get_region(self):
  439. if self.region == None and self.region_id != None:
  440. row = self.loader.db.get_region(self.file_id, self.region_id)
  441. if row != None:
  442. self.region = FileRegionData(self.loader,
  443. self.file_id,
  444. self.region_id,
  445. row.name,
  446. row.begin,
  447. row.end,
  448. row.line_begin,
  449. row.line_end,
  450. row.cursor,
  451. row.group,
  452. row.checksum)
  453. return self.region
  454. class DiffData(Data):
  455. def __init__(self, new_data, old_data):
  456. Data.__init__(self)
  457. self.new_data = new_data
  458. self.old_data = old_data
  459. def get_data(self, namespace, field):
  460. new_data = self.new_data.get_data(namespace, field)
  461. old_data = self.old_data.get_data(namespace, field)
  462. if new_data == None:
  463. return None
  464. if old_data == None:
  465. # non_zero fields has got zero value by default if missed
  466. # the data can be also unavailable,
  467. # because previous collection does not include that
  468. # but external tools (like limit.py) should warn about this,
  469. # using list of registered database properties
  470. old_data = 0
  471. return new_data - old_data
  472. ####################################
  473. # Packager Interface
  474. ####################################
  475. class PackagerError(Exception):
  476. def __init__(self):
  477. Exception.__init__(self, "Failed to pack or unpack.")
  478. class PackagerFactory(object):
  479. def create(self, python_type, non_zero):
  480. if python_type == None:
  481. return PackagerFactory.SkipPackager()
  482. if python_type == int:
  483. if non_zero == False:
  484. return PackagerFactory.IntPackager()
  485. else:
  486. return PackagerFactory.IntNonZeroPackager()
  487. if python_type == float and non_zero == False:
  488. return PackagerFactory.FloatPackager()
  489. if python_type == str:
  490. return PackagerFactory.StringPackager()
  491. class PackagerFactoryError(Exception):
  492. def __init__(self, python_type):
  493. Exception.__init__(self, "Python type '" + str(python_type) + "' is not supported by the factory.")
  494. raise PackagerFactoryError(python_type)
  495. def get_python_type(self, sql_type):
  496. if sql_type == "integer":
  497. return int
  498. if sql_type == "real":
  499. return float
  500. if sql_type == "text":
  501. return str
  502. class PackagerFactoryError(Exception):
  503. def __init__(self, sql_type):
  504. Exception.__init__(self, "SQL type '" + str(sql_type) + "' is not supported by the factory.")
  505. raise PackagerFactoryError(sql_type)
  506. class IPackager(object):
  507. def pack(self, unpacked_data):
  508. raise core.api.InterfaceNotImplemented(self)
  509. def unpack(self, packed_data):
  510. raise core.api.InterfaceNotImplemented(self)
  511. def get_sql_type(self):
  512. raise core.api.InterfaceNotImplemented(self)
  513. def get_python_type(self):
  514. raise core.api.InterfaceNotImplemented(self)
  515. def is_non_zero(self):
  516. return False
  517. class IntPackager(IPackager):
  518. def pack(self, unpacked_data):
  519. if not isinstance(unpacked_data, int):
  520. raise PackagerError()
  521. return str(unpacked_data)
  522. def unpack(self, packed_data):
  523. try:
  524. return int(packed_data)
  525. except ValueError:
  526. raise PackagerError()
  527. def get_sql_type(self):
  528. return "integer"
  529. def get_python_type(self):
  530. return int
  531. class IntNonZeroPackager(IntPackager):
  532. def pack(self, unpacked_data):
  533. if unpacked_data == 0:
  534. raise PackagerError()
  535. return PackagerFactory.IntPackager.pack(self, unpacked_data)
  536. def is_non_zero(self):
  537. return True
  538. class FloatPackager(IPackager):
  539. def pack(self, unpacked_data):
  540. if not isinstance(unpacked_data, float):
  541. raise PackagerError()
  542. return str(unpacked_data)
  543. def unpack(self, packed_data):
  544. try:
  545. return float(packed_data)
  546. except ValueError:
  547. raise PackagerError()
  548. def get_sql_type(self):
  549. return "real"
  550. def get_python_type(self):
  551. return float
  552. class FloatNonZeroPackager(FloatPackager):
  553. def pack(self, unpacked_data):
  554. if unpacked_data == 0:
  555. raise PackagerError()
  556. return PackagerFactory.FloatPackager.pack(self, unpacked_data)
  557. def is_non_zero(self):
  558. return True
  559. class StringPackager(IPackager):
  560. def pack(self, unpacked_data):
  561. if not isinstance(unpacked_data, str):
  562. raise PackagerError()
  563. return str(unpacked_data)
  564. def unpack(self, packed_data):
  565. try:
  566. return str(packed_data)
  567. except ValueError:
  568. raise PackagerError()
  569. def get_sql_type(self):
  570. return "text"
  571. def get_python_type(self):
  572. return str
  573. class SkipPackager(IPackager):
  574. def pack(self, unpacked_data):
  575. return None
  576. def unpack(self, packed_data):
  577. return None
  578. def get_sql_type(self):
  579. return None
  580. def get_python_type(self):
  581. return None
  582. ####################################
  583. # Loader
  584. ####################################
  585. class NamespaceError(Exception):
  586. def __init__(self, namespace, reason):
  587. Exception.__init__(self, "Namespace '"
  588. + namespace
  589. + "': '"
  590. + reason
  591. + "'")
  592. class FieldError(Exception):
  593. def __init__(self, field, reason):
  594. Exception.__init__(self, "Field '"
  595. + field
  596. + "': '"
  597. + reason
  598. + "'")
  599. class Namespace(object):
  600. def __init__(self, db_handle, name, support_regions = False, version='1.0'):
  601. if not isinstance(name, str):
  602. raise NamespaceError(name, "name not a string")
  603. self.name = name
  604. self.support_regions = support_regions
  605. self.fields = {}
  606. self.db = db_handle
  607. if self.db.check_table(name) == False:
  608. self.db.create_table(name, support_regions, version)
  609. else:
  610. for column in self.db.iterate_columns(name):
  611. self.add_field(column.name, PackagerFactory().get_python_type(column.sql_type), non_zero=column.non_zero)
  612. def get_name(self):
  613. return self.name
  614. def are_regions_supported(self):
  615. return self.support_regions
  616. def add_field(self, field_name, python_type, non_zero=False):
  617. if not isinstance(field_name, str):
  618. raise FieldError(field_name, "field_name not a string")
  619. packager = PackagerFactory().create(python_type, non_zero)
  620. if field_name in self.fields.keys():
  621. raise FieldError(field_name, "double used")
  622. self.fields[field_name] = packager
  623. if self.db.check_column(self.get_name(), field_name) == False:
  624. # - False if cloned
  625. # - True if created
  626. return self.db.create_column(self.name, field_name, packager.get_sql_type(), non_zero=non_zero)
  627. return None # if double request
  628. def iterate_field_names(self):
  629. for name in self.fields.keys():
  630. yield name
  631. def get_field_packager(self, field_name):
  632. if field_name in self.fields.keys():
  633. return self.fields[field_name]
  634. else:
  635. return None
  636. def get_field_sql_type(self, field_name):
  637. return self.get_field_packager(field_name).get_sql_type()
  638. def get_field_python_type(self, field_name):
  639. return self.get_field_packager(field_name).get_python_type()
  640. class DataNotPackable(Exception):
  641. def __init__(self, namespace, field, value, packager, extra_message):
  642. Exception.__init__(self, "Data '"
  643. + str(value)
  644. + "' of type "
  645. + str(value.__class__)
  646. + " referred by '"
  647. + namespace
  648. + "=>"
  649. + field
  650. + "' is not packable by registered packager '"
  651. + str(packager.__class__)
  652. + "': " + extra_message)
  653. class Loader(object):
  654. def __init__(self):
  655. self.namespaces = {}
  656. self.db = None
  657. self.last_file_data = None # for performance boost reasons
  658. def create_database(self, dbfile, previous_db = None):
  659. self.db = core.db.sqlite.Database()
  660. if os.path.exists(dbfile):
  661. logging.warn("Removing existing file: " + dbfile)
  662. # TODO can reuse existing db file to speed up the processing?
  663. # TODO add option to choose to remove or to overwrite?
  664. os.unlink(dbfile)
  665. if previous_db != None and os.path.exists(previous_db) == False:
  666. raise core.api.ExitError(None, "Database file '" + previous_db + "' does not exist")
  667. self.db.create(dbfile, clone_from=previous_db)
  668. def open_database(self, dbfile, read_only = True):
  669. self.db = core.db.sqlite.Database()
  670. if os.path.exists(dbfile) == False:
  671. raise core.api.ExitError(None, "Database file '" + dbfile + "' does not exist")
  672. self.db.connect(dbfile, read_only=read_only)
  673. for table in self.db.iterate_tables():
  674. self.create_namespace(table.name, table.support_regions)
  675. def set_property(self, property_name, value):
  676. if self.db == None:
  677. return None
  678. return self.db.set_property(property_name, value)
  679. def get_property(self, property_name):
  680. if self.db == None:
  681. return None
  682. return self.db.get_property(property_name)
  683. def iterate_properties(self):
  684. if self.db == None:
  685. return None
  686. return self.db.iterate_properties()
  687. def create_namespace(self, name, support_regions = False, version='1.0'):
  688. if self.db == None:
  689. return None
  690. if name in self.namespaces.keys():
  691. raise NamespaceError(name, "double used")
  692. new_namespace = Namespace(self.db, name, support_regions, version)
  693. self.namespaces[name] = new_namespace
  694. return new_namespace
  695. def iterate_namespace_names(self):
  696. for name in self.namespaces.keys():
  697. yield name
  698. def get_namespace(self, name):
  699. if name in self.namespaces.keys():
  700. return self.namespaces[name]
  701. else:
  702. return None
  703. def create_file_data(self, path, checksum, content):
  704. if self.db == None:
  705. return None
  706. (new_id, is_updated) = self.db.create_file(path, checksum)
  707. result = FileData(self, path, new_id, checksum, content)
  708. self.last_file_data = result
  709. return (result, is_updated)
  710. def load_file_data(self, path):
  711. if self.db == None:
  712. return None
  713. if self.last_file_data != None and self.last_file_data.get_path() == path:
  714. return self.last_file_data
  715. data = self.db.get_file(path)
  716. if data == None:
  717. return None
  718. result = FileData(self, data.path, data.id, data.checksum, None)
  719. self.last_file_data = result
  720. return result
  721. def save_file_data(self, file_data):
  722. if self.db == None:
  723. return None
  724. class DataIterator(object):
  725. def iterate_packed_values(self, data, namespace, support_regions = False):
  726. for each in data.iterate_fields(namespace):
  727. space = self.loader.get_namespace(namespace)
  728. if space == None:
  729. raise DataNotPackable(namespace, each[0], each[1], None, "The namespace has not been found")
  730. packager = space.get_field_packager(each[0])
  731. if packager == None:
  732. raise DataNotPackable(namespace, each[0], each[1], None, "The field has not been found")
  733. if space.support_regions != support_regions:
  734. raise DataNotPackable(namespace, each[0], each[1], packager, "Incompatible support for regions")
  735. try:
  736. packed_data = packager.pack(each[1])
  737. if packed_data == None:
  738. continue
  739. except PackagerError:
  740. raise DataNotPackable(namespace, each[0], each[1], packager, "Packager raised exception")
  741. yield (each[0], packed_data)
  742. def __init__(self, loader, data, namespace, support_regions = False):
  743. self.loader = loader
  744. self.iterator = self.iterate_packed_values(data, namespace, support_regions)
  745. def __iter__(self):
  746. return self.iterator
  747. for namespace in file_data.iterate_namespaces():
  748. if file_data.is_namespace_updated(namespace) == False:
  749. continue
  750. self.db.add_row(namespace,
  751. file_data.get_id(),
  752. None,
  753. DataIterator(self, file_data, namespace))
  754. if file_data.are_regions_loaded():
  755. for region in file_data.iterate_regions():
  756. for namespace in region.iterate_namespaces():
  757. if region.is_namespace_updated(namespace) == False:
  758. continue
  759. self.db.add_row(namespace,
  760. file_data.get_id(),
  761. region.get_id(),
  762. DataIterator(self, region, namespace, support_regions = True))
  763. def iterate_file_data(self, path = None, path_like_filter = "%"):
  764. if self.db == None:
  765. return None
  766. final_path_like = path_like_filter
  767. if path != None:
  768. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  769. return None
  770. final_path_like = path + path_like_filter
  771. class FileDataIterator(object):
  772. def iterate_file_data(self, loader, final_path_like):
  773. for data in loader.db.iterate_files(path_like=final_path_like):
  774. yield FileData(loader, data.path, data.id, data.checksum, None)
  775. def __init__(self, loader, final_path_like):
  776. self.iterator = self.iterate_file_data(loader, final_path_like)
  777. def __iter__(self):
  778. return self.iterator
  779. if self.db == None:
  780. return None
  781. return FileDataIterator(self, final_path_like)
  782. def load_aggregated_data(self, path = None, path_like_filter = "%", namespaces = None):
  783. if self.db == None:
  784. return None
  785. final_path_like = path_like_filter
  786. if path != None:
  787. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  788. return None
  789. final_path_like = path + path_like_filter
  790. if namespaces == None:
  791. namespaces = self.namespaces.keys()
  792. result = AggregatedData(self, path)
  793. for name in namespaces:
  794. namespace = self.get_namespace(name)
  795. data = self.db.aggregate_rows(name, path_like = final_path_like)
  796. for field in data.keys():
  797. if namespace.get_field_packager(field).get_python_type() == str:
  798. continue
  799. if namespace.get_field_packager(field).is_non_zero() == True:
  800. data[field]['min'] = None
  801. data[field]['avg'] = None
  802. distribution = self.db.count_rows(name, path_like = final_path_like, group_by_column = field)
  803. data[field]['distribution-bars'] = []
  804. for each in distribution:
  805. if each[0] == None:
  806. continue
  807. assert(float(data[field]['count'] != 0))
  808. data[field]['distribution-bars'].append({'metric': each[0],
  809. 'count': each[1],
  810. 'ratio': round((float(each[1]) / float(data[field]['count'])), 4)})
  811. result.set_data(name, field, data[field])
  812. return result
  813. def load_selected_data(self, namespace, fields = None, path = None, path_like_filter = "%", filters = [],
  814. sort_by = None, limit_by = None):
  815. if self.db == None:
  816. return None
  817. final_path_like = path_like_filter
  818. if path != None:
  819. if self.db.check_dir(path) == False and self.db.check_file(path) == False:
  820. return None
  821. final_path_like = path + path_like_filter
  822. namespace_obj = self.get_namespace(namespace)
  823. if namespace_obj == None:
  824. return None
  825. class SelectDataIterator(object):
  826. def iterate_selected_values(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  827. for row in loader.db.select_rows(namespace_obj.get_name(), path_like=final_path_like, filters=filters,
  828. order_by=sort_by, limit_by=limit_by):
  829. region_id = None
  830. if namespace_obj.are_regions_supported() == True:
  831. region_id = row['region_id']
  832. data = SelectData(loader, row['path'], row['id'], region_id)
  833. field_names = fields
  834. if fields == None:
  835. field_names = namespace_obj.iterate_field_names()
  836. for field in field_names:
  837. data.set_data(namespace, field, row[field])
  838. yield data
  839. def __init__(self, loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by):
  840. self.iterator = self.iterate_selected_values(loader, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)
  841. def __iter__(self):
  842. return self.iterator
  843. return SelectDataIterator(self, namespace_obj, final_path_like, fields, filters, sort_by, limit_by)