dbwrap.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703
  1. #
  2. # Metrix++, Copyright 2009-2019, Metrix++ Project
  3. # Link: https://github.com/metrixplusplus/metrixplusplus
  4. #
  5. # This file is a part of Metrix++ Tool.
  6. #
  7. import sqlite3
  8. import re
  9. import os.path
  10. import logging
  11. import itertools
  12. import shutil
  13. import traceback
  14. class Database(object):
  15. last_used_id = 0
  16. version = "1.0"
  17. class TableData(object):
  18. def __init__(self, table_id, name, support_regions):
  19. self.id = table_id
  20. self.name = name
  21. self.support_regions = support_regions
  22. class ColumnData(object):
  23. def __init__(self, column_id, name, sql_type, non_zero):
  24. self.id = column_id
  25. self.name = name
  26. self.sql_type = sql_type
  27. self.non_zero = non_zero
  28. class TagData(object):
  29. def __init__(self, tag_id, name):
  30. self.id = tag_id
  31. self.name = name
  32. class PropertyData(object):
  33. def __init__(self, property_id, name, value):
  34. self.id = property_id
  35. self.name = name
  36. self.value = value
  37. class FileData(object):
  38. def __init__(self, file_id, path, checksum):
  39. self.id = file_id
  40. self.path = path
  41. self.checksum = checksum
  42. class RegionData(object):
  43. def __init__(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
  44. self.file_id = file_id
  45. self.region_id = region_id
  46. self.name = name
  47. self.begin = begin
  48. self.end = end
  49. self.line_begin = line_begin
  50. self.line_end = line_end
  51. self.cursor = cursor
  52. self.group = group
  53. self.checksum = checksum
  54. class MarkerData(object):
  55. def __init__(self, file_id, begin, end, group):
  56. self.file_id = file_id
  57. self.begin = begin
  58. self.end = end
  59. self.group = group
  60. def __init__(self):
  61. self.read_only = False
  62. self.conn = None
  63. self.dirs = None
  64. self.is_cloned = False
  65. self.last_used_id += 1
  66. self.id = self.last_used_id
  67. self._tables_cache = None
  68. def __del__(self):
  69. if self.conn != None:
  70. if self.is_cloned == True:
  71. logging.debug("Cleaning up database file")
  72. self.InternalCleanUpUtils().clean_up_not_confirmed(self)
  73. logging.debug("Committing database file")
  74. self.conn.commit()
  75. class InternalCleanUpUtils(object):
  76. def clean_up_not_confirmed(self, db_loader):
  77. sql = "DELETE FROM __info__ WHERE (confirmed = 0)"
  78. db_loader.log(sql)
  79. db_loader.conn.execute(sql)
  80. sql = "DELETE FROM __tags__ WHERE (confirmed = 0)"
  81. db_loader.log(sql)
  82. db_loader.conn.execute(sql)
  83. sql = "SELECT * FROM __tables__ WHERE (confirmed = 0)"
  84. db_loader.log(sql)
  85. for table in db_loader.conn.execute(sql).fetchall():
  86. sql = "DELETE FROM __columns__ WHERE table_id = '" + str(table['id']) + "'"
  87. db_loader.log(sql)
  88. db_loader.conn.execute(sql)
  89. sql = "DELETE FROM __tables__ WHERE id = '" + str(table['id']) + "'"
  90. db_loader.log(sql)
  91. db_loader.conn.execute(sql)
  92. self._tables_cache = None # reset cache when a list of tables is modified
  93. sql = "DROP TABLE '" + table['name'] + "'"
  94. db_loader.log(sql)
  95. db_loader.conn.execute(sql)
  96. sql = "SELECT __columns__.name AS column_name, __tables__.name AS table_name, __columns__.id AS column_id FROM __columns__, __tables__ WHERE (__columns__.confirmed = 0 AND __columns__.table_id = __tables__.id)"
  97. db_loader.log(sql)
  98. for column in db_loader.conn.execute(sql).fetchall():
  99. logging.info("New database file inherits useless column: '" + column['table_name'] + "'.'" + column['column_name'] + "'")
  100. sql = "DELETE FROM __columns__ WHERE id = '" + str(column['column_id']) + "'"
  101. db_loader.log(sql)
  102. db_loader.conn.execute(sql)
  103. sql = "UPDATE '" + column['table_name'] + "' SET '" + column['column_name'] + "' = NULL"
  104. # TODO bug here: collect with column, recollect without, recollect with again
  105. # it will be impossible to create the column in the table
  106. db_loader.log(sql)
  107. db_loader.conn.execute(sql)
  108. self.clean_up_file(db_loader)
  109. def clean_up_file(self, db_loader, file_id = None):
  110. # this function is called on every updated file, so cache table names
  111. if db_loader._tables_cache == None:
  112. db_loader._tables_cache = []
  113. sql = "SELECT * FROM __tables__"
  114. db_loader.log(sql)
  115. for table in db_loader.conn.execute(sql).fetchall():
  116. db_loader._tables_cache.append(table['name'])
  117. for table_name in itertools.chain(db_loader._tables_cache, ['__regions__', '__markers__']):
  118. sql = ""
  119. if file_id == None:
  120. sql = "DELETE FROM '" + table_name + "' WHERE file_id IN (SELECT __files__.id FROM __files__ WHERE __files__.confirmed = 0)"
  121. else:
  122. sql = "DELETE FROM '" + table_name + "' WHERE (file_id = " + str(file_id) + ")"
  123. db_loader.log(sql)
  124. db_loader.conn.execute(sql)
  125. class InternalPathUtils(object):
  126. def iterate_heads(self, path):
  127. dirs = []
  128. head = os.path.dirname(path)
  129. last_head = None # to process Windows drives
  130. while (head != "" and last_head != head):
  131. dirs.append(os.path.basename(head))
  132. last_head = head
  133. head = os.path.dirname(head)
  134. dirs.reverse()
  135. for each in dirs:
  136. yield each
  137. def normalize_path(self, path):
  138. if path == None:
  139. return None
  140. path =re.sub(r'''[\\]''', "/", path)
  141. if len(path) > 0 and path[len(path) - 1] == '/':
  142. return path[:-1]
  143. return path
  144. def update_dirs(self, db_loader, path = None):
  145. if db_loader.dirs == None:
  146. if path == None:
  147. db_loader.dirs = {} # initial construction
  148. else:
  149. return # avoid useless cache updates
  150. elif path == None:
  151. return # avoid multiple initial constructions
  152. path = self.normalize_path(path)
  153. rows = None
  154. if path == None:
  155. sql = "SELECT * FROM __files__"
  156. db_loader.log(sql)
  157. rows = db_loader.conn.execute(sql).fetchall()
  158. else:
  159. rows = [{"path": path}]
  160. for row in rows:
  161. cur_head = db_loader.dirs
  162. for dir_name in self.iterate_heads(row["path"]):
  163. if dir_name not in list(cur_head.keys()):
  164. cur_head[dir_name] = {}
  165. cur_head = cur_head[dir_name]
  166. cur_head[os.path.basename(row["path"])] = None
  167. def create(self, file_name, clone_from = None):
  168. if clone_from != None:
  169. self.is_cloned = True
  170. logging.debug("Cloning database file: " + clone_from)
  171. shutil.copy2(clone_from, file_name)
  172. logging.debug("Connecting database file: " + file_name)
  173. self.conn = sqlite3.connect(file_name, check_same_thread=False)
  174. self.conn.row_factory = sqlite3.Row
  175. self.read_only = False
  176. sql = "UPDATE __tables__ SET confirmed = 0"
  177. self.log(sql)
  178. self.conn.execute(sql)
  179. self._tables_cache = None # reset cache when a list of tables is modified
  180. sql = "UPDATE __columns__ SET confirmed = 0"
  181. self.log(sql)
  182. self.conn.execute(sql)
  183. sql = "UPDATE __tags__ SET confirmed = 0"
  184. self.log(sql)
  185. self.conn.execute(sql)
  186. sql = "UPDATE __files__ SET confirmed = 0"
  187. self.log(sql)
  188. self.conn.execute(sql)
  189. else:
  190. self.connect(file_name)
  191. def connect(self, file_name, read_only = False):
  192. logging.debug("Connecting database file: " + file_name)
  193. self.conn = sqlite3.connect(file_name, check_same_thread=False)
  194. self.conn.row_factory = sqlite3.Row
  195. self.read_only = read_only
  196. if self.read_only == False:
  197. try:
  198. sql = "CREATE TABLE __info__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, property text NOT NULL, value text, confirmed integer NOT NULL, UNIQUE (property) ON CONFLICT REPLACE)"
  199. self.log(sql)
  200. self.conn.execute(sql)
  201. sql = "INSERT INTO __info__ (property, value, confirmed) VALUES ('version', '" + self.version + "', 1)"
  202. self.log(sql)
  203. self.conn.execute(sql)
  204. sql = "CREATE TABLE __tables__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, version text NOT NULL, support_regions integer NOT NULL, confirmed integer NOT NULL, UNIQUE (name))"
  205. self.log(sql)
  206. self.conn.execute(sql)
  207. self._tables_cache = None # reset cache when a list of tables is modified
  208. sql = "CREATE TABLE __columns__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, type text NOT NULL, table_id integer NOT_NULL, non_zero integer NOT NULL, confirmed integer NOT NULL, UNIQUE (name, table_id))"
  209. self.log(sql)
  210. self.conn.execute(sql)
  211. sql = "CREATE TABLE __tags__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL UNIQUE, confirmed integer NOT NULL)"
  212. self.log(sql)
  213. self.conn.execute(sql)
  214. sql = "CREATE TABLE __files__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, path text NOT NULL, checksum integer NOT NULL, tag1 integer, tag2 integer, tag3 integer, confirmed integer NOT NULL, UNIQUE(path))"
  215. self.log(sql)
  216. self.conn.execute(sql)
  217. sql = "CREATE TABLE __regions__ (file_id integer NOT NULL, region_id integer NOT NULL, name text NOT NULL, begin integer NOT NULL, end integer NOT NULL, line_begin integer NOT NULL, line_end integer NOT NULL, cursor integer NOT NULL, group_id integer NOT NULL, checksum integer NOT NULL, PRIMARY KEY (file_id, region_id))"
  218. self.log(sql)
  219. self.conn.execute(sql)
  220. sql = "CREATE TABLE __markers__ (id integer NOT NULL PRIMARY KEY, file_id integer NOT NULL, begin integer NOT NULL, end integer NOT NULL, group_id integer NOT NULL)"
  221. self.log(sql)
  222. self.conn.execute(sql)
  223. except sqlite3.OperationalError as e:
  224. logging.debug("sqlite3.OperationalError: " + str(e))
  225. def set_property(self, property_name, value):
  226. ret_val = None
  227. sql = "SELECT * FROM __info__ WHERE (property = '" + property_name + "')"
  228. self.log(sql)
  229. result = self.conn.execute(sql).fetchall()
  230. if len(result) != 0:
  231. ret_val = result[0]['value']
  232. sql = "INSERT INTO __info__ (property, value, confirmed) VALUES ('" + property_name + "', '" + value + "', 1)"
  233. self.log(sql)
  234. self.conn.execute(sql)
  235. return ret_val
  236. def get_property(self, property_name):
  237. ret_val = None
  238. sql = "SELECT * FROM __info__ WHERE (property = '" + property_name + "' AND confirmed = 1)"
  239. self.log(sql)
  240. result = self.conn.execute(sql).fetchall()
  241. if len(result) != 0:
  242. ret_val = result[0]['value']
  243. return ret_val
  244. def iterate_properties(self):
  245. sql = "SELECT * FROM __info__ WHERE (confirmed = 1)"
  246. self.log(sql)
  247. for each in self.conn.execute(sql).fetchall():
  248. yield self.PropertyData(each['id'], each['property'], each['value'])
  249. def create_table(self, table_name, support_regions = False, version='1.0'):
  250. assert(self.read_only == False)
  251. self._tables_cache = None # reset cache when a list of tables is modified
  252. sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "'AND confirmed == 0)"
  253. self.log(sql)
  254. result = self.conn.execute(sql).fetchall()
  255. if len(result) != 0:
  256. if result[0]['version'] != version:
  257. # in case of changes in version, drop existing table data
  258. sql = "DELETE FROM __columns__ WHERE table_id = '" + str(result[0]['id']) + "'"
  259. self.log(sql)
  260. self.conn.execute(sql)
  261. sql = "DELETE FROM __tables__ WHERE id = '" + str(result[0]['id']) + "'"
  262. self.log(sql)
  263. self.conn.execute(sql)
  264. sql = "DROP TABLE '" + result[0]['name'] + "'"
  265. self.log(sql)
  266. self.conn.execute(sql)
  267. else:
  268. sql = "UPDATE __tables__ SET confirmed = 1 WHERE (name = '" + table_name + "')"
  269. self.log(sql)
  270. self.conn.execute(sql)
  271. return False
  272. sql = "CREATE TABLE '" + table_name + "' (file_id integer NOT NULL PRIMARY KEY)"
  273. if support_regions == True:
  274. sql = str("CREATE TABLE '" + table_name + "' (file_id integer NOT NULL, region_id integer NOT NULL, "
  275. + "PRIMARY KEY (file_id, region_id))")
  276. self.log(sql)
  277. self.conn.execute(sql)
  278. sql = "INSERT INTO __tables__ (name, version, support_regions, confirmed) VALUES ('" + table_name + "', '" + version + "', '" + str(int(support_regions)) + "', 1)"
  279. self.log(sql)
  280. self.conn.execute(sql)
  281. return True
  282. def iterate_tables(self):
  283. sql = "SELECT * FROM __tables__ WHERE (confirmed = 1)"
  284. self.log(sql)
  285. result = self.conn.execute(sql).fetchall()
  286. for row in result:
  287. yield self.TableData(int(row["id"]), str(row["name"]), bool(row["support_regions"]))
  288. def check_table(self, table_name):
  289. sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "' AND confirmed = 1)"
  290. self.log(sql)
  291. result = self.conn.execute(sql).fetchall()
  292. if len(result) == 0:
  293. return False
  294. return True
  295. def create_column(self, table_name, column_name, column_type, non_zero=False):
  296. assert(self.read_only == False)
  297. if column_type == None:
  298. logging.debug("Skipping column '" + column_name + "' creation for table '" + table_name + "'")
  299. return
  300. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  301. self.log(sql)
  302. table_id = next(self.conn.execute(sql))['id']
  303. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed == 0)"
  304. self.log(sql)
  305. result = self.conn.execute(sql).fetchall()
  306. if len(result) != 0:
  307. # Major changes in columns should result in step up of table version,
  308. # which causes drop the table in case of database reuse
  309. assert(result[0]['type'] == column_type)
  310. assert(result[0]['non_zero'] == non_zero)
  311. sql = "UPDATE __columns__ SET confirmed = 1 WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "')"
  312. self.log(sql)
  313. self.conn.execute(sql)
  314. return False
  315. sql = "ALTER TABLE '" + table_name + "' ADD COLUMN '" + column_name + "' " + column_type
  316. self.log(sql)
  317. self.conn.execute(sql)
  318. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  319. self.log(sql)
  320. table_id = next(self.conn.execute(sql))['id']
  321. sql = "INSERT INTO __columns__ (name, type, table_id, non_zero, confirmed) VALUES ('" + column_name + "', '" + column_type + "', '" + str(table_id) + "', '" + str(int(non_zero)) + "', 1)"
  322. self.log(sql)
  323. self.conn.execute(sql)
  324. return True
  325. def iterate_columns(self, table_name):
  326. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  327. self.log(sql)
  328. table_id = next(self.conn.execute(sql))['id']
  329. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND confirmed = 1)"
  330. self.log(sql)
  331. result = self.conn.execute(sql).fetchall()
  332. for row in result:
  333. yield self.ColumnData(int(row["id"]), str(row["name"]), str(row["type"]), bool(row["non_zero"]))
  334. def check_column(self, table_name, column_name):
  335. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  336. self.log(sql)
  337. table_id = next(self.conn.execute(sql))['id']
  338. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed = 1)"
  339. self.log(sql)
  340. result = self.conn.execute(sql).fetchall()
  341. if len(result) == 0:
  342. return False
  343. return True
  344. def create_tag(self, tag_name):
  345. assert(self.read_only == False)
  346. sql = "SELECT * FROM __tags__ WHERE (name = '" + tag_name + "' AND confirmed == 0)"
  347. self.log(sql)
  348. result = self.conn.execute(sql).fetchall()
  349. if len(result) != 0:
  350. sql = "UPDATE __tags__ SET confirmed = 1 WHERE (name = '" + tag_name + "')"
  351. self.log(sql)
  352. self.conn.execute(sql)
  353. return
  354. sql = "INSERT INTO __tags__ (name, confirmed) VALUES ('" + tag_name + "', 1)"
  355. self.log(sql)
  356. self.conn.execute(sql)
  357. def iterate_tags(self):
  358. sql = "SELECT * FROM __tags__ WHERE (confirmed = 1)"
  359. self.log(sql)
  360. result = self.conn.execute(sql).fetchall()
  361. for row in result:
  362. yield self.TagData(int(row["id"]), str(row["name"]))
  363. def check_tag(self, tag_name):
  364. sql = "SELECT * FROM __tags__ WHERE (name = '" + tag_name + "' AND confirmed = 1)"
  365. self.log(sql)
  366. result = self.conn.execute(sql).fetchall()
  367. if len(result) == 0:
  368. return False
  369. return True
  370. # TODO activate usage of tags
  371. def create_file(self, path, checksum):
  372. assert(self.read_only == False)
  373. path = self.InternalPathUtils().normalize_path(path)
  374. if self.is_cloned == True:
  375. sql = "SELECT * FROM __files__ WHERE (path = '" + path + "')"
  376. self.log(sql)
  377. result = self.conn.execute(sql).fetchall()
  378. if len(result) != 0:
  379. if result[0]['checksum'] == checksum:
  380. old_id = result[0]['id']
  381. sql = "UPDATE __files__ SET confirmed = 1 WHERE (id = " + str(old_id) +")"
  382. self.log(sql)
  383. self.conn.execute(sql)
  384. return (old_id, False)
  385. else:
  386. self.InternalCleanUpUtils().clean_up_file(self, result[0]['id'])
  387. sql = "INSERT OR REPLACE INTO __files__ (path, checksum, confirmed) VALUES (?, ?, 1)"
  388. column_data = [path, checksum]
  389. self.log(sql + " /with arguments: " + str(column_data))
  390. cur = self.conn.cursor()
  391. cur.execute(sql, column_data)
  392. self.InternalPathUtils().update_dirs(self, path=path)
  393. return (cur.lastrowid, True)
  394. def iterate_dircontent(self, path, include_subdirs = True, include_subfiles = True):
  395. self.InternalPathUtils().update_dirs(self)
  396. path = self.InternalPathUtils().normalize_path(path)
  397. cur_head = self.dirs
  398. valid = True
  399. if path != "":
  400. for head in self.InternalPathUtils().iterate_heads(path):
  401. if head not in list(cur_head.keys()):
  402. # non existing directory
  403. valid = False
  404. else:
  405. cur_head = cur_head[head]
  406. basename = os.path.basename(path)
  407. if basename not in list(cur_head.keys()) or cur_head[basename] == None:
  408. # do not exist or points to the file
  409. valid = False
  410. else:
  411. cur_head = cur_head[basename]
  412. if valid == True:
  413. for elem in list(cur_head.keys()):
  414. if include_subdirs == True and cur_head[elem] != None:
  415. yield elem
  416. if include_subfiles == True and cur_head[elem] == None:
  417. yield elem
  418. def check_file(self, path):
  419. return self.get_file(path) != None
  420. def check_dir(self, path):
  421. for each in self.iterate_dircontent(path):
  422. each = each # used
  423. return True # there is at least one item
  424. return False
  425. def get_file(self, path):
  426. path = self.InternalPathUtils().normalize_path(path)
  427. result = self.select_rows("__files__", filters = [("path", "=", path), ("confirmed", "=", 1)])
  428. if len(result) == 0:
  429. return None
  430. assert(len(result) == 1)
  431. return self.FileData(result[0]['id'], result[0]['path'], result[0]['checksum'])
  432. def iterate_files(self, path_like = None):
  433. for row in self.select_rows('__files__', path_like=path_like, filters=[('confirmed','=','1')]):
  434. yield self.FileData(row['id'], row['path'], row['checksum'])
  435. def create_region(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
  436. assert(self.read_only == False)
  437. sql = "INSERT OR REPLACE INTO __regions__ (file_id, region_id, name, begin, end, line_begin, line_end, cursor, group_id, checksum) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
  438. column_data = [file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum]
  439. self.log(sql + " /with arguments: " + str(column_data))
  440. cur = self.conn.cursor()
  441. cur.execute(sql, column_data)
  442. return cur.lastrowid
  443. def get_region(self, file_id, region_id):
  444. result = self.select_rows("__regions__", filters = [("file_id", "=", file_id), ("region_id", "=", region_id)])
  445. if len(result) == 0:
  446. return None
  447. return self.RegionData(result[0]['file_id'],
  448. result[0]['region_id'],
  449. result[0]['name'],
  450. result[0]['begin'],
  451. result[0]['end'],
  452. result[0]['line_begin'],
  453. result[0]['line_end'],
  454. result[0]['cursor'],
  455. result[0]['group_id'],
  456. result[0]['checksum'])
  457. def iterate_regions(self, file_id):
  458. for each in self.select_rows("__regions__", filters = [("file_id", "=", file_id)]):
  459. yield self.RegionData(each['file_id'],
  460. each['region_id'],
  461. each['name'],
  462. each['begin'],
  463. each['end'],
  464. each['line_begin'],
  465. each['line_end'],
  466. each['cursor'],
  467. each['group_id'],
  468. each['checksum'])
  469. def create_marker(self, file_id, begin, end, group):
  470. assert(self.read_only == False)
  471. sql = "INSERT OR REPLACE INTO __markers__ (file_id, begin, end, group_id) VALUES (?, ?, ?, ?)"
  472. column_data = [file_id, begin, end, group]
  473. self.log(sql + " /with arguments: " + str(column_data))
  474. cur = self.conn.cursor()
  475. cur.execute(sql, column_data)
  476. return cur.lastrowid
  477. def iterate_markers(self, file_id):
  478. for each in self.select_rows("__markers__", filters = [("file_id", "=", file_id)]):
  479. yield self.MarkerData(each['file_id'],
  480. each['begin'],
  481. each['end'],
  482. each['group_id'])
  483. def add_row(self, table_name, file_id, region_id, array_data):
  484. assert(self.read_only == False)
  485. column_names = "'file_id'"
  486. column_values = "?"
  487. column_data = [file_id]
  488. if region_id != None:
  489. column_names += ", 'region_id'"
  490. column_values += ", ?"
  491. column_data.append(region_id)
  492. useful_data = 0
  493. for each in array_data:
  494. column_names += ", '" + each[0] + "'"
  495. column_values += ", ?"
  496. column_data.append(each[1])
  497. useful_data += 1
  498. if useful_data == 0:
  499. return
  500. sql = "INSERT OR REPLACE INTO '" + table_name + "' (" + column_names + ") VALUES (" + column_values + ")"
  501. self.log(sql + " /with arguments: " + str(column_data))
  502. cur = self.conn.cursor()
  503. cur.execute(sql, column_data)
  504. return cur.lastrowid
  505. def select_rows(self, table_name, path_like = None, column_names = [], filters = [], order_by = None, limit_by = None):
  506. safe_column_names = []
  507. for each in column_names:
  508. safe_column_names.append("'" + each + "'")
  509. return self.select_rows_unsafe(table_name, path_like = path_like,
  510. column_names = safe_column_names, filters = filters,
  511. order_by = order_by, limit_by = limit_by)
  512. def select_rows_unsafe(self, table_name, path_like = None, column_names = [], filters = [],
  513. group_by = None, order_by = None, limit_by = None):
  514. path_like = self.InternalPathUtils().normalize_path(path_like)
  515. if self.conn == None:
  516. return []
  517. table_stmt = "'" + table_name + "'"
  518. what_stmt = ", ".join(column_names)
  519. if len(what_stmt) == 0:
  520. what_stmt = "*"
  521. elif path_like != None and table_name != '__files__' and group_by == None:
  522. what_stmt += ", '__files__'.'path', '__files__'.'id'"
  523. inner_stmt = ""
  524. if path_like != None and table_name != '__files__':
  525. inner_stmt = " INNER JOIN '__files__' ON '__files__'.'id' = '" + table_name + "'.'file_id' "
  526. where_stmt = " "
  527. values = ()
  528. if len(filters) != 0:
  529. if filters[0][1] == 'IN':
  530. where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " " + filters[0][2]
  531. else:
  532. where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " ?"
  533. values = (filters[0][2],)
  534. for each in filters[1:]:
  535. if each[1] == 'IN':
  536. where_stmt += " AND `" + each[0] + "` " + each[1] + " " + each[2]
  537. else:
  538. where_stmt += " AND `" + each[0] + "` " + each[1] + " ?"
  539. values += (each[2], )
  540. if path_like != None:
  541. where_stmt += " AND '__files__'.'path' LIKE ?"
  542. values += (path_like, )
  543. where_stmt += ")"
  544. elif path_like != None:
  545. where_stmt = " WHERE '__files__'.'path' LIKE ?"
  546. values += (path_like, )
  547. group_stmt = ""
  548. if group_by != None:
  549. group_stmt = " GROUP BY (`" + group_by + "`)"
  550. order_stmt = ""
  551. if order_by != None:
  552. if order_by.startswith("-"):
  553. order_stmt = " ORDER BY (`" + order_by[1:] + "`) DESC "
  554. else:
  555. order_stmt = " ORDER BY (`" + order_by + "`) "
  556. limit_stmt = ""
  557. if limit_by != None:
  558. limit_stmt = " LIMIT " + str(limit_by)
  559. sql = "SELECT " + what_stmt + " FROM " + table_stmt + inner_stmt + where_stmt + group_stmt + order_stmt + limit_stmt
  560. self.log(sql + " /with arguments: " + str(values))
  561. return self.conn.execute(sql, values).fetchall()
  562. def get_row(self, table_name, file_id, region_id):
  563. selected = self.get_rows(table_name, file_id, region_id)
  564. # assures that only one row in database
  565. # if assertion happens, caller's intention is not right, use get_rows instead
  566. assert(len(selected) == 0 or len(selected) == 1)
  567. if len(selected) == 0:
  568. return None
  569. return selected[0]
  570. def get_rows(self, table_name, file_id, region_id):
  571. filters = [("file_id", '=', file_id)]
  572. if region_id != None:
  573. filters.append(("region_id", '=', region_id))
  574. return self.select_rows(table_name, filters=filters)
  575. def aggregate_rows(self, table_name, path_like = None, column_names = None, filters = []):
  576. if column_names == None:
  577. column_names = []
  578. for column in self.iterate_columns(table_name):
  579. column_names.append(column.name)
  580. if len(column_names) == 0:
  581. # it is possible that a table does not have meanfull columns
  582. return {}
  583. total_column_names = []
  584. for column_name in column_names:
  585. for func in ['max', 'min', 'avg', 'total', 'count']:
  586. total_column_names.append(func + "('" + table_name + "'.'" + column_name + "') AS " + "'" + column_name + "_" + func + "'")
  587. data = self.select_rows_unsafe(table_name, path_like = path_like, column_names = total_column_names, filters = filters)
  588. assert(len(data) == 1)
  589. result = {}
  590. for column_name in column_names:
  591. result[column_name] = {}
  592. for func in ['max', 'min', 'avg', 'total', 'count']:
  593. result[column_name][func] = data[0][column_name + "_" + func]
  594. return result
  595. def count_rows(self, table_name, path_like = None, group_by_column = None, filters = []):
  596. count_column_names = None
  597. if group_by_column != None:
  598. for column in self.iterate_columns(table_name):
  599. if group_by_column == column.name:
  600. count_column_names = ["`" + group_by_column + "`", "COUNT(`" + group_by_column + "`)"]
  601. break
  602. else:
  603. count_column_names = ["COUNT(*)"]
  604. if count_column_names == None:
  605. return []
  606. data = self.select_rows_unsafe(table_name, path_like = path_like, column_names = count_column_names,
  607. filters = filters, group_by = group_by_column)
  608. return data
  609. def log(self, sql):
  610. if logging.getLogger().getEffectiveLevel() <= logging.DEBUG:
  611. pass
  612. logging.debug("[" + str(self.id) + "] Executing query: " + sql)
  613. traceback.print_stack()