sqlite.py 24 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543
  1. '''
  2. Created on 3/01/2013
  3. @author: konstaa
  4. '''
  5. import sqlite3
  6. import re
  7. import os.path
  8. import logging
  9. import itertools
  10. import shutil
  11. class Database(object):
  12. last_used_id = 0
  13. version = "1.0"
  14. class TableData(object):
  15. def __init__(self, table_id, name, support_regions):
  16. self.id = table_id
  17. self.name = name
  18. self.support_regions = support_regions
  19. class ColumnData(object):
  20. def __init__(self, column_id, name, sql_type):
  21. self.id = column_id
  22. self.name = name
  23. self.sql_type = sql_type
  24. class FileData(object):
  25. def __init__(self, file_id, path, checksum):
  26. self.id = file_id
  27. self.path = path
  28. self.checksum = checksum
  29. class RegionData(object):
  30. def __init__(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
  31. self.file_id = file_id
  32. self.region_id = region_id
  33. self.name = name
  34. self.begin = begin
  35. self.end = end
  36. self.line_begin = line_begin
  37. self.line_end = line_end
  38. self.cursor = cursor
  39. self.group = group
  40. self.checksum = checksum
  41. class MarkerData(object):
  42. def __init__(self, file_id, begin, end, group):
  43. self.file_id = file_id
  44. self.begin = begin
  45. self.end = end
  46. self.group = group
  47. def __init__(self):
  48. self.read_only = False
  49. self.conn = None
  50. self.dirs = None
  51. self.is_cloned = False
  52. self.last_used_id += 1
  53. self.id = self.last_used_id
  54. def __del__(self):
  55. if self.conn != None:
  56. if self.is_cloned == True:
  57. logging.debug("Cleaning up database file")
  58. self.InternalCleanUpUtils().clean_up_not_confirmed(self)
  59. logging.debug("Committing database file")
  60. self.conn.commit()
  61. class InternalCleanUpUtils(object):
  62. def clean_up_not_confirmed(self, db_loader):
  63. sql = "SELECT * FROM __tables__ WHERE (confirmed = 0)"
  64. db_loader.log(sql)
  65. for table in db_loader.conn.execute(sql).fetchall():
  66. sql = "DELETE FROM __columns__ WHERE table_id = '" + str(table['id']) + "'"
  67. db_loader.log(sql)
  68. db_loader.conn.execute(sql)
  69. sql = "DELETE FROM __tables__ WHERE id = '" + str(table['id']) + "'"
  70. db_loader.log(sql)
  71. db_loader.conn.execute(sql)
  72. sql = "DROP TABLE '" + table['name'] + "'"
  73. db_loader.log(sql)
  74. db_loader.conn.execute(sql)
  75. sql = "SELECT __columns__.name AS column_name, __tables__.name AS table_name, __columns__.id AS column_id FROM __columns__, __tables__ WHERE (__columns__.confirmed = 0 AND __columns__.table_id = __tables__.id)"
  76. db_loader.log(sql)
  77. for column in db_loader.conn.execute(sql).fetchall():
  78. logging.warn("New database file inherits useless column: '" + column['table_name'] + "'.'" + column['column_name'] + "'")
  79. sql = "DELETE FROM __columns__ WHERE id = '" + str(column['column_id']) + "'"
  80. db_loader.log(sql)
  81. db_loader.conn.execute(sql)
  82. sql = "UPDATE '" + column['table_name'] + "' SET '" + column['column_name'] + "' = NULL"
  83. db_loader.log(sql)
  84. db_loader.conn.execute(sql)
  85. self.clean_up_file(db_loader)
  86. def clean_up_file(self, db_loader, file_id = None):
  87. sql = "SELECT * FROM __tables__"
  88. db_loader.log(sql)
  89. for table in itertools.chain(db_loader.conn.execute(sql).fetchall(), [{'name':'__regions__'}, {'name':'__markers__'}]):
  90. sql = ""
  91. if file_id == None:
  92. sql = "DELETE FROM '" + table['name'] + "' WHERE file_id IN (SELECT __files__.id FROM __files__ WHERE __files__.confirmed = 0)"
  93. else:
  94. sql = "DELETE FROM '" + table['name'] + "' WHERE (file_id = " + str(file_id) + ")"
  95. db_loader.log(sql)
  96. db_loader.conn.execute(sql)
  97. class InternalPathUtils(object):
  98. def iterate_heads(self, path):
  99. dirs = []
  100. head = os.path.dirname(path)
  101. last_head = None # to process Windows drives
  102. while (head != "" and last_head != head):
  103. dirs.append(os.path.basename(head))
  104. last_head = head
  105. head = os.path.dirname(head)
  106. dirs.reverse()
  107. for each in dirs:
  108. yield each
  109. def normalize_path(self, path):
  110. if path == None:
  111. return None
  112. return re.sub(r'''[\\]''', "/", path)
  113. def update_dirs(self, db_loader, path = None):
  114. if db_loader.dirs == None:
  115. if path == None:
  116. db_loader.dirs = {} # initial construction
  117. else:
  118. return # avoid useless cache updates
  119. elif path == None:
  120. return # avoid multiple initial constructions
  121. path = self.normalize_path(path)
  122. rows = None
  123. if path == None:
  124. sql = "SELECT * FROM __files__"
  125. db_loader.log(sql)
  126. rows = db_loader.conn.execute(sql).fetchall()
  127. else:
  128. rows = [{"path": path}]
  129. for row in rows:
  130. cur_head = db_loader.dirs
  131. for dir_name in self.iterate_heads(row["path"]):
  132. if dir_name not in cur_head.keys():
  133. cur_head[dir_name] = {}
  134. cur_head = cur_head[dir_name]
  135. cur_head[os.path.basename(row["path"])] = None
  136. def create(self, file_name, clone_from = None):
  137. if clone_from != None:
  138. self.is_cloned = True
  139. logging.debug("Cloning database file: " + clone_from)
  140. shutil.copy2(clone_from, file_name)
  141. logging.debug("Connecting database file: " + file_name)
  142. self.conn = sqlite3.connect(file_name)
  143. self.conn.row_factory = sqlite3.Row
  144. self.read_only = False
  145. sql = "UPDATE __tables__ SET confirmed = 0"
  146. self.log(sql)
  147. self.conn.execute(sql)
  148. sql = "UPDATE __columns__ SET confirmed = 0"
  149. self.log(sql)
  150. self.conn.execute(sql)
  151. sql = "UPDATE __files__ SET confirmed = 0"
  152. self.log(sql)
  153. self.conn.execute(sql)
  154. else:
  155. self.connect(file_name)
  156. def connect(self, file_name, read_only = False):
  157. logging.debug("Connecting database file: " + file_name)
  158. self.conn = sqlite3.connect(file_name)
  159. self.conn.row_factory = sqlite3.Row
  160. self.read_only = read_only
  161. if self.read_only == False:
  162. try:
  163. sql = "CREATE TABLE __info__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, property text NOT NULL, value text, UNIQUE (property) ON CONFLICT REPLACE)"
  164. self.log(sql)
  165. self.conn.execute(sql)
  166. sql = "INSERT INTO __info__ (property, value) VALUES ('version', '" + self.version + "')"
  167. self.log(sql)
  168. self.conn.execute(sql)
  169. sql = "CREATE TABLE __tables__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, support_regions integer NOT NULL, confirmed integer NOT NULL, UNIQUE (name))"
  170. self.log(sql)
  171. self.conn.execute(sql)
  172. sql = "CREATE TABLE __columns__ (id integer NOT NULL PRIMARY KEY, name text NOT NULL, type text NOT NULL, table_id integer NOT_NULL, confirmed integer NOT NULL, UNIQUE (name, table_id))"
  173. self.log(sql)
  174. self.conn.execute(sql)
  175. sql = "CREATE TABLE __files__ (id integer NOT NULL PRIMARY KEY AUTOINCREMENT, path text NOT NULL, checksum integer NOT NULL, confirmed integer NOT NULL, UNIQUE(path))"
  176. self.log(sql)
  177. self.conn.execute(sql)
  178. sql = "CREATE TABLE __regions__ (file_id integer NOT NULL, region_id integer NOT NULL, name text NOT NULL, begin integer NOT NULL, end integer NOT NULL, line_begin integer NOT NULL, line_end integer NOT NULL, cursor integer NOT NULL, group_id integer NOT NULL, checksum integer NOT NULL, PRIMARY KEY (file_id, region_id))"
  179. self.log(sql)
  180. self.conn.execute(sql)
  181. sql = "CREATE TABLE __markers__ (id integer NOT NULL PRIMARY KEY, file_id integer NOT NULL, begin integer NOT NULL, end integer NOT NULL, group_id integer NOT NULL)"
  182. self.log(sql)
  183. self.conn.execute(sql)
  184. except sqlite3.OperationalError as e:
  185. logging.debug("sqlite3.OperationalError: " + str(e))
  186. def create_table(self, table_name, support_regions = False):
  187. assert(self.read_only == False)
  188. sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "'AND confirmed == 0)"
  189. self.log(sql)
  190. result = self.conn.execute(sql).fetchall()
  191. if len(result) != 0:
  192. sql = "UPDATE __tables__ SET confirmed = 1 WHERE (name = '" + table_name + "')"
  193. self.log(sql)
  194. self.conn.execute(sql)
  195. return
  196. sql = "CREATE TABLE '" + table_name + "' (file_id integer NOT NULL PRIMARY KEY)"
  197. if support_regions == True:
  198. sql = str("CREATE TABLE '" + table_name + "' (file_id integer NOT NULL, region_id integer NOT NULL, "
  199. + "PRIMARY KEY (file_id, region_id))")
  200. self.log(sql)
  201. self.conn.execute(sql)
  202. sql = "INSERT INTO __tables__ (name, support_regions, confirmed) VALUES ('" + table_name + "', '" + str(int(support_regions)) + "', 1)"
  203. self.log(sql)
  204. self.conn.execute(sql)
  205. def iterate_tables(self):
  206. sql = "SELECT * FROM __tables__ WHERE (confirmed = 1)"
  207. self.log(sql)
  208. result = self.conn.execute(sql).fetchall()
  209. for row in result:
  210. yield self.TableData(int(row["id"]), str(row["name"]), bool(row["support_regions"]))
  211. def check_table(self, table_name):
  212. sql = "SELECT * FROM __tables__ WHERE (name = '" + table_name + "' AND confirmed = 1)"
  213. self.log(sql)
  214. result = self.conn.execute(sql).fetchall()
  215. if len(result) == 0:
  216. return False
  217. return True
  218. def create_column(self, table_name, column_name, column_type):
  219. assert(self.read_only == False)
  220. if column_type == None:
  221. logging.debug("Skipping column '" + column_name + "' creation for table '" + table_name + "'")
  222. return
  223. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  224. self.log(sql)
  225. table_id = self.conn.execute(sql).next()['id']
  226. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed == 0)"
  227. self.log(sql)
  228. result = self.conn.execute(sql).fetchall()
  229. if len(result) != 0:
  230. sql = "UPDATE __columns__ SET confirmed = 1 WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "')"
  231. self.log(sql)
  232. self.conn.execute(sql)
  233. return
  234. sql = "ALTER TABLE '" + table_name + "' ADD COLUMN '" + column_name + "' " + column_type
  235. self.log(sql)
  236. self.conn.execute(sql)
  237. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  238. self.log(sql)
  239. table_id = self.conn.execute(sql).next()['id']
  240. sql = "INSERT INTO __columns__ (name, type, table_id, confirmed) VALUES ('" + column_name + "', '" + column_type + "', '" + str(table_id) + "', 1)"
  241. self.log(sql)
  242. self.conn.execute(sql)
  243. def iterate_columns(self, table_name):
  244. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  245. self.log(sql)
  246. table_id = self.conn.execute(sql).next()['id']
  247. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND confirmed = 1)"
  248. self.log(sql)
  249. result = self.conn.execute(sql).fetchall()
  250. for row in result:
  251. yield self.ColumnData(int(row["id"]), str(row["name"]), str(row["type"]))
  252. def check_column(self, table_name, column_name):
  253. sql = "SELECT id FROM __tables__ WHERE (name = '" + table_name + "')"
  254. self.log(sql)
  255. table_id = self.conn.execute(sql).next()['id']
  256. sql = "SELECT * FROM __columns__ WHERE (table_id = '" + str(table_id) + "' AND name = '" + column_name + "' AND confirmed = 1)"
  257. self.log(sql)
  258. result = self.conn.execute(sql).fetchall()
  259. if len(result) == 0:
  260. return False
  261. return True
  262. def create_file(self, path, checksum):
  263. assert(self.read_only == False)
  264. path = self.InternalPathUtils().normalize_path(path)
  265. if self.is_cloned == True:
  266. sql = "SELECT * FROM __files__ WHERE (path = '" + path + "')"
  267. self.log(sql)
  268. result = self.conn.execute(sql).fetchall()
  269. if len(result) != 0:
  270. if result[0]['checksum'] == checksum:
  271. old_id = result[0]['id']
  272. sql = "UPDATE __files__ SET confirmed = 1 WHERE (id = " + str(old_id) +")"
  273. self.log(sql)
  274. self.conn.execute(sql)
  275. return old_id
  276. else:
  277. self.InternalCleanUpUtils().clean_up_file(self, result[0]['id'])
  278. sql = "INSERT OR REPLACE INTO __files__ (path, checksum, confirmed) VALUES (?, ?, 1)"
  279. column_data = [path, checksum]
  280. self.log(sql + " /with arguments: " + str(column_data))
  281. cur = self.conn.cursor()
  282. cur.execute(sql, column_data)
  283. self.InternalPathUtils().update_dirs(self, path=path)
  284. return cur.lastrowid
  285. def iterate_dircontent(self, path, include_subdirs = True, include_subfiles = True):
  286. self.InternalPathUtils().update_dirs(self)
  287. path = self.InternalPathUtils().normalize_path(path)
  288. cur_head = self.dirs
  289. valid = True
  290. if path != "":
  291. for head in self.InternalPathUtils().iterate_heads(path):
  292. if head not in cur_head.keys():
  293. # non existing directory
  294. valid = False
  295. else:
  296. cur_head = cur_head[head]
  297. basename = os.path.basename(path)
  298. if basename not in cur_head.keys() or cur_head[basename] == None:
  299. # do not exist or points to the file
  300. valid = False
  301. else:
  302. cur_head = cur_head[basename]
  303. if valid == True:
  304. for elem in cur_head.keys():
  305. if include_subdirs == True and cur_head[elem] != None:
  306. yield elem
  307. if include_subfiles == True and cur_head[elem] == None:
  308. yield elem
  309. def check_file(self, path):
  310. return self.get_file(path) != None
  311. def check_dir(self, path):
  312. for each in self.iterate_dircontent(path):
  313. each = each # used
  314. return True # there is at least one item
  315. return False
  316. def get_file(self, path):
  317. path = self.InternalPathUtils().normalize_path(path)
  318. result = self.select_rows("__files__", filters = [("path", "=", path), ("confirmed", "=", 1)])
  319. if len(result) == 0:
  320. return None
  321. assert(len(result) == 1)
  322. return self.FileData(result[0]['id'], result[0]['path'], result[0]['checksum'])
  323. def iterate_files(self):
  324. sql = "SELECT * FROM __files__ WHERE (confirmed = 1)"
  325. self.log(sql)
  326. for row in self.conn.execute(sql).fetchall():
  327. yield self.FileData(row['id'], row['path'], row['checksum'])
  328. def create_region(self, file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum):
  329. assert(self.read_only == False)
  330. sql = "INSERT OR REPLACE INTO __regions__ (file_id, region_id, name, begin, end, line_begin, line_end, cursor, group_id, checksum) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
  331. column_data = [file_id, region_id, name, begin, end, line_begin, line_end, cursor, group, checksum]
  332. self.log(sql + " /with arguments: " + str(column_data))
  333. cur = self.conn.cursor()
  334. cur.execute(sql, column_data)
  335. return cur.lastrowid
  336. def get_region(self, file_id, region_id):
  337. result = self.select_rows("__regions__", filters = [("file_id", "=", file_id), ("region_id", "=", region_id)])
  338. if len(result) == 0:
  339. return None
  340. return self.RegionData(result[0]['file_id'],
  341. result[0]['region_id'],
  342. result[0]['name'],
  343. result[0]['begin'],
  344. result[0]['end'],
  345. result[0]['line_begin'],
  346. result[0]['line_end'],
  347. result[0]['cursor'],
  348. result[0]['group_id'],
  349. result[0]['checksum'])
  350. def iterate_regions(self, file_id):
  351. for each in self.select_rows("__regions__", filters = [("file_id", "=", file_id)]):
  352. yield self.RegionData(each['file_id'],
  353. each['region_id'],
  354. each['name'],
  355. each['begin'],
  356. each['end'],
  357. each['line_begin'],
  358. each['line_end'],
  359. each['cursor'],
  360. each['group_id'],
  361. each['checksum'])
  362. def create_marker(self, file_id, begin, end, group):
  363. assert(self.read_only == False)
  364. sql = "INSERT OR REPLACE INTO __markers__ (file_id, begin, end, group_id) VALUES (?, ?, ?, ?)"
  365. column_data = [file_id, begin, end, group]
  366. self.log(sql + " /with arguments: " + str(column_data))
  367. cur = self.conn.cursor()
  368. cur.execute(sql, column_data)
  369. return cur.lastrowid
  370. def iterate_markers(self, file_id):
  371. for each in self.select_rows("__markers__", filters = [("file_id", "=", file_id)]):
  372. yield self.MarkerData(each['file_id'],
  373. each['name'],
  374. each['begin'],
  375. each['group_id'])
  376. def add_row(self, table_name, file_id, region_id, array_data):
  377. assert(self.read_only == False)
  378. column_names = "'file_id'"
  379. column_values = "?"
  380. column_data = [file_id]
  381. if region_id != None:
  382. column_names += ", 'region_id'"
  383. column_values += ", ?"
  384. column_data.append(region_id)
  385. useful_data = 0
  386. for each in array_data:
  387. column_names += ", '" + each[0] + "'"
  388. column_values += ", ?"
  389. column_data.append(each[1])
  390. useful_data += 1
  391. if useful_data == 0:
  392. return
  393. sql = "INSERT OR REPLACE INTO '" + table_name + "' (" + column_names + ") VALUES (" + column_values + ")"
  394. self.log(sql + " /with arguments: " + str(column_data))
  395. cur = self.conn.cursor()
  396. cur.execute(sql, column_data)
  397. return cur.lastrowid
  398. def select_rows(self, table_name, path_like = None, column_names = [], filters = []):
  399. safe_column_names = []
  400. for each in column_names:
  401. safe_column_names.append("'" + each + "'")
  402. return self.select_rows_unsafe(table_name, path_like = path_like, column_names = safe_column_names, filters = filters)
  403. def select_rows_unsafe(self, table_name, path_like = None, column_names = [], filters = []):
  404. path_like = self.InternalPathUtils().normalize_path(path_like)
  405. if self.conn == None:
  406. return []
  407. table_stmt = "'" + table_name + "'"
  408. what_stmt = ", ".join(column_names)
  409. if len(what_stmt) == 0:
  410. what_stmt = "*"
  411. elif path_like != None:
  412. what_stmt += ", '__files__'.'path', '__files__'.'id'"
  413. inner_stmt = ""
  414. if path_like != None:
  415. inner_stmt = " INNER JOIN '__files__' ON '__files__'.'id' = '" + table_name + "'.'file_id' "
  416. where_stmt = " "
  417. values = ()
  418. if len(filters) != 0:
  419. if filters[0][1] == 'IN':
  420. where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " " + filters[0][2]
  421. else:
  422. where_stmt = " WHERE (`" + filters[0][0] + "` " + filters[0][1] + " ?"
  423. values = (filters[0][2],)
  424. for each in filters[1:]:
  425. if each[1] == 'IN':
  426. where_stmt += " AND `" + each[0] + "` " + each[1] + " " + each[2]
  427. else:
  428. where_stmt += " AND `" + each[0] + "` " + each[1] + " ?"
  429. values += (each[2], )
  430. if path_like != None:
  431. where_stmt += " AND '__files__'.'path' LIKE ?"
  432. values += (path_like, )
  433. where_stmt += ")"
  434. else:
  435. where_stmt = " WHERE '__files__'.'path' LIKE ?"
  436. values += (path_like, )
  437. sql = "SELECT " + what_stmt + " FROM " + table_stmt + inner_stmt + where_stmt
  438. self.log(sql + " /with arguments: " + str(values))
  439. return self.conn.execute(sql, values).fetchall()
  440. def get_row(self, table_name, file_id, region_id):
  441. selected = self.get_rows(table_name, file_id, region_id)
  442. # assures that only one row in database
  443. # if assertion happens, caller's intention is not right, use get_rows instead
  444. assert(len(selected) == 0 or len(selected) == 1)
  445. if len(selected) == 0:
  446. return None
  447. return selected[0]
  448. def get_rows(self, table_name, file_id, region_id):
  449. filters = [("file_id", '=', file_id)]
  450. if region_id != None:
  451. filters.append(("region_id", '=', region_id))
  452. return self.select_rows(table_name, filters=filters)
  453. def aggregate_rows(self, table_name, path_like = None, column_names = None, filters = []):
  454. if column_names == None:
  455. column_names = []
  456. for column in self.iterate_columns(table_name):
  457. column_names.append(column.name)
  458. if len(column_names) == 0:
  459. # it is possible that a table does not have meanfull columns
  460. return {}
  461. total_column_names = []
  462. for column_name in column_names:
  463. for func in ['max', 'min', 'avg', 'total']:
  464. total_column_names.append(func + "('" + table_name + "'.'" + column_name + "') AS " + "'" + column_name + "_" + func + "'")
  465. data = self.select_rows_unsafe(table_name, path_like = path_like, column_names = total_column_names, filters = filters)
  466. assert(len(data) == 1)
  467. result = {}
  468. for column_name in column_names:
  469. result[column_name] = {}
  470. for func in ['max', 'min', 'avg', 'total']:
  471. result[column_name][func] = data[0][column_name + "_" + func]
  472. return result
  473. def log(self, sql):
  474. #import traceback
  475. #traceback.print_stack()
  476. logging.debug("[" + str(self.id) + "] Executing query: " + sql)