Mercurial > p > roundup > code
comparison roundup/hyperdb.py @ 4472:34dce76bb202
Multilink fixes and optimizations:
- Optimisation: Late evaluation of Multilinks (only in rdbms backends):
previously we materialized each multilink in a Node -- this creates an
SQL query for each multilink (e.g. 'files' and 'messages' for each
line in the issue index display) -- even if the multilinks aren't
displayed. Now we compute multilinks only if they're accessed (and
keep them cached).
- Add a filter_iter similar to the existing filter call. This feature is
considered experimental. This is currently not used in the
web-interface but passes all tests for the filter call except sorting
by Multilinks (which isn't supported by SQL and isn't a sane concept
anyway). When using filter_iter instead of filter this saves a *lot*
of SQL queries: Filter returns only the IDs of Nodes in the database,
the additional content of a Node has to be fetched in a separate SQL
call. The new filter_iter also returns the IDs of Nodes (one by one,
it's an iterator) but pre-seeds the cache with the content of the
Node. The information needed for seeding the cache is retrieved in the
same SQL query as the ids.
| author | Ralf Schlatterbeck <schlatterbeck@users.sourceforge.net> |
|---|---|
| date | Mon, 21 Mar 2011 20:44:39 +0000 |
| parents | 9b619dcb030a |
| children | 1613754d2646 |
comparison
equal
deleted
inserted
replaced
| 4471:4f353d71d716 | 4472:34dce76bb202 |
|---|---|
| 282 | 282 |
| 283 class Proptree(object): | 283 class Proptree(object): |
| 284 """ Simple tree data structure for optimizing searching of | 284 """ Simple tree data structure for optimizing searching of |
| 285 properties. Each node in the tree represents a roundup Class | 285 properties. Each node in the tree represents a roundup Class |
| 286 Property that has to be navigated for finding the given search | 286 Property that has to be navigated for finding the given search |
| 287 or sort properties. The sort_type attribute is used for | 287 or sort properties. The need_for attribute is used for |
| 288 distinguishing nodes in the tree used for sorting or searching: If | 288 distinguishing nodes in the tree used for sorting, searching or |
| 289 it is 0 for a node, that node is not used for sorting. If it is 1, | 289 retrieval: The attribute is a dictionary containing one or several |
| 290 it is used for both, sorting and searching. If it is 2 it is used | 290 of the values 'sort', 'search', 'retrieve'. |
| 291 for sorting only. | |
| 292 | 291 |
| 293 The Proptree is also used for transitively searching attributes for | 292 The Proptree is also used for transitively searching attributes for |
| 294 backends that do not support transitive search (e.g. anydbm). The | 293 backends that do not support transitive search (e.g. anydbm). The |
| 295 _val attribute with set_val is used for this. | 294 _val attribute with set_val is used for this. |
| 296 """ | 295 """ |
| 297 | 296 |
| 298 def __init__(self, db, cls, name, props, parent = None): | 297 def __init__(self, db, cls, name, props, parent=None, retr=False): |
| 299 self.db = db | 298 self.db = db |
| 300 self.name = name | 299 self.name = name |
| 301 self.props = props | 300 self.props = props |
| 302 self.parent = parent | 301 self.parent = parent |
| 303 self._val = None | 302 self._val = None |
| 306 self.classname = None | 305 self.classname = None |
| 307 self.uniqname = None | 306 self.uniqname = None |
| 308 self.children = [] | 307 self.children = [] |
| 309 self.sortattr = [] | 308 self.sortattr = [] |
| 310 self.propdict = {} | 309 self.propdict = {} |
| 311 self.sort_type = 0 | 310 self.need_for = {'search' : True} |
| 312 self.sort_direction = None | 311 self.sort_direction = None |
| 313 self.sort_ids = None | 312 self.sort_ids = None |
| 314 self.sort_ids_needed = False | 313 self.sort_ids_needed = False |
| 315 self.sort_result = None | 314 self.sort_result = None |
| 316 self.attr_sort_done = False | 315 self.attr_sort_done = False |
| 317 self.tree_sort_done = False | 316 self.tree_sort_done = False |
| 318 self.propclass = None | 317 self.propclass = None |
| 319 self.orderby = [] | 318 self.orderby = [] |
| 319 self.sql_idx = None # index of retrieved column in sql result | |
| 320 if parent: | 320 if parent: |
| 321 self.root = parent.root | 321 self.root = parent.root |
| 322 self.depth = parent.depth + 1 | 322 self.depth = parent.depth + 1 |
| 323 else: | 323 else: |
| 324 self.root = self | 324 self.root = self |
| 325 self.seqno = 1 | 325 self.seqno = 1 |
| 326 self.depth = 0 | 326 self.depth = 0 |
| 327 self.sort_type = 1 | 327 self.need_for['sort'] = True |
| 328 self.id = self.root.seqno | 328 self.id = self.root.seqno |
| 329 self.root.seqno += 1 | 329 self.root.seqno += 1 |
| 330 if self.cls: | 330 if self.cls: |
| 331 self.classname = self.cls.classname | 331 self.classname = self.cls.classname |
| 332 self.uniqname = '%s%s' % (self.cls.classname, self.id) | 332 self.uniqname = '%s%s' % (self.cls.classname, self.id) |
| 333 if not self.parent: | 333 if not self.parent: |
| 334 self.uniqname = self.cls.classname | 334 self.uniqname = self.cls.classname |
| 335 | 335 if retr: |
| 336 def append(self, name, sort_type = 0): | 336 self.append_retr_props() |
| 337 | |
| 338 def append(self, name, need_for='search', retr=False): | |
| 337 """Append a property to self.children. Will create a new | 339 """Append a property to self.children. Will create a new |
| 338 propclass for the child. | 340 propclass for the child. |
| 339 """ | 341 """ |
| 340 if name in self.propdict: | 342 if name in self.propdict: |
| 341 pt = self.propdict[name] | 343 pt = self.propdict[name] |
| 342 if sort_type and not pt.sort_type: | 344 pt.need_for[need_for] = True |
| 343 pt.sort_type = 1 | 345 if retr and isinstance(pt.propclass, Link): |
| 346 pt.append_retr_props() | |
| 344 return pt | 347 return pt |
| 345 propclass = self.props[name] | 348 propclass = self.props[name] |
| 346 cls = None | 349 cls = None |
| 347 props = None | 350 props = None |
| 348 if isinstance(propclass, (Link, Multilink)): | 351 if isinstance(propclass, (Link, Multilink)): |
| 349 cls = self.db.getclass(propclass.classname) | 352 cls = self.db.getclass(propclass.classname) |
| 350 props = cls.getprops() | 353 props = cls.getprops() |
| 351 child = self.__class__(self.db, cls, name, props, parent = self) | 354 child = self.__class__(self.db, cls, name, props, parent = self) |
| 352 child.sort_type = sort_type | 355 child.need_for = {need_for : True} |
| 353 child.propclass = propclass | 356 child.propclass = propclass |
| 354 self.children.append(child) | 357 self.children.append(child) |
| 355 self.propdict[name] = child | 358 self.propdict[name] = child |
| 359 if retr and isinstance(child.propclass, Link): | |
| 360 child.append_retr_props() | |
| 356 return child | 361 return child |
| 362 | |
| 363 def append_retr_props(self): | |
| 364 """Append properties for retrieval.""" | |
| 365 for name, prop in self.cls.getprops(protected=1).iteritems(): | |
| 366 if isinstance(prop, Multilink): | |
| 367 continue | |
| 368 self.append(name, need_for='retrieve') | |
| 357 | 369 |
| 358 def compute_sort_done(self, mlseen=False): | 370 def compute_sort_done(self, mlseen=False): |
| 359 """ Recursively check if attribute is needed for sorting | 371 """ Recursively check if attribute is needed for sorting |
| 360 (self.sort_type > 0) or all children have tree_sort_done set and | 372 ('sort' in self.need_for) or all children have tree_sort_done set and |
| 361 sort_ids_needed unset: set self.tree_sort_done if one of the conditions | 373 sort_ids_needed unset: set self.tree_sort_done if one of the conditions |
| 362 holds. Also remove sort_ids_needed recursively once having seen a | 374 holds. Also remove sort_ids_needed recursively once having seen a |
| 363 Multilink. | 375 Multilink. |
| 364 """ | 376 """ |
| 365 if isinstance (self.propclass, Multilink): | 377 if isinstance (self.propclass, Multilink): |
| 369 self.tree_sort_done = True | 381 self.tree_sort_done = True |
| 370 for p in self.children: | 382 for p in self.children: |
| 371 p.compute_sort_done(mlseen) | 383 p.compute_sort_done(mlseen) |
| 372 if not p.tree_sort_done: | 384 if not p.tree_sort_done: |
| 373 self.tree_sort_done = False | 385 self.tree_sort_done = False |
| 374 if not self.sort_type: | 386 if 'sort' not in self.need_for: |
| 375 self.tree_sort_done = True | 387 self.tree_sort_done = True |
| 376 if mlseen: | 388 if mlseen: |
| 377 self.tree_sort_done = False | 389 self.tree_sort_done = False |
| 378 | 390 |
| 379 def ancestors(self): | 391 def ancestors(self): |
| 387 Once all properties are non-transitive, the search generates a | 399 Once all properties are non-transitive, the search generates a |
| 388 simple _filter call which does the real work | 400 simple _filter call which does the real work |
| 389 """ | 401 """ |
| 390 filterspec = {} | 402 filterspec = {} |
| 391 for p in self.children: | 403 for p in self.children: |
| 392 if p.sort_type < 2: | 404 if 'search' in p.need_for: |
| 393 if p.children: | 405 if p.children: |
| 394 p.search(sort = False) | 406 p.search(sort = False) |
| 395 filterspec[p.name] = p.val | 407 filterspec[p.name] = p.val |
| 396 self.val = self.cls._filter(search_matches, filterspec, sort and self) | 408 self.val = self.cls._filter(search_matches, filterspec, sort and self) |
| 397 return self.val | 409 return self.val |
| 411 """ All children needed for sorting. If intermediate is True, | 423 """ All children needed for sorting. If intermediate is True, |
| 412 intermediate nodes (not being a sort attribute) are returned, | 424 intermediate nodes (not being a sort attribute) are returned, |
| 413 too. | 425 too. |
| 414 """ | 426 """ |
| 415 return [p for p in self.children | 427 return [p for p in self.children |
| 416 if p.sort_type > 0 and (intermediate or p.sort_direction)] | 428 if 'sort' in p.need_for and (intermediate or p.sort_direction)] |
| 417 | 429 |
| 418 def __iter__(self): | 430 def __iter__(self): |
| 419 """ Yield nodes in depth-first order -- visited nodes first """ | 431 """ Yield nodes in depth-first order -- visited nodes first """ |
| 420 for p in self.children: | 432 for p in self.children: |
| 421 yield p | 433 yield p |
| 532 dir_idx.append (idx) | 544 dir_idx.append (idx) |
| 533 directions.append (sa.sort_direction) | 545 directions.append (sa.sort_direction) |
| 534 curdir = sa.sort_direction | 546 curdir = sa.sort_direction |
| 535 idx += 1 | 547 idx += 1 |
| 536 sortattr.append (val) | 548 sortattr.append (val) |
| 537 #print >> sys.stderr, "\nsortattr", sortattr | |
| 538 sortattr = zip (*sortattr) | 549 sortattr = zip (*sortattr) |
| 539 for dir, i in reversed(zip(directions, dir_idx)): | 550 for dir, i in reversed(zip(directions, dir_idx)): |
| 540 rev = dir == '-' | 551 rev = dir == '-' |
| 541 sortattr = sorted (sortattr, key = lambda x:x[i:idx], reverse = rev) | 552 sortattr = sorted (sortattr, key = lambda x:x[i:idx], reverse = rev) |
| 542 idx = i | 553 idx = i |
| 1053 """For some backends this implements the non-transitive | 1064 """For some backends this implements the non-transitive |
| 1054 search, for more information see the filter method. | 1065 search, for more information see the filter method. |
| 1055 """ | 1066 """ |
| 1056 raise NotImplementedError | 1067 raise NotImplementedError |
| 1057 | 1068 |
| 1058 def _proptree(self, filterspec, sortattr=[]): | 1069 def _proptree(self, filterspec, sortattr=[], retr=False): |
| 1059 """Build a tree of all transitive properties in the given | 1070 """Build a tree of all transitive properties in the given |
| 1060 filterspec. | 1071 filterspec. |
| 1061 """ | 1072 If we retrieve (retr is True) linked items we don't follow |
| 1062 proptree = Proptree(self.db, self, '', self.getprops()) | 1073 across multilinks. We also don't follow if the searched value |
| 1074 can contain NULL values. | |
| 1075 """ | |
| 1076 proptree = Proptree(self.db, self, '', self.getprops(), retr=retr) | |
| 1063 for key, v in filterspec.iteritems(): | 1077 for key, v in filterspec.iteritems(): |
| 1064 keys = key.split('.') | 1078 keys = key.split('.') |
| 1065 p = proptree | 1079 p = proptree |
| 1080 mlseen = False | |
| 1066 for k in keys: | 1081 for k in keys: |
| 1067 p = p.append(k) | 1082 if isinstance (p.propclass, Multilink): |
| 1083 mlseen = True | |
| 1084 isnull = v == '-1' or v is None | |
| 1085 nullin = isinstance(v, type([])) and ('-1' in v or None in v) | |
| 1086 r = retr and not mlseen and not isnull and not nullin | |
| 1087 p = p.append(k, retr=r) | |
| 1068 p.val = v | 1088 p.val = v |
| 1069 multilinks = {} | 1089 multilinks = {} |
| 1070 for s in sortattr: | 1090 for s in sortattr: |
| 1071 keys = s[1].split('.') | 1091 keys = s[1].split('.') |
| 1072 p = proptree | 1092 p = proptree |
| 1093 mlseen = False | |
| 1073 for k in keys: | 1094 for k in keys: |
| 1074 p = p.append(k, sort_type = 2) | 1095 if isinstance (p.propclass, Multilink): |
| 1096 mlseen = True | |
| 1097 r = retr and not mlseen | |
| 1098 p = p.append(k, need_for='sort', retr=r) | |
| 1075 if isinstance (p.propclass, Multilink): | 1099 if isinstance (p.propclass, Multilink): |
| 1076 multilinks[p] = True | 1100 multilinks[p] = True |
| 1077 if p.cls: | 1101 if p.cls: |
| 1078 p = p.append(p.cls.orderprop(), sort_type = 2) | 1102 p = p.append(p.cls.orderprop(), need_for='sort') |
| 1079 if p.sort_direction: # if an orderprop is also specified explicitly | 1103 if p.sort_direction: # if an orderprop is also specified explicitly |
| 1080 continue | 1104 continue |
| 1081 p.sort_direction = s[0] | 1105 p.sort_direction = s[0] |
| 1082 proptree.sortattr.append (p) | 1106 proptree.sortattr.append (p) |
| 1083 for p in multilinks.iterkeys(): | 1107 for p in multilinks.iterkeys(): |
| 1156 | 1180 |
| 1157 Implementation note: | 1181 Implementation note: |
| 1158 This implements a non-optimized version of Transitive search | 1182 This implements a non-optimized version of Transitive search |
| 1159 using _filter implemented in a backend class. A more efficient | 1183 using _filter implemented in a backend class. A more efficient |
| 1160 version can be implemented in the individual backends -- e.g., | 1184 version can be implemented in the individual backends -- e.g., |
| 1161 an SQL backen will want to create a single SQL statement and | 1185 an SQL backend will want to create a single SQL statement and |
| 1162 override the filter method instead of implementing _filter. | 1186 override the filter method instead of implementing _filter. |
| 1163 """ | 1187 """ |
| 1164 sortattr = self._sortattr(sort = sort, group = group) | 1188 sortattr = self._sortattr(sort = sort, group = group) |
| 1165 proptree = self._proptree(filterspec, sortattr) | 1189 proptree = self._proptree(filterspec, sortattr) |
| 1166 proptree.search(search_matches) | 1190 proptree.search(search_matches) |
| 1167 return proptree.sort() | 1191 return proptree.sort() |
| 1192 | |
| 1193 # non-optimized filter_iter, a backend may chose to implement a | |
| 1194 # better version that provides a real iterator that pre-fills the | |
| 1195 # cache for each id returned. Note that the filter_iter doesn't | |
| 1196 # promise to correctly sort by multilink (which isn't sane to do | |
| 1197 # anyway). | |
| 1198 filter_iter = filter | |
| 1168 | 1199 |
| 1169 def count(self): | 1200 def count(self): |
| 1170 """Get the number of nodes in this class. | 1201 """Get the number of nodes in this class. |
| 1171 | 1202 |
| 1172 If the returned integer is 'numnodes', the ids of all the nodes | 1203 If the returned integer is 'numnodes', the ids of all the nodes |
