Mercurial > p > roundup > code
diff roundup/indexer.py @ 826:6d7a45c8464a
Added reindex command to roundup-admin.
Fixed reindex on first access.
Also fixed reindexing of entries that change.
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Tue, 09 Jul 2002 04:19:09 +0000 |
| parents | 0779ea9f1f18 |
| children | 0a2c1f5e0e5a |
line wrap: on
line diff
--- a/roundup/indexer.py Tue Jul 09 03:02:53 2002 +0000 +++ b/roundup/indexer.py Tue Jul 09 04:19:09 2002 +0000 @@ -14,7 +14,7 @@ # that promote freedom, but obviously am giving up any rights # to compel such. # -#$Id: indexer.py,v 1.4 2002-07-09 03:02:52 richard Exp $ +#$Id: indexer.py,v 1.5 2002-07-09 04:19:09 richard Exp $ ''' This module provides an indexer class, RoundupIndexer, that stores text indices in a roundup instance. This class makes searching the content of @@ -25,29 +25,35 @@ class Indexer: ''' Indexes information from roundup's hyperdb to allow efficient searching. + + Three structures are created by the indexer: + files {identifier: (fileid, wordcount)} + words {word: {fileid: count}} + fileids {fileid: identifier} ''' def __init__(self, db_path): - indexdb_path = os.path.join(db_path, 'indexes') - self.indexdb = os.path.join(indexdb_path, 'index.db') + self.indexdb_path = os.path.join(db_path, 'indexes') + self.indexdb = os.path.join(self.indexdb_path, 'index.db') self.reindex = 0 self.casesensitive = 0 self.quiet = 9 # see if we need to reindex because of a change in code - if (not os.path.exists(indexdb_path) or - not os.path.exists(os.path.join(indexdb_path, 'version'))): + if (not os.path.exists(self.indexdb_path) or + not os.path.exists(os.path.join(self.indexdb_path, 'version'))): # TODO: if the version file exists (in the future) we'll want to # check the value in it - for now the file itself is a flag - if os.path.exists(indexdb_path): - shutil.rmtree(indexdb_path) - os.makedirs(indexdb_path) - os.chmod(indexdb_path, 0775) - open(os.path.join(indexdb_path, 'version'), 'w').write('1\n') + self.force_reindex() - # we need to reindex - self.reindex = 1 - else: - self.reindex = 0 + def force_reindex(self): + '''Force a reindex condition + ''' + if os.path.exists(self.indexdb_path): + shutil.rmtree(self.indexdb_path) + os.makedirs(self.indexdb_path) + os.chmod(self.indexdb_path, 0775) + open(os.path.join(self.indexdb_path, 'version'), 'w').write('1\n') + self.reindex = 1 def should_reindex(self): '''Should we reindex? @@ -61,16 +67,9 @@ # make sure the index is loaded self.load_index() - # Is file eligible for (re)indexing? + # remove old entries for this identifier if self.files.has_key(identifier): - # Reindexing enabled, cleanup dicts - if self.reindex: - self.purge_entry(identifier, self.files, self.words) - else: - # DO NOT reindex this file - if self.quiet < 5: - print "Not reindexing", identifier - return 0 + self.purge_entry(identifier) # split into words words = self.splitter(text, mime_type) @@ -281,19 +280,20 @@ pickle_fh.write(zlib.compress(pickle_str)) os.chmod(filename, 0664) - def purge_entry(self, fname, file_dct, word_dct): + def purge_entry(self, identifier): ''' Remove a file from file index and word index ''' - try: # The easy part, cleanup the file index - file_index = file_dct[fname] - del file_dct[fname] - except KeyError: - pass # We'll assume we only encounter KeyError's + if not self.files.has_key(identifier): + return + + file_index = self.files[identifier][0] + del self.files[identifier] + del self.fileids[file_index] + # The much harder part, cleanup the word index - for word, occurs in word_dct.items(): + for key, occurs in self.words.items(): if occurs.has_key(file_index): del occurs[file_index] - word_dct[word] = occurs def index_loaded(self): return (hasattr(self,'fileids') and hasattr(self,'files') and @@ -301,6 +301,22 @@ # #$Log: not supported by cvs2svn $ +#Revision 1.4 2002/07/09 03:02:52 richard +#More indexer work: +#- all String properties may now be indexed too. Currently there's a bit of +# "issue" specific code in the actual searching which needs to be +# addressed. In a nutshell: +# + pass 'indexme="yes"' as a String() property initialisation arg, eg: +# file = FileClass(db, "file", name=String(), type=String(), +# comment=String(indexme="yes")) +# + the comment will then be indexed and be searchable, with the results +# related back to the issue that the file is linked to +#- as a result of this work, the FileClass has a default MIME type that may +# be overridden in a subclass, or by the use of a "type" property as is +# done in the default templates. +#- the regeneration of the indexes (if necessary) is done once the schema is +# set up in the dbinit. +# #Revision 1.3 2002/07/08 06:58:15 richard #cleaned up the indexer code: # - it splits more words out (much simpler, faster splitter)
