comparison roundup/backends/indexer_dbm.py @ 5470:e2baa4e6ed6d

handle words starting with unicode characters
author Christof Meerwald <cmeerw@cmeerw.org>
date Sat, 28 Jul 2018 22:39:24 +0100
parents 23b8e6067f7c
children 4c7662c86a36
comparison
equal deleted inserted replaced
5469:115efa91f7a1 5470:e2baa4e6ed6d
187 if not wordlist: 187 if not wordlist:
188 segments = self.segments 188 segments = self.segments
189 else: 189 else:
190 segments = ['-','#'] 190 segments = ['-','#']
191 for word in wordlist: 191 for word in wordlist:
192 segments.append(word[0].upper()) 192 initchar = word[0].upper()
193 if initchar not in self.segments:
194 initchar = '_'
195 segments.append(initchar)
193 196
194 # Load the segments 197 # Load the segments
195 for segment in segments: 198 for segment in segments:
196 try: 199 try:
197 f = open(self.indexdb + segment, 'rb') 200 f = open(self.indexdb + segment, 'rb')
240 segdicts = {} # Need batch of empty dicts 243 segdicts = {} # Need batch of empty dicts
241 for segment in letters: 244 for segment in letters:
242 segdicts[segment] = {} 245 segdicts[segment] = {}
243 for word, entry in self.words.items(): # Split into segment dicts 246 for word, entry in self.words.items(): # Split into segment dicts
244 initchar = word[0].upper() 247 initchar = word[0].upper()
248 if initchar not in letters:
249 # if it's a unicode character, add it to the '_' segment
250 initchar = '_'
245 segdicts[initchar][word] = entry 251 segdicts[initchar][word] = entry
246 252
247 # save 253 # save
248 for initchar in letters: 254 for initchar in letters:
249 db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None} 255 db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None}

Roundup Issue Tracker: http://roundup-tracker.org/