Mercurial > p > roundup > code
comparison roundup/backends/indexer_dbm.py @ 5470:e2baa4e6ed6d
handle words starting with unicode characters
| author | Christof Meerwald <cmeerw@cmeerw.org> |
|---|---|
| date | Sat, 28 Jul 2018 22:39:24 +0100 |
| parents | 23b8e6067f7c |
| children | 4c7662c86a36 |
comparison
equal
deleted
inserted
replaced
| 5469:115efa91f7a1 | 5470:e2baa4e6ed6d |
|---|---|
| 187 if not wordlist: | 187 if not wordlist: |
| 188 segments = self.segments | 188 segments = self.segments |
| 189 else: | 189 else: |
| 190 segments = ['-','#'] | 190 segments = ['-','#'] |
| 191 for word in wordlist: | 191 for word in wordlist: |
| 192 segments.append(word[0].upper()) | 192 initchar = word[0].upper() |
| 193 if initchar not in self.segments: | |
| 194 initchar = '_' | |
| 195 segments.append(initchar) | |
| 193 | 196 |
| 194 # Load the segments | 197 # Load the segments |
| 195 for segment in segments: | 198 for segment in segments: |
| 196 try: | 199 try: |
| 197 f = open(self.indexdb + segment, 'rb') | 200 f = open(self.indexdb + segment, 'rb') |
| 240 segdicts = {} # Need batch of empty dicts | 243 segdicts = {} # Need batch of empty dicts |
| 241 for segment in letters: | 244 for segment in letters: |
| 242 segdicts[segment] = {} | 245 segdicts[segment] = {} |
| 243 for word, entry in self.words.items(): # Split into segment dicts | 246 for word, entry in self.words.items(): # Split into segment dicts |
| 244 initchar = word[0].upper() | 247 initchar = word[0].upper() |
| 248 if initchar not in letters: | |
| 249 # if it's a unicode character, add it to the '_' segment | |
| 250 initchar = '_' | |
| 245 segdicts[initchar][word] = entry | 251 segdicts[initchar][word] = entry |
| 246 | 252 |
| 247 # save | 253 # save |
| 248 for initchar in letters: | 254 for initchar in letters: |
| 249 db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None} | 255 db = {'WORDS':segdicts[initchar], 'FILES':None, 'FILEIDS':None} |
