Mercurial > p > roundup > code
annotate roundup/cgi/TAL/XMLParser.py @ 5108:67fad01d2009
issue2550653: xapian search, stemming is not working
This is a partial fix for the issue. It does make stemming work
(so searching for silent will also return docs with silently in
them). However to do this we need to lowercase the text so the
porter stemmer will work. This means capitalization is not
preserved.
Tests in test/test_indexer for xapian backend all pass.
David Wolever (wolever) did the work.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Mon, 27 Jun 2016 22:10:45 -0400 |
| parents | 8c2402a78bb0 |
| children | 88dbacd11cd1 |
| rev | line source |
|---|---|
| 1049 | 1 ############################################################################## |
| 2 # | |
| 3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors. | |
| 4 # All Rights Reserved. | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
5 # |
| 1049 | 6 # This software is subject to the provisions of the Zope Public License, |
| 7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | |
| 8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | |
| 9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | |
| 11 # FOR A PARTICULAR PURPOSE | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
12 # |
| 1049 | 13 ############################################################################## |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
14 # Modifications for Roundup: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
15 # 1. commented out zLOG references |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
16 """ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
17 Generic expat-based XML parser base class. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
18 """ |
|
1071
c08b3820edd1
Adhering to ZPL
Richard Jones <richard@users.sourceforge.net>
parents:
1049
diff
changeset
|
19 |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
20 #import zLOG |
| 1049 | 21 |
| 22 class XMLParser: | |
| 23 | |
| 24 ordered_attributes = 0 | |
| 25 | |
| 26 handler_names = [ | |
| 27 "StartElementHandler", | |
| 28 "EndElementHandler", | |
| 29 "ProcessingInstructionHandler", | |
| 30 "CharacterDataHandler", | |
| 31 "UnparsedEntityDeclHandler", | |
| 32 "NotationDeclHandler", | |
| 33 "StartNamespaceDeclHandler", | |
| 34 "EndNamespaceDeclHandler", | |
| 35 "CommentHandler", | |
| 36 "StartCdataSectionHandler", | |
| 37 "EndCdataSectionHandler", | |
| 38 "DefaultHandler", | |
| 39 "DefaultHandlerExpand", | |
| 40 "NotStandaloneHandler", | |
| 41 "ExternalEntityRefHandler", | |
| 42 "XmlDeclHandler", | |
| 43 "StartDoctypeDeclHandler", | |
| 44 "EndDoctypeDeclHandler", | |
| 45 "ElementDeclHandler", | |
| 46 "AttlistDeclHandler" | |
| 47 ] | |
| 48 | |
| 49 def __init__(self, encoding=None): | |
| 50 self.parser = p = self.createParser() | |
| 51 if self.ordered_attributes: | |
| 52 try: | |
| 53 self.parser.ordered_attributes = self.ordered_attributes | |
| 54 except AttributeError: | |
| 55 #zLOG.LOG("TAL.XMLParser", zLOG.INFO, | |
| 56 # "Can't set ordered_attributes") | |
| 57 self.ordered_attributes = 0 | |
| 58 for name in self.handler_names: | |
| 59 method = getattr(self, name, None) | |
| 60 if method is not None: | |
| 61 try: | |
| 62 setattr(p, name, method) | |
| 63 except AttributeError: | |
| 64 #zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM, | |
| 65 # "Can't set expat handler %s" % name) | |
| 66 pass | |
| 67 | |
| 68 def createParser(self, encoding=None): | |
| 69 global XMLParseError | |
| 70 try: | |
| 71 from Products.ParsedXML.Expat import pyexpat | |
| 72 XMLParseError = pyexpat.ExpatError | |
| 73 return pyexpat.ParserCreate(encoding, ' ') | |
| 74 except ImportError: | |
| 75 from xml.parsers import expat | |
| 76 XMLParseError = expat.ExpatError | |
| 77 return expat.ParserCreate(encoding, ' ') | |
| 78 | |
| 79 def parseFile(self, filename): | |
| 80 self.parseStream(open(filename)) | |
| 81 | |
| 82 def parseString(self, s): | |
| 83 self.parser.Parse(s, 1) | |
| 84 | |
| 85 def parseURL(self, url): | |
| 86 import urllib | |
| 87 self.parseStream(urllib.urlopen(url)) | |
| 88 | |
| 89 def parseStream(self, stream): | |
| 90 self.parser.ParseFile(stream) | |
| 91 | |
| 92 def parseFragment(self, s, end=0): | |
| 93 self.parser.Parse(s, end) |
