Mercurial > p > roundup > code
annotate roundup/cgi/TAL/TALParser.py @ 5096:e74c3611b138
- issue2550636, issue2550909: Added support for Whoosh indexer.
Also adds new config.ini setting called indexer to select
indexer. See ``doc/upgrading.txt`` for details. Initial patch
done by David Wolever. Patch modified (see ticket or below for
changes), docs updated and committed.
I have an outstanding issue with test/test_indexer.py. I have to
comment out all imports and tests for indexers I don't have (i.e.
mysql, postgres) otherwise no tests run.
With that change made, dbm, sqlite (rdbms), xapian and whoosh indexes
are all passing the indexer tests.
Changes summary:
1) support native back ends dbm and rdbms. (original patch only fell
through to dbm)
2) Developed whoosh stopfilter to not index stopwords or words outside
the the maxlength and minlength limits defined in index_common.py.
Required to pass the extremewords test_indexer test. Also I
removed a call to .lower on the input text as the tokenizer I chose
automatically does the lowercase.
3) Added support for max/min length to find. This was needed to pass
extremewords test.
4) Added back a call to save_index in add_text. This allowed all but
two tests to pass.
5) Fixed a call to:
results = searcher.search(query.Term("identifier", identifier))
which had an extra parameter that is an error under current whoosh.
6) Set limit=None in search call for find() otherwise it only return
10 items. This allowed it to pass manyresults test
Also due to changes in the roundup code removed the call in
indexer_whoosh to
from roundup.anypy.sets_ import set
since we use the python builtin set.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sat, 25 Jun 2016 20:10:03 -0400 |
| parents | 8c2402a78bb0 |
| children | 12fe83f90f0d |
| rev | line source |
|---|---|
| 1049 | 1 ############################################################################## |
| 2 # | |
| 3 # Copyright (c) 2001, 2002 Zope Corporation and Contributors. | |
| 4 # All Rights Reserved. | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
5 # |
| 1049 | 6 # This software is subject to the provisions of the Zope Public License, |
| 7 # Version 2.0 (ZPL). A copy of the ZPL should accompany this distribution. | |
| 8 # THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED | |
| 9 # WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED | |
| 10 # WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
11 # FOR A PARTICULAR PURPOSE. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
12 # |
| 1049 | 13 ############################################################################## |
| 14 """ | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
15 Parse XML and compile to TALInterpreter intermediate code. |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
16 """ |
| 1049 | 17 |
| 18 from XMLParser import XMLParser | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
19 from TALDefs import XML_NS, ZOPE_I18N_NS, ZOPE_METAL_NS, ZOPE_TAL_NS |
| 1049 | 20 from TALGenerator import TALGenerator |
| 21 | |
| 22 class TALParser(XMLParser): | |
| 23 | |
| 24 ordered_attributes = 1 | |
| 25 | |
| 26 def __init__(self, gen=None): # Override | |
| 27 XMLParser.__init__(self) | |
| 28 if gen is None: | |
| 29 gen = TALGenerator() | |
| 30 self.gen = gen | |
| 31 self.nsStack = [] | |
| 32 self.nsDict = {XML_NS: 'xml'} | |
| 33 self.nsNew = [] | |
| 34 | |
| 35 def getCode(self): | |
| 36 return self.gen.getCode() | |
| 37 | |
| 38 def getWarnings(self): | |
| 39 return () | |
| 40 | |
| 41 def StartNamespaceDeclHandler(self, prefix, uri): | |
| 42 self.nsStack.append(self.nsDict.copy()) | |
| 43 self.nsDict[uri] = prefix | |
| 44 self.nsNew.append((prefix, uri)) | |
| 45 | |
| 46 def EndNamespaceDeclHandler(self, prefix): | |
| 47 self.nsDict = self.nsStack.pop() | |
| 48 | |
| 49 def StartElementHandler(self, name, attrs): | |
| 50 if self.ordered_attributes: | |
| 51 # attrs is a list of alternating names and values | |
| 52 attrlist = [] | |
| 53 for i in range(0, len(attrs), 2): | |
| 54 key = attrs[i] | |
| 55 value = attrs[i+1] | |
| 56 attrlist.append((key, value)) | |
| 57 else: | |
| 58 # attrs is a dict of {name: value} | |
| 59 attrlist = attrs.items() | |
| 60 attrlist.sort() # For definiteness | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
61 name, attrlist, taldict, metaldict, i18ndict \ |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
62 = self.process_ns(name, attrlist) |
| 1049 | 63 attrlist = self.xmlnsattrs() + attrlist |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
64 self.gen.emitStartElement(name, attrlist, taldict, metaldict, i18ndict) |
| 1049 | 65 |
| 66 def process_ns(self, name, attrlist): | |
| 67 taldict = {} | |
| 68 metaldict = {} | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
69 i18ndict = {} |
| 1049 | 70 fixedattrlist = [] |
| 71 name, namebase, namens = self.fixname(name) | |
| 72 for key, value in attrlist: | |
| 73 key, keybase, keyns = self.fixname(key) | |
| 74 ns = keyns or namens # default to tag namespace | |
| 75 item = key, value | |
| 76 if ns == 'metal': | |
| 77 metaldict[keybase] = value | |
| 78 item = item + ("metal",) | |
| 79 elif ns == 'tal': | |
| 80 taldict[keybase] = value | |
| 81 item = item + ("tal",) | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
82 elif ns == 'i18n': |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
83 assert 0, "dealing with i18n: " + `(keybase, value)` |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
84 i18ndict[keybase] = value |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
85 item = item + ('i18n',) |
| 1049 | 86 fixedattrlist.append(item) |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
87 if namens in ('metal', 'tal', 'i18n'): |
| 1049 | 88 taldict['tal tag'] = namens |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
89 return name, fixedattrlist, taldict, metaldict, i18ndict |
| 1049 | 90 |
| 91 def xmlnsattrs(self): | |
| 92 newlist = [] | |
| 93 for prefix, uri in self.nsNew: | |
| 94 if prefix: | |
| 95 key = "xmlns:" + prefix | |
| 96 else: | |
| 97 key = "xmlns" | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
98 if uri in (ZOPE_METAL_NS, ZOPE_TAL_NS, ZOPE_I18N_NS): |
| 1049 | 99 item = (key, uri, "xmlns") |
| 100 else: | |
| 101 item = (key, uri) | |
| 102 newlist.append(item) | |
| 103 self.nsNew = [] | |
| 104 return newlist | |
| 105 | |
| 106 def fixname(self, name): | |
| 107 if ' ' in name: | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
108 uri, name = name.split(' ') |
| 1049 | 109 prefix = self.nsDict[uri] |
| 110 prefixed = name | |
| 111 if prefix: | |
| 112 prefixed = "%s:%s" % (prefix, name) | |
| 113 ns = 'x' | |
| 114 if uri == ZOPE_TAL_NS: | |
| 115 ns = 'tal' | |
| 116 elif uri == ZOPE_METAL_NS: | |
| 117 ns = 'metal' | |
|
2348
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
118 elif uri == ZOPE_I18N_NS: |
|
8c2402a78bb0
beginning getting ZPT up to date: TAL first
Richard Jones <richard@users.sourceforge.net>
parents:
2005
diff
changeset
|
119 ns = 'i18n' |
| 1049 | 120 return (prefixed, name, ns) |
| 121 return (name, name, None) | |
| 122 | |
| 123 def EndElementHandler(self, name): | |
| 124 name = self.fixname(name)[0] | |
| 125 self.gen.emitEndElement(name) | |
| 126 | |
| 127 def DefaultHandler(self, text): | |
| 128 self.gen.emitRawText(text) | |
| 129 | |
| 130 def test(): | |
| 131 import sys | |
| 132 p = TALParser() | |
| 133 file = "tests/input/test01.xml" | |
| 134 if sys.argv[1:]: | |
| 135 file = sys.argv[1] | |
| 136 p.parseFile(file) | |
| 137 program, macros = p.getCode() | |
| 138 from TALInterpreter import TALInterpreter | |
| 139 from DummyEngine import DummyEngine | |
| 140 engine = DummyEngine(macros) | |
| 141 TALInterpreter(program, macros, engine, sys.stdout, wrap=0)() | |
| 142 | |
| 143 if __name__ == "__main__": | |
| 144 test() |
