view roundup/cgi/TAL/XMLParser.py @ 6593:e70e2789bc2c

issue2551189 - increase text search maxlength This removes I think all the magic references to 25 and 30 (varchar size) and replaces them with references to maxlength or maxlength+5. I am not sure why the db column is 5 characters larger than the size of what should be the max size of a word, but I'll keep the buffer of 5 as making it 1/5 the size of maxlength makes less sense. Also added tests for fts search in templating which were missing. Added postgres, mysql and sqlite native indexing backends in which to test fts. Added fts test to native-fts as well to make sure it's working. I want to commit this now for CI. Todo: add test cases for the use of FTS in the csv output in actions.py. There is no test coverage of the match case there. change maxlength to a higher value (50) as requested in the ticket. Modify existing extremewords test cases to allow words > 25 and < 51 write code to migrate column sizes for mysql and postgresql to match maxlength I will roll this into the version 7 schema update that supports use of database fts support.
author John Rouillard <rouilj@ieee.org>
date Tue, 25 Jan 2022 13:22:00 -0500
parents 14a61eabcea8
children
line wrap: on
line source

##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
# Modifications for Roundup:
# 1. commented out zLOG references
# 2. use roundup.anypy.urllib_
"""
Generic expat-based XML parser base class.
"""

#import zLOG

class XMLParser:

    ordered_attributes = 0

    handler_names = [
        "StartElementHandler",
        "EndElementHandler",
        "ProcessingInstructionHandler",
        "CharacterDataHandler",
        "UnparsedEntityDeclHandler",
        "NotationDeclHandler",
        "StartNamespaceDeclHandler",
        "EndNamespaceDeclHandler",
        "CommentHandler",
        "StartCdataSectionHandler",
        "EndCdataSectionHandler",
        "DefaultHandler",
        "DefaultHandlerExpand",
        "NotStandaloneHandler",
        "ExternalEntityRefHandler",
        "XmlDeclHandler",
        "StartDoctypeDeclHandler",
        "EndDoctypeDeclHandler",
        "ElementDeclHandler",
        "AttlistDeclHandler"
        ]

    def __init__(self, encoding=None):
        self.parser = p = self.createParser()
        # Make sure we don't get fed unicode strings in Python 2 as we
        # can't handle those
        if hasattr(self.parser, 'returns_unicode'):
            self.parser.returns_unicode = False
        if self.ordered_attributes:
            try:
                self.parser.ordered_attributes = self.ordered_attributes
            except AttributeError:
                #zLOG.LOG("TAL.XMLParser", zLOG.INFO, 
                #         "Can't set ordered_attributes")
                self.ordered_attributes = 0
        for name in self.handler_names:
            method = getattr(self, name, None)
            if method is not None:
                try:
                    setattr(p, name, method)
                except AttributeError:
                    #zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM,
                    #         "Can't set expat handler %s" % name)
                    pass

    def createParser(self, encoding=None):
        global XMLParseError
        try:
            from Products.ParsedXML.Expat import pyexpat
            XMLParseError = pyexpat.ExpatError
            return pyexpat.ParserCreate(encoding, ' ')
        except ImportError:
            from xml.parsers import expat
            XMLParseError = expat.ExpatError
            return expat.ParserCreate(encoding, ' ')

    def parseFile(self, filename):
        self.parseStream(open(filename))

    def parseString(self, s):
        self.parser.Parse(s, 1)

    def parseURL(self, url):
        import roundup.anypy.urllib_
        self.parseStream(roundup.anypy.urllib_.urlopen(url))

    def parseStream(self, stream):
        self.parser.ParseFile(stream)

    def parseFragment(self, s, end=0):
        self.parser.Parse(s, end)

Roundup Issue Tracker: http://roundup-tracker.org/