view roundup/cgi/TAL/XMLParser.py @ 5305:e20f472fde7d

issue2550799: provide basic support for handling html only emails Initial implementation and testing with the dehtml html converter done. The use of beautifulsoup 4 is not tested. My test system breaks when running dehtml.py using beautiful soup. I don't get the failures when running under the test harness, but the text output is significantly different (different line breaks, number of newlines etc.) The tests for dehtml need to be generated for beautiful soup and the expected output changed. Since I have a wonky install of beautiful soup, I don't trust my output as the standard to test against. Also since beautiful soup is optional, the test harness needs to skip the beautifulsoup tests if import bs4 fails. Again something outside of my expertise. I deleted the work I had done to implement that. I could not get it working and wanted to get this feature in in some form.
author John Rouillard <rouilj@ieee.org>
date Fri, 13 Oct 2017 21:46:59 -0400
parents 8c2402a78bb0
children 88dbacd11cd1
line wrap: on
line source

##############################################################################
#
# Copyright (c) 2001, 2002 Zope Corporation and Contributors.
# All Rights Reserved.
#
# This software is subject to the provisions of the Zope Public License,
# Version 2.0 (ZPL).  A copy of the ZPL should accompany this distribution.
# THIS SOFTWARE IS PROVIDED "AS IS" AND ANY AND ALL EXPRESS OR IMPLIED
# WARRANTIES ARE DISCLAIMED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF TITLE, MERCHANTABILITY, AGAINST INFRINGEMENT, AND FITNESS
# FOR A PARTICULAR PURPOSE
#
##############################################################################
# Modifications for Roundup:
# 1. commented out zLOG references
"""
Generic expat-based XML parser base class.
"""

#import zLOG

class XMLParser:

    ordered_attributes = 0

    handler_names = [
        "StartElementHandler",
        "EndElementHandler",
        "ProcessingInstructionHandler",
        "CharacterDataHandler",
        "UnparsedEntityDeclHandler",
        "NotationDeclHandler",
        "StartNamespaceDeclHandler",
        "EndNamespaceDeclHandler",
        "CommentHandler",
        "StartCdataSectionHandler",
        "EndCdataSectionHandler",
        "DefaultHandler",
        "DefaultHandlerExpand",
        "NotStandaloneHandler",
        "ExternalEntityRefHandler",
        "XmlDeclHandler",
        "StartDoctypeDeclHandler",
        "EndDoctypeDeclHandler",
        "ElementDeclHandler",
        "AttlistDeclHandler"
        ]

    def __init__(self, encoding=None):
        self.parser = p = self.createParser()
        if self.ordered_attributes:
            try:
                self.parser.ordered_attributes = self.ordered_attributes
            except AttributeError:
                #zLOG.LOG("TAL.XMLParser", zLOG.INFO, 
                #         "Can't set ordered_attributes")
                self.ordered_attributes = 0
        for name in self.handler_names:
            method = getattr(self, name, None)
            if method is not None:
                try:
                    setattr(p, name, method)
                except AttributeError:
                    #zLOG.LOG("TAL.XMLParser", zLOG.PROBLEM,
                    #         "Can't set expat handler %s" % name)
                    pass

    def createParser(self, encoding=None):
        global XMLParseError
        try:
            from Products.ParsedXML.Expat import pyexpat
            XMLParseError = pyexpat.ExpatError
            return pyexpat.ParserCreate(encoding, ' ')
        except ImportError:
            from xml.parsers import expat
            XMLParseError = expat.ExpatError
            return expat.ParserCreate(encoding, ' ')

    def parseFile(self, filename):
        self.parseStream(open(filename))

    def parseString(self, s):
        self.parser.Parse(s, 1)

    def parseURL(self, url):
        import urllib
        self.parseStream(urllib.urlopen(url))

    def parseStream(self, stream):
        self.parser.ParseFile(stream)

    def parseFragment(self, s, end=0):
        self.parser.Parse(s, end)

Roundup Issue Tracker: http://roundup-tracker.org/