annotate roundup/dehtml.py @ 8249:d317a41dfcf3

test: make probe_ports a class method under python2.
author John Rouillard <rouilj@ieee.org>
date Wed, 01 Jan 2025 00:27:52 -0500
parents b68a1d8fd5d9
children 520075b29474
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
1
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
2 from __future__ import print_function
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
3
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
4 import sys
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
5
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
6 from roundup.anypy.strings import u2s, uchr
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
7
6110
af81e7a4302f don't get confused by python-future making Python 3 package names available under Python 2 (but only with Python 2 functionality)
Christof Meerwald <cmeerw@cmeerw.org>
parents: 5997
diff changeset
8 _pyver = sys.version_info[0]
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
9
7228
07ce4e4110f5 flake8 fixes: whitespace, remove unused imports
John Rouillard <rouilj@ieee.org>
parents: 6669
diff changeset
10
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
11 class dehtml:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
12 def __init__(self, converter):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
13 if converter == "none":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
14 self.html2text = None
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
15 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
16
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
17 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
18 if converter == "beautifulsoup":
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
19 # Not as well tested as dehtml.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
20 from bs4 import BeautifulSoup
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
21
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
22 def html2text(html):
6669
ef0975b4291b Explicitly set parser when calling beautiful soup.
John Rouillard <rouilj@ieee.org>
parents: 6110
diff changeset
23 soup = BeautifulSoup(html, "html.parser")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
24
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
25 # kill all script and style elements
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
26 for script in soup(["script", "style"]):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
27 script.extract()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
28
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
29 return u2s(soup.get_text("\n", strip=True))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
30
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
31 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
32 else:
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
33 raise ImportError
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
34 except ImportError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
35 # use the fallback below if beautiful soup is not installed.
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
36 try:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
37 # Python 3+.
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
38 from html.entities import name2codepoint
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
39 from html.parser import HTMLParser
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
40 except ImportError:
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
41 # Python 2.
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
42 from htmlentitydefs import name2codepoint
5411
9c6d98bf79db Python 3 preparation: update HTMLParser / htmlentitydefs imports.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5376
diff changeset
43 from HTMLParser import HTMLParser
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
44
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
45 class DumbHTMLParser(HTMLParser):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
46 # class attribute
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
47 text = ""
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
48
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
49 # internal state variable
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
50 _skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
51 _last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
52
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
53 def handle_data(self, data):
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
54 if self._skip_data: # skip data in script or style block
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
55 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
56
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
57 if (data.strip() == ""):
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
58 # reduce multiple blank lines to 1
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
59 if (self._last_empty):
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
60 return
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
61 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
62 self._last_empty = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
63 else:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
64 self._last_empty = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
65
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
66 self.text = self.text + data
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
67
7833
b68a1d8fd5d9 chore(lint): use ternary, ignore unused param
John Rouillard <rouilj@ieee.org>
parents: 7756
diff changeset
68 def handle_starttag(self, tag, attrs): # noqa: ARG002
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
69 if (tag == "p"):
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
70 self.text = self.text + "\n"
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
71 if (tag in ("style", "script")):
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
72 self._skip_data = True
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
73
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
74 def handle_endtag(self, tag):
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
75 if (tag in ("style", "script")):
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
76 self._skip_data = False
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
77
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
78 def handle_entityref(self, name):
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
79 if self._skip_data:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
80 return
5417
c749d6795bc2 Python 3 preparation: unichr.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5416
diff changeset
81 c = uchr(name2codepoint[name])
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
82 try:
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
83 self.text = self.text + c
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
84 except UnicodeEncodeError:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
85 # print a space as a placeholder
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
86 self.text = self.text + " "
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
87
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
88 def html2text(html):
7833
b68a1d8fd5d9 chore(lint): use ternary, ignore unused param
John Rouillard <rouilj@ieee.org>
parents: 7756
diff changeset
89 parser = DumbHTMLParser(
b68a1d8fd5d9 chore(lint): use ternary, ignore unused param
John Rouillard <rouilj@ieee.org>
parents: 7756
diff changeset
90 convert_charrefs=True) if _pyver == 3 else DumbHTMLParser()
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
91 parser.feed(html)
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
92 parser.close()
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
93 return parser.text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
94
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
95 self.html2text = html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
96
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
97
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
98 if __name__ == "__main__":
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
99 html = """
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
100 <body>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
101 <script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
102 this must not be in output
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
103 </script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
104 <style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
105 p {display:block}
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
106 </style>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
107 <div class="header"><h1>Roundup</h1>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
108 <div id="searchbox" style="display: none">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
109 <form class="search" action="../search.html" method="get">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
110 <input type="text" name="q" size="18" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
111 <input type="submit" value="Search" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
112 <input type="hidden" name="check_keywords" value="yes" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
113 <input type="hidden" name="area" value="default" />
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
114 </form>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
115 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
116 <script type="text/javascript">$('#searchbox').show(0);</script>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
117 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
118 <ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
119 <li class="toctree-l1"><a class="reference internal" href="../index.html">Home</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
120 <li class="toctree-l1"><a class="reference external" href="http://pypi.python.org/pypi/roundup">Download</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
121 <li class="toctree-l1 current"><a class="reference internal" href="../docs.html">Docs</a><ul class="current">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
122 <li class="toctree-l2"><a class="reference internal" href="features.html">Roundup Features</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
123 <li class="toctree-l2 current"><a class="current reference internal" href="">Installing Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
124 <li class="toctree-l2"><a class="reference internal" href="upgrading.html">Upgrading to newer versions of Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
125 <li class="toctree-l2"><a class="reference internal" href="FAQ.html">Roundup FAQ</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
126 <li class="toctree-l2"><a class="reference internal" href="user_guide.html">User Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
127 <li class="toctree-l2"><a class="reference internal" href="customizing.html">Customising Roundup</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
128 <li class="toctree-l2"><a class="reference internal" href="admin_guide.html">Administration Guide</a></li>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
129 </ul>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
130 <div class="section" id="prerequisites">
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
131 <h2><a class="toc-backref" href="#id5">Prerequisites</a></h2>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
132 <p>Roundup requires Python 2.5 or newer (but not Python 3) with a functioning
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
133 anydbm module. Download the latest version from <a class="reference external" href="http://www.python.org/">http://www.python.org/</a>.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
134 It is highly recommended that users install the latest patch version
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
135 of python as these contain many fixes to serious bugs.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
136 <p>Some variants of Linux will need an additional &#8220;python dev&#8221; package
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
137 installed for Roundup installation to work. Debian and derivatives, are
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
138 known to require this.</p>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
139 <p>If you&#8217;re on windows, you will either need to be using the ActiveState python
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
140 distribution (at <a class="reference external" href="http://www.activestate.com/Products/ActivePython/">http://www.activestate.com/Products/ActivePython/</a>), or you&#8217;ll
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
141 have to install the win32all package separately (get it from
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
142 <a class="reference external" href="http://starship.python.net/crew/mhammond/win32/">http://starship.python.net/crew/mhammond/win32/</a>).</p>
5838
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
143 <script>
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
144 &lt; HELP &GT;
b74f0b50bef1 Fix CI deprication warning for HTMLParser convert_charrefs under py3.
John Rouillard <rouilj@ieee.org>
parents: 5417
diff changeset
145 </script>
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
146 </div>
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
147 </body>
7756
6079440ac023 chore(lint): doublequote strings, no yoda conitionals, sort imports...
John Rouillard <rouilj@ieee.org>
parents: 7228
diff changeset
148 """
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
149
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
150 html2text = dehtml("dehtml").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
151 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
152 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
153
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
154 try:
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
155 # trap error seen if N_TOKENS not defined when run.
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
156 html2text = dehtml("beautifulsoup").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
157 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
158 print(html2text(html))
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
159 except NameError as e:
5997
1700542408f3 flake8 cleanups dehtml.py
John Rouillard <rouilj@ieee.org>
parents: 5838
diff changeset
160 print("captured error %s" % e)
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
161
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
162 html2text = dehtml("none").html2text
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
163 if html2text:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
164 print("FAIL: Error, dehtml(none) is returning a function")
5305
e20f472fde7d issue2550799: provide basic support for handling html only emails
John Rouillard <rouilj@ieee.org>
parents:
diff changeset
165 else:
5376
64b05e24dbd8 Python 3 preparation: convert print to a function.
Joseph Myers <jsm@polyomino.org.uk>
parents: 5305
diff changeset
166 print("PASS: dehtml(none) is returning None")

Roundup Issue Tracker: http://roundup-tracker.org/