Mercurial > p > roundup > code
comparison roundup/dehtml.py @ 6110:af81e7a4302f
don't get confused by python-future making Python 3 package names available under Python 2 (but only with Python 2 functionality)
| author | Christof Meerwald <cmeerw@cmeerw.org> |
|---|---|
| date | Fri, 28 Feb 2020 08:48:51 +0000 |
| parents | 1700542408f3 |
| children | ef0975b4291b |
comparison
equal
deleted
inserted
replaced
| 6109:b108c9fc7aea | 6110:af81e7a4302f |
|---|---|
| 1 | 1 |
| 2 from __future__ import print_function | 2 from __future__ import print_function |
| 3 from roundup.anypy.strings import u2s, uchr | 3 from roundup.anypy.strings import u2s, uchr |
| 4 | 4 |
| 5 import sys | |
| 6 _pyver = sys.version_info[0] | |
| 5 | 7 |
| 6 class dehtml: | 8 class dehtml: |
| 7 def __init__(self, converter): | 9 def __init__(self, converter): |
| 8 if converter == "none": | 10 if converter == "none": |
| 9 self.html2text = None | 11 self.html2text = None |
| 30 # use the fallback below if beautiful soup is not installed. | 32 # use the fallback below if beautiful soup is not installed. |
| 31 try: | 33 try: |
| 32 # Python 3+. | 34 # Python 3+. |
| 33 from html.parser import HTMLParser | 35 from html.parser import HTMLParser |
| 34 from html.entities import name2codepoint | 36 from html.entities import name2codepoint |
| 35 pyver = 3 | |
| 36 except ImportError: | 37 except ImportError: |
| 37 # Python 2. | 38 # Python 2. |
| 38 from HTMLParser import HTMLParser | 39 from HTMLParser import HTMLParser |
| 39 from htmlentitydefs import name2codepoint | 40 from htmlentitydefs import name2codepoint |
| 40 pyver = 2 | |
| 41 | 41 |
| 42 class DumbHTMLParser(HTMLParser): | 42 class DumbHTMLParser(HTMLParser): |
| 43 # class attribute | 43 # class attribute |
| 44 text = "" | 44 text = "" |
| 45 | 45 |
| 81 except UnicodeEncodeError: | 81 except UnicodeEncodeError: |
| 82 # print a space as a placeholder | 82 # print a space as a placeholder |
| 83 self.text = self.text + ' ' | 83 self.text = self.text + ' ' |
| 84 | 84 |
| 85 def html2text(html): | 85 def html2text(html): |
| 86 if pyver == 3: | 86 if _pyver == 3: |
| 87 parser = DumbHTMLParser(convert_charrefs=True) | 87 parser = DumbHTMLParser(convert_charrefs=True) |
| 88 else: | 88 else: |
| 89 parser = DumbHTMLParser() | 89 parser = DumbHTMLParser() |
| 90 parser.feed(html) | 90 parser.feed(html) |
| 91 parser.close() | 91 parser.close() |
