Mercurial > p > roundup > code
comparison roundup/cgi/templating.py @ 6104:a1fd9551d416
don't allow javascript URLs in markdown content
limit auto-linkification in markdown content to issue links to avoid interference with markdown link syntax
| author | Christof Meerwald <cmeerw@cmeerw.org> |
|---|---|
| date | Thu, 27 Feb 2020 21:18:56 +0000 |
| parents | af16c135fb98 |
| children | c177e7128dc9 |
comparison
equal
deleted
inserted
replaced
| 6103:af16c135fb98 | 6104:a1fd9551d416 |
|---|---|
| 17 - have menu() methods accept filtering arguments | 17 - have menu() methods accept filtering arguments |
| 18 """ | 18 """ |
| 19 | 19 |
| 20 __docformat__ = 'restructuredtext' | 20 __docformat__ = 'restructuredtext' |
| 21 | 21 |
| 22 # List of schemes that are not rendered as links in rst and markdown. | |
| 23 _disable_url_schemes = [ 'javascript' ] | |
| 22 | 24 |
| 23 import base64, cgi, re, os.path, mimetypes, csv, string | 25 import base64, cgi, re, os.path, mimetypes, csv, string |
| 24 import calendar | 26 import calendar |
| 25 import textwrap | 27 import textwrap |
| 26 import time, hashlib | 28 import time, hashlib |
| 60 | 62 |
| 61 def _import_markdown2(): | 63 def _import_markdown2(): |
| 62 try: | 64 try: |
| 63 import markdown2, re | 65 import markdown2, re |
| 64 class Markdown(markdown2.Markdown): | 66 class Markdown(markdown2.Markdown): |
| 65 # don't restrict protocols in links | 67 # don't allow disabled protocols in links |
| 66 _safe_protocols = re.compile('', re.IGNORECASE) | 68 _safe_protocols = re.compile('(?!' + ':|'.join([re.escape(s) for s in _disable_url_schemes]) + ':)', re.IGNORECASE) |
| 67 | 69 |
| 68 markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s) | 70 markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s) |
| 69 except ImportError: | 71 except ImportError: |
| 70 markdown = None | 72 markdown = None |
| 71 | 73 |
| 73 | 75 |
| 74 def _import_markdown(): | 76 def _import_markdown(): |
| 75 try: | 77 try: |
| 76 from markdown import markdown as markdown_impl | 78 from markdown import markdown as markdown_impl |
| 77 from markdown.extensions import Extension as MarkdownExtension | 79 from markdown.extensions import Extension as MarkdownExtension |
| 78 | 80 from markdown.treeprocessors import Treeprocessor |
| 79 # make sure any HTML tags get escaped | 81 |
| 80 class EscapeHtml(MarkdownExtension): | 82 class RestrictLinksProcessor(Treeprocessor): |
| 83 def run(self, root): | |
| 84 for el in root.iter('a'): | |
| 85 if 'href' in el.attrib: | |
| 86 url = el.attrib['href'].lstrip(' \r\n\t\x1a\0').lower() | |
| 87 for s in _disable_url_schemes: | |
| 88 if url.startswith(s + ':'): | |
| 89 el.attrib['href'] = '#' | |
| 90 | |
| 91 # make sure any HTML tags get escaped and some links restricted | |
| 92 class SafeHtml(MarkdownExtension): | |
| 81 def extendMarkdown(self, md, md_globals=None): | 93 def extendMarkdown(self, md, md_globals=None): |
| 82 if hasattr(md.preprocessors, 'deregister'): | 94 if hasattr(md.preprocessors, 'deregister'): |
| 83 md.preprocessors.deregister('html_block') | 95 md.preprocessors.deregister('html_block') |
| 84 else: | 96 else: |
| 85 del md.preprocessors['html_block'] | 97 del md.preprocessors['html_block'] |
| 86 if hasattr(md.inlinePatterns, 'deregister'): | 98 if hasattr(md.inlinePatterns, 'deregister'): |
| 87 md.inlinePatterns.deregister('html') | 99 md.inlinePatterns.deregister('html') |
| 88 else: | 100 else: |
| 89 del md.inlinePatterns['html'] | 101 del md.inlinePatterns['html'] |
| 90 | 102 |
| 91 markdown = lambda s: markdown_impl(s, extensions=[EscapeHtml(), 'fenced_code']) | 103 if hasattr(md.preprocessors, 'register'): |
| 104 md.treeprocessors.register(RestrictLinksProcessor(), 'restrict_links', 0) | |
| 105 else: | |
| 106 md.treeprocessors['restrict_links'] = RestrictLinksProcessor() | |
| 107 | |
| 108 markdown = lambda s: markdown_impl(s, extensions=[SafeHtml(), 'fenced_code']) | |
| 92 except ImportError: | 109 except ImportError: |
| 93 markdown = None | 110 markdown = None |
| 94 | 111 |
| 95 return markdown | 112 return markdown |
| 96 | 113 |
| 97 def _import_mistune(): | 114 def _import_mistune(): |
| 98 try: | 115 try: |
| 99 from mistune import markdown | 116 import mistune |
| 117 mistune._scheme_blacklist = [ s + ':' for s in _disable_url_schemes ] | |
| 118 markdown = mistune.markdown | |
| 100 except ImportError: | 119 except ImportError: |
| 101 markdown = None | 120 markdown = None |
| 102 | 121 |
| 103 return markdown | 122 return markdown |
| 104 | 123 |
| 1497 # disable rst directives that have security implications | 1516 # disable rst directives that have security implications |
| 1498 rst_defaults = {'file_insertion_enabled': 0, | 1517 rst_defaults = {'file_insertion_enabled': 0, |
| 1499 'raw_enabled': 0, | 1518 'raw_enabled': 0, |
| 1500 '_disable_config': 1} | 1519 '_disable_config': 1} |
| 1501 | 1520 |
| 1502 # List of schemes that are not rendered as links in rst. | |
| 1503 # Could also be used to disable links for other processors: | |
| 1504 # e.g. stext or markdown. If we can figure out how to do it. | |
| 1505 disable_schemes = [ 'javascript' ] | |
| 1506 valid_schemes = { } | 1521 valid_schemes = { } |
| 1507 | 1522 |
| 1508 def _hyper_repl(self, match): | 1523 def _hyper_repl(self, match): |
| 1509 if match.group('url'): | 1524 if match.group('url'): |
| 1510 return self._hyper_repl_url(match, '<a href="%s" rel="nofollow noopener">%s</a>%s') | 1525 return self._hyper_repl_url(match, '<a href="%s" rel="nofollow noopener">%s</a>%s') |
| 1568 else: | 1583 else: |
| 1569 # just return the matched text | 1584 # just return the matched text |
| 1570 return match.group(0) | 1585 return match.group(0) |
| 1571 | 1586 |
| 1572 def _hyper_repl_markdown(self, match): | 1587 def _hyper_repl_markdown(self, match): |
| 1573 if match.group('url'): | 1588 if match.group('id') and len(match.group('id')) < 10: |
| 1574 s = match.group('url') | |
| 1575 return '[%s](%s)'%(s, s) | |
| 1576 elif match.group('email'): | |
| 1577 s = match.group('email') | |
| 1578 return '[%s](mailto:%s)'%(s, s) | |
| 1579 elif len(match.group('id')) < 10: | |
| 1580 return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)') | 1589 return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)') |
| 1581 else: | 1590 else: |
| 1582 # just return the matched text | 1591 # just return the matched text |
| 1583 return match.group(0) | 1592 return match.group(0) |
| 1584 | 1593 |
| 1668 if hyperlink: | 1677 if hyperlink: |
| 1669 s = self.hyper_re.sub(self._hyper_repl_rst, s) | 1678 s = self.hyper_re.sub(self._hyper_repl_rst, s) |
| 1670 | 1679 |
| 1671 # disable javascript and possibly other url schemes from working | 1680 # disable javascript and possibly other url schemes from working |
| 1672 from docutils.utils.urischemes import schemes | 1681 from docutils.utils.urischemes import schemes |
| 1673 for sch in self.disable_schemes: | 1682 for sch in _disable_url_schemes: |
| 1674 # I catch KeyError but reraise if scheme didn't exist. | 1683 # I catch KeyError but reraise if scheme didn't exist. |
| 1675 # Safer to fail if a disabled scheme isn't found. It may | 1684 # Safer to fail if a disabled scheme isn't found. It may |
| 1676 # be a typo that keeps a bad scheme enabled. But this | 1685 # be a typo that keeps a bad scheme enabled. But this |
| 1677 # function can be called multiple times. On the first call | 1686 # function can be called multiple times. On the first call |
| 1678 # the key will be deleted. On the second call the schemes | 1687 # the key will be deleted. On the second call the schemes |
