comparison roundup/cgi/templating.py @ 6104:a1fd9551d416

don't allow javascript URLs in markdown content limit auto-linkification in markdown content to issue links to avoid interference with markdown link syntax
author Christof Meerwald <cmeerw@cmeerw.org>
date Thu, 27 Feb 2020 21:18:56 +0000
parents af16c135fb98
children c177e7128dc9
comparison
equal deleted inserted replaced
6103:af16c135fb98 6104:a1fd9551d416
17 - have menu() methods accept filtering arguments 17 - have menu() methods accept filtering arguments
18 """ 18 """
19 19
20 __docformat__ = 'restructuredtext' 20 __docformat__ = 'restructuredtext'
21 21
22 # List of schemes that are not rendered as links in rst and markdown.
23 _disable_url_schemes = [ 'javascript' ]
22 24
23 import base64, cgi, re, os.path, mimetypes, csv, string 25 import base64, cgi, re, os.path, mimetypes, csv, string
24 import calendar 26 import calendar
25 import textwrap 27 import textwrap
26 import time, hashlib 28 import time, hashlib
60 62
61 def _import_markdown2(): 63 def _import_markdown2():
62 try: 64 try:
63 import markdown2, re 65 import markdown2, re
64 class Markdown(markdown2.Markdown): 66 class Markdown(markdown2.Markdown):
65 # don't restrict protocols in links 67 # don't allow disabled protocols in links
66 _safe_protocols = re.compile('', re.IGNORECASE) 68 _safe_protocols = re.compile('(?!' + ':|'.join([re.escape(s) for s in _disable_url_schemes]) + ':)', re.IGNORECASE)
67 69
68 markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s) 70 markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s)
69 except ImportError: 71 except ImportError:
70 markdown = None 72 markdown = None
71 73
73 75
74 def _import_markdown(): 76 def _import_markdown():
75 try: 77 try:
76 from markdown import markdown as markdown_impl 78 from markdown import markdown as markdown_impl
77 from markdown.extensions import Extension as MarkdownExtension 79 from markdown.extensions import Extension as MarkdownExtension
78 80 from markdown.treeprocessors import Treeprocessor
79 # make sure any HTML tags get escaped 81
80 class EscapeHtml(MarkdownExtension): 82 class RestrictLinksProcessor(Treeprocessor):
83 def run(self, root):
84 for el in root.iter('a'):
85 if 'href' in el.attrib:
86 url = el.attrib['href'].lstrip(' \r\n\t\x1a\0').lower()
87 for s in _disable_url_schemes:
88 if url.startswith(s + ':'):
89 el.attrib['href'] = '#'
90
91 # make sure any HTML tags get escaped and some links restricted
92 class SafeHtml(MarkdownExtension):
81 def extendMarkdown(self, md, md_globals=None): 93 def extendMarkdown(self, md, md_globals=None):
82 if hasattr(md.preprocessors, 'deregister'): 94 if hasattr(md.preprocessors, 'deregister'):
83 md.preprocessors.deregister('html_block') 95 md.preprocessors.deregister('html_block')
84 else: 96 else:
85 del md.preprocessors['html_block'] 97 del md.preprocessors['html_block']
86 if hasattr(md.inlinePatterns, 'deregister'): 98 if hasattr(md.inlinePatterns, 'deregister'):
87 md.inlinePatterns.deregister('html') 99 md.inlinePatterns.deregister('html')
88 else: 100 else:
89 del md.inlinePatterns['html'] 101 del md.inlinePatterns['html']
90 102
91 markdown = lambda s: markdown_impl(s, extensions=[EscapeHtml(), 'fenced_code']) 103 if hasattr(md.preprocessors, 'register'):
104 md.treeprocessors.register(RestrictLinksProcessor(), 'restrict_links', 0)
105 else:
106 md.treeprocessors['restrict_links'] = RestrictLinksProcessor()
107
108 markdown = lambda s: markdown_impl(s, extensions=[SafeHtml(), 'fenced_code'])
92 except ImportError: 109 except ImportError:
93 markdown = None 110 markdown = None
94 111
95 return markdown 112 return markdown
96 113
97 def _import_mistune(): 114 def _import_mistune():
98 try: 115 try:
99 from mistune import markdown 116 import mistune
117 mistune._scheme_blacklist = [ s + ':' for s in _disable_url_schemes ]
118 markdown = mistune.markdown
100 except ImportError: 119 except ImportError:
101 markdown = None 120 markdown = None
102 121
103 return markdown 122 return markdown
104 123
1497 # disable rst directives that have security implications 1516 # disable rst directives that have security implications
1498 rst_defaults = {'file_insertion_enabled': 0, 1517 rst_defaults = {'file_insertion_enabled': 0,
1499 'raw_enabled': 0, 1518 'raw_enabled': 0,
1500 '_disable_config': 1} 1519 '_disable_config': 1}
1501 1520
1502 # List of schemes that are not rendered as links in rst.
1503 # Could also be used to disable links for other processors:
1504 # e.g. stext or markdown. If we can figure out how to do it.
1505 disable_schemes = [ 'javascript' ]
1506 valid_schemes = { } 1521 valid_schemes = { }
1507 1522
1508 def _hyper_repl(self, match): 1523 def _hyper_repl(self, match):
1509 if match.group('url'): 1524 if match.group('url'):
1510 return self._hyper_repl_url(match, '<a href="%s" rel="nofollow noopener">%s</a>%s') 1525 return self._hyper_repl_url(match, '<a href="%s" rel="nofollow noopener">%s</a>%s')
1568 else: 1583 else:
1569 # just return the matched text 1584 # just return the matched text
1570 return match.group(0) 1585 return match.group(0)
1571 1586
1572 def _hyper_repl_markdown(self, match): 1587 def _hyper_repl_markdown(self, match):
1573 if match.group('url'): 1588 if match.group('id') and len(match.group('id')) < 10:
1574 s = match.group('url')
1575 return '[%s](%s)'%(s, s)
1576 elif match.group('email'):
1577 s = match.group('email')
1578 return '[%s](mailto:%s)'%(s, s)
1579 elif len(match.group('id')) < 10:
1580 return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)') 1589 return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)')
1581 else: 1590 else:
1582 # just return the matched text 1591 # just return the matched text
1583 return match.group(0) 1592 return match.group(0)
1584 1593
1668 if hyperlink: 1677 if hyperlink:
1669 s = self.hyper_re.sub(self._hyper_repl_rst, s) 1678 s = self.hyper_re.sub(self._hyper_repl_rst, s)
1670 1679
1671 # disable javascript and possibly other url schemes from working 1680 # disable javascript and possibly other url schemes from working
1672 from docutils.utils.urischemes import schemes 1681 from docutils.utils.urischemes import schemes
1673 for sch in self.disable_schemes: 1682 for sch in _disable_url_schemes:
1674 # I catch KeyError but reraise if scheme didn't exist. 1683 # I catch KeyError but reraise if scheme didn't exist.
1675 # Safer to fail if a disabled scheme isn't found. It may 1684 # Safer to fail if a disabled scheme isn't found. It may
1676 # be a typo that keeps a bad scheme enabled. But this 1685 # be a typo that keeps a bad scheme enabled. But this
1677 # function can be called multiple times. On the first call 1686 # function can be called multiple times. On the first call
1678 # the key will be deleted. On the second call the schemes 1687 # the key will be deleted. On the second call the schemes

Roundup Issue Tracker: http://roundup-tracker.org/