Mercurial > p > roundup > code

--- a/roundup/cgi/templating.py	Tue Feb 25 22:48:17 2020 -0500
+++ b/roundup/cgi/templating.py	Thu Feb 27 21:18:56 2020 +0000
@@ -19,6 +19,8 @@

 __docformat__ = 'restructuredtext'

+# List of schemes that are not rendered as links in rst and markdown.
+_disable_url_schemes = [ 'javascript' ]

 import base64, cgi, re, os.path, mimetypes, csv, string
 import calendar
@@ -62,8 +64,8 @@
     try:
         import markdown2, re
         class Markdown(markdown2.Markdown):
-            # don't restrict protocols in links
-            _safe_protocols = re.compile('', re.IGNORECASE)
+            # don't allow disabled protocols in links
+            _safe_protocols = re.compile('(?!' + ':|'.join([re.escape(s) for s in _disable_url_schemes]) + ':)', re.IGNORECASE)

         markdown = lambda s: Markdown(safe_mode='escape', extras={ 'fenced-code-blocks' : True }).convert(s)
     except ImportError:
@@ -75,9 +77,19 @@
     try:
         from markdown import markdown as markdown_impl
         from markdown.extensions import Extension as MarkdownExtension
-
-        # make sure any HTML tags get escaped
-        class EscapeHtml(MarkdownExtension):
+        from markdown.treeprocessors import Treeprocessor
+
+        class RestrictLinksProcessor(Treeprocessor):
+            def run(self, root):
+                for el in root.iter('a'):
+                    if 'href' in el.attrib:
+                        url = el.attrib['href'].lstrip(' \r\n\t\x1a\0').lower()
+                        for s in _disable_url_schemes:
+                            if url.startswith(s + ':'):
+                                el.attrib['href'] = '#'
+
+        # make sure any HTML tags get escaped and some links restricted
+        class SafeHtml(MarkdownExtension):
             def extendMarkdown(self, md, md_globals=None):
                 if hasattr(md.preprocessors, 'deregister'):
                     md.preprocessors.deregister('html_block')
@@ -88,7 +100,12 @@
                 else:
                     del md.inlinePatterns['html']

-        markdown = lambda s: markdown_impl(s, extensions=[EscapeHtml(), 'fenced_code'])
+                if hasattr(md.preprocessors, 'register'):
+                    md.treeprocessors.register(RestrictLinksProcessor(), 'restrict_links', 0)
+                else:
+                    md.treeprocessors['restrict_links'] = RestrictLinksProcessor()
+
+        markdown = lambda s: markdown_impl(s, extensions=[SafeHtml(), 'fenced_code'])
     except ImportError:
         markdown = None

@@ -96,7 +113,9 @@

 def _import_mistune():
     try:
-        from mistune import markdown
+        import mistune
+        mistune._scheme_blacklist = [ s + ':' for s in _disable_url_schemes ]
+        markdown = mistune.markdown
     except ImportError:
         markdown = None

@@ -1499,10 +1518,6 @@
                     'raw_enabled': 0,
                     '_disable_config': 1}

-    # List of schemes that are not rendered as links in rst.
-    # Could also be used to disable links for other processors:
-    # e.g. stext or markdown. If we can figure out how to do it.
-    disable_schemes = [ 'javascript' ]
     valid_schemes = { }

     def _hyper_repl(self, match):
@@ -1570,13 +1585,7 @@
             return match.group(0)

     def _hyper_repl_markdown(self, match):
-        if match.group('url'):
-            s = match.group('url')
-            return '[%s](%s)'%(s, s)
-        elif match.group('email'):
-            s = match.group('email')
-            return '[%s](mailto:%s)'%(s, s)
-        elif len(match.group('id')) < 10:
+        if match.group('id') and len(match.group('id')) < 10:
             return self._hyper_repl_item(match,'[%(item)s](%(cls)s%(id)s)')
         else:
             # just return the matched text
@@ -1670,7 +1679,7 @@

         # disable javascript and possibly other url schemes from working
         from docutils.utils.urischemes import schemes
-        for sch in self.disable_schemes:
+        for sch in _disable_url_schemes:
             # I catch KeyError but reraise if scheme didn't exist.
             # Safer to fail if a disabled scheme isn't found. It may
             # be a typo that keeps a bad scheme enabled. But this
--- a/test/test_templating.py	Tue Feb 25 22:48:17 2020 -0500
+++ b/test/test_templating.py	Thu Feb 27 21:18:56 2020 +0000
@@ -421,8 +421,32 @@
 # common markdown test cases
 class MarkdownTests:
     def test_string_markdown(self):
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string http://localhost with cmeerw@example.com <br> *embedded* \u00df'))
-        self.assertEqual(p.markdown().strip(), u2s(u'<p>A string <a href="http://localhost">http://localhost</a> with <a href="mailto:cmeerw@example.com">cmeerw@example.com</a> &lt;br&gt; <em>embedded</em> \u00df</p>'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A string with <br> *embedded* \u00df'))
+        self.assertEqual(p.markdown().strip(), u2s(u'<p>A string with &lt;br&gt; <em>embedded</em> \u00df</p>'))
+
+    def test_string_markdown_link(self):
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <http://localhost>'))
+        self.assertEqual(p.markdown().strip(), u2s(u'<p>A link <a href="http://localhost">http://localhost</a></p>'))
+
+    def test_string_markdown_link(self):
+        # markdown2 and markdown
+        try:
+            import html
+            html_unescape = html.unescape
+        except AttributeError:
+            from HTMLParser import HTMLParser
+            html_unescape = HTMLParser().unescape
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <cmeerw@example.com>'))
+        self.assertEqual(html_unescape(p.markdown().strip()), u2s(u'<p>A link <a href="mailto:cmeerw@example.com">cmeerw@example.com</a></p>'))
+
+    def test_string_markdown_javascript_link(self):
+        # make sure we don't get a "javascript:" link
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<javascript:alert(1)>'))
+        self.assertTrue(p.markdown().find('href="javascript:') == -1)
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'[link](javascript:alert(1))'))
+        self.assertTrue(p.markdown().find('href="javascript:') == -1)

     def test_string_markdown_code_block(self):
         p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'embedded code block\n\n```\nline 1\nline 2\n```\n\nnew paragraph'))