changeset 7561:91725f12b239

Support markdown2 2.4.10, 2.4.8- and exclude 2.4.9 Handle these changes to markdown2 version 2.4.9 broke links like (issue1)[issue1]: raise error if used Version 2.4.10 changed how filtering of schemes is done: adapt to new method Mail url's in markdown are formatted [label](mailto:user@something.com). The markdown format wrapper uses the plain text formatter to turn issue1 and user@something.com into markdown formatted strings to be htmlized by the markdown formatters. However when the plain text formatter saw (mailto:user@something.com) it made it (mailto:<user@something.com>). This is broken as the enamil address shouldn't have the angle brackets. By modifying the email pattern to include an optional mailto:, all three markdown formatters do the right thing and I don't end up with href="<user@something.com>" in the link.
author John Rouillard <rouilj@ieee.org>
date Sun, 23 Jul 2023 16:50:35 -0400
parents 5cadcaa13bed
children cd30fa0d6aec
files CHANGES.txt roundup/cgi/templating.py test/test_templating.py
diffstat 3 files changed, 132 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/CHANGES.txt	Sun Jul 23 16:11:23 2023 -0400
+++ b/CHANGES.txt	Sun Jul 23 16:50:35 2023 -0400
@@ -22,6 +22,9 @@
 - issue2551184 - improve i18n handling. Patch to test to make sure it
   uses the test tracker's locale files and not other locale
   files. (Marcus Priesch)
+- issue2551283 - fail if version 2.4.9 of markdown2 is used, it's
+  broken. Support version 2.4.10 with its new schema filtering
+  method and 2.4.8 and earlier. (John Rouillard)
 
 Features:
 
--- a/roundup/cgi/templating.py	Sun Jul 23 16:11:23 2023 -0400
+++ b/roundup/cgi/templating.py	Sun Jul 23 16:50:35 2023 -0400
@@ -61,10 +61,49 @@
         import markdown2
         import re
 
-        class Markdown(markdown2.Markdown):
-            # don't allow disabled protocols in links
-            _safe_protocols = re.compile('(?!' + ':|'.join([
-                re.escape(s) for s in _disable_url_schemes])
+        # Note: version 2.4.9 does not work with Roundup as it breaks
+        # [issue1](issue1) formatted links.
+
+        # Versions 2.4.8 and 2.4.10 use different methods to filter
+        # allowed schemes. 2.4.8 uses a pre-compiled regexp while
+        # 2.4.10 uses a regexp string that it compiles.
+
+        markdown2_vi = markdown2.__version_info__
+        if  markdown2_vi > (2, 4, 9):
+            # Create the filtering regexp.
+            # Allowed default is same as what hyper_re supports.
+
+            # pathed_schemes are terminated with ://
+            pathed_schemes =  [ 'http', 'https', 'ftp', 'ftps' ]
+            # non_pathed are terminated with a :
+            non_pathed_schemes = [ "mailto" ]
+
+            for disabled in _disable_url_schemes:
+                try:
+                    pathed_schemes.remove(disabled)
+                except ValueError:  # if disabled not in list
+                    pass
+                try:
+                    non_pathed_schemes.remove(disabled)
+                except ValueError:
+                    pass
+
+            re_list = []
+            for scheme in pathed_schemes:
+                re_list.append(r'(?:%s)://' % scheme)
+            for scheme in non_pathed_schemes:
+                re_list.append(r'(?:%s):' % scheme)
+
+            enabled_schemes = r"|".join(re_list)
+            class Markdown(markdown2.Markdown):
+                _safe_protocols = enabled_schemes
+        elif markdown2_vi == (2, 4, 9):
+            raise RuntimeError("Unsupported version - markdown2 v2.4.9\n")
+        else:
+            class Markdown(markdown2.Markdown):
+                # don't allow disabled protocols in links
+                _safe_protocols = re.compile('(?!' + ':|'.join([
+                    re.escape(s) for s in _disable_url_schemes])
                                          + ':)', re.IGNORECASE)
 
         def _extras(config):
@@ -1639,7 +1678,7 @@
          (:[\d]{1,5})?                     # port
          (/[\w\-$.+!*(),;:@&=?/~\\#%]*)?   # path etc.
         )|
-        (?P<email>[-+=%/\w\.]+@[\w\.\-]+)|
+        (?P<email>(?:mailto:)?[-+=%/\w\.]+@[\w\.\-]+)|
         (?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+)(?P<fragment>\#[^][\#%^{}"<>\s]+)?)
     )''', re.X | re.I)
     protocol_re = re.compile('^(ht|f)tp(s?)://', re.I)
--- a/test/test_templating.py	Sun Jul 23 16:11:23 2023 -0400
+++ b/test/test_templating.py	Sun Jul 23 16:50:35 2023 -0400
@@ -11,6 +11,8 @@
 import pytest
 from .pytest_patcher import mark_class
 
+from markdown2 import __version_info__ as md2__version_info__
+
 if ReStructuredText:
     skip_rst = lambda func, *args, **kwargs: func
 else:
@@ -774,8 +776,12 @@
         self.assertEqual(p.markdown().strip(), u2s(u'<p>A string with &lt;br&gt; <em>embedded</em> \u00df</p>'))
 
     def test_string_markdown_link(self):
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <http://localhost>'))
-        self.assertEqual(p.markdown().strip(), u2s(u'<p>A link <a href="http://localhost">http://localhost</a></p>'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A link <http://localhost>'))
+        m = p.markdown().strip()
+        m = self.mangleMarkdown2(m)
+
+        self.assertEqual( u2s(u'<p>A link <a href="http://localhost" rel="nofollow noopener">http://localhost</a></p>'), m)
 
     def test_string_markdown_link_item(self):
         """ The link formats for the different markdown engines changes.
@@ -783,29 +789,78 @@
             is different. So most tests check for a substring that indicates
             success rather than the entire returned string.
         """
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An issue1 link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An issue1 link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         # just verify that plain linking is working
         self.assertIn( u2s(u'href="issue1"'), p.plain(hyperlink=1))
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1](issue1) link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issue1](issue1) link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         # just verify that plain linking is working
         self.assertIn( u2s(u'href="issue1"'), p.plain(hyperlink=1))
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1](https://example.com/issue1) link'))
-        self.assertIn( u2s(u'href="https://example.com/issue1"'), p.markdown().strip())
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [issue1](https://example.com/issue1) link'))
+        self.assertIn( u2s(u'href="https://example.com/issue1"'),
+                       p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](#example) link'))
+        self.assertIn( u2s(u'href="#example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](/example) link'))
+        self.assertIn( u2s(u'href="/example"'), p.markdown().strip())
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'An [issue1] (https://example.com/issue1) link'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](./example) link'))
+        self.assertIn( u2s(u'href="./example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](../example) link'))
+        self.assertIn( u2s(u'href="../example"'), p.markdown().strip())
+
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'A [wuarchive_ftp](ftp://www.wustl.gov/file) link'))
+        self.assertIn( u2s(u'href="ftp://www.wustl.gov/file"'),
+                       p.markdown().strip())
+
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [issue1] (https://example.com/issue1) link'))
         self.assertIn( u2s(u'href="issue1"'), p.markdown().strip())
         if type(self) == MistuneTestCase:
             # mistune makes the https url into a real link
-            self.assertIn( u2s(u'href="https://example.com/issue1"'), p.markdown().strip())
+            self.assertIn( u2s(u'href="https://example.com/issue1"'),
+                           p.markdown().strip())
         else:
             # the other two engines leave the parenthesized url as is.
-            self.assertIn( u2s(u' (https://example.com/issue1) link'), p.markdown().strip())
+            self.assertIn( u2s(u' (https://example.com/issue1) link'),
+                           p.markdown().strip())
 
-    def test_string_markdown_link(self):
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'An [issu1](.../example) link'))
+        if (isinstance(self, Markdown2TestCase) and 
+           md2__version_info__ > (2, 4, 9)):
+            # markdown2 > 2.4.9 handles this differently
+            self.assertIn( u2s(u'href="#"'), p.markdown().strip())
+        else:
+            self.assertIn( u2s(u'href=".../example"'), p.markdown().strip())
+            
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A [phone](tel:0016175555555) link'))
+        if (isinstance(self, Markdown2TestCase) and
+           md2__version_info__ > (2, 4, 9)):
+            self.assertIn(u2s(u'href="#"'), p.markdown().strip())
+        else:
+            self.assertIn( u2s(u'href="tel:0016175555555"'),
+                           p.markdown().strip())
+
+    def test_string_email_markdown_link(self):
         # markdown2 and markdown escape the email address
         try:
             from html import unescape as html_unescape
@@ -813,12 +868,30 @@
             from HTMLParser import HTMLParser
             html_unescape = HTMLParser().unescape
 
-        p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'A link <cmeerw@example.com>'))
+        p = StringHTMLProperty(self.client, 'test', '1', None, 'test',
+                               u2s(u'A link <cmeerw@example.com>'))
         m = html_unescape(p.markdown().strip())
         m = self.mangleMarkdown2(m)
 
         self.assertEqual(m, u2s(u'<p>A link <a href="mailto:cmeerw@example.com">cmeerw@example.com</a></p>'))
 
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An bare email baduser@daemons.com link'))
+        m = self.mangleMarkdown2(html_unescape(p.markdown().strip()))
+        self.assertIn( u2s(u'href="mailto:baduser@daemons.com"'),
+                       m)
+        
+        p = StringHTMLProperty(
+            self.client, 'test', '1', None, 'test',
+            u2s(u'An [email_url](mailto:baduser@daemons.com) link'))
+        m = self.mangleMarkdown2(html_unescape(p.markdown().strip()))
+        
+        if isinstance(self, MistuneTestCase):
+            self.assertIn('<a href="mailto:baduser@daemons.com" rel="nofollow noopener">email_url</a>', m)            
+        else:
+            self.assertIn('<a href="mailto:baduser@daemons.com">email_url</a>', m)
+
     def test_string_markdown_javascript_link(self):
         # make sure we don't get a "javascript:" link
         p = StringHTMLProperty(self.client, 'test', '1', None, 'test', u2s(u'<javascript:alert(1)>'))
@@ -866,7 +939,7 @@
         if type(self) == MistuneTestCase:
             self.assertEqual(m, parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="lang-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
         elif type(self) == MarkdownTestCase:
-            self.assertEqual(m, parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="language-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
+            self.assertEqual(m.replace('class="python"','class="language-python"'), parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<pre><code class="language-python">line 1\nline 2\n</code></pre>\n<p>new &lt;/pre&gt; paragraph</p>'))
         else:
             expected_result = parser.normalize('<p>embedded code block &lt;pre&gt;</p>\n<div class="codehilite"><pre><span></span><code><span class="n">line</span> <span class="mi">1</span>\n<span class="n">line</span> <span class="mi">2</span>\n</code></pre></div>\n<p>new &lt;/pre&gt; paragraph</p>')
             self.assertEqual(m, expected_result)

Roundup Issue Tracker: http://roundup-tracker.org/