diff roundup/cgi/templating.py @ 6282:d30501bafdfb

issue2551098: markdown links missing rel="noreferer nofollow" Links generated by all markdown backends are missing the noopener and nofollow relation that roundup's normal text -> html core adds to prevent security issues and link spam. Now rel="nofollow" is added to links generated by markdown2 backends and rel="nofollow noopener" for mistune and markdown backends. Markdown2 isn't as programable as the other two backends so I used the built-in nofollow support. This means that a user that generates a link that opens in a new window can manpulate the parent window.
author John Rouillard <rouilj@ieee.org>
date Sat, 31 Oct 2020 14:51:16 -0400
parents 6ed5152a92d0
children 3f7538316724
line wrap: on
line diff
--- a/roundup/cgi/templating.py	Thu Oct 29 23:43:05 2020 -0400
+++ b/roundup/cgi/templating.py	Sat Oct 31 14:51:16 2020 -0400
@@ -70,7 +70,7 @@
             _safe_protocols = re.compile('(?!' + ':|'.join([re.escape(s) for s in _disable_url_schemes]) + ':)', re.IGNORECASE)
 
         def _extras(config):
-            extras = { 'fenced-code-blocks' : {} }
+            extras = { 'fenced-code-blocks' : {}, 'nofollow': None }
             if config['MARKDOWN_BREAK_ON_NEWLINE']:
                 extras['break-on-newline'] = True
             return extras
@@ -96,7 +96,21 @@
                             if url.startswith(s + ':'):
                                 el.attrib['href'] = '#'
 
+        class LinkRendererWithRel(Treeprocessor):
+            ''' Rendering class that sets the rel="nofollow noreferer"
+                for links. '''
+            rel_value = "nofollow noopener"
+
+            def run(self, root):
+                for el in root.iter('a'):
+                    if 'href' in el.attrib:
+                        url = el.get('href').lstrip(' \r\n\t\x1a\0').lower()
+                        if not url.startswith('http'):  # only add rel for absolute http url's
+                            continue
+                        el.set('rel', self.rel_value)
+
         # make sure any HTML tags get escaped and some links restricted
+        # and rel="nofollow noopener" are added to links
         class SafeHtml(MarkdownExtension):
             def extendMarkdown(self, md, md_globals=None):
                 if hasattr(md.preprocessors, 'deregister'):
@@ -112,7 +126,11 @@
                     md.treeprocessors.register(RestrictLinksProcessor(), 'restrict_links', 0)
                 else:
                     md.treeprocessors['restrict_links'] = RestrictLinksProcessor()
-
+                if hasattr(md.preprocessors, 'register'):
+                    md.treeprocessors.register(LinkRendererWithRel(), 'add_link_rel', 0)
+                else:
+                    md.treeprocessors['add_link_rel'] = LinkRendererWithRel()
+                
         def _extensions(config):
             extensions = [SafeHtml(), 'fenced_code']
             if config['MARKDOWN_BREAK_ON_NEWLINE']:
@@ -128,10 +146,44 @@
 def _import_mistune():
     try:
         import mistune
+        from mistune import Renderer, escape_link, escape
+
         mistune._scheme_blacklist = [ s + ':' for s in _disable_url_schemes ]
 
+        class LinkRendererWithRel(Renderer):
+            ''' Rendering class that sets the rel="nofollow noreferer"
+                for links. '''
+
+            rel_value = "nofollow noopener"
+
+            def autolink(self, link, is_email=False):
+                ''' handle <url or email> style explicit links '''
+                text = link = escape_link(link)
+                if is_email:
+                    link = 'mailto:%s' % link
+                    return '<a href="%(href)s">%(text)s</a>' % { 'href': link, 'text': text }
+                return '<a href="%(href)s" rel="%(rel)s">%(href)s</a>' % {
+                    'rel': self.rel_value, 'href': escape_link(link)}
+
+            def link(self, link, title, content):
+                ''' handle [text](url "title") style links and Reference
+                    links '''
+
+                values = {
+                    'content': escape(content),
+                    'href': escape_link(link),
+                    'rel': self.rel_value,
+                    'title': escape(title) if title else '', 
+                }
+
+                if title:
+                    return '<a href="%(href)s" rel="%(rel)s" ' \
+                            'title="%(title)s">%(content)s</a>' % values
+
+                return '<a href="%(href)s" rel="%(rel)s">%(content)s</a>' % values
+
         def _options(config):
-            options = {}
+            options = {'renderer': LinkRendererWithRel(escape = True)}
             if config['MARKDOWN_BREAK_ON_NEWLINE']:
                 options['hard_wrap'] = True
             return options

Roundup Issue Tracker: http://roundup-tracker.org/