Mercurial > p > roundup > code
changeset 3991:13161539e5bd
improved URL matching
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Mon, 18 Aug 2008 06:03:06 +0000 |
| parents | 0728808fdf5c |
| children | fe2af84a5ca5 |
| files | CHANGES.txt roundup/cgi/templating.py test/test_templating.py |
| diffstat | 3 files changed, 56 insertions(+), 5 deletions(-) [+] |
line wrap: on
line diff
--- a/CHANGES.txt Mon Aug 18 05:09:00 2008 +0000 +++ b/CHANGES.txt Mon Aug 18 06:03:06 2008 +0000 @@ -19,6 +19,7 @@ - Prevent broken pipe errors in csv export (sf patch #1911449) - Session API and cleanup thanks anatoly t. - Make WSGI handler threadsafe (sf #1968027) +- Improved URL matching RE (sf #2038858) 2008-03-01 1.4.4
--- a/roundup/cgi/templating.py Mon Aug 18 05:09:00 2008 +0000 +++ b/roundup/cgi/templating.py Mon Aug 18 06:03:06 2008 +0000 @@ -1245,9 +1245,19 @@ return self.is_edit_ok() class StringHTMLProperty(HTMLProperty): - hyper_re = re.compile(r'((?P<url>\w{3,6}://\S+)|' - r'(?P<email>[-+=%/\w\.]+@[\w\.\-]+)|' - r'(?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+)))') + hyper_re = re.compile(r'''( + (?P<url> + ((ht|f)tp(s?)://|www\.)? # prefix + ([\w]+:\w+@)? # username/password + (([\w\-]+\.)+([\w]{2,5})) # hostname + (:[\d]{1,5})? # port + (/[\w\-$.+!*(),;:@&=?/~\\#%]*)? # path etc. + )| + (?P<email>[-+=%/\w\.]+@[\w\.\-]+)| + (?P<item>(?P<class>[A-Za-z_]+)(\s*)(?P<id>\d+)) + )''', re.X | re.I) + protocol_re = re.compile('^(ht|f)tp(s?)://', re.I) + def _hyper_repl_item(self,match,replacement): item = match.group('item') cls = match.group('class').lower() @@ -1263,8 +1273,16 @@ def _hyper_repl(self, match): if match.group('url'): - s = match.group('url') - return '<a href="%s">%s</a>'%(s, s) + u = s = match.group('url') + if not self.protocol_re.search(s): + u = 'http://' + s + # catch an escaped ">" at the end of the URL + if s.endswith('>'): + u = s = s[:-4] + e = '>' + else: + e = '' + return '<a href="%s">%s</a>%s'%(u, s, e) elif match.group('email'): s = match.group('email') return '<a href="mailto:%s">%s</a>'%(s, s)
--- a/test/test_templating.py Mon Aug 18 05:09:00 2008 +0000 +++ b/test/test_templating.py Mon Aug 18 06:03:06 2008 +0000 @@ -88,6 +88,38 @@ cls = HTMLClass(self.client, "issue") cls["nosy"] + def test_url_match(self): + '''Test the URL regular expression in StringHTMLProperty. + ''' + def t(s, **groups): + m = StringHTMLProperty.hyper_re.search(s) + self.assertNotEquals(m, None, '%r did not match'%s) + d = m.groupdict() + for g in groups: + self.assertEquals(d[g], groups[g], '%s %r != %r in %r'%(g, d[g], + groups[g], s)) + + #t('123.321.123.321', 'url') + t('http://roundup.net/', url='http://roundup.net/') + t('<HTTP://roundup.net/>', url='HTTP://roundup.net/') + t('www.a.ex', url='www.a.ex') + t('http://a.ex', url='http://a.ex') + t('http://a.ex/?foo&bar=baz\\.@!$%()qwerty', + url='http://a.ex/?foo&bar=baz\\.@!$%()qwerty') + t('www.net', url='www.net') + t('richard@com.example', email='richard@com.example') + t('r@a.com', email='r@a.com') + t('i1', **{'class':'i', 'id':'1'}) + t('item123', **{'class':'item', 'id':'123'}) + + def test_url_replace(self): + p = StringHTMLProperty(self.client, 'test', '1', None, 'test', '') + def t(s): return p.hyper_re.sub(p._hyper_repl, s) + ae = self.assertEquals + ae(t('http://roundup.net/'), '<a href="http://roundup.net/">http://roundup.net/</a>') + ae(t('<HTTP://roundup.net/>'), '<<a href="HTTP://roundup.net/">HTTP://roundup.net/</a>>') + ae(t('<www.roundup.net>'), '<<a href="http://www.roundup.net">www.roundup.net</a>>') + ''' class HTMLPermissions: def is_edit_ok(self):
