Skip to content

Commit 1ca712e

Browse files
committed
Fix requested changes
1 parent f50fe6c commit 1ca712e

File tree

2 files changed

+20
-44
lines changed

2 files changed

+20
-44
lines changed

telegram/utils/helpers.py

Lines changed: 8 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,24 +17,16 @@
1717
# You should have received a copy of the GNU Lesser Public License
1818
# along with this program. If not, see [http://www.gnu.org/licenses/].
1919
"""This module contains helper functions."""
20-
from html import escape
21-
2220
import re
23-
from collections import OrderedDict
2421
import signal
22+
from collections import OrderedDict
2523
from datetime import datetime
2624

27-
try:
28-
from urllib.parse import urlparse
29-
except ImportError:
30-
from urlparse import urlparse
31-
3225
try:
3326
from html import escape as escape_html # noqa: F401
3427
except ImportError:
3528
from cgi import escape as escape_html # noqa: F401
3629

37-
3830
# From https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
3931
_signames = {v: k
4032
for k, v in reversed(sorted(vars(signal).items()))
@@ -56,7 +48,6 @@ def _timestamp(dt_obj):
5648
# Python < 3.3 (incl 2.7)
5749
from time import mktime
5850

59-
6051
def _timestamp(dt_obj):
6152
return mktime(dt_obj.timetuple())
6253

@@ -107,7 +98,7 @@ def mention_html(user_id, name):
10798
:obj:`str`: The inline mention for the user as html.
10899
"""
109100
if isinstance(user_id, int):
110-
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape(name))
101+
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape_html(name))
111102

112103

113104
def mention_markdown(user_id, name):
@@ -154,23 +145,11 @@ def effective_message_type(entity):
154145
return None
155146

156147

157-
def _extract_urls_from_text(text):
158-
"""
159-
Returns a list of urls from a text string.
160-
URLs without a leading `http://` or `www.` won't be found.
161-
"""
162-
out = []
163-
for word in text.split(' '):
164-
thing = urlparse(word.strip())
165-
if thing.scheme:
166-
out.append(word)
167-
return out
168-
169-
170148
def extract_urls(message):
171149
"""
172150
Extracts all Hyperlinks that are contained in a message. This includes
173-
message entities and the media caption. Distinct links are returned in order of appearance.
151+
message entities and the media caption. Distinct links are returned in order of appearance,
152+
while links in the text take precedence over ones in the media caption.
174153
175154
Note: Exact duplicates are removed, but there may still be URLs that link
176155
to the same resource.
@@ -186,11 +165,12 @@ def extract_urls(message):
186165
types = [MessageEntity.URL, MessageEntity.TEXT_LINK]
187166
results = message.parse_entities(types=types)
188167
results.update(message.parse_caption_entities(types=types))
189-
all_urls = [v if k.type == MessageEntity.URL else k.url for k, v in results.items()]
168+
169+
all_urls = (v if k.type == MessageEntity.URL else k.url for k, v in results.items())
190170

191171
# Strip trailing slash from URL so we can compare them for equality
192-
stripped_urls = [x[:-1] if x[-1] == '/' else x for x in all_urls]
172+
stripped_urls = (x.rstrip('/') for x in all_urls)
193173

194-
# Remove exact duplicates, compliant with legacy python
174+
# Remove exact duplicates, in a way that is compliant with legacy python
195175
urls = OrderedDict({k: None for k in stripped_urls})
196176
return list(urls.keys())

tests/test_helpers.py

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,9 @@
1616
#
1717
# You should have received a copy of the GNU Lesser Public License
1818
# along with this program. If not, see [http://www.gnu.org/licenses/].
19-
from telegram import Update
20-
21-
from telegram import Message
2219
from telegram import MessageEntity
2320
from telegram import Sticker
21+
from telegram import Update
2422
from telegram import User
2523
from telegram.message import Message
2624
from telegram.utils import helpers
@@ -33,14 +31,6 @@ def test_escape_markdown(self):
3331

3432
assert expected_str == helpers.escape_markdown(test_str)
3533

36-
def test_extract_urls_from_text(self):
37-
urls = "http://google.com and http://github.com/ and " \
38-
"python-telegram-bot.readthedocs.io/en/latest/"
39-
result = helpers._extract_urls_from_text(urls)
40-
assert len(result) == 2
41-
assert result[0] == 'http://google.com'
42-
assert result[1] == 'http://github.com/'
43-
4434
def test_extract_urls_entities(self):
4535
test_entities = [{
4636
'length': 6, 'offset': 0, 'type': 'text_link',
@@ -65,17 +55,23 @@ def test_extract_urls_entities(self):
6555
assert (test_entities[2]['url'] == result[1])
6656

6757
def test_extract_urls_caption(self):
58+
test_entities = [{
59+
'length': 109, 'offset': 11, 'type': 'url'
60+
}]
6861
caption = "Taken from https://stackoverflow.com/questions/520031/whats" \
69-
"-the-cleanest-way-to-extract-urls-from-a-string-using-python"
62+
"-the-cleanest-way-to-extract-urls-from-a-string-using-python/"
7063
test_message = Message(message_id=1,
7164
from_user=None,
7265
date=None,
7366
chat=None,
74-
caption=caption)
75-
result = helpers.extract_urls(test_message)
67+
caption=caption,
68+
caption_entities=[MessageEntity(**e) for e in test_entities]
69+
)
70+
results = helpers.extract_urls(test_message)
7671

77-
assert result[0] == 'https://stackoverflow.com/questions/520031/whats-the-' \
78-
'cleanest-way-to-extract-urls-from-a-string-using-python'
72+
assert len(results) == 1
73+
assert results[0] == 'https://stackoverflow.com/questions/520031/whats-the-' \
74+
'cleanest-way-to-extract-urls-from-a-string-using-python'
7975

8076
def test_effective_message_type(self):
8177
test_message = Message(message_id=1,

0 commit comments

Comments
 (0)