Skip to content

Commit d6b51a6

Browse files
committed
Changed extract_urls helper according to suggestions
1 parent 66f8079 commit d6b51a6

File tree

1 file changed

+7
-8
lines changed

1 file changed

+7
-8
lines changed

telegram/utils/helpers.py

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -138,8 +138,7 @@ def _extract_urls_from_text(text):
138138
def extract_urls(message):
139139
"""
140140
Extracts all Hyperlinks that are contained in a message. This includes
141-
message entities and the media caption. The links are returned in lexicographically
142-
ascending order.
141+
message entities and the media caption. Distinct links are returned in order of appearance.
143142
144143
Note: Exact duplicates are removed, but there may still be URLs that link
145144
to the same resource.
@@ -152,14 +151,14 @@ def extract_urls(message):
152151
"""
153152
from telegram import MessageEntity
154153

155-
results = message.parse_entities(types=[MessageEntity.URL, MessageEntity.TEXT_LINK])
154+
types = [MessageEntity.URL, MessageEntity.TEXT_LINK]
155+
results = message.parse_entities(types=types)
156+
results.update(message.parse_caption_entities(types=types))
156157
all_urls = [v if k.type == MessageEntity.URL else k.url for k, v in results.items()]
157158

158-
if message.caption:
159-
all_urls += _extract_urls_from_text(message.caption)
160-
161159
# Strip trailing slash from URL so we can compare them for equality
162160
stripped_urls = [x[:-1] if x[-1] == '/' else x for x in all_urls]
163161

164-
urls = set(stripped_urls)
165-
return sorted(list(urls))
162+
# Remove exact duplicates, compliant with legacy python
163+
urls = OrderedDict({k: None for k in stripped_urls})
164+
return list(urls.keys())

0 commit comments

Comments
 (0)