@@ -138,8 +138,7 @@ def _extract_urls_from_text(text):
138138def extract_urls (message ):
139139 """
140140 Extracts all Hyperlinks that are contained in a message. This includes
141- message entities and the media caption. The links are returned in lexicographically
142- ascending order.
141+ message entities and the media caption. Distinct links are returned in order of appearance.
143142
144143 Note: Exact duplicates are removed, but there may still be URLs that link
145144 to the same resource.
@@ -152,14 +151,14 @@ def extract_urls(message):
152151 """
153152 from telegram import MessageEntity
154153
155- results = message .parse_entities (types = [MessageEntity .URL , MessageEntity .TEXT_LINK ])
154+ types = [MessageEntity .URL , MessageEntity .TEXT_LINK ]
155+ results = message .parse_entities (types = types )
156+ results .update (message .parse_caption_entities (types = types ))
156157 all_urls = [v if k .type == MessageEntity .URL else k .url for k , v in results .items ()]
157158
158- if message .caption :
159- all_urls += _extract_urls_from_text (message .caption )
160-
161159 # Strip trailing slash from URL so we can compare them for equality
162160 stripped_urls = [x [:- 1 ] if x [- 1 ] == '/' else x for x in all_urls ]
163161
164- urls = set (stripped_urls )
165- return sorted (list (urls ))
162+ # Remove exact duplicates, compliant with legacy python
163+ urls = OrderedDict ({k : None for k in stripped_urls })
164+ return list (urls .keys ())
0 commit comments