Skip to content

Commit b275031

Browse files
Eldinnietsnoam
authored andcommitted
Add Message caption html/markdown methods (python-telegram-bot#1013)
Closes python-telegram-bot#1010
1 parent a9a503b commit b275031

File tree

3 files changed

+119
-25
lines changed

3 files changed

+119
-25
lines changed

telegram/message.py

Lines changed: 72 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -19,12 +19,13 @@
1919
# along with this program. If not, see [http://www.gnu.org/licenses/].
2020
"""This module contains an object that represents a Telegram Message."""
2121
import sys
22+
from html import escape
2223

2324
from telegram import (Audio, Contact, Document, Chat, Location, PhotoSize, Sticker, TelegramObject,
2425
User, Video, Voice, Venue, MessageEntity, Game, Invoice, SuccessfulPayment,
2526
VideoNote)
2627
from telegram import ParseMode
27-
from telegram.utils.helpers import escape_html, escape_markdown, to_timestamp, from_timestamp
28+
from telegram.utils.helpers import escape_markdown, to_timestamp, from_timestamp
2829

2930
_UNDEFINED = object()
3031

@@ -865,17 +866,16 @@ def parse_caption_entities(self, types=None):
865866
for entity in self.caption_entities if entity.type in types
866867
}
867868

868-
def _text_html(self, urled=False):
869-
entities = self.parse_entities()
870-
message_text = self.text
869+
@staticmethod
870+
def _parse_html(message_text, entities, urled=False):
871871
if not sys.maxunicode == 0xffff:
872872
message_text = message_text.encode('utf-16-le')
873873

874874
html_text = ''
875875
last_offset = 0
876876

877877
for entity, text in sorted(entities.items(), key=(lambda item: item[0].offset)):
878-
text = escape_html(text)
878+
text = escape(text)
879879

880880
if entity.type == MessageEntity.TEXT_LINK:
881881
insert = '<a href="{}">{}</a>'.format(entity.url, text)
@@ -893,17 +893,17 @@ def _text_html(self, urled=False):
893893
insert = text
894894

895895
if sys.maxunicode == 0xffff:
896-
html_text += escape_html(message_text[last_offset:entity.offset]) + insert
896+
html_text += escape(message_text[last_offset:entity.offset]) + insert
897897
else:
898-
html_text += escape_html(message_text[last_offset * 2:entity.offset * 2]
899-
.decode('utf-16-le')) + insert
898+
html_text += escape(message_text[last_offset * 2:entity.offset * 2]
899+
.decode('utf-16-le')) + insert
900900

901901
last_offset = entity.offset + entity.length
902902

903903
if sys.maxunicode == 0xffff:
904-
html_text += escape_html(message_text[last_offset:])
904+
html_text += escape(message_text[last_offset:])
905905
else:
906-
html_text += escape_html(message_text[last_offset * 2:].decode('utf-16-le'))
906+
html_text += escape(message_text[last_offset * 2:].decode('utf-16-le'))
907907
return html_text
908908

909909
@property
@@ -917,7 +917,7 @@ def text_html(self):
917917
:obj:`str`: Message text with entities formatted as HTML.
918918
919919
"""
920-
return self._text_html(urled=False)
920+
return self._parse_html(self.text, self.parse_entities(), urled=False)
921921

922922
@property
923923
def text_html_urled(self):
@@ -930,11 +930,38 @@ def text_html_urled(self):
930930
:obj:`str`: Message text with entities formatted as HTML.
931931
932932
"""
933-
return self._text_html(urled=True)
933+
return self._parse_html(self.text, self.parse_entities(), urled=True)
934934

935-
def _text_markdown(self, urled=False):
936-
entities = self.parse_entities()
937-
message_text = self.text
935+
@property
936+
def caption_html(self):
937+
"""Creates an HTML-formatted string from the markup entities found in the message's
938+
caption.
939+
940+
Use this if you want to retrieve the message caption with the caption entities formatted as
941+
HTML in the same way the original message was formatted.
942+
943+
Returns:
944+
:obj:`str`: Message caption with captionentities formatted as HTML.
945+
946+
"""
947+
return self._parse_html(self.caption, self.parse_caption_entities(), urled=False)
948+
949+
@property
950+
def caption_html_urled(self):
951+
"""Creates an HTML-formatted string from the markup entities found in the message's
952+
caption.
953+
954+
Use this if you want to retrieve the message caption with the caption entities formatted as
955+
HTML. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
956+
957+
Returns:
958+
:obj:`str`: Message caption with caption entities formatted as HTML.
959+
960+
"""
961+
return self._parse_html(self.caption, self.parse_caption_entities(), urled=True)
962+
963+
@staticmethod
964+
def _parse_markdown(message_text, entities, urled=False):
938965
if not sys.maxunicode == 0xffff:
939966
message_text = message_text.encode('utf-16-le')
940967

@@ -983,7 +1010,7 @@ def text_markdown(self):
9831010
:obj:`str`: Message text with entities formatted as Markdown.
9841011
9851012
"""
986-
return self._text_markdown(urled=False)
1013+
return self._parse_markdown(self.text, self.parse_entities(), urled=False)
9871014

9881015
@property
9891016
def text_markdown_urled(self):
@@ -996,4 +1023,32 @@ def text_markdown_urled(self):
9961023
:obj:`str`: Message text with entities formatted as Markdown.
9971024
9981025
"""
999-
return self._text_markdown(urled=True)
1026+
return self._parse_markdown(self.text, self.parse_entities(), urled=True)
1027+
1028+
@property
1029+
def caption_markdown(self):
1030+
"""Creates an Markdown-formatted string from the markup entities found in the message's
1031+
caption.
1032+
1033+
Use this if you want to retrieve the message caption with the caption entities formatted as
1034+
Markdown in the same way the original message was formatted.
1035+
1036+
Returns:
1037+
:obj:`str`: Message caption with caption entities formatted as Markdown.
1038+
1039+
"""
1040+
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=False)
1041+
1042+
@property
1043+
def caption_markdown_urled(self):
1044+
"""Creates an Markdown-formatted string from the markup entities found in the message's
1045+
caption.
1046+
1047+
Use this if you want to retrieve the message caption with the caption entities formatted as
1048+
Markdown. This also formats :attr:`telegram.MessageEntity.URL` as a hyperlink.
1049+
1050+
Returns:
1051+
:obj:`str`: Message caption with caption entities formatted as Markdown.
1052+
1053+
"""
1054+
return self._parse_markdown(self.caption, self.parse_caption_entities(), urled=True)

telegram/utils/helpers.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,12 @@
1717
# You should have received a copy of the GNU Lesser Public License
1818
# along with this program. If not, see [http://www.gnu.org/licenses/].
1919
"""This module contains helper functions."""
20+
from html import escape
2021

2122
import re
2223
import signal
2324
from datetime import datetime
2425

25-
try:
26-
from html import escape as escape_html # noqa: F401
27-
except ImportError:
28-
from cgi import escape as escape_html # noqa: F401
29-
30-
3126
# From https://stackoverflow.com/questions/2549939/get-signal-names-from-numbers-in-python
3227
_signames = {v: k
3328
for k, v in reversed(sorted(vars(signal).items()))
@@ -99,7 +94,7 @@ def mention_html(user_id, name):
9994
:obj:`str`: The inline mention for the user as html.
10095
"""
10196
if isinstance(user_id, int):
102-
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape_html(name))
97+
return '<a href="tg://user?id={}">{}</a>'.format(user_id, escape(name))
10398

10499

105100
def mention_markdown(user_id, name):

tests/test_message.py

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,9 @@ class TestMessage(object):
117117
date=None,
118118
chat=None,
119119
text=test_text,
120-
entities=[MessageEntity(**e) for e in test_entities])
120+
entities=[MessageEntity(**e) for e in test_entities],
121+
caption=test_text,
122+
caption_entities=[MessageEntity(**e) for e in test_entities])
121123

122124
def test_all_posibilities_de_json_and_to_dict(self, bot, message_params):
123125
new = Message.de_json(message_params.to_dict(), bot)
@@ -206,6 +208,48 @@ def test_text_markdown_emoji(self):
206208
text=text, entities=[bold_entity])
207209
assert expected == message.text_markdown
208210

211+
def test_caption_html_simple(self):
212+
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
213+
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
214+
'http://google.com')
215+
caption_html = self.test_message.caption_html
216+
assert caption_html == test_html_string
217+
218+
def test_caption_html_urled(self):
219+
test_html_string = ('Test for &lt;<b>bold</b>, <i>ita_lic</i>, <code>code</code>, '
220+
'<a href="http://github.com/">links</a> and <pre>pre</pre>. '
221+
'<a href="http://google.com">http://google.com</a>')
222+
caption_html = self.test_message.caption_html_urled
223+
assert caption_html == test_html_string
224+
225+
def test_caption_markdown_simple(self):
226+
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
227+
'```pre```. http://google.com')
228+
caption_markdown = self.test_message.caption_markdown
229+
assert caption_markdown == test_md_string
230+
231+
def test_caption_markdown_urled(self):
232+
test_md_string = ('Test for <*bold*, _ita\_lic_, `code`, [links](http://github.com/) and '
233+
'```pre```. [http://google.com](http://google.com)')
234+
caption_markdown = self.test_message.caption_markdown_urled
235+
assert caption_markdown == test_md_string
236+
237+
def test_caption_html_emoji(self):
238+
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
239+
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d <b>ABC</b>'.decode('unicode-escape')
240+
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
241+
message = Message(1, self.from_user, self.date, self.chat,
242+
caption=caption, caption_entities=[bold_entity])
243+
assert expected == message.caption_html
244+
245+
def test_caption_markdown_emoji(self):
246+
caption = b'\\U0001f469\\u200d\\U0001f469\\u200d ABC'.decode('unicode-escape')
247+
expected = b'\\U0001f469\\u200d\\U0001f469\\u200d *ABC*'.decode('unicode-escape')
248+
bold_entity = MessageEntity(type=MessageEntity.BOLD, offset=7, length=3)
249+
message = Message(1, self.from_user, self.date, self.chat,
250+
caption=caption, caption_entities=[bold_entity])
251+
assert expected == message.caption_markdown
252+
209253
def test_parse_entities_url_emoji(self):
210254
url = b'http://github.com/?unicode=\\u2713\\U0001f469'.decode('unicode-escape')
211255
text = 'some url'

0 commit comments

Comments
 (0)