Skip to content
8 changes: 8 additions & 0 deletions Doc/library/textwrap.rst
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,14 @@ hyphenated words; only then will long words be broken if necessary, unless
.. versionadded:: 3.4


.. attribute:: text_len

(default: ``len``) Used to determine the length of a string. You can
provide a custom function, e.g. to account for wide characters.

.. versionadded:: 3.11


.. index:: single: ...; placeholder

.. attribute:: placeholder
Expand Down
2 changes: 1 addition & 1 deletion Lib/idlelib/idle_test/test_calltip.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def test_signature_wrap(self):
(width=70, initial_indent='', subsequent_indent='', expand_tabs=True,
replace_whitespace=True, fix_sentence_endings=False, break_long_words=True,
drop_whitespace=True, break_on_hyphens=True, tabsize=8, *, max_lines=None,
placeholder=' [...]')
placeholder=' [...]', text_len=<built-in function len>)
Object for wrapping/filling text. The public interface consists of
the wrap() and fill() methods; the other methods are just there for
subclasses to override in order to tweak the default behaviour.
Expand Down
68 changes: 68 additions & 0 deletions Lib/test/test_textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#

import unittest
import unicodedata

from textwrap import TextWrapper, wrap, fill, dedent, indent, shorten

Expand Down Expand Up @@ -1133,5 +1134,72 @@ def test_first_word_too_long_but_placeholder_fits(self):
self.check_shorten("Helloo", 5, "[...]")


class WideCharacterTestCase(BaseTestCase):
def text_len(self, text):
return sum(
2 if unicodedata.east_asian_width(c) in {'F', 'W'} else 1
for c in text
)

def check_shorten(self, text, width, expect, **kwargs):
result = shorten(text, width, **kwargs)
self.check(result, expect)

def test_wrap(self):
text = "123 🔧"
self.check_wrap(text, 5, ["123 🔧"])
self.check_wrap(text, 5, ["123", "🔧"], text_len=self.text_len)

def test_wrap_initial_indent(self):
text = "12 12"
self.check_wrap(text, 6, ["🔧12 12"], initial_indent="🔧")
self.check_wrap(text, 6, ["🔧12", "12"], initial_indent="🔧",
text_len=self.text_len)

def test_wrap_subsequent_indent(self):
text = "12 12 12 12"
self.check_wrap(text, 6, ["12 12", "🔧12 12"], subsequent_indent="🔧")
self.check_wrap(text, 6, ["12 12", "🔧12", "🔧12"],
subsequent_indent="🔧", text_len=self.text_len)

def test_shorten(self):
text = "123 1234🔧"
expected = "123 [...]"
self.check_shorten(text, 9, "123 1234🔧")
self.check_shorten(text, 9, "123 [...]", text_len=self.text_len)

def test_shorten_placeholder(self):
text = "123 1 123"
self.check_shorten(text, 7, "123 1 🔧", placeholder=" 🔧")
self.check_shorten(text, 7, "123 🔧", placeholder=" 🔧",
text_len=self.text_len)


class CustomWidthTestCase(BaseTestCase):
def text_len(self, text):
lengths = {
'A': 4,
'B': 2,
'Q': 0,
}

return sum(
lengths[c] if c in lengths else 1
for c in text
)

def test_zero_width_text_len(self):
text = "0QQ1234QQ56789"
self.check_wrap(text, 6, ["0QQ1234QQ5", "6789"], text_len=self.text_len)

def test_char_longer_than_width(self):
text = "AA0123"
self.check_wrap(text, 3, ["A", "A", "012", "3"], text_len=self.text_len)

def test_next_char_overflow(self):
text = "BB0123"
self.check_wrap(text, 3, ["B", "B0", "123"], text_len=self.text_len)


if __name__ == '__main__':
unittest.main()
48 changes: 36 additions & 12 deletions Lib/textwrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,8 @@ def __init__(self,
tabsize=8,
*,
max_lines=None,
placeholder=' [...]'):
placeholder=' [...]',
text_len=len):
self.width = width
self.initial_indent = initial_indent
self.subsequent_indent = subsequent_indent
Expand All @@ -135,6 +136,7 @@ def __init__(self,
self.tabsize = tabsize
self.max_lines = max_lines
self.placeholder = placeholder
self.text_len = text_len


# -- Private methods -----------------------------------------------
Expand Down Expand Up @@ -194,6 +196,25 @@ def _fix_sentence_endings(self, chunks):
else:
i += 1

def _find_width_index(self, text, width):
"""_find_length_index(text : string, width : int)

Find at which index the text has the required width, since when using a
different text_len, this index will not be equal to the required width.
"""
# When using default len as self.text_len, the required index and width
# will be equal, this prevents calculation time.
if self.text_len(text[:width]) == width:
# For character widths greater than one, width can be more than the
# number of characters
return min(width, len(text))
cur_text = ''
for i, c in enumerate(text):
cur_text += c
cur_width = self.text_len(cur_text)
if cur_width > width:
return max(i, 1)

def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
"""_handle_long_word(chunks : [string],
cur_line : [string],
Expand All @@ -214,10 +235,11 @@ def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
if self.break_long_words and space_left > 0:
end = space_left
chunk = reversed_chunks[-1]
if self.break_on_hyphens and len(chunk) > space_left:
end = self._find_width_index(chunk, space_left)
if self.break_on_hyphens and self.text_len(chunk) > space_left:
# break after last hyphen, but only if there are
# non-hyphens before it
hyphen = chunk.rfind('-', 0, space_left)
hyphen = chunk.rfind('-', 0, end)
if hyphen > 0 and any(c != '-' for c in chunk[:hyphen]):
end = hyphen + 1
cur_line.append(chunk[:end])
Expand Down Expand Up @@ -256,7 +278,8 @@ def _wrap_chunks(self, chunks):
indent = self.subsequent_indent
else:
indent = self.initial_indent
if len(indent) + len(self.placeholder.lstrip()) > self.width:
if (self.text_len(indent) +
self.text_len(self.placeholder.lstrip()) > self.width):
raise ValueError("placeholder too large for max width")

# Arrange in reverse order so items can be efficiently popped
Expand All @@ -277,15 +300,15 @@ def _wrap_chunks(self, chunks):
indent = self.initial_indent

# Maximum width for this line.
width = self.width - len(indent)
width = self.width - self.text_len(indent)

# First chunk on line is whitespace -- drop it, unless this
# is the very beginning of the text (ie. no lines started yet).
if self.drop_whitespace and chunks[-1].strip() == '' and lines:
del chunks[-1]

while chunks:
l = len(chunks[-1])
l = self.text_len(chunks[-1])

# Can at least squeeze this chunk onto the current line.
if cur_len + l <= width:
Expand All @@ -298,13 +321,13 @@ def _wrap_chunks(self, chunks):

# The current line is full, and the next chunk is too big to
# fit on *any* line (not just this one).
if chunks and len(chunks[-1]) > width:
if chunks and self.text_len(chunks[-1]) > width:
self._handle_long_word(chunks, cur_line, cur_len, width)
cur_len = sum(map(len, cur_line))
cur_len = sum(map(self.text_len, cur_line))

# If the last chunk on this line is all whitespace, drop it.
if self.drop_whitespace and cur_line and cur_line[-1].strip() == '':
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]

if cur_line:
Expand All @@ -320,16 +343,17 @@ def _wrap_chunks(self, chunks):
else:
while cur_line:
if (cur_line[-1].strip() and
cur_len + len(self.placeholder) <= width):
cur_len + self.text_len(self.placeholder) <= width):
cur_line.append(self.placeholder)
lines.append(indent + ''.join(cur_line))
break
cur_len -= len(cur_line[-1])
cur_len -= self.text_len(cur_line[-1])
del cur_line[-1]
else:
if lines:
prev_line = lines[-1].rstrip()
if (len(prev_line) + len(self.placeholder) <=
if (self.text_len(prev_line) +
self.text_len(self.placeholder) <=
self.width):
lines[-1] = prev_line + self.placeholder
break
Expand Down
Loading