1010__all__ = ['TextWrapper' , 'wrap' , 'fill' , 'dedent' , 'indent' , 'shorten' ]
1111
1212# Hardcode the recognized whitespace characters to the US-ASCII
13- # whitespace characters. The main reason for doing this is that in
14- # ISO-8859-1, 0xa0 is non-breaking whitespace, so in certain locales
15- # that character winds up in string.whitespace. Respecting
16- # string.whitespace in those cases would 1) make textwrap treat 0xa0 the
17- # same as any other whitespace char, which is clearly wrong (it's a
18- # *non-breaking* space), 2) possibly cause problems with Unicode,
19- # since 0xa0 is not in range(128).
13+ # whitespace characters. The main reason for doing this is that
14+ # some Unicode spaces (like \u00a0) are non-breaking whitespaces.
2015_whitespace = '\t \n \x0b \x0c \r '
2116
2217class TextWrapper :
@@ -81,29 +76,34 @@ class TextWrapper:
8176 # (after stripping out empty strings).
8277 word_punct = r'[\w!"\'&.,?]'
8378 letter = r'[^\d\W]'
79+ whitespace = r'[%s]' % re .escape (_whitespace )
80+ nowhitespace = '[^' + whitespace [1 :]
8481 wordsep_re = re .compile (r'''
8582 ( # any whitespace
86- \ s+
83+ %(ws) s+
8784 | # em-dash between words
8885 (?<=%(wp)s) -{2,} (?=\w)
8986 | # word, possibly hyphenated
90- \S +? (?:
87+ %(nws)s +? (?:
9188 # hyphenated word
9289 -(?: (?<=%(lt)s{2}-) | (?<=%(lt)s-%(lt)s-))
9390 (?= %(lt)s -? %(lt)s)
9491 | # end of word
95- (?=\ s|\Z)
92+ (?=%(ws) s|\Z)
9693 | # em-dash
9794 (?<=%(wp)s) (?=-{2,}\w)
9895 )
99- )''' % {'wp' : word_punct , 'lt' : letter }, re .VERBOSE )
100- del word_punct , letter
96+ )''' % {'wp' : word_punct , 'lt' : letter ,
97+ 'ws' : whitespace , 'nws' : nowhitespace },
98+ re .VERBOSE )
99+ del word_punct , letter , nowhitespace
101100
102101 # This less funky little regex just split on recognized spaces. E.g.
103102 # "Hello there -- you goof-ball, use the -b option!"
104103 # splits into
105104 # Hello/ /there/ /--/ /you/ /goof-ball,/ /use/ /the/ /-b/ /option!/
106- wordsep_simple_re = re .compile (r'(\s+)' )
105+ wordsep_simple_re = re .compile (r'(%s+)' % whitespace )
106+ del whitespace
107107
108108 # XXX this is not locale- or charset-aware -- string.lowercase
109109 # is US-ASCII only (and therefore English-only)
@@ -112,7 +112,6 @@ class TextWrapper:
112112 r'[\"\']?' # optional end-of-quote
113113 r'\Z' ) # end of chunk
114114
115-
116115 def __init__ (self ,
117116 width = 70 ,
118117 initial_indent = "" ,
0 commit comments