Mercurial > p > roundup > code
comparison roundup/token_r.py @ 7178:db06d4aeb978
unshadow stdlib token from roundup's token.
This bites me every now and again when running pytest and pdb. Some
submodules want to load the stdlib python and end up getting roundup's
python and thing break with N_TOKENS not defined etc.
So rename token.py to token_r.py (token_r(oundup)... hey naming things
is hard) an change code as needed.
| author | John Rouillard <rouilj@ieee.org> |
|---|---|
| date | Sun, 26 Feb 2023 12:00:35 -0500 |
| parents | |
| children | 07ce4e4110f5 |
comparison
equal
deleted
inserted
replaced
| 7177:d787f7282ea3 | 7178:db06d4aeb978 |
|---|---|
| 1 # | |
| 2 # Copyright (c) 2001 Richard Jones, richard@bofh.asn.au. | |
| 3 # This module is free software, and you may redistribute it and/or modify | |
| 4 # under the same terms as Python, so long as this copyright message and | |
| 5 # disclaimer are retained in their original form. | |
| 6 # | |
| 7 # This module is distributed in the hope that it will be useful, | |
| 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 10 # | |
| 11 | |
| 12 """This module provides the tokeniser used by roundup-admin. | |
| 13 """ | |
| 14 __docformat__ = 'restructuredtext' | |
| 15 | |
| 16 | |
| 17 def token_split(s, whitespace=' \r\n\t', quotes='\'"', | |
| 18 escaped={'r': '\r', 'n': '\n', 't': '\t'}): | |
| 19 r'''Split the string up into tokens. An occurence of a ``'`` or ``"`` in | |
| 20 the input will cause the splitter to ignore whitespace until a matching | |
| 21 quote char is found. Embedded non-matching quote chars are also skipped. | |
| 22 | |
| 23 Whitespace and quoting characters may be escaped using a backslash. | |
| 24 ``\r``, ``\n`` and ``\t`` are converted to carriage-return, newline and | |
| 25 tab. All other backslashed characters are left as-is. | |
| 26 | |
| 27 Valid examples:: | |
| 28 | |
| 29 hello world (2 tokens: hello, world) | |
| 30 "hello world" (1 token: hello world) | |
| 31 "Roch'e" Compaan (2 tokens: Roch'e Compaan) | |
| 32 Roch\'e Compaan (2 tokens: Roch'e Compaan) | |
| 33 address="1 2 3" (1 token: address=1 2 3) | |
| 34 \\ (1 token: \) | |
| 35 \n (1 token: a newline) | |
| 36 \o (1 token: \o) | |
| 37 | |
| 38 Invalid examples:: | |
| 39 | |
| 40 "hello world (no matching quote) | |
| 41 Roch'e Compaan (no matching quote) | |
| 42 ''' | |
| 43 l = [] | |
| 44 pos = 0 | |
| 45 NEWTOKEN = 'newtoken' | |
| 46 TOKEN = 'token' | |
| 47 QUOTE = 'quote' | |
| 48 ESCAPE = 'escape' | |
| 49 quotechar = '' | |
| 50 state = NEWTOKEN | |
| 51 oldstate = '' # one-level state stack ;) | |
| 52 length = len(s) | |
| 53 token = '' | |
| 54 while 1: | |
| 55 # end of string, finish off the current token | |
| 56 if pos == length: | |
| 57 if state == QUOTE: raise ValueError | |
| 58 elif state == TOKEN: l.append(token) | |
| 59 break | |
| 60 c = s[pos] | |
| 61 if state == NEWTOKEN: | |
| 62 # looking for a new token | |
| 63 if c in quotes: | |
| 64 # quoted token | |
| 65 state = QUOTE | |
| 66 quotechar = c | |
| 67 pos = pos + 1 | |
| 68 continue | |
| 69 elif c in whitespace: | |
| 70 # skip whitespace | |
| 71 pos = pos + 1 | |
| 72 continue | |
| 73 elif c == '\\': | |
| 74 pos = pos + 1 | |
| 75 oldstate = TOKEN | |
| 76 state = ESCAPE | |
| 77 continue | |
| 78 # otherwise we have a token | |
| 79 state = TOKEN | |
| 80 elif state == TOKEN: | |
| 81 if c in whitespace: | |
| 82 # have a token, and have just found a whitespace terminator | |
| 83 l.append(token) | |
| 84 pos = pos + 1 | |
| 85 state = NEWTOKEN | |
| 86 token = '' | |
| 87 continue | |
| 88 elif c in quotes: | |
| 89 # have a token, just found embedded quotes | |
| 90 state = QUOTE | |
| 91 quotechar = c | |
| 92 pos = pos + 1 | |
| 93 continue | |
| 94 elif c == '\\': | |
| 95 pos = pos + 1 | |
| 96 oldstate = state | |
| 97 state = ESCAPE | |
| 98 continue | |
| 99 elif state == QUOTE and c == quotechar: | |
| 100 # in a quoted token and found a matching quote char | |
| 101 pos = pos + 1 | |
| 102 # now we're looking for whitespace | |
| 103 state = TOKEN | |
| 104 continue | |
| 105 elif state == ESCAPE: | |
| 106 # escaped-char conversions (t, r, n) | |
| 107 # TODO: octal, hexdigit | |
| 108 state = oldstate | |
| 109 if c in escaped: | |
| 110 c = escaped[c] | |
| 111 # just add this char to the token and move along | |
| 112 token = token + c | |
| 113 pos = pos + 1 | |
| 114 return l | |
| 115 | |
| 116 # vim: set filetype=python ts=4 sw=4 et si |
