Mercurial > p > roundup > code
comparison roundup/token.py @ 470:9f7320624bc2
Added better tokenising to roundup-admin - handles spaces and stuff.
Can use quoting or backslashes. See the roundup.token pydoc.
| author | Richard Jones <richard@users.sourceforge.net> |
|---|---|
| date | Mon, 31 Dec 2001 05:09:20 +0000 |
| parents | |
| children | a1a44636bace |
comparison
equal
deleted
inserted
replaced
| 469:d35e51360175 | 470:9f7320624bc2 |
|---|---|
| 1 # | |
| 2 # Copyright (c) 2001 Richard Jones. | |
| 3 # This module is free software, and you may redistribute it and/or modify | |
| 4 # under the same terms as Python, so long as this copyright message and | |
| 5 # disclaimer are retained in their original form. | |
| 6 # | |
| 7 # This module is distributed in the hope that it will be useful, | |
| 8 # but WITHOUT ANY WARRANTY; without even the implied warranty of | |
| 9 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. | |
| 10 # | |
| 11 # $Id: token.py,v 1.1 2001-12-31 05:09:20 richard Exp $ | |
| 12 # | |
| 13 | |
| 14 __doc__ = """ | |
| 15 This module provides the tokeniser used by roundup-admin. | |
| 16 """ | |
| 17 | |
| 18 def token_split(s, whitespace=' \r\n\t', quotes='\'"', | |
| 19 escaped={'r':'\r', 'n':'\n', 't':'\t'}): | |
| 20 '''Split the string up into tokens. An occurence of a ' or " in the | |
| 21 input will cause the splitter to ignore whitespace until a matching | |
| 22 quote char is found. Embedded non-matching quote chars are also | |
| 23 skipped. | |
| 24 Whitespace and quoting characters may be escaped using a backslash. | |
| 25 \r, \n and \t are converted to carriage-return, newline and tab. | |
| 26 All other backslashed characters are left as-is. | |
| 27 Valid: | |
| 28 hello world (2 tokens: hello, world) | |
| 29 "hello world" (1 token: hello world) | |
| 30 "Roch'e" Compaan (2 tokens: Roch'e Compaan) | |
| 31 Roch\'e Compaan (2 tokens: Roch'e Compaan) | |
| 32 address="1 2 3" (1 token: address=1 2 3) | |
| 33 \\ (1 token: \) | |
| 34 \n (1 token: a newline) | |
| 35 \o (1 token: \o) | |
| 36 Invalid: | |
| 37 "hello world (no matching quote) | |
| 38 Roch'e Compaan (no matching quote) | |
| 39 ''' | |
| 40 l = [] | |
| 41 pos = 0 | |
| 42 NEWTOKEN = 'newtoken' | |
| 43 TOKEN = 'token' | |
| 44 QUOTE = 'quote' | |
| 45 ESCAPE = 'escape' | |
| 46 quotechar = '' | |
| 47 state = NEWTOKEN | |
| 48 oldstate = '' # one-level state stack ;) | |
| 49 length = len(s) | |
| 50 finish = 0 | |
| 51 token = '' | |
| 52 while 1: | |
| 53 # end of string, finish off the current token | |
| 54 if pos == length: | |
| 55 if state == QUOTE: raise ValueError, "unmatched quote" | |
| 56 elif state == TOKEN: l.append(token) | |
| 57 break | |
| 58 c = s[pos] | |
| 59 if state == NEWTOKEN: | |
| 60 # looking for a new token | |
| 61 if c in quotes: | |
| 62 # quoted token | |
| 63 state = QUOTE | |
| 64 quotechar = c | |
| 65 pos = pos + 1 | |
| 66 continue | |
| 67 elif c in whitespace: | |
| 68 # skip whitespace | |
| 69 pos = pos + 1 | |
| 70 continue | |
| 71 elif c == '\\': | |
| 72 pos = pos + 1 | |
| 73 oldstate = TOKEN | |
| 74 state = ESCAPE | |
| 75 continue | |
| 76 # otherwise we have a token | |
| 77 state = TOKEN | |
| 78 elif state == TOKEN: | |
| 79 if c in whitespace: | |
| 80 # have a token, and have just found a whitespace terminator | |
| 81 l.append(token) | |
| 82 pos = pos + 1 | |
| 83 state = NEWTOKEN | |
| 84 token = '' | |
| 85 continue | |
| 86 elif c in quotes: | |
| 87 # have a token, just found embedded quotes | |
| 88 state = QUOTE | |
| 89 quotechar = c | |
| 90 pos = pos + 1 | |
| 91 continue | |
| 92 elif c == '\\': | |
| 93 pos = pos + 1 | |
| 94 oldstate = state | |
| 95 state = ESCAPE | |
| 96 continue | |
| 97 elif state == QUOTE and c == quotechar: | |
| 98 # in a quoted token and found a matching quote char | |
| 99 pos = pos + 1 | |
| 100 # now we're looking for whitespace | |
| 101 state = TOKEN | |
| 102 continue | |
| 103 elif state == ESCAPE: | |
| 104 # escaped-char conversions (t, r, n) | |
| 105 # TODO: octal, hexdigit | |
| 106 state = oldstate | |
| 107 if escaped.has_key(c): | |
| 108 c = escaped[c] | |
| 109 # just add this char to the token and move along | |
| 110 token = token + c | |
| 111 pos = pos + 1 | |
| 112 return l | |
| 113 | |
| 114 # | |
| 115 # $Log: not supported by cvs2svn $ | |
| 116 # | |
| 117 # | |
| 118 # vim: set filetype=python ts=4 sw=4 et si |
