changeset 7859:9a74dfeb8620

feat: can use escaped tokens inside quotes including quotes. Change tokenizer to support: cmd with arg "string with embedded \" double quote" works for single quotes too. Mixed quotes can skip the \" or \' escaping. Also: quoted cmds args "can include \n newline, \t tab and \r return" Added a doc example, also tests for new feature.
author John Rouillard <rouilj@ieee.org>
date Sat, 06 Apr 2024 20:37:45 -0400
parents 376f70513242
children 8b31893f5930
files roundup/admin.py roundup/token_r.py test/test_token.py
diffstat 3 files changed, 33 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/roundup/admin.py	Wed Apr 03 16:53:13 2024 -0400
+++ b/roundup/admin.py	Sat Apr 06 20:37:45 2024 -0400
@@ -297,6 +297,7 @@
            address="1 2 3"  (1 token: address=1 2 3)
            \\\\               (1 token: \\)
            \\n\\r\\t           (1 token: a newline, carriage-return and tab)
+           f "test\\"q"      (2 tokens: f test"q)
 
 When multiple nodes are specified to the roundup get or roundup set
 commands, the specified properties are retrieved or set on all the listed
--- a/roundup/token_r.py	Wed Apr 03 16:53:13 2024 -0400
+++ b/roundup/token_r.py	Sat Apr 06 20:37:45 2024 -0400
@@ -96,6 +96,12 @@
                 oldstate = state
                 state = ESCAPE
                 continue
+        elif state == QUOTE and c == '\\':
+            # in a quoted token and found an escape sequence
+            pos = pos + 1
+            oldstate = state
+            state = ESCAPE
+            continue
         elif state == QUOTE and c == quotechar:
             # in a quoted token and found a matching quote char
             pos = pos + 1
--- a/test/test_token.py	Wed Apr 03 16:53:13 2024 -0400
+++ b/test/test_token.py	Sat Apr 06 20:37:45 2024 -0400
@@ -33,6 +33,28 @@
         l = token_split('address="1 2 3"')
         self.assertEqual(l, ['address=1 2 3'])
 
+    def testEmbedEscapeQuote(self):
+        l = token_split(r'"Roch\'e Compaan"')
+        self.assertEqual(l, ["Roch'e Compaan"])
+
+        l = token_split(r'"Roch\"e Compaan"')
+        self.assertEqual(l, ['Roch"e Compaan'])
+
+        l = token_split(r'sql "COLLATE = \"utf8mb4_unicode_ci\";"')
+        self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
+
+        l = token_split(r'''sql 'COLLATE = "utf8mb4_unicode_ci";' ''')
+        self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
+
+        l = token_split(r'''sql 'COLLATE = \"utf8mb4_unicode_ci\";' ''')
+        self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
+
+        l = token_split(r'''sql 'COLLATE = \'utf8mb4_unicode_ci\';' ''')
+        self.assertEqual(l, ["sql", "COLLATE = 'utf8mb4_unicode_ci';"])
+
+        l = token_split(r'''sql 'new\nline\rneed \ttab' ''')
+        self.assertEqual(l, ["sql", "new\nline\rneed \ttab"])
+
     def testEscaping(self):
         l = token_split('"Roch\'e" Compaan')
         self.assertEqual(l, ["Roch'e", "Compaan"])
@@ -42,6 +64,10 @@
         self.assertEqual(l, ['\\'])
         l = token_split(r'\n')
         self.assertEqual(l, ['\n'])
+        l = token_split(r'\r')
+        self.assertEqual(l, ['\r'])
+        l = token_split(r'\t')
+        self.assertEqual(l, ['\t'])
 
     def testBadQuote(self):
         self.assertRaises(ValueError, token_split, '"hello world')

Roundup Issue Tracker: http://roundup-tracker.org/