comparison test/test_token.py @ 7859:9a74dfeb8620

feat: can use escaped tokens inside quotes including quotes. Change tokenizer to support: cmd with arg "string with embedded \" double quote" works for single quotes too. Mixed quotes can skip the \" or \' escaping. Also: quoted cmds args "can include \n newline, \t tab and \r return" Added a doc example, also tests for new feature.
author John Rouillard <rouilj@ieee.org>
date Sat, 06 Apr 2024 20:37:45 -0400
parents 6971c9249c6d
children
comparison
equal deleted inserted replaced
7858:376f70513242 7859:9a74dfeb8620
31 l = token_split(r'Roch\'e Compaan') 31 l = token_split(r'Roch\'e Compaan')
32 self.assertEqual(l, ["Roch'e", "Compaan"]) 32 self.assertEqual(l, ["Roch'e", "Compaan"])
33 l = token_split('address="1 2 3"') 33 l = token_split('address="1 2 3"')
34 self.assertEqual(l, ['address=1 2 3']) 34 self.assertEqual(l, ['address=1 2 3'])
35 35
36 def testEmbedEscapeQuote(self):
37 l = token_split(r'"Roch\'e Compaan"')
38 self.assertEqual(l, ["Roch'e Compaan"])
39
40 l = token_split(r'"Roch\"e Compaan"')
41 self.assertEqual(l, ['Roch"e Compaan'])
42
43 l = token_split(r'sql "COLLATE = \"utf8mb4_unicode_ci\";"')
44 self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
45
46 l = token_split(r'''sql 'COLLATE = "utf8mb4_unicode_ci";' ''')
47 self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
48
49 l = token_split(r'''sql 'COLLATE = \"utf8mb4_unicode_ci\";' ''')
50 self.assertEqual(l, ["sql", 'COLLATE = "utf8mb4_unicode_ci";'])
51
52 l = token_split(r'''sql 'COLLATE = \'utf8mb4_unicode_ci\';' ''')
53 self.assertEqual(l, ["sql", "COLLATE = 'utf8mb4_unicode_ci';"])
54
55 l = token_split(r'''sql 'new\nline\rneed \ttab' ''')
56 self.assertEqual(l, ["sql", "new\nline\rneed \ttab"])
57
36 def testEscaping(self): 58 def testEscaping(self):
37 l = token_split('"Roch\'e" Compaan') 59 l = token_split('"Roch\'e" Compaan')
38 self.assertEqual(l, ["Roch'e", "Compaan"]) 60 self.assertEqual(l, ["Roch'e", "Compaan"])
39 l = token_split(r'hello\ world') 61 l = token_split(r'hello\ world')
40 self.assertEqual(l, ['hello world']) 62 self.assertEqual(l, ['hello world'])
41 l = token_split(r'\\') 63 l = token_split(r'\\')
42 self.assertEqual(l, ['\\']) 64 self.assertEqual(l, ['\\'])
43 l = token_split(r'\n') 65 l = token_split(r'\n')
44 self.assertEqual(l, ['\n']) 66 self.assertEqual(l, ['\n'])
67 l = token_split(r'\r')
68 self.assertEqual(l, ['\r'])
69 l = token_split(r'\t')
70 self.assertEqual(l, ['\t'])
45 71
46 def testBadQuote(self): 72 def testBadQuote(self):
47 self.assertRaises(ValueError, token_split, '"hello world') 73 self.assertRaises(ValueError, token_split, '"hello world')
48 self.assertRaises(ValueError, token_split, "Roch'e Compaan") 74 self.assertRaises(ValueError, token_split, "Roch'e Compaan")
49 75

Roundup Issue Tracker: http://roundup-tracker.org/