Skip to content

Commit b51b102

Browse files
committed
py/lexer: Support nested f-strings.
Signed-off-by: Damien George <damien@micropython.org>
1 parent 891d05f commit b51b102

File tree

5 files changed

+49
-24
lines changed

5 files changed

+49
-24
lines changed

py/lexer.c

Lines changed: 30 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -148,25 +148,25 @@ static void next_char(mp_lexer_t *lex) {
148148
lex->chr0 = lex->chr1;
149149
lex->chr1 = lex->chr2;
150150

151-
// and add the next byte from either the fstring args or the reader
151+
// and add the next byte from either inject_chrs or the reader
152152
#if MICROPY_PY_FSTRINGS
153-
if (lex->fstring_args_idx) {
154-
// if there are saved chars, then we're currently injecting fstring args
155-
if (lex->fstring_args_idx < lex->fstring_args.len) {
156-
lex->chr2 = lex->fstring_args.buf[lex->fstring_args_idx++];
153+
if (lex->inject_chrs_idx) {
154+
// if there are saved chars, then we're currently injecting them
155+
if (lex->inject_chrs_idx < lex->inject_chrs.len) {
156+
lex->chr2 = lex->inject_chrs.buf[lex->inject_chrs_idx++];
157157
} else {
158-
// no more fstring arg bytes
158+
// no more characters to inject
159159
lex->chr2 = '\0';
160160
}
161161

162162
if (lex->chr0 == '\0') {
163-
// consumed all fstring data, restore saved input queue
163+
// consumed all injected characters, restore saved input queue
164164
lex->chr0 = lex->chr0_saved;
165165
lex->chr1 = lex->chr1_saved;
166166
lex->chr2 = lex->chr2_saved;
167-
// stop consuming fstring arg data
168-
vstr_reset(&lex->fstring_args);
169-
lex->fstring_args_idx = 0;
167+
// stop consuming injected characters
168+
vstr_reset(&lex->inject_chrs);
169+
lex->inject_chrs_idx = 0;
170170
}
171171
} else
172172
#endif
@@ -333,8 +333,7 @@ static void parse_string_literal(mp_lexer_t *lex, bool is_raw, bool is_fstring)
333333
#if MICROPY_PY_FSTRINGS
334334
if (is_fstring) {
335335
// assume there's going to be interpolation, so prep the injection data
336-
// fstring_args_idx==0 && len(fstring_args)>0 means we're extracting the args.
337-
// only when fstring_args_idx>0 will we consume the arg data
336+
// len(fstring_args)>0 means we're extracting the args.
338337
// lex->fstring_args is reset when finished, so at this point there are two cases:
339338
// - lex->fstring_args is empty: start of a new f-string
340339
// - lex->fstring_args is non-empty: concatenation of adjacent f-strings
@@ -560,19 +559,25 @@ static bool skip_whitespace(mp_lexer_t *lex, bool stop_at_newline) {
560559

561560
void mp_lexer_to_next(mp_lexer_t *lex) {
562561
#if MICROPY_PY_FSTRINGS
563-
if (lex->fstring_args.len && lex->fstring_args_idx == 0) {
562+
if (lex->fstring_args.len) {
564563
// moving onto the next token means the literal string is complete.
565564
// switch into injecting the format args.
566565
vstr_add_byte(&lex->fstring_args, ')');
567-
lex->chr0_saved = lex->chr0;
568-
lex->chr1_saved = lex->chr1;
569-
lex->chr2_saved = lex->chr2;
570-
lex->chr0 = lex->fstring_args.buf[0];
571-
lex->chr1 = lex->fstring_args.buf[1];
572-
lex->chr2 = lex->fstring_args.buf[2];
573-
// we've already extracted 3 chars, but setting this non-zero also
574-
// means we'll start consuming the fstring data
575-
lex->fstring_args_idx = 3;
566+
if (lex->inject_chrs_idx == 0) {
567+
// switch from stream to inject_chrs
568+
lex->chr0_saved = lex->chr0;
569+
lex->chr1_saved = lex->chr1;
570+
lex->chr2_saved = lex->chr2;
571+
} else {
572+
// already consuming from inject_chrs, rewind cached chars to insert new ones
573+
assert(lex->inject_chrs_idx >= 3);
574+
lex->inject_chrs_idx -= 3;
575+
}
576+
vstr_ins_strn(&lex->inject_chrs, lex->inject_chrs_idx, lex->fstring_args.buf, lex->fstring_args.len);
577+
vstr_reset(&lex->fstring_args);
578+
lex->chr0 = lex->inject_chrs.buf[lex->inject_chrs_idx++];
579+
lex->chr1 = lex->inject_chrs.buf[lex->inject_chrs_idx++];
580+
lex->chr2 = lex->inject_chrs.buf[lex->inject_chrs_idx++];
576581
}
577582
#endif
578583

@@ -857,8 +862,9 @@ mp_lexer_t *mp_lexer_new(qstr src_name, mp_reader_t reader) {
857862
lex->indent_level = m_new(uint16_t, lex->alloc_indent_level);
858863
vstr_init(&lex->vstr, 32);
859864
#if MICROPY_PY_FSTRINGS
865+
vstr_init(&lex->inject_chrs, 0);
866+
lex->inject_chrs_idx = 0;
860867
vstr_init(&lex->fstring_args, 0);
861-
lex->fstring_args_idx = 0;
862868
#endif
863869

864870
// store sentinel for first indentation level
@@ -915,6 +921,7 @@ void mp_lexer_free(mp_lexer_t *lex) {
915921
lex->reader.close(lex->reader.data);
916922
vstr_clear(&lex->vstr);
917923
#if MICROPY_PY_FSTRINGS
924+
vstr_clear(&lex->inject_chrs);
918925
vstr_clear(&lex->fstring_args);
919926
#endif
920927
m_del(uint16_t, lex->indent_level, lex->alloc_indent_level);

py/lexer.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -182,8 +182,9 @@ typedef struct _mp_lexer_t {
182182
mp_token_kind_t tok_kind; // token kind
183183
vstr_t vstr; // token data
184184
#if MICROPY_PY_FSTRINGS
185+
vstr_t inject_chrs; // characters currently being injected into the stream
186+
size_t inject_chrs_idx; // current index into inject_chrs
185187
vstr_t fstring_args; // extracted arguments to pass to .format()
186-
size_t fstring_args_idx; // how many bytes of fstring_args have been read
187188
#endif
188189
} mp_lexer_t;
189190

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
# Test nesting of f-strings within f-strings.
2+
3+
x = 1
4+
5+
# 2-level nesting, with padding.
6+
print(f"a{f'b{x:2}c':>5}d")
7+
8+
# 4-level nesting using the different styles of quotes.
9+
print(f"""a{f'''b{f"c{f'd{x}e'}f"}g'''}h""")
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
# Test nesting of f-strings within f-strings.
2+
# These test rely on Python 3.12+ to use the same quote style for nesting.
3+
4+
x = 1
5+
6+
# 8-level nesting using the same quote style.
7+
print(f"a{f"b{f"c{f"d{f"e{f"f{f"g{f"h{x}i"}j"}k"}l"}m"}n"}o"}p")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
abcdefgh1ijklmnop

0 commit comments

Comments
 (0)