Skip to content

Commit cb2f994

Browse files
committed
Correct wrong index access
1 parent 51b6bd7 commit cb2f994

1 file changed

Lines changed: 123 additions & 70 deletions

File tree

crates/stdlib/src/json.rs

Lines changed: 123 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,22 @@ mod _json {
3737
count
3838
}
3939

40+
/// Check if a character iterator starts with a given pattern.
41+
/// This avoids byte/char index mismatch issues with non-ASCII strings.
42+
#[inline]
43+
fn starts_with_chars<I>(mut chars: I, pattern: &str) -> bool
44+
where
45+
I: Iterator<Item = char>,
46+
{
47+
for expected in pattern.chars() {
48+
match chars.next() {
49+
Some(c) if c == expected => continue,
50+
_ => return false,
51+
}
52+
}
53+
true
54+
}
55+
4056
#[pyattr(name = "make_scanner")]
4157
#[pyclass(name = "Scanner", traverse)]
4258
#[derive(Debug, PyPayload)]
@@ -202,6 +218,50 @@ mod _json {
202218
Some((ret, buf.len()))
203219
}
204220

221+
/// Parse a number from a character iterator.
222+
/// Returns (result, character_count) where character_count is the number of chars consumed.
223+
fn parse_number_from_chars<I>(&self, chars: I, vm: &VirtualMachine) -> Option<(PyResult, usize)>
224+
where
225+
I: Iterator<Item = char>,
226+
{
227+
let mut buf = String::new();
228+
let mut has_neg = false;
229+
let mut has_decimal = false;
230+
let mut has_exponent = false;
231+
let mut has_e_sign = false;
232+
233+
for c in chars {
234+
let i = buf.len();
235+
match c {
236+
'-' if i == 0 => has_neg = true,
237+
n if n.is_ascii_digit() => {}
238+
'.' if !has_decimal => has_decimal = true,
239+
'e' | 'E' if !has_exponent => has_exponent = true,
240+
'+' | '-' if !has_e_sign => has_e_sign = true,
241+
_ => break,
242+
}
243+
buf.push(c);
244+
}
245+
246+
let len = buf.len();
247+
if len == 0 || (len == 1 && has_neg) {
248+
return None;
249+
}
250+
251+
let ret = if has_decimal || has_exponent {
252+
if let Some(ref parse_float) = self.parse_float {
253+
parse_float.call((&buf,), vm)
254+
} else {
255+
Ok(vm.ctx.new_float(f64::from_str(&buf).unwrap()).into())
256+
}
257+
} else if let Some(ref parse_int) = self.parse_int {
258+
parse_int.call((&buf,), vm)
259+
} else {
260+
Ok(vm.new_pyobj(BigInt::from_str(&buf).unwrap()))
261+
};
262+
Some((ret, len))
263+
}
264+
205265
/// Parse a JSON object starting after the opening '{'.
206266
/// Returns (parsed_object, end_character_index).
207267
fn parse_object(
@@ -458,6 +518,7 @@ mod _json {
458518
}
459519

460520
/// Call scan_once and handle the result.
521+
/// Uses character iterators to avoid byte/char index mismatch with non-ASCII strings.
461522
fn call_scan_once(
462523
&self,
463524
scan_once: &PyObjectRef,
@@ -466,100 +527,92 @@ mod _json {
466527
memo: &mut HashMap<String, PyStrRef>,
467528
vm: &VirtualMachine,
468529
) -> PyResult<(PyObjectRef, usize)> {
469-
// First try to handle common cases directly in Rust
470530
let s = pystr.as_str();
471-
let mut chars = s.chars().skip(idx).peekable();
531+
let chars = s.chars().skip(idx).peekable();
472532

473-
let remaining = &s[idx..];
533+
let first_char = match chars.clone().next() {
534+
Some(c) => c,
535+
None => return Err(self.make_decode_error("Expecting value", pystr, idx, vm)),
536+
};
474537

475-
match chars.peek() {
476-
Some('"') => {
477-
// String - parse directly in Rust
538+
match first_char {
539+
'"' => {
540+
// String
478541
let (wtf8, end) = machinery::scanstring(pystr.as_wtf8(), idx + 1, self.strict)
479542
.map_err(|e| py_decode_error(e, pystr.clone(), vm))?;
480543
let py_str = vm.ctx.new_str(wtf8.to_string());
481-
return Ok((py_str.into(), end));
544+
Ok((py_str.into(), end))
482545
}
483-
Some('{') => {
484-
// Nested object - parse recursively in Rust
485-
return self.parse_object(pystr, idx + 1, scan_once, memo, vm);
486-
}
487-
Some('[') => {
488-
// Nested array - parse recursively in Rust
489-
return self.parse_array(pystr, idx + 1, scan_once, memo, vm);
546+
'{' => {
547+
// Object
548+
self.parse_object(pystr, idx + 1, scan_once, memo, vm)
490549
}
491-
Some('n') => {
492-
// null - parse directly in Rust
493-
if remaining.starts_with("null") {
494-
return Ok((vm.ctx.none(), idx + 4));
495-
}
550+
'[' => {
551+
// Array
552+
self.parse_array(pystr, idx + 1, scan_once, memo, vm)
496553
}
497-
Some('t') => {
498-
// true - parse directly in Rust
499-
if remaining.starts_with("true") {
500-
return Ok((vm.ctx.new_bool(true).into(), idx + 4));
501-
}
554+
'n' if starts_with_chars(chars.clone(), "null") => {
555+
// null
556+
Ok((vm.ctx.none(), idx + 4))
502557
}
503-
Some('f') => {
504-
// false - parse directly in Rust
505-
if remaining.starts_with("false") {
506-
return Ok((vm.ctx.new_bool(false).into(), idx + 5));
507-
}
558+
't' if starts_with_chars(chars.clone(), "true") => {
559+
// true
560+
Ok((vm.ctx.new_bool(true).into(), idx + 4))
508561
}
509-
Some(c) if c.is_ascii_digit() => {
510-
// Number starting with digit - parse directly in Rust
511-
if let Some((result, len)) = self.parse_number(remaining, vm) {
512-
return Ok((result?, idx + len));
513-
}
562+
'f' if starts_with_chars(chars.clone(), "false") => {
563+
// false
564+
Ok((vm.ctx.new_bool(false).into(), idx + 5))
514565
}
515-
Some('N') => {
516-
// NaN - parse directly in Rust
517-
if remaining.starts_with("NaN") {
518-
let result = self.parse_constant.call(("NaN",), vm)?;
519-
return Ok((result, idx + 3));
520-
}
566+
'N' if starts_with_chars(chars.clone(), "NaN") => {
567+
// NaN
568+
let result = self.parse_constant.call(("NaN",), vm)?;
569+
Ok((result, idx + 3))
521570
}
522-
Some('I') => {
523-
// Infinity - parse directly in Rust
524-
if remaining.starts_with("Infinity") {
525-
let result = self.parse_constant.call(("Infinity",), vm)?;
526-
return Ok((result, idx + 8));
527-
}
571+
'I' if starts_with_chars(chars.clone(), "Infinity") => {
572+
// Infinity
573+
let result = self.parse_constant.call(("Infinity",), vm)?;
574+
Ok((result, idx + 8))
528575
}
529-
Some('-') => {
576+
'-' => {
530577
// -Infinity or negative number
531-
if remaining.starts_with("-Infinity") {
578+
if starts_with_chars(chars.clone(), "-Infinity") {
532579
let result = self.parse_constant.call(("-Infinity",), vm)?;
533580
return Ok((result, idx + 9));
534581
}
535-
// Try parsing as negative number
536-
if let Some((result, len)) = self.parse_number(remaining, vm) {
582+
// Negative number - collect number characters
583+
if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
537584
return Ok((result?, idx + len));
538585
}
586+
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
539587
}
540-
_ => {
541-
// fall through to call scan_once
542-
}
543-
}
544-
545-
// Fall back to scan_once for other value types
546-
let result = scan_once.call((pystr.clone(), idx as isize), vm);
547-
548-
match result {
549-
Ok(tuple) => {
550-
use crate::vm::builtins::PyTupleRef;
551-
let tuple: PyTupleRef = tuple.try_into_value(vm)?;
552-
if tuple.len() != 2 {
553-
return Err(vm.new_value_error("scan_once must return 2-tuple"));
588+
c if c.is_ascii_digit() => {
589+
// Positive number
590+
if let Some((result, len)) = self.parse_number_from_chars(chars, vm) {
591+
return Ok((result?, idx + len));
554592
}
555-
let value = tuple.as_slice()[0].clone();
556-
let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
557-
Ok((value, end_idx as usize))
558-
}
559-
Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
560593
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
561594
}
562-
Err(err) => Err(err),
595+
_ => {
596+
// Fall back to scan_once for unrecognized input
597+
let result = scan_once.call((pystr.clone(), idx as isize), vm);
598+
599+
match result {
600+
Ok(tuple) => {
601+
use crate::vm::builtins::PyTupleRef;
602+
let tuple: PyTupleRef = tuple.try_into_value(vm)?;
603+
if tuple.len() != 2 {
604+
return Err(vm.new_value_error("scan_once must return 2-tuple"));
605+
}
606+
let value = tuple.as_slice()[0].clone();
607+
let end_idx: isize = tuple.as_slice()[1].try_to_value(vm)?;
608+
Ok((value, end_idx as usize))
609+
}
610+
Err(err) if err.fast_isinstance(vm.ctx.exceptions.stop_iteration) => {
611+
Err(self.make_decode_error("Expecting value", pystr, idx, vm))
612+
}
613+
Err(err) => Err(err),
614+
}
615+
}
563616
}
564617
}
565618

0 commit comments

Comments
 (0)