Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Lib/test/test_descr.py
Original file line number Diff line number Diff line change
Expand Up @@ -3120,7 +3120,6 @@ class sublist(list):
## pass
## os_helper.unlink(os_helper.TESTFN)

@unittest.expectedFailure # TODO: RUSTPYTHON
def test_keywords(self):
# Testing keyword args to basic type constructors ...
with self.assertRaisesRegex(TypeError, 'keyword argument'):
Expand Down
6 changes: 2 additions & 4 deletions Lib/test/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -1074,7 +1074,7 @@ def test_issue18183(self):
'\U00100000'.ljust(3, '\U00010000')
'\U00100000'.rjust(3, '\U00010000')

@unittest.expectedFailure # TODO: RUSTPYTHON; ? +
@unittest.expectedFailure # TODO: RUSTPYTHON; '{0:08s}'.format('result') misalign — '0' fill treated as numeric zero-pad for str type
def test_format(self):
self.assertEqual(''.format(), '')
self.assertEqual('a'.format(), 'a')
Expand Down Expand Up @@ -1503,7 +1503,7 @@ def __format__(self, spec):
self.assertEqual('{:{f}}{g}{}'.format(1, 3, g='g', f=2), ' 1g3')
self.assertEqual('{f:{}}{}{g}'.format(2, 4, f=1, g='g'), ' 14g')

@unittest.expectedFailure # TODO: RUSTPYTHON; TypeError: %x format: an integer is required, not PseudoInt
@unittest.expectedFailure # TODO: RUSTPYTHON; %c error wording uses bare class name; CPython uses fully qualified module.qualname (e.g. test.test_str.PseudoFloat)
def test_formatting(self):
string_tests.StringLikeTest.test_formatting(self)
# Testing Unicode formatting strings...
Expand Down Expand Up @@ -1752,7 +1752,6 @@ def __str__(self):
'character buffers are decoded to unicode'
)

@unittest.expectedFailure # TODO: RUSTPYTHON; Pass various keyword argument combinations to the constructor.
def test_constructor_keyword_args(self):
"""Pass various keyword argument combinations to the constructor."""
# The object argument can be passed as a keyword.
Expand All @@ -1762,7 +1761,6 @@ def test_constructor_keyword_args(self):
self.assertEqual(str(b'foo', errors='strict'), 'foo') # not "b'foo'"
self.assertEqual(str(object=b'foo', errors='strict'), 'foo')

@unittest.expectedFailure # TODO: RUSTPYTHON; Check the constructor argument defaults.
def test_constructor_defaults(self):
"""Check the constructor argument defaults."""
# The object argument defaults to '' or b''.
Expand Down
23 changes: 15 additions & 8 deletions crates/common/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -873,14 +873,15 @@ impl FormatSpec {
{
self.validate_format(FormatType::String)?;
match self.format_type {
Some(FormatType::String) | None => self
.format_sign_and_align(s, "", FormatAlign::Left)
.map(|mut value| {
if let Some(precision) = self.precision {
value.truncate(precision);
}
value
}),
Some(FormatType::String) | None => {
// CPython parity: precision truncates BEFORE width pads.
// `'{:3.2s}'.format('abc')` -> 'ab ' (truncate to 'ab', pad to 3).
let truncated: String = match self.precision {
Some(p) => s.deref().chars().take(p).collect(),
None => s.deref().to_owned(),
};
self.format_sign_and_align(&truncated, "", FormatAlign::Left)
}
_ => {
let ch = char::from(self.format_type.as_ref().unwrap());
Err(FormatSpecError::UnknownFormatCode(ch, "str"))
Expand Down Expand Up @@ -1078,6 +1079,12 @@ impl CharLen for AsciiStr<'_> {
}
}

impl CharLen for String {
fn char_len(&self) -> usize {
self.chars().count()
}
}

impl Deref for AsciiStr<'_> {
type Target = str;

Expand Down
36 changes: 29 additions & 7 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -411,8 +411,12 @@ impl Constructor for PyStr {
// result as-is so any str subclass type the user returned is preserved
// (matches unicode_new_impl which only invokes unicode_subtype_new when
// type != &PyUnicode_Type).
// CPython parity: `errors` without `encoding` also triggers decode
// mode (with default UTF-8). The fast-path repr only applies when
// BOTH `encoding` and `errors` are missing.
if cls.is(vm.ctx.types.str_type)
&& args.encoding.is_missing()
&& args.errors.is_missing()
&& let OptionalArg::Present(input) = &args.object
{
return Ok(input.str(vm)?.into());
Expand All @@ -425,13 +429,31 @@ impl Constructor for PyStr {
fn py_new(_cls: &Py<PyType>, args: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
match args.object {
OptionalArg::Present(input) => {
if let OptionalArg::Present(enc) = args.encoding {
let s = vm.state.codec_registry.decode_text(
input,
enc.as_str(),
args.errors.into_option(),
vm,
)?;
let encoding = args.encoding.into_option();
let errors = args.errors.into_option();
// CPython parity: presence of `encoding` OR `errors` triggers
// decode mode. When `errors` is given alone, the encoding
// defaults to UTF-8.
if encoding.is_some() || errors.is_some() {
// CPython rejects str / non-bytes-like input early with
// specific TypeError wording (unicode_new_impl).
if input.fast_isinstance(vm.ctx.types.str_type) {
return Err(vm.new_type_error("decoding str is not supported"));
}
if !input.fast_isinstance(vm.ctx.types.bytes_type)
&& !input.fast_isinstance(vm.ctx.types.bytearray_type)
&& crate::protocol::PyBuffer::try_from_borrowed_object(vm, &input).is_err()
{
return Err(vm.new_type_error(format!(
"decoding to str: need a bytes-like object, {} found",
input.class().name()
)));
}
let enc_str = encoding.as_ref().map(|e| e.as_str()).unwrap_or("utf-8");
let s = vm
.state
.codec_registry
.decode_text(input, enc_str, errors, vm)?;
Ok(Self::from(s.as_wtf8().to_owned()))
} else {
let s = input.str(vm)?;
Expand Down
78 changes: 60 additions & 18 deletions crates/vm/src/cformat.rs
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,13 @@ fn spec_format_bytes(
Ok(spec.format_number(&bigint).into_bytes())
}
obj => {
// CPython parity: `%d` / `%i` / `%u` accept any object
// with `__index__` (preferred) or `__int__`.
if let Some(int_result) = obj.try_index_opt(vm) {
let i = int_result?;
check_int_to_str_digits(i.as_bigint(), vm)?;
return Ok(spec.format_number(i.as_bigint()).into_bytes());
}
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
let result = method?.call((), vm)?;
if let Some(i) = result.downcast_ref::<PyInt>() {
Expand All @@ -72,16 +79,21 @@ fn spec_format_bytes(
}
}
Err(vm.new_type_error(format!(
"%{} format: a number is required, not {}",
"%{} format: a real number is required, not {}",
spec.format_type.to_char(),
obj.class().name()
)))
}
})
}
_ => {
// CPython parity: `%x` / `%o` / `%X` accept any object with
// `__index__`, not just PyInt. Mirrors PyNumber_Index dispatch.
if let Some(i) = obj.downcast_ref::<PyInt>() {
Ok(spec.format_number(i.as_bigint()).into_bytes())
} else if let Some(int_result) = obj.try_index_opt(vm) {
let i = int_result?;
Ok(spec.format_number(i.as_bigint()).into_bytes())
} else {
Err(vm.new_type_error(format!(
"%{} format: an integer is required, not {}",
Expand All @@ -105,12 +117,8 @@ fn spec_format_bytes(
Ok(spec.format_float(value.into()).into_bytes())
}
CFormatType::Character(CCharacterType::Character) => {
if let Some(i) = obj.downcast_ref::<PyInt>() {
let ch = i
.try_to_primitive::<u8>(vm)
.map_err(|_| vm.new_overflow_error("%c arg not in range(256)"))?;
return Ok(spec.format_char(ch));
}
// CPython parity: bytes `%c` accepts a single byte or any object
// with `__index__` in range(256).
if let Some(b) = obj.downcast_ref::<PyBytes>() {
if b.len() == 1 {
return Ok(spec.format_char(b.as_bytes()[0]));
Expand All @@ -121,7 +129,20 @@ fn spec_format_bytes(
return Ok(spec.format_char(buf[0]));
}
}
Err(vm.new_type_error("%c requires an integer in range(256) or a single byte"))
let int = if let Some(i) = obj.downcast_ref::<PyInt>() {
i.to_owned()
} else if let Some(int_result) = obj.try_index_opt(vm) {
int_result?
} else {
return Err(vm.new_type_error(format!(
"%c requires an integer in range(256) or a single byte, not {}",
obj.class().name()
)));
};
let ch = int
.try_to_primitive::<u8>(vm)
.map_err(|_| vm.new_overflow_error("%c arg not in range(256)"))?;
Ok(spec.format_char(ch))
}
}
}
Expand Down Expand Up @@ -161,6 +182,13 @@ fn spec_format_string(
Ok(spec.format_number(&bigint).into())
}
obj => {
// CPython parity: `%d` / `%i` / `%u` accept any object
// with `__index__` (preferred) or `__int__`.
if let Some(int_result) = obj.try_index_opt(vm) {
let i = int_result?;
check_int_to_str_digits(i.as_bigint(), vm)?;
return Ok(spec.format_number(i.as_bigint()).into());
}
if let Some(method) = vm.get_method(obj.clone(), identifier!(vm, __int__)) {
let result = method?.call((), vm)?;
if let Some(i) = result.downcast_ref::<PyInt>() {
Expand All @@ -169,16 +197,21 @@ fn spec_format_string(
}
}
Err(vm.new_type_error(format!(
"%{} format: a number is required, not {}",
"%{} format: a real number is required, not {}",
spec.format_type.to_char(),
obj.class().name()
)))
}
})
}
_ => {
// CPython parity: `%x` / `%o` / `%X` accept any object with
// `__index__`, not just PyInt. Mirrors PyNumber_Index dispatch.
if let Some(i) = obj.downcast_ref::<PyInt>() {
Ok(spec.format_number(i.as_bigint()).into())
} else if let Some(int_result) = obj.try_index_opt(vm) {
let i = int_result?;
Ok(spec.format_number(i.as_bigint()).into())
} else {
Err(vm.new_type_error(format!(
"%{} format: an integer is required, not {}",
Expand All @@ -193,20 +226,29 @@ fn spec_format_string(
Ok(spec.format_float(value.into()).into())
}
CFormatType::Character(CCharacterType::Character) => {
if let Some(i) = obj.downcast_ref::<PyInt>() {
let ch = i
.as_bigint()
.to_u32()
.and_then(CodePoint::from_u32)
.ok_or_else(|| vm.new_overflow_error("%c arg not in range(0x110000)"))?;
return Ok(spec.format_char(ch));
}
// CPython parity: `%c` accepts a single-char str or any object with
// `__index__` (the latter via PyNumber_Index dispatch).
if let Some(s) = obj.downcast_ref::<PyStr>()
&& let Ok(ch) = s.as_wtf8().code_points().exactly_one()
{
return Ok(spec.format_char(ch));
}
Err(vm.new_type_error("%c requires int or char"))
let int = if let Some(i) = obj.downcast_ref::<PyInt>() {
i.to_owned()
} else if let Some(int_result) = obj.try_index_opt(vm) {
int_result?
} else {
return Err(vm.new_type_error(format!(
"%c requires an int or a unicode character, not {}",
obj.class().name()
)));
};
let ch = int
.as_bigint()
.to_u32()
.and_then(CodePoint::from_u32)
.ok_or_else(|| vm.new_overflow_error("%c arg not in range(0x110000)"))?;
Ok(spec.format_char(ch))
}
}
}
Expand Down
21 changes: 11 additions & 10 deletions crates/vm/src/format.rs
Original file line number Diff line number Diff line change
Expand Up @@ -198,11 +198,12 @@ pub(crate) fn format(
));
}
auto_argument_index += 1;
arguments
.args
.get(auto_argument_index - 1)
.cloned()
.ok_or_else(|| vm.new_index_error("tuple index out of range"))
let idx = auto_argument_index - 1;
arguments.args.get(idx).cloned().ok_or_else(|| {
vm.new_index_error(format!(
"Replacement index {idx} out of range for positional args tuple"
))
})
}
FieldType::Index(index) => {
if auto_argument_index != 0 {
Expand All @@ -211,11 +212,11 @@ pub(crate) fn format(
));
}
seen_index = true;
arguments
.args
.get(index)
.cloned()
.ok_or_else(|| vm.new_index_error("tuple index out of range"))
arguments.args.get(index).cloned().ok_or_else(|| {
vm.new_index_error(format!(
"Replacement index {index} out of range for positional args tuple"
))
})
}
FieldType::Keyword(keyword) => keyword
.as_str()
Expand Down
Loading