Skip to content

Commit 4457769

Browse files
committed
check surrogates
1 parent 27ab62d commit 4457769

File tree

3 files changed

+23
-5
lines changed

3 files changed

+23
-5
lines changed

Lib/test/test_builtin.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2401,8 +2401,6 @@ def test_type_nokwargs(self):
24012401
with self.assertRaises(TypeError):
24022402
type('a', (), dict={})
24032403

2404-
# TODO: RUSTPYTHON
2405-
@unittest.expectedFailure
24062404
def test_type_name(self):
24072405
for name in 'A', '\xc4', '\U0001f40d', 'B.A', '42', '':
24082406
with self.subTest(name=name):
@@ -2452,8 +2450,6 @@ def test_type_qualname(self):
24522450
A.__qualname__ = b'B'
24532451
self.assertEqual(A.__qualname__, 'D.E')
24542452

2455-
# TODO: RUSTPYTHON
2456-
@unittest.expectedFailure
24572453
def test_type_doc(self):
24582454
for doc in 'x', '\xc4', '\U0001f40d', 'x\x00y', b'x', 42, None:
24592455
A = type('A', (), {'__doc__': doc})

crates/vm/src/builtins/str.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -441,7 +441,7 @@ impl PyStr {
441441
self.data.as_str()
442442
}
443443

444-
fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> {
444+
pub(crate) fn ensure_valid_utf8(&self, vm: &VirtualMachine) -> PyResult<()> {
445445
if self.is_utf8() {
446446
Ok(())
447447
} else {

crates/vm/src/builtins/type.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1163,6 +1163,7 @@ impl PyType {
11631163
if name.as_bytes().contains(&0) {
11641164
return Err(vm.new_value_error("type name must not contain null characters"));
11651165
}
1166+
name.ensure_valid_utf8(vm)?;
11661167

11671168
// Use std::mem::replace to swap the new value in and get the old value out,
11681169
// then drop the old value after releasing the lock (similar to CPython's Py_SETREF)
@@ -1254,6 +1255,7 @@ impl Constructor for PyType {
12541255
if name.as_bytes().contains(&0) {
12551256
return Err(vm.new_value_error("type name must not contain null characters"));
12561257
}
1258+
name.ensure_valid_utf8(vm)?;
12571259

12581260
let (metatype, base, bases, base_is_type) = if bases.is_empty() {
12591261
let base = vm.ctx.types.object_type.to_owned();
@@ -1306,6 +1308,13 @@ impl Constructor for PyType {
13061308
});
13071309
let mut attributes = dict.to_attributes(vm);
13081310

1311+
// Check __doc__ for surrogates - raises UnicodeEncodeError during type creation
1312+
if let Some(doc) = attributes.get(identifier!(vm, __doc__))
1313+
&& let Some(doc_str) = doc.downcast_ref::<PyStr>()
1314+
{
1315+
doc_str.ensure_valid_utf8(vm)?;
1316+
}
1317+
13091318
if let Some(f) = attributes.get_mut(identifier!(vm, __init_subclass__))
13101319
&& f.class().is(vm.ctx.types.function_type)
13111320
{
@@ -1340,6 +1349,13 @@ impl Constructor for PyType {
13401349

13411350
let (heaptype_slots, add_dict): (Option<PyRef<PyTuple<PyStrRef>>>, bool) =
13421351
if let Some(x) = attributes.get(identifier!(vm, __slots__)) {
1352+
// Check if __slots__ is bytes - not allowed
1353+
if x.class().is(vm.ctx.types.bytes_type) {
1354+
return Err(vm.new_type_error(
1355+
"__slots__ items must be strings, not 'bytes'".to_owned(),
1356+
));
1357+
}
1358+
13431359
let slots = if x.class().is(vm.ctx.types.str_type) {
13441360
let x = unsafe { x.downcast_unchecked_ref::<PyStr>() };
13451361
PyTuple::new_ref_typed(vec![x.to_owned()], &vm.ctx)
@@ -1348,6 +1364,12 @@ impl Constructor for PyType {
13481364
let elements = {
13491365
let mut elements = Vec::new();
13501366
while let PyIterReturn::Return(element) = iter.next(vm)? {
1367+
// Check if any slot item is bytes
1368+
if element.class().is(vm.ctx.types.bytes_type) {
1369+
return Err(vm.new_type_error(
1370+
"__slots__ items must be strings, not 'bytes'".to_owned(),
1371+
));
1372+
}
13511373
elements.push(element);
13521374
}
13531375
elements

0 commit comments

Comments
 (0)