Skip to content

Commit 5f660a4

Browse files
committed
Support unicode array type.
1 parent 67ede59 commit 5f660a4

6 files changed

Lines changed: 60 additions & 74 deletions

File tree

Lib/test/test_array.py

Lines changed: 2 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
import array
1616
# from array import _array_reconstructor as array_reconstructor # XXX: RUSTPYTHON
1717

18-
# sizeof_wchar = array.array('u').itemsize # XXX: RUSTPYTHON
18+
sizeof_wchar = array.array('u').itemsize
1919

2020

2121
class ArraySubclass(array.array):
@@ -25,10 +25,7 @@ class ArraySubclassWithKwargs(array.array):
2525
def __init__(self, typecode, newarg=None):
2626
array.array.__init__(self)
2727

28-
# TODO: RUSTPYTHON
29-
# We did not support typecode u for unicode yet
30-
# typecodes = 'ubBhHiIlLfdqQ'
31-
typecodes = 'bBhHiIlLfdqQ'
28+
typecodes = 'ubBhHiIlLfdqQ'
3229

3330
class MiscTest(unittest.TestCase):
3431

@@ -1091,8 +1088,6 @@ def test_sizeof_without_buffer(self):
10911088
basesize = support.calcvobjsize('Pn2Pi')
10921089
support.check_sizeof(self, a, basesize)
10931090

1094-
# TODO: RUSTPYTHON
1095-
@unittest.expectedFailure
10961091
def test_initialize_with_unicode(self):
10971092
if self.typecode != 'u':
10981093
with self.assertRaises(TypeError) as cm:
@@ -1137,7 +1132,6 @@ class UnicodeTest(StringTest, unittest.TestCase):
11371132
minitemsize = 2
11381133

11391134
# TODO: RUSTPYTHON
1140-
@unittest.expectedFailure
11411135
def test_add(self):
11421136
super().test_add()
11431137

@@ -1146,128 +1140,78 @@ def test_add(self):
11461140
def test_buffer(self):
11471141
super().test_buffer()
11481142

1149-
# TODO: RUSTPYTHON
1150-
@unittest.expectedFailure
11511143
def test_buffer_info(self):
11521144
super().test_buffer_info()
11531145

1154-
# TODO: RUSTPYTHON
1155-
@unittest.expectedFailure
11561146
def test_byteswap(self):
11571147
super().test_byteswap()
11581148

1159-
# TODO: RUSTPYTHON
1160-
@unittest.expectedFailure
11611149
def test_cmp(self):
11621150
super().test_cmp()
11631151

1164-
# TODO: RUSTPYTHON
1165-
@unittest.expectedFailure
11661152
def test_constructor(self):
11671153
super().test_constructor()
11681154

1169-
# TODO: RUSTPYTHON
1170-
@unittest.expectedFailure
11711155
def test_constructor_with_iterable_argument(self):
11721156
super().test_constructor_with_iterable_argument()
11731157

1174-
# TODO: RUSTPYTHON
1175-
@unittest.expectedFailure
11761158
def test_copy(self):
11771159
super().test_copy()
11781160

1179-
# TODO: RUSTPYTHON
1180-
@unittest.expectedFailure
11811161
def test_count(self):
11821162
super().test_count()
11831163

1184-
# TODO: RUSTPYTHON
1185-
@unittest.expectedFailure
11861164
def test_coveritertraverse(self):
11871165
super().test_coveritertraverse()
11881166

1189-
# TODO: RUSTPYTHON
1190-
@unittest.expectedFailure
11911167
def test_deepcopy(self):
11921168
super().test_deepcopy()
11931169

1194-
# TODO: RUSTPYTHON
1195-
@unittest.expectedFailure
11961170
def test_delitem(self):
11971171
super().test_delitem()
11981172

1199-
# TODO: RUSTPYTHON
1200-
@unittest.expectedFailure
12011173
def test_exhausted_iterator(self):
12021174
super().test_exhausted_iterator()
12031175

1204-
# TODO: RUSTPYTHON
1205-
@unittest.expectedFailure
12061176
def test_extend(self):
12071177
super().test_extend()
12081178

1209-
# TODO: RUSTPYTHON
1210-
@unittest.expectedFailure
12111179
def test_extended_getslice(self):
12121180
super().test_extended_getslice()
12131181

1214-
# TODO: RUSTPYTHON
1215-
@unittest.expectedFailure
12161182
def test_extended_set_del_slice(self):
12171183
super().test_extended_set_del_slice()
12181184

1219-
# TODO: RUSTPYTHON
1220-
@unittest.expectedFailure
12211185
def test_fromarray(self):
12221186
super().test_fromarray()
12231187

1224-
# TODO: RUSTPYTHON
1225-
@unittest.expectedFailure
12261188
def test_getitem(self):
12271189
super().test_getitem()
12281190

1229-
# TODO: RUSTPYTHON
1230-
@unittest.expectedFailure
12311191
def test_getslice(self):
12321192
super().test_getslice()
12331193

1234-
# TODO: RUSTPYTHON
1235-
@unittest.expectedFailure
12361194
def test_iadd(self):
12371195
super().test_iadd()
12381196

1239-
# TODO: RUSTPYTHON
1240-
@unittest.expectedFailure
12411197
def test_imul(self):
12421198
super().test_imul()
12431199

1244-
# TODO: RUSTPYTHON
1245-
@unittest.expectedFailure
12461200
def test_index(self):
12471201
super().test_index()
12481202

1249-
# TODO: RUSTPYTHON
1250-
@unittest.expectedFailure
12511203
def test_insert(self):
12521204
super().test_insert()
12531205

1254-
# TODO: RUSTPYTHON
1255-
@unittest.expectedFailure
12561206
def test_len(self):
12571207
super().test_len()
12581208

1259-
# TODO: RUSTPYTHON
1260-
@unittest.expectedFailure
12611209
def test_mul(self):
12621210
super().test_mul()
12631211

1264-
# TODO: RUSTPYTHON
1265-
@unittest.expectedFailure
12661212
def test_pop(self):
12671213
super().test_pop()
12681214

1269-
# TODO: RUSTPYTHON
1270-
@unittest.expectedFailure
12711215
def test_remove(self):
12721216
super().test_remove()
12731217

@@ -1276,28 +1220,18 @@ def test_remove(self):
12761220
def test_repr(self):
12771221
super().test_repr()
12781222

1279-
# TODO: RUSTPYTHON
1280-
@unittest.expectedFailure
12811223
def test_reverse(self):
12821224
super().test_reverse()
12831225

1284-
# TODO: RUSTPYTHON
1285-
@unittest.expectedFailure
12861226
def test_setslice(self):
12871227
super().test_setslice()
12881228

1289-
# TODO: RUSTPYTHON
1290-
@unittest.expectedFailure
12911229
def test_str(self):
12921230
super().test_str()
12931231

1294-
# TODO: RUSTPYTHON
1295-
@unittest.expectedFailure
12961232
def test_tofrombytes(self):
12971233
super().test_tofrombytes()
12981234

1299-
# TODO: RUSTPYTHON
1300-
@unittest.expectedFailure
13011235
def test_tofromlist(self):
13021236
super().test_tofromlist()
13031237

vm/src/builtins/memory.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ type PyMemoryViewRef = PyRef<PyMemoryView>;
5555
impl PyMemoryView {
5656
fn parse_format(format: &str, vm: &VirtualMachine) -> PyResult<FormatSpec> {
5757
FormatSpec::parse(format)
58-
.map_err(|msg| vm.new_exception_msg(vm.ctx.types.memoryview_type.clone(), msg))
58+
.map_err(|msg| vm.new_struct_error(msg))
5959
}
6060

6161
pub fn from_buffer(

vm/src/builtins/pystr.rs

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -767,6 +767,12 @@ impl PyStr {
767767
self.value.py_join(iter)
768768
}
769769

770+
// FIXME: two traversals of str is expensive
771+
#[inline]
772+
fn _to_char_idx(r: &str, byte_idx: usize) -> usize {
773+
r.char_indices().take_while(|&(idx, _)| idx < byte_idx).count()
774+
}
775+
770776
#[inline]
771777
fn _find<F>(&self, args: FindArgs, find: F) -> Option<usize>
772778
where
@@ -778,25 +784,25 @@ impl PyStr {
778784

779785
#[pymethod]
780786
fn find(&self, args: FindArgs) -> isize {
781-
self._find(args, |r, s| r.find(s))
787+
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
782788
.map_or(-1, |v| v as isize)
783789
}
784790

785791
#[pymethod]
786792
fn rfind(&self, args: FindArgs) -> isize {
787-
self._find(args, |r, s| r.rfind(s))
793+
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
788794
.map_or(-1, |v| v as isize)
789795
}
790796

791797
#[pymethod]
792798
fn index(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
793-
self._find(args, |r, s| r.find(s))
799+
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.find(s)?)))
794800
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
795801
}
796802

797803
#[pymethod]
798804
fn rindex(&self, args: FindArgs, vm: &VirtualMachine) -> PyResult<usize> {
799-
self._find(args, |r, s| r.rfind(s))
805+
self._find(args, |r, s| Some(Self::_to_char_idx(r, r.rfind(s)?)))
800806
.ok_or_else(|| vm.new_value_error("substring not found".to_owned()))
801807
}
802808

vm/src/exceptions.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -488,6 +488,7 @@ pub struct ExceptionZoo {
488488
pub indentation_error: PyTypeRef,
489489
pub tab_error: PyTypeRef,
490490
pub system_error: PyTypeRef,
491+
pub struct_error: PyTypeRef,
491492
pub type_error: PyTypeRef,
492493
pub value_error: PyTypeRef,
493494
pub unicode_error: PyTypeRef,
@@ -581,6 +582,7 @@ impl ExceptionZoo {
581582
let tab_error = create_exception_type("TabError", &indentation_error);
582583
let target_scope_error = create_exception_type("TargetScopeError", &syntax_error);
583584
let system_error = create_exception_type("SystemError", &exception_type);
585+
let struct_error = create_exception_type("StructError", &exception_type);
584586
let type_error = create_exception_type("TypeError", &exception_type);
585587
let value_error = create_exception_type("ValueError", &exception_type);
586588
let unicode_error = create_exception_type("UnicodeError", &value_error);
@@ -654,6 +656,7 @@ impl ExceptionZoo {
654656
indentation_error,
655657
tab_error,
656658
system_error,
659+
struct_error,
657660
type_error,
658661
value_error,
659662
unicode_error,

vm/src/stdlib/array.rs

Lines changed: 39 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,8 @@ macro_rules! def_array_enum {
404404
def_array_enum!(
405405
(SignedByte, i8, 'b', "b"),
406406
(UnsignedByte, u8, 'B', "B"),
407-
// TODO: support unicode char
407+
// TODO: `repr` of unicode array is a special case
408+
(PyUnicode, PyUnicode, 'u', "u"),
408409
(SignedShort, raw::c_short, 'h', "h"),
409410
(UnsignedShort, raw::c_ushort, 'H', "H"),
410411
(SignedInt, raw::c_int, 'i', "i"),
@@ -417,6 +418,9 @@ def_array_enum!(
417418
(Double, f64, 'd', "d"),
418419
);
419420

421+
#[derive(Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Debug)]
422+
pub struct PyUnicode(libc::wchar_t);
423+
420424
trait ArrayElement: Sized {
421425
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self>;
422426
fn byteswap(self) -> Self;
@@ -464,6 +468,40 @@ fn f64_try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<f
464468
IntoPyFloat::try_from_object(vm, obj).map(|x| x.to_f64())
465469
}
466470

471+
impl ArrayElement for PyUnicode {
472+
fn try_into_from_object(vm: &VirtualMachine, obj: PyObjectRef) -> PyResult<Self> {
473+
let ch = PyStrRef::try_from_object(vm, obj)?
474+
.as_str()
475+
.chars()
476+
.nth(0)
477+
.expect("PY_UNICODE must not be empty");
478+
Ok(Self(ch as _))
479+
}
480+
481+
fn byteswap(self) -> Self {
482+
Self(self.0.swap_bytes())
483+
}
484+
}
485+
486+
impl From<PyUnicode> for char {
487+
fn from(ch: PyUnicode) -> Self {
488+
// safe because every configuration of bytes for the types we support are valid
489+
unsafe { char::from_u32_unchecked(ch.0 as u32) }
490+
}
491+
}
492+
493+
impl IntoPyObject for PyUnicode {
494+
fn into_pyobject(self, vm: &VirtualMachine) -> PyObjectRef {
495+
String::from(char::from(self)).into_pyobject(vm)
496+
}
497+
}
498+
499+
impl fmt::Display for PyUnicode {
500+
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
501+
write!(f, "{}", char::from(*self))
502+
}
503+
}
504+
467505
#[pyclass(module = "array", name = "array")]
468506
#[derive(Debug)]
469507
pub struct PyArray {

vm/src/vm.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -622,6 +622,11 @@ impl VirtualMachine {
622622
self.new_exception(exc_type, vec![self.ctx.new_str(msg)])
623623
}
624624

625+
pub fn new_struct_error(&self, msg: String) -> PyBaseExceptionRef {
626+
let struct_error = self.ctx.exceptions.struct_error.clone();
627+
self.new_exception_msg(struct_error, msg)
628+
}
629+
625630
pub fn new_lookup_error(&self, msg: String) -> PyBaseExceptionRef {
626631
let lookup_error = self.ctx.exceptions.lookup_error.clone();
627632
self.new_exception_msg(lookup_error, msg)

0 commit comments

Comments
 (0)