Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion Lib/test/test_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,7 +792,6 @@ def test_isdecimal(self):
for ch in ['\U0001D7F6', '\U00011066', '\U000104A0']:
self.assertTrue(ch.isdecimal(), '{!a} is decimal.'.format(ch))

@unittest.expectedFailure # TODO: RUSTPYTHON; AssertionError: False != True
def test_isdigit(self):
super().test_isdigit()
self.checkequalnofix(True, '\u2460', 'isdigit')
Expand Down
9 changes: 5 additions & 4 deletions crates/sre_engine/src/string.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use icu_properties::props::{CanonicalCombiningClass, EnumeratedProperty};
use icu_properties::props::{EnumeratedProperty, GeneralCategory, GeneralCategoryGroup};
use rustpython_wtf8::Wtf8;

#[derive(Debug, Clone, Copy)]
Expand Down Expand Up @@ -444,9 +444,10 @@ pub(crate) const fn is_uni_linebreak(ch: u32) -> bool {
pub(crate) fn is_uni_alnum(ch: u32) -> bool {
// TODO: check with cpython
char::try_from(ch)
.map(|x| {
x.is_alphanumeric()
&& CanonicalCombiningClass::for_char(x) == CanonicalCombiningClass::NotReordered
.map(|c| {
GeneralCategoryGroup::Letter
.union(GeneralCategoryGroup::Number)
.contains(GeneralCategory::for_char(c))
})
.unwrap_or(false)
}
Expand Down
27 changes: 18 additions & 9 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,8 @@ use rustpython_common::{
};

use icu_properties::props::{
BidiClass, BinaryProperty, CanonicalCombiningClass, EnumeratedProperty, GeneralCategory,
XidContinue, XidStart,
BidiClass, BinaryProperty, EnumeratedProperty, GeneralCategory, GeneralCategoryGroup,
NumericType, XidContinue, XidStart,
};
use unicode_casing::CharExt;

Expand Down Expand Up @@ -949,23 +949,30 @@ impl PyStr {
fn isalnum(&self) -> bool {
!self.data.is_empty()
&& self.char_all(|c| {
c.is_alphanumeric()
&& CanonicalCombiningClass::for_char(c) == CanonicalCombiningClass::NotReordered
GeneralCategoryGroup::Letter
.union(GeneralCategoryGroup::Number)
.contains(GeneralCategory::for_char(c))
})
}

#[pymethod]
fn isnumeric(&self) -> bool {
!self.data.is_empty() && self.char_all(char::is_numeric)
!self.data.is_empty()
&& self.char_all(|c| {
[
NumericType::Decimal,
NumericType::Digit,
NumericType::Numeric,
]
.contains(&NumericType::for_char(c))
})
}

#[pymethod]
fn isdigit(&self) -> bool {
// python's isdigit also checks if exponents are digits, these are the unicode codepoints for exponents
!self.data.is_empty()
&& self.char_all(|c| {
c.is_ascii_digit()
|| matches!(c, '⁰' | '¹' | '²' | '³' | '⁴' | '⁵' | '⁶' | '⁷' | '⁸' | '⁹')
[NumericType::Digit, NumericType::Decimal].contains(&NumericType::for_char(c))
})
}

Expand Down Expand Up @@ -1064,7 +1071,9 @@ impl PyStr {

#[pymethod]
fn isalpha(&self) -> bool {
!self.data.is_empty() && self.char_all(char::is_alphabetic)
!self.data.is_empty()
&& self
.char_all(|c| GeneralCategoryGroup::Letter.contains(GeneralCategory::for_char(c)))
}

#[pymethod]
Expand Down
13 changes: 13 additions & 0 deletions extra_tests/snippets/builtin_str.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,16 +72,29 @@
assert "\u1c89".istitle()
# assert "DZ".title() == "Dz"
assert a.isalpha()
assert not "\u093f".isalpha()

# Combining characters differ slightly between Rust and Python
assert "\u006e".isalnum()
assert not "\u0303".isalnum()
assert not "\u006e\u0303".isalnum()
assert "\u00f1".isalnum()
assert not "\u0345".isalnum()
assert not "\u093f".isalnum()
for raw in range(0x0363, 0x036F):
assert not chr(raw).isalnum()

# isdigit is true for exponents
assert "⁰".isdigit()
assert "⁰".isnumeric()
assert not "½".isdigit()
assert "½".isnumeric()
assert not "Ⅻ".isdigit()
assert "Ⅻ".isnumeric()

# isnumeric is broader than Rust's
assert "\u3405".isnumeric()

s = "1 2 3"
assert s.split(" ", 1) == ["1", "2 3"]
assert s.rsplit(" ", 1) == ["1 2", "3"]
Expand Down
Loading