Fix swapcase() (RustPython#7788)

joshuamegnauth54 · web-flow · commit ef375bec260c · 2026-05-13T23:21:45.000+09:00
The tests for swapcase() were failing for two reasons. The first is '𐐧' casing which should be fixed with modern Unicode tables. The second failure is due to CPython's sigma override, which I implemented in PR RustPython#7717.
diff --git a/Lib/test/test_str.py b/Lib/test/test_str.py
@@ -972,7 +972,6 @@ def test_title(self):
         self.assertEqual('A\u03a3 \u1fa1xy'.title(), 'A\u03c2 \u1fa9xy')
         self.assertEqual('A\u03a3A'.title(), 'A\u03c3a')
 
-    @unittest.expectedFailure  # TODO: RUSTPYTHON; + 𐐧
     def test_swapcase(self):
         string_tests.StringLikeTest.test_swapcase(self)
         self.assertEqual('\U0001044F'.swapcase(), '\U00010427')
diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs
@@ -11,10 +11,13 @@ use crate::{
     TryFromBorrowedObject, VirtualMachine,
     anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper, adjust_indices},
     atomic_func,
+    bytes_inner::{swapcase_ascii, title_ascii},
     cformat::cformat_string,
     class::PyClassImpl,
-    common::lock::LazyLock,
-    common::str::{PyKindStr, StrData, StrKind},
+    common::{
+        lock::LazyLock,
+        str::{PyKindStr, StrData, StrKind},
+    },
     convert::{IntoPyException, ToPyException, ToPyObject, ToPyResult},
     format::{format, format_map},
     function::{ArgIterable, ArgSize, FuncArgs, OptionalArg, OptionalOption, PyComparisonValue},
@@ -1078,7 +1081,7 @@ impl PyStr {
     fn title(&self) -> Wtf8Buf {
         match self.as_str_kind() {
             PyKindStr::Ascii(_) => unsafe {
-                Wtf8Buf::from_bytes_unchecked(crate::bytes_inner::title_ascii(self.as_bytes()))
+                Wtf8Buf::from_bytes_unchecked(title_ascii(self.as_bytes()))
             },
             PyKindStr::Utf8(s) => {
                 let mut out = VecFmtWriter(Vec::with_capacity(s.len()));
@@ -1102,19 +1105,29 @@ impl PyStr {
 
     #[pymethod]
     fn swapcase(&self) -> Wtf8Buf {
-        let mut swapped_str = Wtf8Buf::with_capacity(self.data.len());
-        for c_orig in self.as_wtf8().code_points() {
-            let c = c_orig.to_char_lossy();
-            // to_uppercase returns an iterator because case changes may be multiple bytes
-            if c.is_lowercase() {
-                swapped_str.extend(c.to_uppercase());
-            } else if c.is_uppercase() {
-                swapped_str.extend(c.to_lowercase());
-            } else {
-                swapped_str.push(c_orig);
+        match self.as_str_kind() {
+            PyKindStr::Ascii(s) => unsafe {
+                // SAFETY: ASCII is valid Unicode and swapcase_ascii does not produce non-ASCII.
+                Wtf8Buf::from_bytes_unchecked(swapcase_ascii(s.as_bytes()))
+            },
+            PyKindStr::Utf8(s) => {
+                let mut out = VecFmtWriter(Vec::with_capacity(s.len()));
+                swapcase_utf8(s, &mut out);
+                // SAFETY: `s` is valid UTF-8 and swapcase_utf8 only works on Unicode.
+                unsafe { Wtf8Buf::from_bytes_unchecked(out.0) }
+            }
+            PyKindStr::Wtf8(s) => {
+                let mut out = VecFmtWriter(Vec::with_capacity(s.len()));
+                for chunk in s.as_bytes().utf8_chunks() {
+                    swapcase_utf8(chunk.valid(), &mut out);
+                    out.0.extend(chunk.invalid());
+                }
+                // SAFETY:
+                // * `s` is valid WTF-8; surrogate bytes were appended without processing.
+                // * swapcase_utf8 produces valid UTF-8.
+                unsafe { Wtf8Buf::from_bytes_unchecked(out.0) }
             }
         }
-        swapped_str
     }
 
     #[pymethod]
@@ -1648,6 +1661,24 @@ fn handle_capital_sigma(s: &str, i: usize) -> char {
     if before && !after { 'ς' } else { 'σ' }
 }
 
+fn swapcase_utf8(s: &str, out: &mut VecFmtWriter) {
+    for (i, ch) in s.char_indices() {
+        if ch.is_uppercase() {
+            lowercase_or_sigma(ch, s, i, out);
+        } else if ch.is_lowercase() {
+            for ch in ch.to_uppercase() {
+                let mut buf = [0u8; 4];
+                let s = ch.encode_utf8(&mut buf);
+                out.0.extend(s.as_bytes());
+            }
+        } else {
+            let mut buf = [0u8; 4];
+            let s = ch.encode_utf8(&mut buf);
+            out.0.extend(s.as_bytes());
+        }
+    }
+}
+
 impl PyRef<PyStr> {
     #[must_use]
     pub fn is_empty(&self) -> bool {
diff --git a/crates/vm/src/bytes_inner.rs b/crates/vm/src/bytes_inner.rs
@@ -413,15 +413,7 @@ impl PyBytesInner {
     }
 
     pub fn swapcase(&self) -> Vec<u8> {
-        let mut new: Vec<u8> = Vec::with_capacity(self.elements.len());
-        for w in &self.elements {
-            match w {
-                b'A'..=b'Z' => new.push(w.to_ascii_lowercase()),
-                b'a'..=b'z' => new.push(w.to_ascii_uppercase()),
-                x => new.push(*x),
-            }
-        }
-        new
+        swapcase_ascii(self.as_bytes())
     }
 
     pub fn hex(
@@ -1238,3 +1230,10 @@ pub(crate) fn title_ascii(bytes: &[u8]) -> Vec<u8> {
     }
     out
 }
+
+pub(crate) fn swapcase_ascii(bytes: &[u8]) -> Vec<u8> {
+    bytes
+        .iter()
+        .map(|&b| if b.is_ascii_alphabetic() { b ^ 0x20 } else { b })
+        .collect()
+}