@@ -11,10 +11,13 @@ use crate::{
1111 TryFromBorrowedObject , VirtualMachine ,
1212 anystr:: { self , AnyStr , AnyStrContainer , AnyStrWrapper , adjust_indices} ,
1313 atomic_func,
14+ bytes_inner:: { swapcase_ascii, title_ascii} ,
1415 cformat:: cformat_string,
1516 class:: PyClassImpl ,
16- common:: lock:: LazyLock ,
17- common:: str:: { PyKindStr , StrData , StrKind } ,
17+ common:: {
18+ lock:: LazyLock ,
19+ str:: { PyKindStr , StrData , StrKind } ,
20+ } ,
1821 convert:: { IntoPyException , ToPyException , ToPyObject , ToPyResult } ,
1922 format:: { format, format_map} ,
2023 function:: { ArgIterable , ArgSize , FuncArgs , OptionalArg , OptionalOption , PyComparisonValue } ,
@@ -1078,7 +1081,7 @@ impl PyStr {
10781081 fn title ( & self ) -> Wtf8Buf {
10791082 match self . as_str_kind ( ) {
10801083 PyKindStr :: Ascii ( _) => unsafe {
1081- Wtf8Buf :: from_bytes_unchecked ( crate :: bytes_inner :: title_ascii ( self . as_bytes ( ) ) )
1084+ Wtf8Buf :: from_bytes_unchecked ( title_ascii ( self . as_bytes ( ) ) )
10821085 } ,
10831086 PyKindStr :: Utf8 ( s) => {
10841087 let mut out = VecFmtWriter ( Vec :: with_capacity ( s. len ( ) ) ) ;
@@ -1102,19 +1105,29 @@ impl PyStr {
11021105
11031106 #[ pymethod]
11041107 fn swapcase ( & self ) -> Wtf8Buf {
1105- let mut swapped_str = Wtf8Buf :: with_capacity ( self . data . len ( ) ) ;
1106- for c_orig in self . as_wtf8 ( ) . code_points ( ) {
1107- let c = c_orig. to_char_lossy ( ) ;
1108- // to_uppercase returns an iterator because case changes may be multiple bytes
1109- if c. is_lowercase ( ) {
1110- swapped_str. extend ( c. to_uppercase ( ) ) ;
1111- } else if c. is_uppercase ( ) {
1112- swapped_str. extend ( c. to_lowercase ( ) ) ;
1113- } else {
1114- swapped_str. push ( c_orig) ;
1108+ match self . as_str_kind ( ) {
1109+ PyKindStr :: Ascii ( s) => unsafe {
1110+ // SAFETY: ASCII is valid Unicode and swapcase_ascii does not produce non-ASCII.
1111+ Wtf8Buf :: from_bytes_unchecked ( swapcase_ascii ( s. as_bytes ( ) ) )
1112+ } ,
1113+ PyKindStr :: Utf8 ( s) => {
1114+ let mut out = VecFmtWriter ( Vec :: with_capacity ( s. len ( ) ) ) ;
1115+ swapcase_utf8 ( s, & mut out) ;
1116+ // SAFETY: `s` is valid UTF-8 and swapcase_utf8 only works on Unicode.
1117+ unsafe { Wtf8Buf :: from_bytes_unchecked ( out. 0 ) }
1118+ }
1119+ PyKindStr :: Wtf8 ( s) => {
1120+ let mut out = VecFmtWriter ( Vec :: with_capacity ( s. len ( ) ) ) ;
1121+ for chunk in s. as_bytes ( ) . utf8_chunks ( ) {
1122+ swapcase_utf8 ( chunk. valid ( ) , & mut out) ;
1123+ out. 0 . extend ( chunk. invalid ( ) ) ;
1124+ }
1125+ // SAFETY:
1126+ // * `s` is valid WTF-8; surrogate bytes were appended without processing.
1127+ // * swapcase_utf8 produces valid UTF-8.
1128+ unsafe { Wtf8Buf :: from_bytes_unchecked ( out. 0 ) }
11151129 }
11161130 }
1117- swapped_str
11181131 }
11191132
11201133 #[ pymethod]
@@ -1648,6 +1661,24 @@ fn handle_capital_sigma(s: &str, i: usize) -> char {
16481661 if before && !after { 'ς' } else { 'σ' }
16491662}
16501663
1664+ fn swapcase_utf8 ( s : & str , out : & mut VecFmtWriter ) {
1665+ for ( i, ch) in s. char_indices ( ) {
1666+ if ch. is_uppercase ( ) {
1667+ lowercase_or_sigma ( ch, s, i, out) ;
1668+ } else if ch. is_lowercase ( ) {
1669+ for ch in ch. to_uppercase ( ) {
1670+ let mut buf = [ 0u8 ; 4 ] ;
1671+ let s = ch. encode_utf8 ( & mut buf) ;
1672+ out. 0 . extend ( s. as_bytes ( ) ) ;
1673+ }
1674+ } else {
1675+ let mut buf = [ 0u8 ; 4 ] ;
1676+ let s = ch. encode_utf8 ( & mut buf) ;
1677+ out. 0 . extend ( s. as_bytes ( ) ) ;
1678+ }
1679+ }
1680+ }
1681+
16511682impl PyRef < PyStr > {
16521683 #[ must_use]
16531684 pub fn is_empty ( & self ) -> bool {
0 commit comments