Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
150 changes: 145 additions & 5 deletions crates/capi/src/unicodeobject.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
use crate::PyObject;
use crate::object::define_py_check;
use crate::pystate::with_vm;
use crate::{PyObject, pystate::with_vm};
use core::ffi::{CStr, c_char, c_int};
use core::ptr::NonNull;
use core::slice;
Expand Down Expand Up @@ -92,6 +91,103 @@ pub unsafe extern "C" fn PyUnicode_AsEncodedString(
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyUnicode_AsUTF8String(unicode: *mut PyObject) -> *mut PyObject {
with_vm(|vm| {
let unicode = unsafe { &*unicode }
.try_downcast_ref::<PyStr>(vm)?
.to_owned();
vm.state
.codec_registry
.encode_text(unicode, "utf-8", None, vm)
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyUnicode_DecodeFSDefaultAndSize(
s: *const c_char,
size: isize,
) -> *mut PyObject {
with_vm(|vm| {
let size: usize = size
.try_into()
.map_err(|_| vm.new_system_error("size must be non-negative"))?;

let bytes = if s.is_null() {
if size != 0 {
return Err(vm.new_system_error(
"PyUnicode_DecodeFSDefaultAndSize called with null data and non-zero size",
));
}
&[][..]
} else {
unsafe { slice::from_raw_parts(s.cast::<u8>(), size) }
};

vm.state.codec_registry.decode_text(
vm.ctx.new_bytes(bytes.to_vec()).into(),
vm.fs_encoding().as_str(),
Some(vm.fs_encode_errors().to_owned()),
vm,
)
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyUnicode_EncodeFSDefault(unicode: *mut PyObject) -> *mut PyObject {
with_vm(|vm| {
let unicode = unsafe { &*unicode }
.try_downcast_ref::<PyStr>(vm)?
.to_owned();
vm.state.codec_registry.encode_text(
unicode,
vm.fs_encoding().as_str(),
Some(vm.fs_encode_errors().to_owned()),
vm,
)
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyUnicode_FromEncodedObject(
obj: *mut PyObject,
encoding: *const c_char,
errors: *const c_char,
) -> *mut PyObject {
with_vm(|vm| {
let obj = unsafe { &*obj };

if obj.downcast_ref::<PyStr>().is_some() {
return Err(vm.new_type_error("decoding str is not supported".to_owned()));
}

let encoding = if encoding.is_null() {
"utf-8"
} else {
unsafe { CStr::from_ptr(encoding) }
.to_str()
.map_err(|_| vm.new_system_error("encoding must be valid UTF-8"))?
};
let errors = if errors.is_null() {
None
} else {
let errors = unsafe { CStr::from_ptr(errors) }
.to_str()
.map_err(|_| vm.new_system_error("errors must be valid UTF-8"))?;
Some(vm.ctx.new_utf8_str(errors))
};

obj.try_bytes_like(vm, |b| {
vm.state.codec_registry.decode_text(
vm.ctx.new_bytes(b.to_vec()).into(),
encoding,
errors,
vm,
)
})?
})
}

#[unsafe(no_mangle)]
pub unsafe extern "C" fn PyUnicode_InternInPlace(string: *mut *mut PyObject) {
with_vm(|vm| {
Expand Down Expand Up @@ -131,12 +227,17 @@ pub unsafe extern "C" fn PyUnicode_EqualToUTF8AndSize(

#[cfg(false)]
mod tests {
use std::ffi::{OsStr, OsString};

use pyo3::intern;
use pyo3::prelude::*;
use pyo3::types::PyString;
use pyo3::types::{PyBytes, PyString, PyStringMethods};

#[cfg(unix)]
use std::os::unix::ffi::OsStrExt;

#[test]
fn test_unicode() {
fn unicode() {
Python::attach(|py| {
let string = PyString::new(py, "Hello, World!");
assert!(string.is_instance_of::<PyString>());
Expand All @@ -146,9 +247,48 @@ mod tests {
}

#[test]
fn test_intern_str() {
fn intern_str() {
Python::attach(|py| {
let _string = intern!(py, "Hello, World!");
})
}

#[test]
fn encode_utf8_via_wrapper() {
Python::attach(|py| {
let s = PyString::new(py, "h\u{00E9}llo");
let encoded = s.encode_utf8().unwrap();
assert_eq!(encoded.as_bytes(), "h\u{00E9}llo".as_bytes());
})
}

#[test]
fn from_encoded_object_bytes() {
Python::attach(|py| {
let src = PyBytes::new(py, b"h\xC3\xA9llo");
let s = PyString::from_encoded_object(src.as_any(), None, None).unwrap();
assert_eq!(s.to_str().unwrap(), "h\u{00E9}llo");
})
}

#[cfg(unix)]
#[test]
fn fs_default_roundtrip_non_utf8_unix() {
Python::attach(|py| {
let original = OsStr::from_bytes(&[b'f', b'o', 0x80]);
let py_str = original.into_pyobject(py).unwrap();
let roundtrip: OsString = py_str.extract().unwrap();
assert_eq!(roundtrip.as_os_str().as_bytes(), original.as_bytes());
})
}

#[test]
fn fs_default_roundtrip_utf8() {
Python::attach(|py| {
let original = OsStr::new("hello.txt");
let py_str = original.into_pyobject(py).unwrap();
let roundtrip: OsString = py_str.extract().unwrap();
assert_eq!(roundtrip, original);
})
}
}
Loading