Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions tests/snippets/strings.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,10 @@ def try_mutate_str():
for s, b, e in zip(ss, bs, ['u8', 'U8', 'utf-8', 'UTF-8', 'utf_8']):
assert s.encode(e) == b
# assert s.encode(encoding=e) == b

# str.isisprintable
assert "".isprintable()
assert " ".isprintable()
assert "abcdefg".isprintable()
assert not "abcdefg\n".isprintable()
assert "ʹ".isprintable()
1 change: 1 addition & 0 deletions vm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ hexf = "0.1.0"
indexmap = "1.0.2"
crc = "^1.0.0"
bincode = "1.1.4"
unicode_categories = "0.1.1"


# TODO: release and publish to crates.io
Expand Down
26 changes: 26 additions & 0 deletions vm/src/obj/objstr.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
extern crate unicode_categories;
extern crate unicode_xid;

use std::fmt;
Expand Down Expand Up @@ -27,6 +28,8 @@ use super::objsequence::PySliceableSequence;
use super::objslice::PySlice;
use super::objtype::{self, PyClassRef};

use unicode_categories::UnicodeCategories;

/// str(object='') -> str
/// str(bytes_or_buffer[, encoding[, errors]]) -> str
///
Expand Down Expand Up @@ -519,6 +522,29 @@ impl PyString {
}
}

/// Return true if all characters in the string are printable or the string is empty,
/// false otherwise. Nonprintable characters are those characters defined in the
/// Unicode character database as `Other` or `Separator`,
/// excepting the ASCII space (0x20) which is considered printable.
///
/// All characters except those characters defined in the Unicode character
/// database as following categories are considered printable.
/// * Cc (Other, Control)
/// * Cf (Other, Format)
/// * Cs (Other, Surrogate)
/// * Co (Other, Private Use)
/// * Cn (Other, Not Assigned)
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
/// * Zs (Separator, Space) other than ASCII space('\x20').
#[pymethod]
fn isprintable(&self, _vm: &VirtualMachine) -> bool {
self.value.chars().all(|c| match c {
'\u{0020}' => true,
_ => !(c.is_other_control() | c.is_separator()),
})
}

// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
#[pymethod]
Expand Down