Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
2c789a9
Adapt byte*.startswith
erlend-aasland Apr 2, 2024
132080c
Adapt byte*.endswith
erlend-aasland Apr 2, 2024
b7f04d7
Optimise
erlend-aasland Apr 2, 2024
57c9afc
Add NEWS
erlend-aasland Apr 3, 2024
1b89eb9
Adjust docstring
erlend-aasland Apr 3, 2024
fabe761
Docstring: Use 'bytes' or 'bytearray' instead of 'byte string'
erlend-aasland Apr 3, 2024
e9b59cc
Docstring: Remove unneeded body from bytearray.startswith; the param …
erlend-aasland Apr 3, 2024
655a6a8
Regen clinic
erlend-aasland Apr 3, 2024
e1a5acc
Adapt byte*.{*find,*index,count}
erlend-aasland Apr 3, 2024
f5fb40b
Pull in main
erlend-aasland Apr 3, 2024
2c54b79
Address Inada-san's review: endswith's first param is named 'suffix'
erlend-aasland Apr 3, 2024
ce24a95
Merge branch 'perf/bytes.starwith' into perf/bytes.find-and-friends
erlend-aasland Apr 3, 2024
a6981f3
WIP
erlend-aasland Apr 3, 2024
2443309
Address Inada-san's review: indent clinic input and consistently use …
erlend-aasland Apr 3, 2024
c188058
Merge branch 'perf/bytes.starwith' into perf/bytes.find-and-friends
erlend-aasland Apr 3, 2024
b3b8c8e
Pull in main
erlend-aasland Apr 3, 2024
3e9e733
Amend NEWS entry
erlend-aasland Apr 3, 2024
829696c
Pull in main
erlend-aasland Apr 3, 2024
096a232
Remove now unused STRINGLIB_parse_args_finds
erlend-aasland Apr 3, 2024
0749175
Pull in main
erlend-aasland Apr 10, 2024
f937faa
Remove now unneeded @text_signature's
erlend-aasland Apr 10, 2024
d671524
Apply suggestions from code review
erlend-aasland Apr 12, 2024
e749d3e
Pull in main
erlend-aasland Apr 12, 2024
c83c5d1
Regen clinic
erlend-aasland Apr 12, 2024
747ecf9
Update Objects/bytearrayobject.c
erlend-aasland Apr 12, 2024
0ad3919
Again
erlend-aasland Apr 12, 2024
f0cdf70
NEWS
erlend-aasland Apr 12, 2024
9bb65d2
Correct NEWS entry
erlend-aasland Apr 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions Include/internal/pycore_bytes_methods.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,16 @@ extern void _Py_bytes_title(char *result, const char *s, Py_ssize_t len);
extern void _Py_bytes_capitalize(char *result, const char *s, Py_ssize_t len);
extern void _Py_bytes_swapcase(char *result, const char *s, Py_ssize_t len);

extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args);
extern PyObject *_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end);
extern PyObject *_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end);
extern PyObject *_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end);
extern PyObject *_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end);
extern PyObject *_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end);
extern int _Py_bytes_contains(const char *str, Py_ssize_t len, PyObject *arg);
extern PyObject *_Py_bytes_startswith(const char *str, Py_ssize_t len,
PyObject *subobj, Py_ssize_t start,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
Improve the performance of the following :class:`bytes` and
:class:`bytearray` methods by adapting them to the :c:macro:`METH_FASTCALL`
calling convention:

* :meth:`!count`
* :meth:`!find`
* :meth:`!index`
* :meth:`!rfind`
* :meth:`!rindex`
93 changes: 76 additions & 17 deletions Objects/bytearrayobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -1121,16 +1121,44 @@ bytearray_dealloc(PyByteArrayObject *self)
#include "stringlib/transmogrify.h"


/*[clinic input]
@text_signature "($self, sub[, start[, end]], /)"
bytearray.find

sub: object
start: slice_index(accept={int, NoneType}, c_default='0') = None
Optional start position. Default: start of the bytes.
end: slice_index(accept={int, NoneType}, c_default='PY_SSIZE_T_MAX') = None
Optional stop position. Default: end of the bytes.
/

Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].

Return -1 on failure.
[clinic start generated code]*/

static PyObject *
bytearray_find(PyByteArrayObject *self, PyObject *args)
bytearray_find_impl(PyByteArrayObject *self, PyObject *sub, Py_ssize_t start,
Py_ssize_t end)
/*[clinic end generated code: output=413e1cab2ae87da0 input=793dfad803e2952f]*/
{
return _Py_bytes_find(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
return _Py_bytes_find(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, start, end);
}

/*[clinic input]
bytearray.count = bytearray.find

Return the number of non-overlapping occurrences of subsection 'sub' in bytes B[start:end].
[clinic start generated code]*/

static PyObject *
bytearray_count(PyByteArrayObject *self, PyObject *args)
bytearray_count_impl(PyByteArrayObject *self, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
/*[clinic end generated code: output=a21ee2692e4f1233 input=4deb529db38deda8]*/
{
return _Py_bytes_count(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
return _Py_bytes_count(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, start, end);
}

/*[clinic input]
Expand Down Expand Up @@ -1162,22 +1190,55 @@ bytearray_copy_impl(PyByteArrayObject *self)
PyByteArray_GET_SIZE(self));
}

/*[clinic input]
bytearray.index = bytearray.find

Return the lowest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].

Raise ValueError if the subsection is not found.
[clinic start generated code]*/

static PyObject *
bytearray_index(PyByteArrayObject *self, PyObject *args)
bytearray_index_impl(PyByteArrayObject *self, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
/*[clinic end generated code: output=067a1e78efc672a7 input=8cbaf6836dbd2a9a]*/
{
return _Py_bytes_index(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
return _Py_bytes_index(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, start, end);
}

/*[clinic input]
bytearray.rfind = bytearray.find

Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].

Return -1 on failure.
[clinic start generated code]*/

static PyObject *
bytearray_rfind(PyByteArrayObject *self, PyObject *args)
bytearray_rfind_impl(PyByteArrayObject *self, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
/*[clinic end generated code: output=51bf886f932b283c input=eaa107468a158423]*/
{
return _Py_bytes_rfind(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
return _Py_bytes_rfind(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, start, end);
}

/*[clinic input]
bytearray.rindex = bytearray.find

Return the highest index in B where subsection 'sub' is found, such that 'sub' is contained within B[start:end].

Raise ValueError if the subsection is not found.
[clinic start generated code]*/

static PyObject *
bytearray_rindex(PyByteArrayObject *self, PyObject *args)
bytearray_rindex_impl(PyByteArrayObject *self, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
/*[clinic end generated code: output=38e1cf66bafb08b9 input=81cf49d0af4d5bd0]*/
{
return _Py_bytes_rindex(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self), args);
return _Py_bytes_rindex(PyByteArray_AS_STRING(self), PyByteArray_GET_SIZE(self),
sub, start, end);
}

static int
Expand Down Expand Up @@ -2236,17 +2297,15 @@ bytearray_methods[] = {
STRINGLIB_CENTER_METHODDEF
BYTEARRAY_CLEAR_METHODDEF
BYTEARRAY_COPY_METHODDEF
{"count", (PyCFunction)bytearray_count, METH_VARARGS,
_Py_count__doc__},
BYTEARRAY_COUNT_METHODDEF
BYTEARRAY_DECODE_METHODDEF
BYTEARRAY_ENDSWITH_METHODDEF
STRINGLIB_EXPANDTABS_METHODDEF
BYTEARRAY_EXTEND_METHODDEF
{"find", (PyCFunction)bytearray_find, METH_VARARGS,
_Py_find__doc__},
BYTEARRAY_FIND_METHODDEF
BYTEARRAY_FROMHEX_METHODDEF
BYTEARRAY_HEX_METHODDEF
{"index", (PyCFunction)bytearray_index, METH_VARARGS, _Py_index__doc__},
BYTEARRAY_INDEX_METHODDEF
BYTEARRAY_INSERT_METHODDEF
{"isalnum", stringlib_isalnum, METH_NOARGS,
_Py_isalnum__doc__},
Expand Down Expand Up @@ -2276,8 +2335,8 @@ bytearray_methods[] = {
BYTEARRAY_REMOVEPREFIX_METHODDEF
BYTEARRAY_REMOVESUFFIX_METHODDEF
BYTEARRAY_REVERSE_METHODDEF
{"rfind", (PyCFunction)bytearray_rfind, METH_VARARGS, _Py_rfind__doc__},
{"rindex", (PyCFunction)bytearray_rindex, METH_VARARGS, _Py_rindex__doc__},
BYTEARRAY_RFIND_METHODDEF
BYTEARRAY_RINDEX_METHODDEF
STRINGLIB_RJUST_METHODDEF
BYTEARRAY_RPARTITION_METHODDEF
BYTEARRAY_RSPLIT_METHODDEF
Expand Down
102 changes: 26 additions & 76 deletions Objects/bytes_methods.c
Original file line number Diff line number Diff line change
Expand Up @@ -453,31 +453,21 @@ stringlib_parse_args_finds().
*/

Py_LOCAL_INLINE(int)
parse_args_finds_byte(const char *function_name, PyObject *args,
PyObject **subobj, char *byte,
Py_ssize_t *start, Py_ssize_t *end)
parse_args_finds_byte(const char *function_name, PyObject **subobj, char *byte)
{
PyObject *tmp_subobj;
Py_ssize_t ival;

if(!stringlib_parse_args_finds(function_name, args, &tmp_subobj,
start, end))
return 0;

if (PyObject_CheckBuffer(tmp_subobj)) {
*subobj = tmp_subobj;
if (PyObject_CheckBuffer(*subobj)) {
return 1;
}

if (!_PyIndex_Check(tmp_subobj)) {
if (!_PyIndex_Check(*subobj)) {
PyErr_Format(PyExc_TypeError,
"argument should be integer or bytes-like object, "
"not '%.200s'",
Py_TYPE(tmp_subobj)->tp_name);
Py_TYPE(*subobj)->tp_name);
return 0;
}

ival = PyNumber_AsSsize_t(tmp_subobj, NULL);
Py_ssize_t ival = PyNumber_AsSsize_t(*subobj, NULL);
if (ival == -1 && PyErr_Occurred()) {
return 0;
}
Expand Down Expand Up @@ -508,19 +498,19 @@ parse_args_finds_byte(const char *function_name, PyObject *args,

Py_LOCAL_INLINE(Py_ssize_t)
find_internal(const char *str, Py_ssize_t len,
const char *function_name, PyObject *args, int dir)
const char *function_name, PyObject *subobj,
Py_ssize_t start, Py_ssize_t end,
int dir)
{
PyObject *subobj;
char byte;
Py_buffer subbuf;
const char *sub;
Py_ssize_t sub_len;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
Py_ssize_t res;

if (!parse_args_finds_byte(function_name, args,
&subobj, &byte, &start, &end))
if (!parse_args_finds_byte(function_name, &subobj, &byte)) {
return -2;
}

if (subobj) {
if (PyObject_GetBuffer(subobj, &subbuf, PyBUF_SIMPLE) != 0)
Expand Down Expand Up @@ -566,37 +556,21 @@ find_internal(const char *str, Py_ssize_t len,
return res;
}

PyDoc_STRVAR_shared(_Py_find__doc__,
"B.find(sub[, start[, end]]) -> int\n\
\n\
Return the lowest index in B where subsection sub is found,\n\
such that sub is contained within B[start,end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Return -1 on failure.");

PyObject *
_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *args)
_Py_bytes_find(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
{
Py_ssize_t result = find_internal(str, len, "find", args, +1);
Py_ssize_t result = find_internal(str, len, "find", sub, start, end, +1);
if (result == -2)
return NULL;
return PyLong_FromSsize_t(result);
}

PyDoc_STRVAR_shared(_Py_index__doc__,
"B.index(sub[, start[, end]]) -> int\n\
\n\
Return the lowest index in B where subsection sub is found,\n\
such that sub is contained within B[start,end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Raises ValueError when the subsection is not found.");

PyObject *
_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
_Py_bytes_index(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
{
Py_ssize_t result = find_internal(str, len, "index", args, +1);
Py_ssize_t result = find_internal(str, len, "index", sub, start, end, +1);
if (result == -2)
return NULL;
if (result == -1) {
Expand All @@ -607,37 +581,21 @@ _Py_bytes_index(const char *str, Py_ssize_t len, PyObject *args)
return PyLong_FromSsize_t(result);
}

PyDoc_STRVAR_shared(_Py_rfind__doc__,
"B.rfind(sub[, start[, end]]) -> int\n\
\n\
Return the highest index in B where subsection sub is found,\n\
such that sub is contained within B[start,end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Return -1 on failure.");

PyObject *
_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *args)
_Py_bytes_rfind(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
{
Py_ssize_t result = find_internal(str, len, "rfind", args, -1);
Py_ssize_t result = find_internal(str, len, "rfind", sub, start, end, -1);
if (result == -2)
return NULL;
return PyLong_FromSsize_t(result);
}

PyDoc_STRVAR_shared(_Py_rindex__doc__,
"B.rindex(sub[, start[, end]]) -> int\n\
\n\
Return the highest index in B where subsection sub is found,\n\
such that sub is contained within B[start,end]. Optional\n\
arguments start and end are interpreted as in slice notation.\n\
\n\
Raise ValueError when the subsection is not found.");

PyObject *
_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
_Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *sub,
Py_ssize_t start, Py_ssize_t end)
{
Py_ssize_t result = find_internal(str, len, "rindex", args, -1);
Py_ssize_t result = find_internal(str, len, "rindex", sub, start, end, -1);
if (result == -2)
return NULL;
if (result == -1) {
Expand All @@ -648,28 +606,20 @@ _Py_bytes_rindex(const char *str, Py_ssize_t len, PyObject *args)
return PyLong_FromSsize_t(result);
}

PyDoc_STRVAR_shared(_Py_count__doc__,
"B.count(sub[, start[, end]]) -> int\n\
\n\
Return the number of non-overlapping occurrences of subsection sub in\n\
bytes B[start:end]. Optional arguments start and end are interpreted\n\
as in slice notation.");

PyObject *
_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *args)
_Py_bytes_count(const char *str, Py_ssize_t len, PyObject *sub_obj,
Py_ssize_t start, Py_ssize_t end)
{
PyObject *sub_obj;
const char *sub;
Py_ssize_t sub_len;
char byte;
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;

Py_buffer vsub;
PyObject *count_obj;

if (!parse_args_finds_byte("count", args,
&sub_obj, &byte, &start, &end))
if (!parse_args_finds_byte("count", &sub_obj, &byte)) {
return NULL;
}

if (sub_obj) {
if (PyObject_GetBuffer(sub_obj, &vsub, PyBUF_SIMPLE) != 0)
Expand Down
Loading