Skip to content

Commit a0867f7

Browse files
committed
Optimizations for bytes reallocation.
This uses up to 12.5% overallocation, not entirely unlike list_resize(). Could probably use more tweaks for odd allocation patterns, TBD. Also add __alloc__() method which returns the actually allocated size. PS. I'm now convinced that we need something like "".join(); later.
1 parent 5584245 commit a0867f7

2 files changed

Lines changed: 54 additions & 11 deletions

File tree

Include/bytesobject.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ extern "C" {
2121
/* Object layout */
2222
typedef struct {
2323
PyObject_VAR_HEAD
24+
Py_ssize_t ob_alloc; /* How many bytes allocated */
2425
char *ob_bytes;
2526
} PyBytesObject;
2627

Objects/bytesobject.c

Lines changed: 53 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
#define PY_SSIZE_T_CLEAN
66
#include "Python.h"
7+
#include "structmember.h"
78

89
/* Direct API functions */
910

@@ -25,7 +26,6 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
2526
if (new == NULL)
2627
return NULL;
2728

28-
new->ob_size = size;
2929
if (size == 0)
3030
new->ob_bytes = NULL;
3131
else {
@@ -37,6 +37,7 @@ PyBytes_FromStringAndSize(const char *bytes, Py_ssize_t size)
3737
if (bytes != NULL)
3838
memcpy(new->ob_bytes, bytes, size);
3939
}
40+
new->ob_size = new->ob_alloc = size;
4041

4142
return (PyObject *)new;
4243
}
@@ -63,19 +64,39 @@ int
6364
PyBytes_Resize(PyObject *self, Py_ssize_t size)
6465
{
6566
void *sval;
67+
Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
6668

6769
assert(self != NULL);
6870
assert(PyBytes_Check(self));
6971
assert(size >= 0);
7072

71-
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, size);
73+
if (size < alloc / 2) {
74+
/* Major downsize; resize down to exact size */
75+
alloc = size;
76+
}
77+
else if (size <= alloc) {
78+
/* Within allocated size; quick exit */
79+
((PyBytesObject *)self)->ob_size = size;
80+
return 0;
81+
}
82+
else if (size <= alloc * 1.125) {
83+
/* Moderate upsize; overallocate similar to list_resize() */
84+
alloc = size + (size >> 3) + (size < 9 ? 3 : 6);
85+
}
86+
else {
87+
/* Major upsize; resize up to exact size */
88+
alloc = size;
89+
}
90+
91+
sval = PyMem_Realloc(((PyBytesObject *)self)->ob_bytes, alloc);
7292
if (sval == NULL) {
7393
PyErr_NoMemory();
7494
return -1;
7595
}
7696

7797
((PyBytesObject *)self)->ob_bytes = sval;
7898
((PyBytesObject *)self)->ob_size = size;
99+
((PyBytesObject *)self)->ob_alloc = alloc;
79100

80101
return 0;
81102
}
@@ -133,7 +154,9 @@ bytes_iconcat(PyBytesObject *self, PyObject *other)
133154
size = mysize + osize;
134155
if (size < 0)
135156
return PyErr_NoMemory();
136-
if (PyBytes_Resize((PyObject *)self, size) < 0)
157+
if (size <= self->ob_alloc)
158+
self->ob_size = size;
159+
else if (PyBytes_Resize((PyObject *)self, size) < 0)
137160
return NULL;
138161
memcpy(self->ob_bytes + mysize, ((PyBytesObject *)other)->ob_bytes, osize);
139162
Py_INCREF(self);
@@ -178,7 +201,9 @@ bytes_irepeat(PyBytesObject *self, Py_ssize_t count)
178201
size = mysize * count;
179202
if (count != 0 && size / count != mysize)
180203
return PyErr_NoMemory();
181-
if (PyBytes_Resize((PyObject *)self, size) < 0)
204+
if (size <= self->ob_alloc)
205+
self->ob_size = size;
206+
else if (PyBytes_Resize((PyObject *)self, size) < 0)
182207
return NULL;
183208

184209
if (mysize == 1)
@@ -372,9 +397,11 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
372397
PyObject *it;
373398
PyObject *(*iternext)(PyObject *);
374399

375-
/* Empty previous contents (yes, do this first of all!) */
376-
if (PyBytes_Resize((PyObject *)self, 0) < 0)
377-
return -1;
400+
if (self->ob_size != 0) {
401+
/* Empty previous contents (yes, do this first of all!) */
402+
if (PyBytes_Resize((PyObject *)self, 0) < 0)
403+
return -1;
404+
}
378405

379406
/* Parse arguments */
380407
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist,
@@ -410,7 +437,9 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
410437
}
411438
bytes = PyString_AS_STRING(encoded);
412439
size = PyString_GET_SIZE(encoded);
413-
if (PyBytes_Resize((PyObject *)self, size) < 0) {
440+
if (size <= self->ob_alloc)
441+
self->ob_size = size;
442+
else if (PyBytes_Resize((PyObject *)self, size) < 0) {
414443
Py_DECREF(encoded);
415444
return -1;
416445
}
@@ -492,8 +521,9 @@ bytes_init(PyBytesObject *self, PyObject *args, PyObject *kwds)
492521
}
493522

494523
/* Append the byte */
495-
/* XXX Speed this up */
496-
if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
524+
if (self->ob_size < self->ob_alloc)
525+
self->ob_size++;
526+
else if (PyBytes_Resize((PyObject *)self, self->ob_size+1) < 0)
497527
goto error;
498528
self->ob_bytes[self->ob_size-1] = value;
499529
}
@@ -673,6 +703,17 @@ bytes_decode(PyObject *self, PyObject *args)
673703
return PyCodec_Decode(self, encoding, errors);
674704
}
675705

706+
PyDoc_STRVAR(alloc_doc,
707+
"B.__alloc__() -> int\n\
708+
\n\
709+
Returns the number of bytes actually allocated.");
710+
711+
static PyObject *
712+
bytes_alloc(PyBytesObject *self)
713+
{
714+
return PyInt_FromSsize_t(self->ob_alloc);
715+
}
716+
676717
static PySequenceMethods bytes_as_sequence = {
677718
(lenfunc)bytes_length, /*sq_length*/
678719
(binaryfunc)bytes_concat, /*sq_concat*/
@@ -704,7 +745,8 @@ static PyBufferProcs bytes_as_buffer = {
704745
static PyMethodDef
705746
bytes_methods[] = {
706747
{"decode", (PyCFunction)bytes_decode, METH_VARARGS, decode_doc},
707-
{NULL, NULL}
748+
{"__alloc__", (PyCFunction)bytes_alloc, METH_NOARGS, alloc_doc},
749+
{NULL}
708750
};
709751

710752
PyDoc_STRVAR(bytes_doc,

0 commit comments

Comments
 (0)