Skip to content
6 changes: 6 additions & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,12 @@ struct _Py_unicode_runtime_ids {
};

struct _Py_unicode_runtime_state {
struct {
PyThread_type_lock lock;
PyThreadState *tstate;
/* The actual interned dict is at
_PyRuntime.cached_objects.interned_strings. */
} interned;
struct _Py_unicode_runtime_ids ids;
};

Expand Down
82 changes: 70 additions & 12 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -14533,13 +14533,12 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
return _PyStatus_OK();
}

// Initialize the global interned dict
/* Initialize the global interned dict. */
PyObject *interned = PyDict_New();
if (interned == NULL) {
PyErr_Clear();
return _PyStatus_ERR("failed to create interned dict");
}

set_interned_dict(interned);

/* Intern statically allocated string identifiers and deepfreeze strings.
Expand Down Expand Up @@ -14585,6 +14584,63 @@ _PyUnicode_InitTypes(PyInterpreterState *interp)
}


static PyThreadState *
get_interned_tstate(void)
{
PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate;
if (tstate == NULL) {
PyInterpreterState *main_interp = _PyInterpreterState_Main();
/* We do not "bind" the thread state here. */
tstate = _PyThreadState_New(main_interp);
if (tstate == NULL) {
PyErr_Clear();
return NULL;
}
}
return tstate;
}

static void
clear_interned_tstate(void)
{
PyThreadState *tstate = _PyRuntime.unicode_state.interned.tstate;
if (tstate != NULL) {
_PyRuntime.unicode_state.interned.tstate = NULL;
PyThreadState_Clear(tstate);
PyThreadState_Delete(tstate);
}
}

static inline PyObject *
store_interned(PyObject *obj)
{
PyObject *interned = get_interned_dict();
assert(interned != NULL);

/* Swap to the main interpreter, if necessary. */
PyThreadState *oldts = NULL;
if (!_Py_IsMainInterpreter(_PyInterpreterState_GET())) {
PyThreadState *main_tstate = get_interned_tstate();
if (main_tstate == NULL) {
return NULL;
}
oldts = PyThreadState_Swap(main_tstate);
assert(oldts != NULL);
}

PyObject *t = PyDict_SetDefault(interned, obj, obj);
if (t == NULL) {
PyErr_Clear();
}

/* Swap back. */
if (oldts != NULL) {
PyThreadState_Swap(oldts);
}

return t;
}

void
PyUnicode_InternInPlace(PyObject **p)
{
Expand All @@ -14608,20 +14664,20 @@ PyUnicode_InternInPlace(PyObject **p)
return;
}

PyObject *interned = get_interned_dict();
assert(interned != NULL);

PyObject *t = PyDict_SetDefault(interned, s, s);
if (t == NULL) {
PyErr_Clear();
return;
}

PyThread_acquire_lock(_PyRuntime.unicode_state.interned.lock, WAIT_LOCK);
PyObject *t = store_interned(s);
PyThread_release_lock(_PyRuntime.unicode_state.interned.lock);
if (t != s) {
Py_SETREF(*p, Py_NewRef(t));
if (t != NULL) {
Py_SETREF(*p, Py_NewRef(t));
}
return;
}

/* Immortalize the object. */
// XXX Uncomment this once the PEP 683 implementation has landed.
//_Py_SetImmortal(s);

/* The two references in interned dict (key and value) are not counted by
refcnt. unicode_dealloc() and _PyUnicode_ClearInterned() take care of
this. */
Expand Down Expand Up @@ -14696,6 +14752,8 @@ _PyUnicode_ClearInterned(PyInterpreterState *interp)
PyDict_Clear(interned);
Py_DECREF(interned);
set_interned_dict(NULL);

clear_interned_tstate();
}


Expand Down
23 changes: 19 additions & 4 deletions Python/pystate.c
Original file line number Diff line number Diff line change
Expand Up @@ -356,7 +356,8 @@ _Py_COMP_DIAG_POP

static int
alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
PyThread_type_lock *plock3, PyThread_type_lock *plock4)
PyThread_type_lock *plock3, PyThread_type_lock *plock4,
PyThread_type_lock *plock5)
{
/* Force default allocator, since _PyRuntimeState_Fini() must
use the same allocator than this function. */
Expand Down Expand Up @@ -389,12 +390,22 @@ alloc_for_runtime(PyThread_type_lock *plock1, PyThread_type_lock *plock2,
return -1;
}

PyThread_type_lock lock5 = PyThread_allocate_lock();
if (lock4 == NULL) {
PyThread_free_lock(lock1);
PyThread_free_lock(lock2);
PyThread_free_lock(lock3);
PyThread_free_lock(lock4);
return -1;
}

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);

*plock1 = lock1;
*plock2 = lock2;
*plock3 = lock3;
*plock4 = lock4;
*plock5 = lock5;
return 0;
}

Expand All @@ -404,6 +415,7 @@ init_runtime(_PyRuntimeState *runtime,
_Py_AuditHookEntry *audit_hook_head,
Py_ssize_t unicode_next_index,
PyThread_type_lock unicode_ids_mutex,
PyThread_type_lock interned_mutex,
PyThread_type_lock interpreters_mutex,
PyThread_type_lock xidregistry_mutex,
PyThread_type_lock getargs_mutex)
Expand Down Expand Up @@ -435,6 +447,7 @@ init_runtime(_PyRuntimeState *runtime,

runtime->unicode_state.ids.next_index = unicode_next_index;
runtime->unicode_state.ids.lock = unicode_ids_mutex;
runtime->unicode_state.interned.lock = interned_mutex;

runtime->_initialized = 1;
}
Expand All @@ -452,8 +465,8 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
// is called multiple times.
Py_ssize_t unicode_next_index = runtime->unicode_state.ids.next_index;

PyThread_type_lock lock1, lock2, lock3, lock4;
if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4) != 0) {
PyThread_type_lock lock1, lock2, lock3, lock4, lock5;
if (alloc_for_runtime(&lock1, &lock2, &lock3, &lock4, &lock5) != 0) {
return _PyStatus_NO_MEMORY();
}

Expand All @@ -474,7 +487,7 @@ _PyRuntimeState_Init(_PyRuntimeState *runtime)
}

init_runtime(runtime, open_code_hook, open_code_userdata, audit_hook_head,
unicode_next_index, lock1, lock2, lock3, lock4);
unicode_next_index, lock1, lock2, lock3, lock4, lock5);

return _PyStatus_OK();
}
Expand Down Expand Up @@ -530,6 +543,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
int reinit_interp = _PyThread_at_fork_reinit(&runtime->interpreters.mutex);
int reinit_xidregistry = _PyThread_at_fork_reinit(&runtime->xidregistry.mutex);
int reinit_unicode_ids = _PyThread_at_fork_reinit(&runtime->unicode_state.ids.lock);
int reinit_interned = _PyThread_at_fork_reinit(&runtime->unicode_state.interned.lock);
int reinit_getargs = _PyThread_at_fork_reinit(&runtime->getargs.mutex);

PyMem_SetAllocator(PYMEM_DOMAIN_RAW, &old_alloc);
Expand All @@ -542,6 +556,7 @@ _PyRuntimeState_ReInitThreads(_PyRuntimeState *runtime)
|| reinit_main_id < 0
|| reinit_xidregistry < 0
|| reinit_unicode_ids < 0
|| reinit_interned < 0
|| reinit_getargs < 0)
{
return _PyStatus_ERR("Failed to reinitialize runtime locks");
Expand Down