Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
4c9f1aa
this reuses SOME interned strings, but not `utf-8` and friends
albertedwardson Oct 27, 2025
d287cc6
Update codeobject.c: - unnecessary check
albertedwardson Oct 27, 2025
d9eaaf6
correct error handling, refcount interned_dict
albertedwardson Oct 28, 2025
5a8b4ce
get interned strings from another dict with interned strings?
albertedwardson Dec 19, 2025
7ab67c4
[SKIP CI] cancelling previous workflow
albertedwardson Dec 19, 2025
4a9e55e
deadlock
albertedwardson Dec 19, 2025
bc861a6
fix usage of Py_BEGIN_CRITICAL_SECTION
albertedwardson Dec 19, 2025
45129f8
just guessing
albertedwardson Dec 19, 2025
485414c
ft build interns and immortilizes everything anyway
albertedwardson Dec 20, 2025
cc63fa2
initial tests
albertedwardson Dec 20, 2025
ad6af24
fix tests
albertedwardson Dec 21, 2025
903cc96
fix tests
albertedwardson Dec 21, 2025
9ec2e09
global cache first
albertedwardson Dec 21, 2025
a1655f1
reorginize tests
albertedwardson Dec 22, 2025
8863b2e
unnecessary, but pretty
albertedwardson Dec 22, 2025
1df49ee
move dicts of interned strings to appropriate section in header
albertedwardson Dec 22, 2025
98ac326
add notes in comments that this is copypaste
albertedwardson Dec 22, 2025
0c6d450
move import
albertedwardson Dec 22, 2025
26fe5d5
move dicts of interned strings to appropriate section in header
albertedwardson Dec 22, 2025
f10e201
reuse `get_interned_dict` from header, and do not refcount it
albertedwardson Dec 22, 2025
9cec5f2
add test, confuse myself even more
albertedwardson Dec 22, 2025
4d9f068
why
albertedwardson Dec 22, 2025
7e07279
:)
albertedwardson Dec 22, 2025
056e2c5
rerun workflow
albertedwardson Dec 23, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions Include/internal/pycore_unicodeobject.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ extern "C" {

#include "pycore_fileutils.h" // _Py_error_handler
#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
#include "pycore_global_objects.h"// _Py_INTERP_CACHED_OBJECT


// Maximum code point of Unicode 6.0: 0x10ffff (1,114,111).
Expand Down Expand Up @@ -358,6 +359,19 @@ extern PyTypeObject _PyUnicodeASCIIIter_Type;

// All these are "ref-neutral", like the public PyUnicode_InternInPlace.

/* This hashtable holds statically allocated interned strings.
* See InternalDocs/string_interning.md for details.
*/
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings

/* This dictionary holds per-interpreter interned strings.
* See InternalDocs/string_interning.md for details.
*/
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}

// Explicit interning routines:
PyAPI_FUNC(void) _PyUnicode_InternMortal(PyInterpreterState *interp, PyObject **);
PyAPI_FUNC(void) _PyUnicode_InternImmortal(PyInterpreterState *interp, PyObject **);
Expand Down
23 changes: 23 additions & 0 deletions Lib/test/support/constants_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import re
from pathlib import Path

from typing import Iterable

from test.support.project_files_helper import iter_all_c_files


# copypaste from 'Tools/build/generate_global_objects.py'
def iter_global_strings() -> Iterable[str]:
id_regex = re.compile(r"\b_Py_ID\((\w+)\)")
str_regex = re.compile(r'\b_Py_DECLARE_STR\((?:\w+), "(.*?)"\)')
for filename in iter_all_c_files():
infile = Path(filename)
if not infile.exists():
# The file must have been a temporary file.
continue
with infile.open(encoding="utf-8") as infile_open:
for line in infile_open:
for m in id_regex.finditer(line):
yield m.group(1)
for m in str_regex.finditer(line):
yield m.group(1)
22 changes: 22 additions & 0 deletions Lib/test/support/project_files_helper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from pathlib import Path

from typing import Iterable

ROOT = Path(__file__).resolve().parents[3]


# copypaste from 'Tools/build/generate_global_objects.py'
def iter_all_c_files() -> Iterable[Path]:
for top_directory_name in (
"Modules",
"Objects",
"Parser",
"PC",
"Programs",
"Python",
):
for dirname, _, files in (ROOT / top_directory_name).walk():
for name in files:
if not name.endswith((".c", ".h")):
continue
yield dirname / name
36 changes: 36 additions & 0 deletions Lib/test/test_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@
from test.support import (cpython_only,
check_impl_detail, requires_debug_ranges,
gc_collect, Py_GIL_DISABLED)
from test.support.constants_helper import iter_global_strings
from test.support.script_helper import assert_python_ok
from test.support import threading_helper, import_helper
from test.support.bytecode_helper import instructions_with_positions
Expand Down Expand Up @@ -1251,6 +1252,41 @@ class MyInt(int):
self.assertIsInstance(code.co_consts[1], Unhashable)
self.assertEqual(code.co_consts[2], code.co_consts[3])

@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test__Py_DECLARE_STR_is_interned(self):
for global_string in iter_global_strings():
with self.subTest(global_string=global_string):
self.assertIsInterned(eval(f"'{global_string}'"))

noninternable_by_default = textwrap.dedent(f'''
not-internable
not.internable
не_интернируемый
str with spaces
{chr(0x011111)}
{chr(0x9999)}
{chr(0x100)}
''')

@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test_non_internable_strings_not_interned(self):
for noninternable in self.noninternable_by_default.strip().splitlines():
with self.subTest(noninternable=noninternable):
self.assertIsNotInterned(eval(f"'{noninternable}'"))

@cpython_only
@unittest.skipIf(Py_GIL_DISABLED, "free-threaded build interns all string constants")
def test_explicitly_interned_strings(self):
for noninternable in self.noninternable_by_default.strip().splitlines():
self.assertIsNotInterned(noninternable)
sys.intern(noninternable)
with self.subTest(noninternable=noninternable):
self.assertIsInterned(noninternable)
interned_from_code = eval(f"'{noninternable}'")
self.assertIsInterned(interned_from_code)
self.assertIs(noninternable, interned_from_code)

class CodeWeakRefTest(unittest.TestCase):

Expand Down
36 changes: 23 additions & 13 deletions Objects/codeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ PyCode_ClearWatcher(int watcher_id)

#define _PyCodeObject_CAST(op) (assert(PyCode_Check(op)), (PyCodeObject *)(op))

static int
static inline int
should_intern_string(PyObject *o)
{
#ifdef Py_GIL_DISABLED
Expand Down Expand Up @@ -196,6 +196,8 @@ intern_strings(PyObject *tuple)
return 0;
}

#define _constants_tuple_modified(modified) if (modified) *modified = 1

/* Intern constants. In the default build, this interns selected string
constants. In the free-threaded build, this also interns non-string
constants. */
Expand All @@ -206,14 +208,28 @@ intern_constants(PyObject *tuple, int *modified)
for (Py_ssize_t i = PyTuple_GET_SIZE(tuple); --i >= 0; ) {
PyObject *v = PyTuple_GET_ITEM(tuple, i);
if (PyUnicode_CheckExact(v)) {
if (PyUnicode_CHECK_INTERNED(v) != 0) {
continue;
}
#if !defined(Py_GIL_DISABLED)
PyObject *interned = _Py_hashtable_get(INTERNED_STRINGS, v);
if (interned == NULL) {
interned = PyDict_GetItemWithError(get_interned_dict(interp), v);
if (PyErr_Occurred()) return -1;
}
if (interned != NULL && interned != v) {
Py_INCREF(interned);
PyTuple_SET_ITEM(tuple, i, interned);
Py_DECREF(v);
_constants_tuple_modified(modified);
} else
#endif
if (should_intern_string(v)) {
PyObject *w = v;
_PyUnicode_InternMortal(interp, &v);
if (w != v) {
PyTuple_SET_ITEM(tuple, i, v);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
}
}
Expand Down Expand Up @@ -242,9 +258,7 @@ intern_constants(PyObject *tuple, int *modified)

PyTuple_SET_ITEM(tuple, i, v);
Py_DECREF(w);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
Py_DECREF(tmp);
}
Expand Down Expand Up @@ -273,9 +287,7 @@ intern_constants(PyObject *tuple, int *modified)
}
PyTuple_SET_ITEM(tuple, i, v);
Py_DECREF(slice);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
Py_DECREF(tmp);
}
Expand All @@ -293,9 +305,7 @@ intern_constants(PyObject *tuple, int *modified)
else if (interned != v) {
PyTuple_SET_ITEM(tuple, i, interned);
Py_SETREF(v, interned);
if (modified) {
*modified = 1;
}
_constants_tuple_modified(modified);
}
}
#endif
Expand Down
12 changes: 0 additions & 12 deletions Objects/unicodeobject.c
Original file line number Diff line number Diff line change
Expand Up @@ -207,18 +207,6 @@ _PyUnicode_GetEmpty(void)
return &_Py_STR(empty);
}

/* This dictionary holds per-interpreter interned strings.
* See InternalDocs/string_interning.md for details.
*/
static inline PyObject *get_interned_dict(PyInterpreterState *interp)
{
return _Py_INTERP_CACHED_OBJECT(interp, interned_strings);
}

/* This hashtable holds statically allocated interned strings.
* See InternalDocs/string_interning.md for details.
*/
#define INTERNED_STRINGS _PyRuntime.cached_objects.interned_strings

/* Get number of all interned strings for the current interpreter. */
Py_ssize_t
Expand Down
Loading