@@ -198,6 +198,11 @@ extern "C" {
198198# define OVERALLOCATE_FACTOR 4
199199#endif
200200
201+ /* bpo-40521: Interned strings are shared by all interpreters. */
202+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
203+ # define INTERNED_STRINGS
204+ #endif
205+
201206/* This dictionary holds all interned unicode strings. Note that references
202207 to strings in this dictionary are *not* counted in the string's ob_refcnt.
203208 When the interned string reaches a refcnt of 0 the string deallocation
@@ -206,7 +211,9 @@ extern "C" {
206211 Another way to look at this is that to say that the actual reference
207212 count of a string is: s->ob_refcnt + (s->state ? 2 : 0)
208213*/
214+ #ifdef INTERNED_STRINGS
209215static PyObject * interned = NULL ;
216+ #endif
210217
211218/* The empty Unicode object is shared to improve performance. */
212219static PyObject * unicode_empty = NULL ;
@@ -281,9 +288,16 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
281288/* List of static strings. */
282289static _Py_Identifier * static_strings = NULL ;
283290
291+ /* bpo-40521: Latin1 singletons are shared by all interpreters. */
292+ #ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
293+ # define LATIN1_SINGLETONS
294+ #endif
295+
296+ #ifdef LATIN1_SINGLETONS
284297/* Single character Unicode strings in the Latin-1 range are being
285298 shared as well. */
286299static PyObject * unicode_latin1 [256 ] = {NULL };
300+ #endif
287301
288302/* Fast detection of the most frequent whitespace characters */
289303const unsigned char _Py_ascii_whitespace [] = {
@@ -662,6 +676,7 @@ unicode_result_ready(PyObject *unicode)
662676 return unicode_empty ;
663677 }
664678
679+ #ifdef LATIN1_SINGLETONS
665680 if (length == 1 ) {
666681 const void * data = PyUnicode_DATA (unicode );
667682 int kind = PyUnicode_KIND (unicode );
@@ -683,6 +698,7 @@ unicode_result_ready(PyObject *unicode)
683698 }
684699 }
685700 }
701+ #endif
686702
687703 assert (_PyUnicode_CheckConsistency (unicode , 1 ));
688704 return unicode ;
@@ -1913,10 +1929,12 @@ unicode_dealloc(PyObject *unicode)
19131929 case SSTATE_INTERNED_MORTAL :
19141930 /* revive dead object temporarily for DelItem */
19151931 Py_SET_REFCNT (unicode , 3 );
1932+ #ifdef INTERNED_STRINGS
19161933 if (PyDict_DelItem (interned , unicode ) != 0 ) {
19171934 _PyErr_WriteUnraisableMsg ("deletion of interned string failed" ,
19181935 NULL );
19191936 }
1937+ #endif
19201938 break ;
19211939
19221940 case SSTATE_INTERNED_IMMORTAL :
@@ -1944,15 +1962,18 @@ unicode_dealloc(PyObject *unicode)
19441962static int
19451963unicode_is_singleton (PyObject * unicode )
19461964{
1947- PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
1948- if (unicode == unicode_empty )
1965+ if (unicode == unicode_empty ) {
19491966 return 1 ;
1967+ }
1968+ #ifdef LATIN1_SINGLETONS
1969+ PyASCIIObject * ascii = (PyASCIIObject * )unicode ;
19501970 if (ascii -> state .kind != PyUnicode_WCHAR_KIND && ascii -> length == 1 )
19511971 {
19521972 Py_UCS4 ch = PyUnicode_READ_CHAR (unicode , 0 );
19531973 if (ch < 256 && unicode_latin1 [ch ] == unicode )
19541974 return 1 ;
19551975 }
1976+ #endif
19561977 return 0 ;
19571978}
19581979#endif
@@ -2094,16 +2115,28 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
20942115static PyObject *
20952116get_latin1_char (unsigned char ch )
20962117{
2097- PyObject * unicode = unicode_latin1 [ch ];
2118+ PyObject * unicode ;
2119+
2120+ #ifdef LATIN1_SINGLETONS
2121+ unicode = unicode_latin1 [ch ];
2122+ if (unicode ) {
2123+ Py_INCREF (unicode );
2124+ return unicode ;
2125+ }
2126+ #endif
2127+
2128+ unicode = PyUnicode_New (1 , ch );
20982129 if (!unicode ) {
2099- unicode = PyUnicode_New (1 , ch );
2100- if (!unicode )
2101- return NULL ;
2102- PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
2103- assert (_PyUnicode_CheckConsistency (unicode , 1 ));
2104- unicode_latin1 [ch ] = unicode ;
2130+ return NULL ;
21052131 }
2132+
2133+ PyUnicode_1BYTE_DATA (unicode )[0 ] = ch ;
2134+ assert (_PyUnicode_CheckConsistency (unicode , 1 ));
2135+
2136+ #ifdef LATIN1_SINGLETONS
21062137 Py_INCREF (unicode );
2138+ unicode_latin1 [ch ] = unicode ;
2139+ #endif
21072140 return unicode ;
21082141}
21092142
@@ -11270,7 +11303,6 @@ int
1127011303_PyUnicode_EqualToASCIIId (PyObject * left , _Py_Identifier * right )
1127111304{
1127211305 PyObject * right_uni ;
11273- Py_hash_t hash ;
1127411306
1127511307 assert (_PyUnicode_CHECK (left ));
1127611308 assert (right -> string );
@@ -11302,10 +11334,12 @@ _PyUnicode_EqualToASCIIId(PyObject *left, _Py_Identifier *right)
1130211334 if (PyUnicode_CHECK_INTERNED (left ))
1130311335 return 0 ;
1130411336
11337+ #ifdef INTERNED_STRINGS
1130511338 assert (_PyUnicode_HASH (right_uni ) != -1 );
11306- hash = _PyUnicode_HASH (left );
11339+ Py_hash_t hash = _PyUnicode_HASH (left );
1130711340 if (hash != -1 && hash != _PyUnicode_HASH (right_uni ))
1130811341 return 0 ;
11342+ #endif
1130911343
1131011344 return unicode_compare_eq (left , right_uni );
1131111345}
@@ -15487,43 +15521,55 @@ void
1548715521PyUnicode_InternInPlace (PyObject * * p )
1548815522{
1548915523 PyObject * s = * p ;
15490- PyObject * t ;
1549115524#ifdef Py_DEBUG
1549215525 assert (s != NULL );
1549315526 assert (_PyUnicode_CHECK (s ));
1549415527#else
15495- if (s == NULL || !PyUnicode_Check (s ))
15528+ if (s == NULL || !PyUnicode_Check (s )) {
1549615529 return ;
15530+ }
1549715531#endif
15532+
1549815533 /* If it's a subclass, we don't really know what putting
1549915534 it in the interned dict might do. */
15500- if (!PyUnicode_CheckExact (s ))
15535+ if (!PyUnicode_CheckExact (s )) {
1550115536 return ;
15502- if (PyUnicode_CHECK_INTERNED (s ))
15537+ }
15538+
15539+ if (PyUnicode_CHECK_INTERNED (s )) {
1550315540 return ;
15541+ }
15542+
15543+ #ifdef INTERNED_STRINGS
1550415544 if (interned == NULL ) {
1550515545 interned = PyDict_New ();
1550615546 if (interned == NULL ) {
1550715547 PyErr_Clear (); /* Don't leave an exception */
1550815548 return ;
1550915549 }
1551015550 }
15551+
15552+ PyObject * t ;
1551115553 Py_ALLOW_RECURSION
1551215554 t = PyDict_SetDefault (interned , s , s );
1551315555 Py_END_ALLOW_RECURSION
15556+
1551415557 if (t == NULL) {
1551515558 PyErr_Clear ();
1551615559 return ;
1551715560 }
15561+
1551815562 if (t != s ) {
1551915563 Py_INCREF (t );
1552015564 Py_SETREF (* p , t );
1552115565 return ;
1552215566 }
15567+
1552315568 /* The two references in interned are not counted by refcnt.
1552415569 The deallocator will take care of this */
1552515570 Py_SET_REFCNT (s , Py_REFCNT (s ) - 2 );
1552615571 _PyUnicode_STATE (s ).interned = SSTATE_INTERNED_MORTAL ;
15572+ #endif
1552715573}
1552815574
1552915575void
@@ -16109,9 +16155,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
1610916155
1611016156 Py_CLEAR (unicode_empty );
1611116157
16158+ #ifdef LATIN1_SINGLETONS
1611216159 for (Py_ssize_t i = 0 ; i < 256 ; i ++ ) {
1611316160 Py_CLEAR (unicode_latin1 [i ]);
1611416161 }
16162+ #endif
1611516163 _PyUnicode_ClearStaticStrings ();
1611616164 }
1611716165
0 commit comments