@@ -11,6 +11,7 @@ Copyright (c) Corporation for National Research Initiatives.
1111#include "Python.h"
1212#include "pycore_call.h" // _PyObject_CallNoArgs()
1313#include "pycore_interp.h" // PyInterpreterState.codec_search_path
14+ #include "pycore_lock.h" // PyMutex
1415#include "pycore_pyerrors.h" // _PyErr_FormatNote()
1516#include "pycore_pystate.h" // _PyInterpreterState_GET()
1617#include "pycore_ucnhash.h" // _PyUnicode_Name_CAPI
@@ -19,24 +20,10 @@ const char *Py_hexdigits = "0123456789abcdef";
1920
2021/* --- Codec Registry ----------------------------------------------------- */
2122
22- /* Import the standard encodings package which will register the first
23- codec search function.
24-
25- This is done in a lazy way so that the Unicode implementation does
26- not downgrade startup time of scripts not needing it.
27-
28- ImportErrors are silently ignored by this function. Only one try is
29- made.
30-
31- */
32-
33- static int _PyCodecRegistry_Init (void ); /* Forward */
34-
3523int PyCodec_Register (PyObject * search_function )
3624{
3725 PyInterpreterState * interp = _PyInterpreterState_GET ();
38- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
39- goto onError ;
26+ assert (interp -> codecs .initialized );
4027 if (search_function == NULL ) {
4128 PyErr_BadArgument ();
4229 goto onError ;
@@ -45,7 +32,14 @@ int PyCodec_Register(PyObject *search_function)
4532 PyErr_SetString (PyExc_TypeError , "argument must be callable" );
4633 goto onError ;
4734 }
48- return PyList_Append (interp -> codec_search_path , search_function );
35+ #ifdef Py_GIL_DISABLED
36+ PyMutex_Lock (& interp -> codecs .search_path_mutex );
37+ #endif
38+ int ret = PyList_Append (interp -> codecs .search_path , search_function );
39+ #ifdef Py_GIL_DISABLED
40+ PyMutex_Unlock (& interp -> codecs .search_path_mutex );
41+ #endif
42+ return ret ;
4943
5044 onError :
5145 return -1 ;
5549PyCodec_Unregister (PyObject * search_function )
5650{
5751 PyInterpreterState * interp = _PyInterpreterState_GET ();
58- PyObject * codec_search_path = interp -> codec_search_path ;
59- /* Do nothing if codec_search_path is not created yet or was cleared. */
60- if ( codec_search_path == NULL ) {
52+ if ( interp -> codecs . initialized != 1 ) {
53+ /* Do nothing if codecs state was cleared (only possible during
54+ interpreter shutdown). */
6155 return 0 ;
6256 }
6357
58+ PyObject * codec_search_path = interp -> codecs .search_path ;
6459 assert (PyList_CheckExact (codec_search_path ));
65- Py_ssize_t n = PyList_GET_SIZE (codec_search_path );
66- for (Py_ssize_t i = 0 ; i < n ; i ++ ) {
67- PyObject * item = PyList_GET_ITEM (codec_search_path , i );
60+ for (Py_ssize_t i = 0 ; i < PyList_GET_SIZE (codec_search_path ); i ++ ) {
61+ #ifdef Py_GIL_DISABLED
62+ PyMutex_Lock (& interp -> codecs .search_path_mutex );
63+ #endif
64+ PyObject * item = PyList_GetItemRef (codec_search_path , i );
65+ int ret = 1 ;
6866 if (item == search_function ) {
69- if (interp -> codec_search_cache != NULL ) {
70- assert (PyDict_CheckExact (interp -> codec_search_cache ));
71- PyDict_Clear (interp -> codec_search_cache );
72- }
73- return PyList_SetSlice (codec_search_path , i , i + 1 , NULL );
67+ // We hold a reference to the item, so its destructor can't run
68+ // while we hold search_path_mutex.
69+ ret = PyList_SetSlice (codec_search_path , i , i + 1 , NULL );
70+ }
71+ #ifdef Py_GIL_DISABLED
72+ PyMutex_Unlock (& interp -> codecs .search_path_mutex );
73+ #endif
74+ Py_DECREF (item );
75+ if (ret != 1 ) {
76+ assert (interp -> codecs .search_cache != NULL );
77+ assert (PyDict_CheckExact (interp -> codecs .search_cache ));
78+ PyDict_Clear (interp -> codecs .search_cache );
79+ return ret ;
7480 }
7581 }
7682 return 0 ;
@@ -132,9 +138,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
132138 }
133139
134140 PyInterpreterState * interp = _PyInterpreterState_GET ();
135- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ()) {
136- return NULL ;
137- }
141+ assert (interp -> codecs .initialized );
138142
139143 /* Convert the encoding to a normalized Python string: all
140144 characters are converted to lower case, spaces and hyphens are
@@ -147,7 +151,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
147151
148152 /* First, try to lookup the name in the registry dictionary */
149153 PyObject * result ;
150- if (PyDict_GetItemRef (interp -> codec_search_cache , v , & result ) < 0 ) {
154+ if (PyDict_GetItemRef (interp -> codecs . search_cache , v , & result ) < 0 ) {
151155 goto onError ;
152156 }
153157 if (result != NULL ) {
@@ -156,7 +160,7 @@ PyObject *_PyCodec_Lookup(const char *encoding)
156160 }
157161
158162 /* Next, scan the search functions in order of registration */
159- const Py_ssize_t len = PyList_Size (interp -> codec_search_path );
163+ const Py_ssize_t len = PyList_Size (interp -> codecs . search_path );
160164 if (len < 0 )
161165 goto onError ;
162166 if (len == 0 ) {
@@ -170,14 +174,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
170174 for (i = 0 ; i < len ; i ++ ) {
171175 PyObject * func ;
172176
173- func = PyList_GetItem (interp -> codec_search_path , i );
177+ func = PyList_GetItemRef (interp -> codecs . search_path , i );
174178 if (func == NULL )
175179 goto onError ;
176180 result = PyObject_CallOneArg (func , v );
181+ Py_DECREF (func );
177182 if (result == NULL )
178183 goto onError ;
179184 if (result == Py_None ) {
180- Py_DECREF (result );
185+ Py_CLEAR (result );
181186 continue ;
182187 }
183188 if (!PyTuple_Check (result ) || PyTuple_GET_SIZE (result ) != 4 ) {
@@ -188,15 +193,15 @@ PyObject *_PyCodec_Lookup(const char *encoding)
188193 }
189194 break ;
190195 }
191- if (i == len ) {
196+ if (result == NULL ) {
192197 /* XXX Perhaps we should cache misses too ? */
193198 PyErr_Format (PyExc_LookupError ,
194199 "unknown encoding: %s" , encoding );
195200 goto onError ;
196201 }
197202
198203 /* Cache and return the result */
199- if (PyDict_SetItem (interp -> codec_search_cache , v , result ) < 0 ) {
204+ if (PyDict_SetItem (interp -> codecs . search_cache , v , result ) < 0 ) {
200205 Py_DECREF (result );
201206 goto onError ;
202207 }
@@ -600,13 +605,12 @@ PyObject *_PyCodec_DecodeText(PyObject *object,
600605int PyCodec_RegisterError (const char * name , PyObject * error )
601606{
602607 PyInterpreterState * interp = _PyInterpreterState_GET ();
603- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
604- return -1 ;
608+ assert (interp -> codecs .initialized );
605609 if (!PyCallable_Check (error )) {
606610 PyErr_SetString (PyExc_TypeError , "handler must be callable" );
607611 return -1 ;
608612 }
609- return PyDict_SetItemString (interp -> codec_error_registry ,
613+ return PyDict_SetItemString (interp -> codecs . error_registry ,
610614 name , error );
611615}
612616
@@ -616,13 +620,12 @@ int PyCodec_RegisterError(const char *name, PyObject *error)
616620PyObject * PyCodec_LookupError (const char * name )
617621{
618622 PyInterpreterState * interp = _PyInterpreterState_GET ();
619- if (interp -> codec_search_path == NULL && _PyCodecRegistry_Init ())
620- return NULL ;
623+ assert (interp -> codecs .initialized );
621624
622625 if (name == NULL )
623626 name = "strict" ;
624627 PyObject * handler ;
625- if (PyDict_GetItemStringRef (interp -> codec_error_registry , name , & handler ) < 0 ) {
628+ if (PyDict_GetItemStringRef (interp -> codecs . error_registry , name , & handler ) < 0 ) {
626629 return NULL ;
627630 }
628631 if (handler == NULL ) {
@@ -1375,7 +1378,8 @@ static PyObject *surrogateescape_errors(PyObject *self, PyObject *exc)
13751378 return PyCodec_SurrogateEscapeErrors (exc );
13761379}
13771380
1378- static int _PyCodecRegistry_Init (void )
1381+ PyStatus
1382+ _PyCodec_InitRegistry (PyInterpreterState * interp )
13791383{
13801384 static struct {
13811385 const char * name ;
@@ -1463,45 +1467,51 @@ static int _PyCodecRegistry_Init(void)
14631467 }
14641468 };
14651469
1466- PyInterpreterState * interp = _PyInterpreterState_GET ();
1467- PyObject * mod ;
1468-
1469- if (interp -> codec_search_path != NULL )
1470- return 0 ;
1471-
1472- interp -> codec_search_path = PyList_New (0 );
1473- if (interp -> codec_search_path == NULL ) {
1474- return -1 ;
1470+ assert (interp -> codecs .initialized == 0 );
1471+ interp -> codecs .search_path = PyList_New (0 );
1472+ if (interp -> codecs .search_path == NULL ) {
1473+ return PyStatus_NoMemory ();
14751474 }
1476-
1477- interp -> codec_search_cache = PyDict_New ();
1478- if (interp -> codec_search_cache == NULL ) {
1479- return -1 ;
1475+ interp -> codecs .search_cache = PyDict_New ();
1476+ if (interp -> codecs .search_cache == NULL ) {
1477+ return PyStatus_NoMemory ();
14801478 }
1481-
1482- interp -> codec_error_registry = PyDict_New ();
1483- if (interp -> codec_error_registry == NULL ) {
1484- return -1 ;
1479+ interp -> codecs .error_registry = PyDict_New ();
1480+ if (interp -> codecs .error_registry == NULL ) {
1481+ return PyStatus_NoMemory ();
14851482 }
1486-
14871483 for (size_t i = 0 ; i < Py_ARRAY_LENGTH (methods ); ++ i ) {
14881484 PyObject * func = PyCFunction_NewEx (& methods [i ].def , NULL , NULL );
1489- if (! func ) {
1490- return -1 ;
1485+ if (func == NULL ) {
1486+ return PyStatus_NoMemory () ;
14911487 }
14921488
1493- int res = PyCodec_RegisterError (methods [i ].name , func );
1489+ int res = PyDict_SetItemString (interp -> codecs .error_registry ,
1490+ methods [i ].name , func );
14941491 Py_DECREF (func );
1495- if (res ) {
1496- return -1 ;
1492+ if (res < 0 ) {
1493+ return PyStatus_Error ( "Failed to insert into codec error registry" ) ;
14971494 }
14981495 }
14991496
1500- mod = PyImport_ImportModule ("encodings" );
1497+ interp -> codecs .initialized = 1 ;
1498+
1499+ // Importing `encodings' will call back into this module to register codec
1500+ // search functions, so this is done after everything else is initialized.
1501+ PyObject * mod = PyImport_ImportModule ("encodings" );
15011502 if (mod == NULL ) {
1502- return -1 ;
1503+ return PyStatus_Error ( "Failed to import encodings module" ) ;
15031504 }
15041505 Py_DECREF (mod );
1505- interp -> codecs_initialized = 1 ;
1506- return 0 ;
1506+
1507+ return PyStatus_Ok ();
1508+ }
1509+
1510+ void
1511+ _PyCodec_Fini (PyInterpreterState * interp )
1512+ {
1513+ Py_CLEAR (interp -> codecs .search_path );
1514+ Py_CLEAR (interp -> codecs .search_cache );
1515+ Py_CLEAR (interp -> codecs .error_registry );
1516+ interp -> codecs .initialized = 0 ;
15071517}
0 commit comments