@@ -5051,32 +5051,22 @@ _PyUnicode_EncodeUTF32(PyObject *str,
50515051 const char * errors ,
50525052 int byteorder )
50535053{
5054- int kind ;
5055- void * data ;
5054+ enum PyUnicode_Kind kind ;
5055+ const void * data ;
50565056 Py_ssize_t len ;
50575057 PyObject * v ;
5058- unsigned char * p ;
5059- Py_ssize_t nsize , i ;
5060- /* Offsets from p for storing byte pairs in the right order. */
5058+ PY_UINT32_T * out ;
50615059#if PY_LITTLE_ENDIAN
5062- int iorder [] = { 0 , 1 , 2 , 3 } ;
5060+ int native_ordering = byteorder <= 0 ;
50635061#else
5064- int iorder [] = { 3 , 2 , 1 , 0 } ;
5062+ int native_ordering = byteorder >= 0 ;
50655063#endif
50665064 const char * encoding ;
5065+ Py_ssize_t nsize , pos ;
50675066 PyObject * errorHandler = NULL ;
50685067 PyObject * exc = NULL ;
50695068 PyObject * rep = NULL ;
50705069
5071- #define STORECHAR (CH ) \
5072- do { \
5073- p[iorder[3]] = ((CH) >> 24) & 0xff; \
5074- p[iorder[2]] = ((CH) >> 16) & 0xff; \
5075- p[iorder[1]] = ((CH) >> 8) & 0xff; \
5076- p[iorder[0]] = (CH) & 0xff; \
5077- p += 4; \
5078- } while(0)
5079-
50805070 if (!PyUnicode_Check (str )) {
50815071 PyErr_BadArgument ();
50825072 return NULL ;
@@ -5087,67 +5077,61 @@ _PyUnicode_EncodeUTF32(PyObject *str,
50875077 data = PyUnicode_DATA (str );
50885078 len = PyUnicode_GET_LENGTH (str );
50895079
5090- nsize = len + (byteorder == 0 );
5091- if (nsize > PY_SSIZE_T_MAX / 4 )
5080+ if (len > PY_SSIZE_T_MAX / 4 - (byteorder == 0 ))
50925081 return PyErr_NoMemory ();
5082+ nsize = len + (byteorder == 0 );
50935083 v = PyBytes_FromStringAndSize (NULL , nsize * 4 );
50945084 if (v == NULL )
50955085 return NULL ;
50965086
5097- p = (unsigned char * )PyBytes_AS_STRING (v );
5087+ /* output buffer is 4-bytes aligned */
5088+ assert (_Py_IS_ALIGNED (PyBytes_AS_STRING (v ), 4 ));
5089+ out = (PY_UINT32_T * )PyBytes_AS_STRING (v );
50985090 if (byteorder == 0 )
5099- STORECHAR ( 0xFEFF ) ;
5091+ * out ++ = 0xFEFF ;
51005092 if (len == 0 )
5101- return v ;
5093+ goto done ;
51025094
5103- if (byteorder == -1 ) {
5104- /* force LE */
5105- iorder [0 ] = 0 ;
5106- iorder [1 ] = 1 ;
5107- iorder [2 ] = 2 ;
5108- iorder [3 ] = 3 ;
5095+ if (byteorder == -1 )
51095096 encoding = "utf-32-le" ;
5110- }
5111- else if (byteorder == 1 ) {
5112- /* force BE */
5113- iorder [0 ] = 3 ;
5114- iorder [1 ] = 2 ;
5115- iorder [2 ] = 1 ;
5116- iorder [3 ] = 0 ;
5097+ else if (byteorder == 1 )
51175098 encoding = "utf-32-be" ;
5118- }
51195099 else
51205100 encoding = "utf-32" ;
51215101
51225102 if (kind == PyUnicode_1BYTE_KIND ) {
5123- for (i = 0 ; i < len ; i ++ )
5124- STORECHAR (PyUnicode_READ (kind , data , i ));
5125- return v ;
5103+ ucs1lib_utf32_encode ((const Py_UCS1 * )data , len , & out , native_ordering );
5104+ goto done ;
51265105 }
51275106
5128- for (i = 0 ; i < len ;) {
5107+ pos = 0 ;
5108+ while (pos < len ) {
51295109 Py_ssize_t repsize , moreunits ;
5130- Py_UCS4 ch = PyUnicode_READ (kind , data , i );
5131- i ++ ;
5132- assert (ch <= MAX_UNICODE );
5133- if (!Py_UNICODE_IS_SURROGATE (ch )) {
5134- STORECHAR (ch );
5135- continue ;
5110+
5111+ if (kind == PyUnicode_2BYTE_KIND ) {
5112+ pos += ucs2lib_utf32_encode ((const Py_UCS2 * )data + pos , len - pos ,
5113+ & out , native_ordering );
51365114 }
5115+ else {
5116+ assert (kind == PyUnicode_4BYTE_KIND );
5117+ pos += ucs4lib_utf32_encode ((const Py_UCS4 * )data + pos , len - pos ,
5118+ & out , native_ordering );
5119+ }
5120+ if (pos == len )
5121+ break ;
51375122
51385123 rep = unicode_encode_call_errorhandler (
51395124 errors , & errorHandler ,
51405125 encoding , "surrogates not allowed" ,
5141- str , & exc , i - 1 , i , & i );
5142-
5126+ str , & exc , pos , pos + 1 , & pos );
51435127 if (!rep )
51445128 goto error ;
51455129
51465130 if (PyBytes_Check (rep )) {
51475131 repsize = PyBytes_GET_SIZE (rep );
51485132 if (repsize & 3 ) {
51495133 raise_encode_exception (& exc , encoding ,
5150- str , i - 1 , i ,
5134+ str , pos - 1 , pos ,
51515135 "surrogates not allowed" );
51525136 goto error ;
51535137 }
@@ -5160,15 +5144,15 @@ _PyUnicode_EncodeUTF32(PyObject *str,
51605144 moreunits = repsize = PyUnicode_GET_LENGTH (rep );
51615145 if (!PyUnicode_IS_ASCII (rep )) {
51625146 raise_encode_exception (& exc , encoding ,
5163- str , i - 1 , i ,
5147+ str , pos - 1 , pos ,
51645148 "surrogates not allowed" );
51655149 goto error ;
51665150 }
51675151 }
51685152
51695153 /* four bytes are reserved for each surrogate */
51705154 if (moreunits > 1 ) {
5171- Py_ssize_t outpos = p - (unsigned char * ) PyBytes_AS_STRING (v );
5155+ Py_ssize_t outpos = out - (PY_UINT32_T * ) PyBytes_AS_STRING (v );
51725156 Py_ssize_t morebytes = 4 * (moreunits - 1 );
51735157 if (PyBytes_GET_SIZE (v ) > PY_SSIZE_T_MAX - morebytes ) {
51745158 /* integer overflow */
@@ -5177,20 +5161,16 @@ _PyUnicode_EncodeUTF32(PyObject *str,
51775161 }
51785162 if (_PyBytes_Resize (& v , PyBytes_GET_SIZE (v ) + morebytes ) < 0 )
51795163 goto error ;
5180- p = (unsigned char * ) PyBytes_AS_STRING (v ) + outpos ;
5164+ out = (PY_UINT32_T * ) PyBytes_AS_STRING (v ) + outpos ;
51815165 }
51825166
51835167 if (PyBytes_Check (rep )) {
5184- Py_MEMCPY (p , PyBytes_AS_STRING (rep ), repsize );
5185- p += repsize ;
5168+ Py_MEMCPY (out , PyBytes_AS_STRING (rep ), repsize );
5169+ out += moreunits ;
51865170 } else /* rep is unicode */ {
5187- const Py_UCS1 * repdata ;
51885171 assert (PyUnicode_KIND (rep ) == PyUnicode_1BYTE_KIND );
5189- repdata = PyUnicode_1BYTE_DATA (rep );
5190- while (repsize -- ) {
5191- Py_UCS4 ch = * repdata ++ ;
5192- STORECHAR (ch );
5193- }
5172+ ucs1lib_utf32_encode (PyUnicode_1BYTE_DATA (rep ), repsize ,
5173+ & out , native_ordering );
51945174 }
51955175
51965176 Py_CLEAR (rep );
@@ -5199,19 +5179,19 @@ _PyUnicode_EncodeUTF32(PyObject *str,
51995179 /* Cut back to size actually needed. This is necessary for, for example,
52005180 encoding of a string containing isolated surrogates and the 'ignore'
52015181 handler is used. */
5202- nsize = p - (unsigned char * ) PyBytes_AS_STRING (v );
5182+ nsize = ( unsigned char * ) out - (unsigned char * ) PyBytes_AS_STRING (v );
52035183 if (nsize != PyBytes_GET_SIZE (v ))
52045184 _PyBytes_Resize (& v , nsize );
52055185 Py_XDECREF (errorHandler );
52065186 Py_XDECREF (exc );
5187+ done :
52075188 return v ;
52085189 error :
52095190 Py_XDECREF (rep );
52105191 Py_XDECREF (errorHandler );
52115192 Py_XDECREF (exc );
52125193 Py_XDECREF (v );
52135194 return NULL ;
5214- #undef STORECHAR
52155195}
52165196
52175197PyObject *
0 commit comments