@@ -1068,61 +1068,85 @@ bytes_dealloc(PyObject *op)
10681068 the string is UTF-8 encoded and should be re-encoded in the
10691069 specified encoding. */
10701070
1071+ static char *
1072+ _PyBytes_DecodeEscapeRecode (const char * * s , const char * end ,
1073+ const char * errors , const char * recode_encoding ,
1074+ _PyBytesWriter * writer , char * p )
1075+ {
1076+ PyObject * u , * w ;
1077+ const char * t ;
1078+
1079+ t = * s ;
1080+ /* Decode non-ASCII bytes as UTF-8. */
1081+ while (t < end && (* t & 0x80 ))
1082+ t ++ ;
1083+ u = PyUnicode_DecodeUTF8 (* s , t - * s , errors );
1084+ if (u == NULL )
1085+ return NULL ;
1086+
1087+ /* Recode them in target encoding. */
1088+ w = PyUnicode_AsEncodedString (u , recode_encoding , errors );
1089+ Py_DECREF (u );
1090+ if (w == NULL )
1091+ return NULL ;
1092+ assert (PyBytes_Check (w ));
1093+
1094+ /* Append bytes to output buffer. */
1095+ writer -> min_size -- ; /* substract 1 preallocated byte */
1096+ p = _PyBytesWriter_WriteBytes (writer , p ,
1097+ PyBytes_AS_STRING (w ),
1098+ PyBytes_GET_SIZE (w ));
1099+ Py_DECREF (w );
1100+ if (p == NULL )
1101+ return NULL ;
1102+
1103+ * s = t ;
1104+ return p ;
1105+ }
1106+
10711107PyObject * PyBytes_DecodeEscape (const char * s ,
10721108 Py_ssize_t len ,
10731109 const char * errors ,
10741110 Py_ssize_t unicode ,
10751111 const char * recode_encoding )
10761112{
10771113 int c ;
1078- char * p , * buf ;
1114+ char * p ;
10791115 const char * end ;
1080- PyObject * v ;
1081- Py_ssize_t newlen = recode_encoding ? 4 * len :len ;
1082- v = PyBytes_FromStringAndSize ((char * )NULL , newlen );
1083- if (v == NULL )
1116+ _PyBytesWriter writer ;
1117+
1118+ _PyBytesWriter_Init (& writer );
1119+
1120+ p = _PyBytesWriter_Alloc (& writer , len );
1121+ if (p == NULL )
10841122 return NULL ;
1085- p = buf = PyBytes_AsString (v );
1123+ writer .overallocate = 1 ;
1124+
10861125 end = s + len ;
10871126 while (s < end ) {
10881127 if (* s != '\\' ) {
10891128 non_esc :
1090- if (recode_encoding && (* s & 0x80 )) {
1091- PyObject * u , * w ;
1092- char * r ;
1093- const char * t ;
1094- Py_ssize_t rn ;
1095- t = s ;
1096- /* Decode non-ASCII bytes as UTF-8. */
1097- while (t < end && (* t & 0x80 )) t ++ ;
1098- u = PyUnicode_DecodeUTF8 (s , t - s , errors );
1099- if (!u ) goto failed ;
1100-
1101- /* Recode them in target encoding. */
1102- w = PyUnicode_AsEncodedString (
1103- u , recode_encoding , errors );
1104- Py_DECREF (u );
1105- if (!w ) goto failed ;
1106-
1107- /* Append bytes to output buffer. */
1108- assert (PyBytes_Check (w ));
1109- r = PyBytes_AS_STRING (w );
1110- rn = PyBytes_GET_SIZE (w );
1111- Py_MEMCPY (p , r , rn );
1112- p += rn ;
1113- Py_DECREF (w );
1114- s = t ;
1115- } else {
1129+ if (!(recode_encoding && (* s & 0x80 ))) {
11161130 * p ++ = * s ++ ;
11171131 }
1132+ else {
1133+ /* non-ASCII character and need to recode */
1134+ p = _PyBytes_DecodeEscapeRecode (& s , end ,
1135+ errors , recode_encoding ,
1136+ & writer , p );
1137+ if (p == NULL )
1138+ goto failed ;
1139+ }
11181140 continue ;
11191141 }
1142+
11201143 s ++ ;
1121- if (s == end ) {
1144+ if (s == end ) {
11221145 PyErr_SetString (PyExc_ValueError ,
11231146 "Trailing \\ in string" );
11241147 goto failed ;
11251148 }
1149+
11261150 switch (* s ++ ) {
11271151 /* XXX This assumes ASCII! */
11281152 case '\n' : break ;
@@ -1147,28 +1171,18 @@ PyObject *PyBytes_DecodeEscape(const char *s,
11471171 * p ++ = c ;
11481172 break ;
11491173 case 'x' :
1150- if (s + 1 < end && Py_ISXDIGIT (s [0 ]) && Py_ISXDIGIT (s [1 ])) {
1151- unsigned int x = 0 ;
1152- c = Py_CHARMASK (* s );
1153- s ++ ;
1154- if (Py_ISDIGIT (c ))
1155- x = c - '0' ;
1156- else if (Py_ISLOWER (c ))
1157- x = 10 + c - 'a' ;
1158- else
1159- x = 10 + c - 'A' ;
1160- x = x << 4 ;
1161- c = Py_CHARMASK (* s );
1162- s ++ ;
1163- if (Py_ISDIGIT (c ))
1164- x += c - '0' ;
1165- else if (Py_ISLOWER (c ))
1166- x += 10 + c - 'a' ;
1167- else
1168- x += 10 + c - 'A' ;
1169- * p ++ = x ;
1170- break ;
1174+ if (s + 1 < end ) {
1175+ int digit1 , digit2 ;
1176+ digit1 = _PyLong_DigitValue [Py_CHARMASK (s [0 ])];
1177+ digit2 = _PyLong_DigitValue [Py_CHARMASK (s [1 ])];
1178+ if (digit1 < 16 && digit2 < 16 ) {
1179+ * p ++ = (unsigned char )((digit1 << 4 ) + digit2 );
1180+ s += 2 ;
1181+ break ;
1182+ }
11711183 }
1184+ /* invalid hexadecimal digits */
1185+
11721186 if (!errors || strcmp (errors , "strict" ) == 0 ) {
11731187 PyErr_Format (PyExc_ValueError ,
11741188 "invalid \\x escape at position %d" ,
@@ -1190,18 +1204,19 @@ PyObject *PyBytes_DecodeEscape(const char *s,
11901204 if (s < end && Py_ISXDIGIT (s [0 ]))
11911205 s ++ ; /* and a hexdigit */
11921206 break ;
1207+
11931208 default :
11941209 * p ++ = '\\' ;
11951210 s -- ;
11961211 goto non_esc ; /* an arbitrary number of unescaped
11971212 UTF-8 bytes may follow. */
11981213 }
11991214 }
1200- if ( p - buf < newlen )
1201- _PyBytes_Resize ( & v , p - buf );
1202- return v ;
1215+
1216+ return _PyBytesWriter_Finish ( & writer , p );
1217+
12031218 failed :
1204- Py_DECREF ( v );
1219+ _PyBytesWriter_Dealloc ( & writer );
12051220 return NULL ;
12061221}
12071222
0 commit comments