Skip to content

Commit e6abb48

Browse files
committed
unicodeobject.c: Add MAX_MAXCHAR() macro to (micro-)optimize the computation
of the second argument of PyUnicode_New(). * Create also align_maxchar() function * Optimize fix_decimal_and_space_to_ascii(): don't compute the maximum character when ch <= 127 (it is ASCII)
1 parent 438106b commit e6abb48

1 file changed

Lines changed: 49 additions & 48 deletions

File tree

Objects/unicodeobject.c

Lines changed: 49 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,11 @@ extern "C" {
111111
#define _PyUnicode_DATA_ANY(op) \
112112
(((PyUnicodeObject*)(op))->data.any)
113113

114+
/* Optimized version of Py_MAX() to compute the maximum character:
115+
use it when your are computing the second argument of PyUnicode_New() */
116+
#define MAX_MAXCHAR(maxchar1, maxchar2) \
117+
((maxchar1) | (maxchar2))
118+
114119
#undef PyUnicode_READY
115120
#define PyUnicode_READY(op) \
116121
(assert(_PyUnicode_CHECK(op)), \
@@ -1867,6 +1872,19 @@ kind_maxchar_limit(unsigned int kind)
18671872
}
18681873
}
18691874

1875+
Py_LOCAL_INLINE(Py_UCS4)
1876+
align_maxchar(Py_UCS4 maxchar)
1877+
{
1878+
if (maxchar <= 127)
1879+
return 127;
1880+
else if (maxchar <= 255)
1881+
return 255;
1882+
else if (maxchar <= 65535)
1883+
return 65535;
1884+
else
1885+
return MAX_UNICODE;
1886+
}
1887+
18701888
static PyObject*
18711889
_PyUnicode_FromUCS1(const unsigned char* u, Py_ssize_t size)
18721890
{
@@ -2439,7 +2457,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
24392457
case 'c':
24402458
{
24412459
Py_UCS4 ordinal = va_arg(count, int);
2442-
maxchar = Py_MAX(maxchar, ordinal);
2460+
maxchar = MAX_MAXCHAR(maxchar, ordinal);
24432461
n++;
24442462
break;
24452463
}
@@ -2535,7 +2553,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25352553
/* since PyUnicode_DecodeUTF8 returns already flexible
25362554
unicode objects, there is no need to call ready on them */
25372555
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
2538-
maxchar = Py_MAX(maxchar, argmaxchar);
2556+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
25392557
n += PyUnicode_GET_LENGTH(str);
25402558
/* Remember the str and switch to the next slot */
25412559
*callresult++ = str;
@@ -2548,7 +2566,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25482566
if (PyUnicode_READY(obj) == -1)
25492567
goto fail;
25502568
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
2551-
maxchar = Py_MAX(maxchar, argmaxchar);
2569+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
25522570
n += PyUnicode_GET_LENGTH(obj);
25532571
break;
25542572
}
@@ -2563,7 +2581,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25632581
if (PyUnicode_READY(obj) == -1)
25642582
goto fail;
25652583
argmaxchar = PyUnicode_MAX_CHAR_VALUE(obj);
2566-
maxchar = Py_MAX(maxchar, argmaxchar);
2584+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
25672585
n += PyUnicode_GET_LENGTH(obj);
25682586
*callresult++ = NULL;
25692587
}
@@ -2576,7 +2594,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25762594
goto fail;
25772595
}
25782596
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str_obj);
2579-
maxchar = Py_MAX(maxchar, argmaxchar);
2597+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
25802598
n += PyUnicode_GET_LENGTH(str_obj);
25812599
*callresult++ = str_obj;
25822600
}
@@ -2595,7 +2613,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25952613
goto fail;
25962614
}
25972615
argmaxchar = PyUnicode_MAX_CHAR_VALUE(str);
2598-
maxchar = Py_MAX(maxchar, argmaxchar);
2616+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
25992617
n += PyUnicode_GET_LENGTH(str);
26002618
/* Remember the str and switch to the next slot */
26012619
*callresult++ = str;
@@ -2614,7 +2632,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26142632
goto fail;
26152633
}
26162634
argmaxchar = PyUnicode_MAX_CHAR_VALUE(repr);
2617-
maxchar = Py_MAX(maxchar, argmaxchar);
2635+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
26182636
n += PyUnicode_GET_LENGTH(repr);
26192637
/* Remember the repr and switch to the next slot */
26202638
*callresult++ = repr;
@@ -2633,7 +2651,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26332651
goto fail;
26342652
}
26352653
argmaxchar = PyUnicode_MAX_CHAR_VALUE(ascii);
2636-
maxchar = Py_MAX(maxchar, argmaxchar);
2654+
maxchar = MAX_MAXCHAR(maxchar, argmaxchar);
26372655
n += PyUnicode_GET_LENGTH(ascii);
26382656
/* Remember the repr and switch to the next slot */
26392657
*callresult++ = ascii;
@@ -5563,14 +5581,14 @@ PyUnicode_DecodeUTF16Stateful(const char *s,
55635581
maxch = (Py_UCS2)(block & 0xFFFF);
55645582
#if SIZEOF_LONG == 8
55655583
ch = (Py_UCS2)((block >> 16) & 0xFFFF);
5566-
maxch = Py_MAX(maxch, ch);
5584+
maxch = MAX_MAXCHAR(maxch, ch);
55675585
ch = (Py_UCS2)((block >> 32) & 0xFFFF);
5568-
maxch = Py_MAX(maxch, ch);
5586+
maxch = MAX_MAXCHAR(maxch, ch);
55695587
ch = (Py_UCS2)(block >> 48);
5570-
maxch = Py_MAX(maxch, ch);
5588+
maxch = MAX_MAXCHAR(maxch, ch);
55715589
#else
55725590
ch = (Py_UCS2)(block >> 16);
5573-
maxch = Py_MAX(maxch, ch);
5591+
maxch = MAX_MAXCHAR(maxch, ch);
55745592
#endif
55755593
if (maxch > PyUnicode_MAX_CHAR_VALUE(unicode)) {
55765594
if (unicode_widen(&unicode, maxch) < 0)
@@ -8987,7 +9005,7 @@ fix_decimal_and_space_to_ascii(PyObject *self)
89879005
const Py_ssize_t len = PyUnicode_GET_LENGTH(self);
89889006
const int kind = PyUnicode_KIND(self);
89899007
void *data = PyUnicode_DATA(self);
8990-
Py_UCS4 maxchar = 0, ch, fixed;
9008+
Py_UCS4 maxchar = 127, ch, fixed;
89919009
int modified = 0;
89929010
Py_ssize_t i;
89939011

@@ -9004,15 +9022,12 @@ fix_decimal_and_space_to_ascii(PyObject *self)
90049022
}
90059023
if (fixed != 0) {
90069024
modified = 1;
9007-
if (fixed > maxchar)
9008-
maxchar = fixed;
9025+
maxchar = MAX_MAXCHAR(maxchar, fixed);
90099026
PyUnicode_WRITE(kind, data, i, fixed);
90109027
}
9011-
else if (ch > maxchar)
9012-
maxchar = ch;
9028+
else
9029+
maxchar = MAX_MAXCHAR(maxchar, ch);
90139030
}
9014-
else if (ch > maxchar)
9015-
maxchar = ch;
90169031
}
90179032

90189033
return (modified) ? maxchar : 0;
@@ -9052,7 +9067,7 @@ PyUnicode_TransformDecimalToASCII(Py_UNICODE *s,
90529067
int decimal = Py_UNICODE_TODECIMAL(ch);
90539068
if (decimal >= 0)
90549069
ch = '0' + decimal;
9055-
maxchar = Py_MAX(maxchar, ch);
9070+
maxchar = MAX_MAXCHAR(maxchar, ch);
90569071
}
90579072
}
90589073

@@ -9293,8 +9308,8 @@ _PyUnicode_InsertThousandsGrouping(
92939308
if (unicode == NULL) {
92949309
*maxchar = 127;
92959310
if (len != n_digits) {
9296-
*maxchar = Py_MAX(*maxchar,
9297-
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
9311+
*maxchar = MAX_MAXCHAR(*maxchar,
9312+
PyUnicode_MAX_CHAR_VALUE(thousands_sep));
92989313
}
92999314
}
93009315
return len;
@@ -9591,14 +9606,7 @@ fixup(PyObject *self,
95919606
return u;
95929607
}
95939608

9594-
if (maxchar_new <= 127)
9595-
maxchar_new = 127;
9596-
else if (maxchar_new <= 255)
9597-
maxchar_new = 255;
9598-
else if (maxchar_new <= 65535)
9599-
maxchar_new = 65535;
9600-
else
9601-
maxchar_new = MAX_UNICODE;
9609+
maxchar_new = align_maxchar(maxchar_new);
96029610

96039611
if (maxchar_new == maxchar_old)
96049612
return u;
@@ -9695,16 +9703,14 @@ do_capitalize(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *ma
96959703
c = PyUnicode_READ(kind, data, 0);
96969704
n_res = _PyUnicode_ToUpperFull(c, mapped);
96979705
for (j = 0; j < n_res; j++) {
9698-
if (mapped[j] > *maxchar)
9699-
*maxchar = mapped[j];
9706+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
97009707
res[k++] = mapped[j];
97019708
}
97029709
for (i = 1; i < length; i++) {
97039710
c = PyUnicode_READ(kind, data, i);
97049711
n_res = lower_ucs4(kind, data, length, i, c, mapped);
97059712
for (j = 0; j < n_res; j++) {
9706-
if (mapped[j] > *maxchar)
9707-
*maxchar = mapped[j];
9713+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
97089714
res[k++] = mapped[j];
97099715
}
97109716
}
@@ -9729,8 +9735,7 @@ do_swapcase(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
97299735
mapped[0] = c;
97309736
}
97319737
for (j = 0; j < n_res; j++) {
9732-
if (mapped[j] > *maxchar)
9733-
*maxchar = mapped[j];
9738+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
97349739
res[k++] = mapped[j];
97359740
}
97369741
}
@@ -9751,8 +9756,7 @@ do_upper_or_lower(int kind, void *data, Py_ssize_t length, Py_UCS4 *res,
97519756
else
97529757
n_res = _PyUnicode_ToUpperFull(c, mapped);
97539758
for (j = 0; j < n_res; j++) {
9754-
if (mapped[j] > *maxchar)
9755-
*maxchar = mapped[j];
9759+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
97569760
res[k++] = mapped[j];
97579761
}
97589762
}
@@ -9781,8 +9785,7 @@ do_casefold(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxc
97819785
Py_UCS4 mapped[3];
97829786
int j, n_res = _PyUnicode_ToFoldedFull(c, mapped);
97839787
for (j = 0; j < n_res; j++) {
9784-
if (mapped[j] > *maxchar)
9785-
*maxchar = mapped[j];
9788+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
97869789
res[k++] = mapped[j];
97879790
}
97889791
}
@@ -9807,8 +9810,7 @@ do_title(int kind, void *data, Py_ssize_t length, Py_UCS4 *res, Py_UCS4 *maxchar
98079810
n_res = _PyUnicode_ToTitleFull(c, mapped);
98089811

98099812
for (j = 0; j < n_res; j++) {
9810-
if (mapped[j] > *maxchar)
9811-
*maxchar = mapped[j];
9813+
*maxchar = MAX_MAXCHAR(*maxchar, mapped[j]);
98129814
res[k++] = mapped[j];
98139815
}
98149816

@@ -9965,7 +9967,7 @@ PyUnicode_Join(PyObject *separator, PyObject *seq)
99659967
goto onError;
99669968
sz += PyUnicode_GET_LENGTH(item);
99679969
item_maxchar = PyUnicode_MAX_CHAR_VALUE(item);
9968-
maxchar = Py_MAX(maxchar, item_maxchar);
9970+
maxchar = MAX_MAXCHAR(maxchar, item_maxchar);
99699971
if (i != 0)
99709972
sz += seplen;
99719973
if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
@@ -10127,8 +10129,7 @@ pad(PyObject *self,
1012710129
return NULL;
1012810130
}
1012910131
maxchar = PyUnicode_MAX_CHAR_VALUE(self);
10130-
if (fill > maxchar)
10131-
maxchar = fill;
10132+
maxchar = MAX_MAXCHAR(maxchar, fill);
1013210133
u = PyUnicode_New(left + _PyUnicode_LENGTH(self) + right, maxchar);
1013310134
if (!u)
1013410135
return NULL;
@@ -10442,7 +10443,7 @@ replace(PyObject *self, PyObject *str1,
1044210443
/* Replacing str1 with str2 may cause a maxchar reduction in the
1044310444
result string. */
1044410445
mayshrink = (maxchar_str2 < maxchar);
10445-
maxchar = Py_MAX(maxchar, maxchar_str2);
10446+
maxchar = MAX_MAXCHAR(maxchar, maxchar_str2);
1044610447

1044710448
if (len1 == len2) {
1044810449
/* same length */
@@ -11027,7 +11028,7 @@ PyUnicode_Concat(PyObject *left, PyObject *right)
1102711028

1102811029
maxchar = PyUnicode_MAX_CHAR_VALUE(u);
1102911030
maxchar2 = PyUnicode_MAX_CHAR_VALUE(v);
11030-
maxchar = Py_MAX(maxchar, maxchar2);
11031+
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
1103111032

1103211033
/* Concat the two Unicode strings */
1103311034
w = PyUnicode_New(new_len, maxchar);
@@ -11114,7 +11115,7 @@ PyUnicode_Append(PyObject **p_left, PyObject *right)
1111411115
else {
1111511116
maxchar = PyUnicode_MAX_CHAR_VALUE(left);
1111611117
maxchar2 = PyUnicode_MAX_CHAR_VALUE(right);
11117-
maxchar = Py_MAX(maxchar, maxchar2);
11118+
maxchar = MAX_MAXCHAR(maxchar, maxchar2);
1111811119

1111911120
/* Concat the two Unicode strings */
1112011121
res = PyUnicode_New(new_len, maxchar);

0 commit comments

Comments
 (0)