@@ -54,7 +54,36 @@ static _locale_t current_locale = NULL;
5454
5555#define TYPE_PAIR (t1 ,t2 ) (((t1) << 4) | (t2))
5656
57- static const unsigned char tolower_map [256 ] = {
57+ #if __SSE2__
58+ #define HAVE_BLOCKCONV
59+
60+ /* Common code for SSE2 accelerated character case conversion */
61+
62+ #define BLOCKCONV_INIT_RANGE (start , end ) \
63+ const __m128i blconv_start_minus_1 = _mm_set1_epi8((start) - 1); \
64+ const __m128i blconv_end_plus_1 = _mm_set1_epi8((end) + 1);
65+
66+ #define BLOCKCONV_STRIDE sizeof(__m128i)
67+
68+ #define BLOCKCONV_INIT_DELTA (delta ) \
69+ const __m128i blconv_delta = _mm_set1_epi8(delta);
70+
71+ #define BLOCKCONV_LOAD (input ) \
72+ __m128i blconv_operand = _mm_loadu_si128((__m128i*)(input)); \
73+ __m128i blconv_gt = _mm_cmpgt_epi8(blconv_operand, blconv_start_minus_1); \
74+ __m128i blconv_lt = _mm_cmplt_epi8(blconv_operand, blconv_end_plus_1); \
75+ __m128i blconv_mingle = _mm_and_si128(blconv_gt, blconv_lt);
76+
77+ #define BLOCKCONV_FOUND () _mm_movemask_epi8(blconv_mingle)
78+
79+ #define BLOCKCONV_STORE (dest ) \
80+ __m128i blconv_add = _mm_and_si128(blconv_mingle, blconv_delta); \
81+ __m128i blconv_result = _mm_add_epi8(blconv_operand, blconv_add); \
82+ _mm_storeu_si128((__m128i *)(dest), blconv_result);
83+
84+ #endif /* __SSE2__ */
85+
86+ ZEND_API const unsigned char zend_tolower_map [256 ] = {
58870x00 ,0x01 ,0x02 ,0x03 ,0x04 ,0x05 ,0x06 ,0x07 ,0x08 ,0x09 ,0x0a ,0x0b ,0x0c ,0x0d ,0x0e ,0x0f ,
59880x10 ,0x11 ,0x12 ,0x13 ,0x14 ,0x15 ,0x16 ,0x17 ,0x18 ,0x19 ,0x1a ,0x1b ,0x1c ,0x1d ,0x1e ,0x1f ,
60890x20 ,0x21 ,0x22 ,0x23 ,0x24 ,0x25 ,0x26 ,0x27 ,0x28 ,0x29 ,0x2a ,0x2b ,0x2c ,0x2d ,0x2e ,0x2f ,
@@ -73,7 +102,25 @@ static const unsigned char tolower_map[256] = {
731020xf0 ,0xf1 ,0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xf6 ,0xf7 ,0xf8 ,0xf9 ,0xfa ,0xfb ,0xfc ,0xfd ,0xfe ,0xff
74103};
75104
76- #define zend_tolower_ascii (c ) (tolower_map[(unsigned char)(c)])
105+ ZEND_API const unsigned char zend_toupper_map [256 ] = {
106+ 0x00 ,0x01 ,0x02 ,0x03 ,0x04 ,0x05 ,0x06 ,0x07 ,0x08 ,0x09 ,0x0a ,0x0b ,0x0c ,0x0d ,0x0e ,0x0f ,
107+ 0x10 ,0x11 ,0x12 ,0x13 ,0x14 ,0x15 ,0x16 ,0x17 ,0x18 ,0x19 ,0x1a ,0x1b ,0x1c ,0x1d ,0x1e ,0x1f ,
108+ 0x20 ,0x21 ,0x22 ,0x23 ,0x24 ,0x25 ,0x26 ,0x27 ,0x28 ,0x29 ,0x2a ,0x2b ,0x2c ,0x2d ,0x2e ,0x2f ,
109+ 0x30 ,0x31 ,0x32 ,0x33 ,0x34 ,0x35 ,0x36 ,0x37 ,0x38 ,0x39 ,0x3a ,0x3b ,0x3c ,0x3d ,0x3e ,0x3f ,
110+ 0x40 ,0x41 ,0x42 ,0x43 ,0x44 ,0x45 ,0x46 ,0x47 ,0x48 ,0x49 ,0x4a ,0x4b ,0x4c ,0x4d ,0x4e ,0x4f ,
111+ 0x50 ,0x51 ,0x52 ,0x53 ,0x54 ,0x55 ,0x56 ,0x57 ,0x58 ,0x59 ,0x5a ,0x5b ,0x5c ,0x5d ,0x5e ,0x5f ,
112+ 0x60 ,0x41 ,0x42 ,0x43 ,0x44 ,0x45 ,0x46 ,0x47 ,0x48 ,0x49 ,0x4a ,0x4b ,0x4c ,0x4d ,0x4e ,0x4f ,
113+ 0x50 ,0x51 ,0x52 ,0x53 ,0x54 ,0x55 ,0x56 ,0x57 ,0x58 ,0x59 ,0x5a ,0x7b ,0x7c ,0x7d ,0x7e ,0x7f ,
114+ 0x80 ,0x81 ,0x82 ,0x83 ,0x84 ,0x85 ,0x86 ,0x87 ,0x88 ,0x89 ,0x8a ,0x8b ,0x8c ,0x8d ,0x8e ,0x8f ,
115+ 0x90 ,0x91 ,0x92 ,0x93 ,0x94 ,0x95 ,0x96 ,0x97 ,0x98 ,0x99 ,0x9a ,0x9b ,0x9c ,0x9d ,0x9e ,0x9f ,
116+ 0xa0 ,0xa1 ,0xa2 ,0xa3 ,0xa4 ,0xa5 ,0xa6 ,0xa7 ,0xa8 ,0xa9 ,0xaa ,0xab ,0xac ,0xad ,0xae ,0xaf ,
117+ 0xb0 ,0xb1 ,0xb2 ,0xb3 ,0xb4 ,0xb5 ,0xb6 ,0xb7 ,0xb8 ,0xb9 ,0xba ,0xbb ,0xbc ,0xbd ,0xbe ,0xbf ,
118+ 0xc0 ,0xc1 ,0xc2 ,0xc3 ,0xc4 ,0xc5 ,0xc6 ,0xc7 ,0xc8 ,0xc9 ,0xca ,0xcb ,0xcc ,0xcd ,0xce ,0xcf ,
119+ 0xd0 ,0xd1 ,0xd2 ,0xd3 ,0xd4 ,0xd5 ,0xd6 ,0xd7 ,0xd8 ,0xd9 ,0xda ,0xdb ,0xdc ,0xdd ,0xde ,0xdf ,
120+ 0xe0 ,0xe1 ,0xe2 ,0xe3 ,0xe4 ,0xe5 ,0xe6 ,0xe7 ,0xe8 ,0xe9 ,0xea ,0xeb ,0xec ,0xed ,0xee ,0xef ,
121+ 0xf0 ,0xf1 ,0xf2 ,0xf3 ,0xf4 ,0xf5 ,0xf6 ,0xf7 ,0xf8 ,0xf9 ,0xfa ,0xfb ,0xfc ,0xfd ,0xfe ,0xff
122+ };
123+
77124
78125/**
79126 * Functions using locale lowercase:
@@ -2665,22 +2712,16 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26652712 unsigned char * p = (unsigned char * )str ;
26662713 unsigned char * q = (unsigned char * )dest ;
26672714 unsigned char * end = p + length ;
2668- #ifdef __SSE2__
2669- if (length >= 16 ) {
2670- const __m128i _A = _mm_set1_epi8 ('A' - 1 );
2671- const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
2672- const __m128i delta = _mm_set1_epi8 ('a' - 'A' );
2715+ #ifdef HAVE_BLOCKCONV
2716+ if (length >= BLOCKCONV_STRIDE ) {
2717+ BLOCKCONV_INIT_RANGE ('A' , 'Z' );
2718+ BLOCKCONV_INIT_DELTA ('a' - 'A' );
26732719 do {
2674- __m128i op = _mm_loadu_si128 ((__m128i * )p );
2675- __m128i gt = _mm_cmpgt_epi8 (op , _A );
2676- __m128i lt = _mm_cmplt_epi8 (op , Z_ );
2677- __m128i mingle = _mm_and_si128 (gt , lt );
2678- __m128i add = _mm_and_si128 (mingle , delta );
2679- __m128i lower = _mm_add_epi8 (op , add );
2680- _mm_storeu_si128 ((__m128i * )q , lower );
2681- p += 16 ;
2682- q += 16 ;
2683- } while (p + 16 <= end );
2720+ BLOCKCONV_LOAD (p );
2721+ BLOCKCONV_STORE (q );
2722+ p += BLOCKCONV_STRIDE ;
2723+ q += BLOCKCONV_STRIDE ;
2724+ } while (p + BLOCKCONV_STRIDE <= end );
26842725 }
26852726#endif
26862727 while (p < end ) {
@@ -2689,6 +2730,28 @@ static zend_always_inline void zend_str_tolower_impl(char *dest, const char *str
26892730}
26902731/* }}} */
26912732
2733+ static zend_always_inline void zend_str_toupper_impl (char * dest , const char * str , size_t length ) /* {{{ */ {
2734+ unsigned char * p = (unsigned char * )str ;
2735+ unsigned char * q = (unsigned char * )dest ;
2736+ unsigned char * end = p + length ;
2737+ #ifdef HAVE_BLOCKCONV
2738+ if (length >= BLOCKCONV_STRIDE ) {
2739+ BLOCKCONV_INIT_RANGE ('a' , 'z' );
2740+ BLOCKCONV_INIT_DELTA ('A' - 'a' );
2741+ do {
2742+ BLOCKCONV_LOAD (p );
2743+ BLOCKCONV_STORE (q );
2744+ p += BLOCKCONV_STRIDE ;
2745+ q += BLOCKCONV_STRIDE ;
2746+ } while (p + BLOCKCONV_STRIDE <= end );
2747+ }
2748+ #endif
2749+ while (p < end ) {
2750+ * q ++ = zend_toupper_ascii (* p ++ );
2751+ }
2752+ }
2753+ /* }}} */
2754+
26922755ZEND_API char * ZEND_FASTCALL zend_str_tolower_copy (char * dest , const char * source , size_t length ) /* {{{ */
26932756{
26942757 zend_str_tolower_impl (dest , source , length );
@@ -2697,18 +2760,39 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_copy(char *dest, const char *sourc
26972760}
26982761/* }}} */
26992762
2763+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_copy (char * dest , const char * source , size_t length ) /* {{{ */
2764+ {
2765+ zend_str_toupper_impl (dest , source , length );
2766+ dest [length ] = '\0' ;
2767+ return dest ;
2768+ }
2769+ /* }}} */
2770+
27002771ZEND_API char * ZEND_FASTCALL zend_str_tolower_dup (const char * source , size_t length ) /* {{{ */
27012772{
27022773 return zend_str_tolower_copy ((char * )emalloc (length + 1 ), source , length );
27032774}
27042775/* }}} */
27052776
2777+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_dup (const char * source , size_t length ) /* {{{ */
2778+ {
2779+ return zend_str_toupper_copy ((char * )emalloc (length + 1 ), source , length );
2780+ }
2781+ /* }}} */
2782+
27062783ZEND_API void ZEND_FASTCALL zend_str_tolower (char * str , size_t length ) /* {{{ */
27072784{
27082785 zend_str_tolower_impl (str , (const char * )str , length );
27092786}
27102787/* }}} */
27112788
2789+ ZEND_API void ZEND_FASTCALL zend_str_toupper (char * str , size_t length ) /* {{{ */
2790+ {
2791+ zend_str_toupper_impl (str , (const char * )str , length );
2792+ }
2793+ /* }}} */
2794+
2795+
27122796ZEND_API char * ZEND_FASTCALL zend_str_tolower_dup_ex (const char * source , size_t length ) /* {{{ */
27132797{
27142798 const unsigned char * p = (const unsigned char * )source ;
@@ -2733,38 +2817,57 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
27332817}
27342818/* }}} */
27352819
2820+ ZEND_API char * ZEND_FASTCALL zend_str_toupper_dup_ex (const char * source , size_t length ) /* {{{ */
2821+ {
2822+ const unsigned char * p = (const unsigned char * )source ;
2823+ const unsigned char * end = p + length ;
2824+
2825+ while (p < end ) {
2826+ if (* p != zend_toupper_ascii (* p )) {
2827+ char * res = (char * )emalloc (length + 1 );
2828+ unsigned char * r ;
2829+
2830+ if (p != (const unsigned char * )source ) {
2831+ memcpy (res , source , p - (const unsigned char * )source );
2832+ }
2833+ r = (unsigned char * )p + (res - source );
2834+ zend_str_toupper_impl ((char * )r , (const char * )p , end - p );
2835+ res [length ] = '\0' ;
2836+ return res ;
2837+ }
2838+ p ++ ;
2839+ }
2840+ return NULL ;
2841+ }
2842+ /* }}} */
2843+
27362844ZEND_API zend_string * ZEND_FASTCALL zend_string_tolower_ex (zend_string * str , bool persistent ) /* {{{ */
27372845{
27382846 size_t length = ZSTR_LEN (str );
27392847 unsigned char * p = (unsigned char * ) ZSTR_VAL (str );
27402848 unsigned char * end = p + length ;
27412849
2742- #ifdef __SSE2__
2743- while (p + 16 <= end ) {
2744- const __m128i _A = _mm_set1_epi8 ('A' - 1 );
2745- const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
2746- __m128i op = _mm_loadu_si128 ((__m128i * )p );
2747- __m128i gt = _mm_cmpgt_epi8 (op , _A );
2748- __m128i lt = _mm_cmplt_epi8 (op , Z_ );
2749- __m128i mingle = _mm_and_si128 (gt , lt );
2750- if (_mm_movemask_epi8 (mingle )) {
2850+ #ifdef HAVE_BLOCKCONV
2851+ BLOCKCONV_INIT_RANGE ('A' , 'Z' );
2852+ while (p + BLOCKCONV_STRIDE <= end ) {
2853+ BLOCKCONV_LOAD (p );
2854+ if (BLOCKCONV_FOUND ()) {
27512855 zend_string * res = zend_string_alloc (length , persistent );
27522856 memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
27532857 unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
27542858
27552859 /* Lowercase the chunk we already compared. */
2756- const __m128i delta = _mm_set1_epi8 ('a' - 'A' );
2757- __m128i add = _mm_and_si128 (mingle , delta );
2758- __m128i lower = _mm_add_epi8 (op , add );
2759- _mm_storeu_si128 ((__m128i * ) q , lower );
2860+ BLOCKCONV_INIT_DELTA ('a' - 'A' );
2861+ BLOCKCONV_STORE (q );
27602862
27612863 /* Lowercase the rest of the string. */
2762- p += 16 ; q += 16 ;
2864+ p += BLOCKCONV_STRIDE ;
2865+ q += BLOCKCONV_STRIDE ;
27632866 zend_str_tolower_impl ((char * ) q , (const char * ) p , end - p );
27642867 ZSTR_VAL (res )[length ] = '\0' ;
27652868 return res ;
27662869 }
2767- p += 16 ;
2870+ p += BLOCKCONV_STRIDE ;
27682871 }
27692872#endif
27702873
@@ -2787,6 +2890,55 @@ ZEND_API zend_string* ZEND_FASTCALL zend_string_tolower_ex(zend_string *str, boo
27872890}
27882891/* }}} */
27892892
2893+ ZEND_API zend_string * ZEND_FASTCALL zend_string_toupper_ex (zend_string * str , bool persistent ) /* {{{ */
2894+ {
2895+ size_t length = ZSTR_LEN (str );
2896+ unsigned char * p = (unsigned char * ) ZSTR_VAL (str );
2897+ unsigned char * end = p + length ;
2898+
2899+ #ifdef HAVE_BLOCKCONV
2900+ BLOCKCONV_INIT_RANGE ('a' , 'z' );
2901+ while (p + BLOCKCONV_STRIDE <= end ) {
2902+ BLOCKCONV_LOAD (p );
2903+ if (BLOCKCONV_FOUND ()) {
2904+ zend_string * res = zend_string_alloc (length , persistent );
2905+ memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2906+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2907+
2908+ /* Uppercase the chunk we already compared. */
2909+ BLOCKCONV_INIT_DELTA ('A' - 'a' );
2910+ BLOCKCONV_STORE (q );
2911+
2912+ /* Uppercase the rest of the string. */
2913+ p += BLOCKCONV_STRIDE ;
2914+ q += BLOCKCONV_STRIDE ;
2915+ zend_str_toupper_impl ((char * ) q , (const char * ) p , end - p );
2916+ ZSTR_VAL (res )[length ] = '\0' ;
2917+ return res ;
2918+ }
2919+ p += BLOCKCONV_STRIDE ;
2920+ }
2921+ #endif
2922+
2923+ while (p < end ) {
2924+ if (* p != zend_toupper_ascii (* p )) {
2925+ zend_string * res = zend_string_alloc (length , persistent );
2926+ memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2927+
2928+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2929+ while (p < end ) {
2930+ * q ++ = zend_toupper_ascii (* p ++ );
2931+ }
2932+ ZSTR_VAL (res )[length ] = '\0' ;
2933+ return res ;
2934+ }
2935+ p ++ ;
2936+ }
2937+
2938+ return zend_string_copy (str );
2939+ }
2940+ /* }}} */
2941+
27902942ZEND_API int ZEND_FASTCALL zend_binary_strcmp (const char * s1 , size_t len1 , const char * s2 , size_t len2 ) /* {{{ */
27912943{
27922944 int retval ;
0 commit comments