@@ -2464,8 +2464,8 @@ ZEND_API void zend_update_current_locale(void) /* {{{ */
24642464#endif
24652465
24662466static zend_always_inline void zend_str_tolower_impl (char * dest , const char * str , size_t length ) /* {{{ */ {
2467- register unsigned char * p = (unsigned char * )str ;
2468- register unsigned char * q = (unsigned char * )dest ;
2467+ unsigned char * p = (unsigned char * )str ;
2468+ unsigned char * q = (unsigned char * )dest ;
24692469 unsigned char * end = p + length ;
24702470#ifdef __SSE2__
24712471 if (length >= 16 ) {
@@ -2537,23 +2537,54 @@ ZEND_API char* ZEND_FASTCALL zend_str_tolower_dup_ex(const char *source, size_t
25372537
25382538ZEND_API zend_string * ZEND_FASTCALL zend_string_tolower_ex (zend_string * str , int persistent ) /* {{{ */
25392539{
2540- register unsigned char * p = (unsigned char * )ZSTR_VAL (str );
2541- register unsigned char * end = p + ZSTR_LEN (str );
2540+ size_t length = ZSTR_LEN (str );
2541+ unsigned char * p = (unsigned char * ) ZSTR_VAL (str );
2542+ unsigned char * end = p + length ;
2543+
2544+ #ifdef __SSE2__
2545+ while (p + 16 <= end ) {
2546+ const __m128i _A = _mm_set1_epi8 ('A' - 1 );
2547+ const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
2548+ __m128i op = _mm_loadu_si128 ((__m128i * )p );
2549+ __m128i gt = _mm_cmpgt_epi8 (op , _A );
2550+ __m128i lt = _mm_cmplt_epi8 (op , Z_ );
2551+ __m128i mingle = _mm_and_si128 (gt , lt );
2552+ if (_mm_movemask_epi8 (mingle )) {
2553+ zend_string * res = zend_string_alloc (length , persistent );
2554+ memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * ) ZSTR_VAL (str ));
2555+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2556+
2557+ /* Lowercase the chunk we already compared. */
2558+ const __m128i delta = _mm_set1_epi8 ('a' - 'A' );
2559+ __m128i add = _mm_and_si128 (mingle , delta );
2560+ __m128i lower = _mm_add_epi8 (op , add );
2561+ _mm_storeu_si128 ((__m128i * ) q , lower );
2562+
2563+ /* Lowercase the rest of the string. */
2564+ p += 16 ; q += 16 ;
2565+ zend_str_tolower_impl ((char * ) q , (const char * ) p , end - p );
2566+ ZSTR_VAL (res )[length ] = '\0' ;
2567+ return res ;
2568+ }
2569+ p += 16 ;
2570+ }
2571+ #endif
2572+
25422573 while (p < end ) {
25432574 if (* p != zend_tolower_ascii (* p )) {
2544- zend_string * res = zend_string_alloc (ZSTR_LEN ( str ) , persistent );
2545- register unsigned char * r ;
2575+ zend_string * res = zend_string_alloc (length , persistent );
2576+ memcpy ( ZSTR_VAL ( res ), ZSTR_VAL ( str ), p - ( unsigned char * ) ZSTR_VAL ( str )) ;
25462577
2547- if (p != (unsigned char * )ZSTR_VAL (str )) {
2548- memcpy (ZSTR_VAL (res ), ZSTR_VAL (str ), p - (unsigned char * )ZSTR_VAL (str ));
2578+ unsigned char * q = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2579+ while (p < end ) {
2580+ * q ++ = zend_tolower_ascii (* p ++ );
25492581 }
2550- r = p + (ZSTR_VAL (res ) - ZSTR_VAL (str ));
2551- zend_str_tolower_impl ((char * )r , (const char * )p , end - p );
2552- ZSTR_VAL (res )[ZSTR_LEN (res )] = '\0' ;
2582+ ZSTR_VAL (res )[length ] = '\0' ;
25532583 return res ;
25542584 }
25552585 p ++ ;
25562586 }
2587+
25572588 return zend_string_copy (str );
25582589}
25592590/* }}} */
0 commit comments