1919#include <ctype.h>
2020#include <sys/types.h>
2121
22+ #ifdef __SSE2__
23+ #include <emmintrin.h>
24+ #endif
25+
2226#include "php.h"
2327
2428#include "url.h"
@@ -444,10 +448,7 @@ static int php_htoi(char *s)
444448
445449static unsigned char hexchars [] = "0123456789ABCDEF" ;
446450
447- /* {{{ php_url_encode
448- */
449- PHPAPI zend_string * php_url_encode (char const * s , size_t len )
450- {
451+ static zend_always_inline zend_string * php_url_encode_impl (const char * s , size_t len , zend_bool raw ) /* {{{ */ {
451452 register unsigned char c ;
452453 unsigned char * to ;
453454 unsigned char const * from , * end ;
@@ -458,15 +459,76 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
458459 start = zend_string_safe_alloc (3 , len , 0 , 0 );
459460 to = (unsigned char * )ZSTR_VAL (start );
460461
462+ #ifdef __SSE2__
463+ while (from + 16 < end ) {
464+ __m128i mask ;
465+ uint32_t bits ;
466+ const __m128i _A = _mm_set1_epi8 ('A' - 1 );
467+ const __m128i Z_ = _mm_set1_epi8 ('Z' + 1 );
468+ const __m128i _a = _mm_set1_epi8 ('a' - 1 );
469+ const __m128i z_ = _mm_set1_epi8 ('z' + 1 );
470+ const __m128i _zero = _mm_set1_epi8 ('0' - 1 );
471+ const __m128i nine_ = _mm_set1_epi8 ('9' + 1 );
472+ const __m128i dot = _mm_set1_epi8 ('.' );
473+ const __m128i minus = _mm_set1_epi8 ('-' );
474+ const __m128i under = _mm_set1_epi8 ('_' );
475+
476+ __m128i in = _mm_loadu_si128 ((__m128i * )from );
477+
478+ __m128i gt = _mm_cmpgt_epi8 (in , _A );
479+ __m128i lt = _mm_cmplt_epi8 (in , Z_ );
480+ mask = _mm_and_si128 (lt , gt ); /* upper */
481+ gt = _mm_cmpgt_epi8 (in , _a );
482+ lt = _mm_cmplt_epi8 (in , z_ );
483+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* lower */
484+ gt = _mm_cmpgt_epi8 (in , _zero );
485+ lt = _mm_cmplt_epi8 (in , nine_ );
486+ mask = _mm_or_si128 (mask , _mm_and_si128 (lt , gt )); /* number */
487+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , dot ));
488+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , minus ));
489+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , under ));
490+
491+ if (!raw ) {
492+ const __m128i blank = _mm_set1_epi8 (' ' );
493+ __m128i eq = _mm_cmpeq_epi8 (in , blank );
494+ if (_mm_movemask_epi8 (eq )) {
495+ in = _mm_add_epi8 (in , _mm_and_si128 (eq , _mm_set1_epi8 ('+' - ' ' )));
496+ mask = _mm_or_si128 (mask , eq );
497+ }
498+ }
499+ if (raw ) {
500+ const __m128i wavy = _mm_set1_epi8 ('~' );
501+ mask = _mm_or_si128 (mask , _mm_cmpeq_epi8 (in , wavy ));
502+ }
503+ if (((bits = _mm_movemask_epi8 (mask )) & 0xffff ) == 0xffff ) {
504+ _mm_storeu_si128 ((__m128i * )to , in );
505+ to += 16 ;
506+ } else {
507+ int i ;
508+ unsigned char xmm [16 ];
509+ _mm_storeu_si128 ((__m128i * )xmm , in );
510+ for (i = 0 ; i < sizeof (xmm ); i ++ ) {
511+ if ((bits & (0x1 << i ))) {
512+ * to ++ = xmm [i ];
513+ } else {
514+ * to ++ = '%' ;
515+ * to ++ = hexchars [xmm [i ] >> 4 ];
516+ * to ++ = hexchars [xmm [i ] & 0xf ];
517+ }
518+ }
519+ }
520+ from += 16 ;
521+ }
522+ #endif
461523 while (from < end ) {
462524 c = * from ++ ;
463525
464- if (c == ' ' ) {
526+ if (! raw && c == ' ' ) {
465527 * to ++ = '+' ;
466528 } else if ((c < '0' && c != '-' && c != '.' ) ||
467- (c < 'A' && c > '9' ) ||
468- (c > 'Z' && c < 'a' && c != '_' ) ||
469- ( c > 'z' )) {
529+ (c < 'A' && c > '9' ) ||
530+ (c > 'Z' && c < 'a' && c != '_' ) ||
531+ ( c > 'z' && (! raw || c != '~' ) )) {
470532 to [0 ] = '%' ;
471533 to [1 ] = hexchars [c >> 4 ];
472534 to [2 ] = hexchars [c & 15 ];
@@ -483,6 +545,14 @@ PHPAPI zend_string *php_url_encode(char const *s, size_t len)
483545}
484546/* }}} */
485547
548+ /* {{{ php_url_encode
549+ */
550+ PHPAPI zend_string * php_url_encode (char const * s , size_t len )
551+ {
552+ return php_url_encode_impl (s , len , 0 );
553+ }
554+ /* }}} */
555+
486556/* {{{ proto string urlencode(string str)
487557 URL-encodes string */
488558PHP_FUNCTION (urlencode )
@@ -545,29 +615,7 @@ PHPAPI size_t php_url_decode(char *str, size_t len)
545615 */
546616PHPAPI zend_string * php_raw_url_encode (char const * s , size_t len )
547617{
548- register size_t x , y ;
549- zend_string * str ;
550- char * ret ;
551-
552- str = zend_string_safe_alloc (3 , len , 0 , 0 );
553- ret = ZSTR_VAL (str );
554- for (x = 0 , y = 0 ; len -- ; x ++ , y ++ ) {
555- char c = s [x ];
556-
557- ret [y ] = c ;
558- if ((c < '0' && c != '-' && c != '.' ) ||
559- (c < 'A' && c > '9' ) ||
560- (c > 'Z' && c < 'a' && c != '_' ) ||
561- (c > 'z' && c != '~' )) {
562- ret [y ++ ] = '%' ;
563- ret [y ++ ] = hexchars [(unsigned char ) c >> 4 ];
564- ret [y ] = hexchars [(unsigned char ) c & 15 ];
565- }
566- }
567- ret [y ] = '\0' ;
568- str = zend_string_truncate (str , y , 0 );
569-
570- return str ;
618+ return php_url_encode_impl (s , len , 1 );
571619}
572620/* }}} */
573621
0 commit comments