@@ -125,31 +125,40 @@ PHP_MINFO_FUNCTION(tokenizer)
125125}
126126/* }}} */
127127
128- static inline zend_string * make_str (unsigned char * text , size_t leng ) {
128+ static zend_string * make_str (unsigned char * text , size_t leng , HashTable * interned_strings ) {
129129 if (leng == 1 ) {
130130 return ZSTR_CHAR (text [0 ]);
131+ } else if (interned_strings ) {
132+ zend_string * interned_str = zend_hash_str_find_ptr (interned_strings , (char * ) text , leng );
133+ if (interned_str ) {
134+ return zend_string_copy (interned_str );
135+ }
136+ interned_str = zend_string_init ((char * ) text , leng , 0 );
137+ zend_hash_add_new_ptr (interned_strings , interned_str , interned_str );
138+ return interned_str ;
131139 } else {
132140 return zend_string_init ((char * ) text , leng , 0 );
133141 }
134142}
135143
136- static void add_token (zval * return_value , int token_type ,
137- unsigned char * text , size_t leng , int lineno , zend_bool as_object ) {
144+ static void add_token (
145+ zval * return_value , int token_type , unsigned char * text , size_t leng , int lineno ,
146+ zend_bool as_object , HashTable * interned_strings ) {
138147 zval token ;
139148 if (as_object ) {
140149 zend_object * obj = zend_objects_new (php_token_ce );
141150 ZVAL_OBJ (& token , obj );
142151 ZVAL_LONG (OBJ_PROP_NUM (obj , 0 ), token_type );
143- ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng ));
152+ ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng , interned_strings ));
144153 ZVAL_LONG (OBJ_PROP_NUM (obj , 2 ), lineno );
145154 ZVAL_LONG (OBJ_PROP_NUM (obj , 3 ), text - LANG_SCNG (yy_start ));
146155 } else if (token_type >= 256 ) {
147156 array_init (& token );
148157 add_next_index_long (& token , token_type );
149- add_next_index_str (& token , make_str (text , leng ));
158+ add_next_index_str (& token , make_str (text , leng , interned_strings ));
150159 add_next_index_long (& token , lineno );
151160 } else {
152- ZVAL_STR (& token , make_str (text , leng ));
161+ ZVAL_STR (& token , make_str (text , leng , interned_strings ));
153162 }
154163 zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & token );
155164}
@@ -162,6 +171,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
162171 int token_type ;
163172 int token_line = 1 ;
164173 int need_tokens = -1 ; /* for __halt_compiler lexing. -1 = disabled */
174+ HashTable interned_strings ;
165175
166176 ZVAL_STR_COPY (& source_zval , source );
167177 zend_save_lexical_state (& original_lex_state );
@@ -172,10 +182,12 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
172182 }
173183
174184 LANG_SCNG (yy_state ) = yycINITIAL ;
185+ zend_hash_init (& interned_strings , 0 , NULL , NULL , 0 );
175186 array_init (return_value );
176187
177188 while ((token_type = lex_scan (& token , NULL ))) {
178- add_token (return_value , token_type , zendtext , zendleng , token_line , as_object );
189+ add_token (return_value , token_type , zendtext , zendleng , token_line , as_object ,
190+ & interned_strings );
179191
180192 if (Z_TYPE (token ) != IS_UNDEF ) {
181193 zval_ptr_dtor_nogc (& token );
@@ -191,7 +203,8 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
191203 /* fetch the rest into a T_INLINE_HTML */
192204 if (zendcursor != zendlimit ) {
193205 add_token (return_value , T_INLINE_HTML ,
194- zendcursor , zendlimit - zendcursor , token_line , as_object );
206+ zendcursor , zendlimit - zendcursor , token_line , as_object ,
207+ & interned_strings );
195208 }
196209 break ;
197210 }
@@ -209,6 +222,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
209222
210223 zval_ptr_dtor_str (& source_zval );
211224 zend_restore_lexical_state (& original_lex_state );
225+ zend_hash_destroy (& interned_strings );
212226
213227 return 1 ;
214228}
@@ -234,7 +248,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
234248 token = T_OPEN_TAG_WITH_ECHO ;
235249 }
236250 add_token (ctx -> tokens , token ,
237- LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object );
251+ LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object , NULL );
238252 break ;
239253 case ON_FEEDBACK :
240254 tokens_ht = Z_ARRVAL_P (ctx -> tokens );
@@ -249,7 +263,8 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
249263 case ON_STOP :
250264 if (LANG_SCNG (yy_cursor ) != LANG_SCNG (yy_limit )) {
251265 add_token (ctx -> tokens , T_INLINE_HTML , LANG_SCNG (yy_cursor ),
252- LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ), ctx -> as_object );
266+ LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ),
267+ ctx -> as_object , NULL );
253268 }
254269 break ;
255270 }
0 commit comments