@@ -76,6 +76,126 @@ zend_module_entry tokenizer_module_entry = {
7676ZEND_GET_MODULE (tokenizer )
7777#endif
7878
79+ static zval * php_token_get_id (zval * obj ) {
80+ zval * id = OBJ_PROP_NUM (Z_OBJ_P (obj ), 0 );
81+ if (Z_ISUNDEF_P (id )) {
82+ zend_throw_error (NULL ,
83+ "Typed property PhpToken::$id must not be accessed before initialization" );
84+ return NULL ;
85+ }
86+
87+ ZVAL_DEREF (id );
88+ ZEND_ASSERT (Z_TYPE_P (id ) == IS_LONG );
89+ return id ;
90+ }
91+
92+ static zval * php_token_get_text (zval * obj ) {
93+ zval * text = OBJ_PROP_NUM (Z_OBJ_P (obj ), 1 );
94+ if (Z_ISUNDEF_P (text )) {
95+ zend_throw_error (NULL ,
96+ "Typed property PhpToken::$text must not be accessed before initialization" );
97+ return NULL ;
98+ }
99+
100+ ZVAL_DEREF (text );
101+ ZEND_ASSERT (Z_TYPE_P (text ) == IS_STRING );
102+ return text ;
103+ }
104+
105+ PHP_METHOD (PhpToken , is )
106+ {
107+ zval * kind ;
108+
109+ ZEND_PARSE_PARAMETERS_START (1 , 1 )
110+ Z_PARAM_ZVAL (kind )
111+ ZEND_PARSE_PARAMETERS_END ();
112+
113+ if (Z_TYPE_P (kind ) == IS_LONG ) {
114+ zval * id_zval = php_token_get_id (ZEND_THIS );
115+ if (!id_zval ) {
116+ RETURN_THROWS ();
117+ }
118+
119+ RETURN_BOOL (Z_LVAL_P (id_zval ) == Z_LVAL_P (kind ));
120+ } else if (Z_TYPE_P (kind ) == IS_STRING ) {
121+ zval * text_zval = php_token_get_text (ZEND_THIS );
122+ if (!text_zval ) {
123+ RETURN_THROWS ();
124+ }
125+
126+ RETURN_BOOL (zend_string_equals (Z_STR_P (text_zval ), Z_STR_P (kind )));
127+ } else if (Z_TYPE_P (kind ) == IS_ARRAY ) {
128+ zval * id_zval = NULL , * text_zval = NULL , * entry ;
129+ ZEND_HASH_FOREACH_VAL (Z_ARRVAL_P (kind ), entry ) {
130+ ZVAL_DEREF (entry );
131+ if (Z_TYPE_P (entry ) == IS_LONG ) {
132+ if (!id_zval ) {
133+ id_zval = php_token_get_id (ZEND_THIS );
134+ if (!id_zval ) {
135+ RETURN_THROWS ();
136+ }
137+ }
138+ if (Z_LVAL_P (id_zval ) == Z_LVAL_P (entry )) {
139+ RETURN_TRUE ;
140+ }
141+ } else if (Z_TYPE_P (entry ) == IS_STRING ) {
142+ if (!text_zval ) {
143+ text_zval = php_token_get_text (ZEND_THIS );
144+ if (!text_zval ) {
145+ RETURN_THROWS ();
146+ }
147+ }
148+ if (zend_string_equals (Z_STR_P (text_zval ), Z_STR_P (entry ))) {
149+ RETURN_TRUE ;
150+ }
151+ } else {
152+ zend_type_error ("Kind array must have elements of type int or string" );
153+ RETURN_THROWS ();
154+ }
155+ } ZEND_HASH_FOREACH_END ();
156+ RETURN_FALSE ;
157+ } else {
158+ zend_type_error ("Kind must be of type int, string or array" );
159+ RETURN_THROWS ();
160+ }
161+ }
162+
163+ PHP_METHOD (PhpToken , isIgnorable )
164+ {
165+ ZEND_PARSE_PARAMETERS_NONE ();
166+
167+ zval * id_zval = php_token_get_id (ZEND_THIS );
168+ if (!id_zval ) {
169+ RETURN_THROWS ();
170+ }
171+
172+ zend_long id = Z_LVAL_P (id_zval );
173+ RETURN_BOOL (id == T_WHITESPACE || id == T_COMMENT || id == T_DOC_COMMENT || id == T_OPEN_TAG );
174+ }
175+
176+ PHP_METHOD (PhpToken , getTokenName )
177+ {
178+ ZEND_PARSE_PARAMETERS_NONE ();
179+
180+ zval * id_zval = php_token_get_id (ZEND_THIS );
181+ if (!id_zval ) {
182+ RETURN_THROWS ();
183+ }
184+
185+ if (Z_LVAL_P (id_zval ) < 256 ) {
186+ RETURN_INTERNED_STR (ZSTR_CHAR (Z_LVAL_P (id_zval )));
187+ } else {
188+ RETURN_STRING (get_token_type_name (Z_LVAL_P (id_zval )));
189+ }
190+ }
191+
192+ static const zend_function_entry php_token_methods [] = {
193+ PHP_ME (PhpToken , is , arginfo_class_PhpToken_is , ZEND_ACC_PUBLIC )
194+ PHP_ME (PhpToken , isIgnorable , arginfo_class_PhpToken_isIgnorable , ZEND_ACC_PUBLIC )
195+ PHP_ME (PhpToken , getTokenName , arginfo_class_PhpToken_getTokenName , ZEND_ACC_PUBLIC )
196+ PHP_FE_END
197+ };
198+
79199/* {{{ PHP_MINIT_FUNCTION
80200 */
81201PHP_MINIT_FUNCTION (tokenizer )
@@ -88,7 +208,7 @@ PHP_MINIT_FUNCTION(tokenizer)
88208 tokenizer_register_constants (INIT_FUNC_ARGS_PASSTHRU );
89209 tokenizer_token_get_all_register_constants (INIT_FUNC_ARGS_PASSTHRU );
90210
91- INIT_CLASS_ENTRY (ce , "PhpToken" , NULL );
211+ INIT_CLASS_ENTRY (ce , "PhpToken" , php_token_methods );
92212 php_token_ce = zend_register_internal_class (& ce );
93213
94214 name = zend_string_init ("id" , sizeof ("id" ) - 1 , 1 );
@@ -125,40 +245,31 @@ PHP_MINFO_FUNCTION(tokenizer)
125245}
126246/* }}} */
127247
128- static zend_string * make_str (unsigned char * text , size_t leng , HashTable * interned_strings ) {
248+ static inline zend_string * make_str (unsigned char * text , size_t leng ) {
129249 if (leng == 1 ) {
130250 return ZSTR_CHAR (text [0 ]);
131- } else if (interned_strings ) {
132- zend_string * interned_str = zend_hash_str_find_ptr (interned_strings , (char * ) text , leng );
133- if (interned_str ) {
134- return zend_string_copy (interned_str );
135- }
136- interned_str = zend_string_init ((char * ) text , leng , 0 );
137- zend_hash_add_new_ptr (interned_strings , interned_str , interned_str );
138- return interned_str ;
139251 } else {
140252 return zend_string_init ((char * ) text , leng , 0 );
141253 }
142254}
143255
144- static void add_token (
145- zval * return_value , int token_type , unsigned char * text , size_t leng , int lineno ,
146- zend_bool as_object , HashTable * interned_strings ) {
256+ static void add_token (zval * return_value , int token_type ,
257+ unsigned char * text , size_t leng , int lineno , zend_bool as_object ) {
147258 zval token ;
148259 if (as_object ) {
149260 zend_object * obj = zend_objects_new (php_token_ce );
150261 ZVAL_OBJ (& token , obj );
151262 ZVAL_LONG (OBJ_PROP_NUM (obj , 0 ), token_type );
152- ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng , interned_strings ));
263+ ZVAL_STR (OBJ_PROP_NUM (obj , 1 ), make_str (text , leng ));
153264 ZVAL_LONG (OBJ_PROP_NUM (obj , 2 ), lineno );
154265 ZVAL_LONG (OBJ_PROP_NUM (obj , 3 ), text - LANG_SCNG (yy_start ));
155266 } else if (token_type >= 256 ) {
156267 array_init (& token );
157268 add_next_index_long (& token , token_type );
158- add_next_index_str (& token , make_str (text , leng , interned_strings ));
269+ add_next_index_str (& token , make_str (text , leng ));
159270 add_next_index_long (& token , lineno );
160271 } else {
161- ZVAL_STR (& token , make_str (text , leng , interned_strings ));
272+ ZVAL_STR (& token , make_str (text , leng ));
162273 }
163274 zend_hash_next_index_insert_new (Z_ARRVAL_P (return_value ), & token );
164275}
@@ -171,7 +282,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
171282 int token_type ;
172283 int token_line = 1 ;
173284 int need_tokens = -1 ; /* for __halt_compiler lexing. -1 = disabled */
174- HashTable interned_strings ;
175285
176286 ZVAL_STR_COPY (& source_zval , source );
177287 zend_save_lexical_state (& original_lex_state );
@@ -182,12 +292,10 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
182292 }
183293
184294 LANG_SCNG (yy_state ) = yycINITIAL ;
185- zend_hash_init (& interned_strings , 0 , NULL , NULL , 0 );
186295 array_init (return_value );
187296
188297 while ((token_type = lex_scan (& token , NULL ))) {
189- add_token (return_value , token_type , zendtext , zendleng , token_line , as_object ,
190- & interned_strings );
298+ add_token (return_value , token_type , zendtext , zendleng , token_line , as_object );
191299
192300 if (Z_TYPE (token ) != IS_UNDEF ) {
193301 zval_ptr_dtor_nogc (& token );
@@ -203,8 +311,7 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
203311 /* fetch the rest into a T_INLINE_HTML */
204312 if (zendcursor != zendlimit ) {
205313 add_token (return_value , T_INLINE_HTML ,
206- zendcursor , zendlimit - zendcursor , token_line , as_object ,
207- & interned_strings );
314+ zendcursor , zendlimit - zendcursor , token_line , as_object );
208315 }
209316 break ;
210317 }
@@ -222,7 +329,6 @@ static zend_bool tokenize(zval *return_value, zend_string *source, zend_bool as_
222329
223330 zval_ptr_dtor_str (& source_zval );
224331 zend_restore_lexical_state (& original_lex_state );
225- zend_hash_destroy (& interned_strings );
226332
227333 return 1 ;
228334}
@@ -248,7 +354,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
248354 token = T_OPEN_TAG_WITH_ECHO ;
249355 }
250356 add_token (ctx -> tokens , token ,
251- LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object , NULL );
357+ LANG_SCNG (yy_text ), LANG_SCNG (yy_leng ), line , ctx -> as_object );
252358 break ;
253359 case ON_FEEDBACK :
254360 tokens_ht = Z_ARRVAL_P (ctx -> tokens );
@@ -263,8 +369,7 @@ void on_event(zend_php_scanner_event event, int token, int line, void *context)
263369 case ON_STOP :
264370 if (LANG_SCNG (yy_cursor ) != LANG_SCNG (yy_limit )) {
265371 add_token (ctx -> tokens , T_INLINE_HTML , LANG_SCNG (yy_cursor ),
266- LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ),
267- ctx -> as_object , NULL );
372+ LANG_SCNG (yy_limit ) - LANG_SCNG (yy_cursor ), CG (zend_lineno ), ctx -> as_object );
268373 }
269374 break ;
270375 }
0 commit comments