@@ -229,6 +229,12 @@ ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
229229 ZEND_ARG_INFO (0 , status )
230230ZEND_END_ARG_INFO ()
231231
232+ ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_str_split , 0 , 0 , 1 )
233+ ZEND_ARG_INFO (0 , str )
234+ ZEND_ARG_INFO (0 , split_length )
235+ ZEND_ARG_INFO (0 , encoding )
236+ ZEND_END_ARG_INFO ()
237+
232238ZEND_BEGIN_ARG_INFO_EX (arginfo_mb_strlen , 0 , 0 , 1 )
233239 ZEND_ARG_INFO (0 , str )
234240 ZEND_ARG_INFO (0 , encoding )
@@ -526,6 +532,7 @@ static const zend_function_entry mbstring_functions[] = {
526532 PHP_FE (mb_parse_str , arginfo_mb_parse_str )
527533 PHP_FE (mb_output_handler , arginfo_mb_output_handler )
528534 PHP_FE (mb_preferred_mime_name , arginfo_mb_preferred_mime_name )
535+ PHP_FE (mb_str_split , arginfo_mb_str_split )
529536 PHP_FE (mb_strlen , arginfo_mb_strlen )
530537 PHP_FE (mb_strpos , arginfo_mb_strpos )
531538 PHP_FE (mb_strrpos , arginfo_mb_strrpos )
@@ -2273,6 +2280,169 @@ PHP_FUNCTION(mb_output_handler)
22732280}
22742281/* }}} */
22752282
2283+ /* {{{ proto array mb_str_split(string str [, int split_length] [, string encoding])
2284+ Convert a multibyte string to an array. If split_length is specified,
2285+ break the string down into chunks each split_length characters long. */
2286+
2287+ /* structure to pass split params to the callback */
2288+ struct mbfl_split_params {
2289+ zval * return_value ; /* php function return value structure pointer */
2290+ mbfl_string * result_string ; /* string to store result chunk */
2291+ size_t mb_chunk_length ; /* actual chunk length in chars */
2292+ size_t split_length ; /* split length in chars */
2293+ mbfl_convert_filter * next_filter ; /* widechar to encoding converter */
2294+ };
2295+
2296+ /* callback function to fill split array */
2297+ static int mbfl_split_output (int c , void * data )
2298+ {
2299+ struct mbfl_split_params * params = (struct mbfl_split_params * )data ; /* cast passed data */
2300+
2301+ (* params -> next_filter -> filter_function )(c , params -> next_filter ); /* decoder filter */
2302+
2303+ if (params -> split_length == ++ params -> mb_chunk_length ) { /* if current chunk size reached defined chunk size or last char reached */
2304+ mbfl_convert_filter_flush (params -> next_filter );/* concatenate separate decoded chars to the solid string */
2305+ mbfl_memory_device * device = (mbfl_memory_device * )params -> next_filter -> data ; /* chars container */
2306+ mbfl_string * chunk = params -> result_string ;
2307+ mbfl_memory_device_result (device , chunk ); /* make chunk */
2308+ add_next_index_stringl (params -> return_value , (const char * )chunk -> val , chunk -> len ); /* add chunk to the array */
2309+ efree (chunk -> val );
2310+ params -> mb_chunk_length = 0 ; /* reset mb_chunk size */
2311+ }
2312+ return 0 ;
2313+ }
2314+
2315+ PHP_FUNCTION (mb_str_split )
2316+ {
2317+ zend_string * str , * encoding = NULL ;
2318+ size_t mb_len , chunks , chunk_len ;
2319+ const char * p , * last ; /* pointer for the string cursor and last string char */
2320+ mbfl_string string , result_string ;
2321+ const mbfl_encoding * mbfl_encoding ;
2322+ zend_long split_length = 1 ;
2323+
2324+ ZEND_PARSE_PARAMETERS_START (1 , 3 )
2325+ Z_PARAM_STR (str )
2326+ Z_PARAM_OPTIONAL
2327+ Z_PARAM_LONG (split_length )
2328+ Z_PARAM_STR (encoding )
2329+ ZEND_PARSE_PARAMETERS_END ();
2330+
2331+ if (split_length <= 0 ) {
2332+ php_error_docref (NULL , E_WARNING , "The length of each segment must be greater than zero" );
2333+ RETURN_FALSE ;
2334+ }
2335+
2336+ /* fill mbfl_string structure */
2337+ string .val = (unsigned char * ) ZSTR_VAL (str );
2338+ string .len = ZSTR_LEN (str );
2339+ string .no_language = MBSTRG (language );
2340+ string .encoding = php_mb_get_encoding (encoding );
2341+ if (!string .encoding ) {
2342+ RETURN_FALSE ;
2343+ }
2344+
2345+ p = ZSTR_VAL (str ); /* string cursor pointer */
2346+ last = ZSTR_VAL (str ) + ZSTR_LEN (str ); /* last string char pointer */
2347+
2348+ mbfl_encoding = string .encoding ;
2349+
2350+ /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
2351+ if (mbfl_encoding -> flag & MBFL_ENCTYPE_SBCS ) { /* 1 byte */
2352+ mb_len = string .len ;
2353+ chunk_len = (size_t )split_length ; /* chunk length in bytes */
2354+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE )) { /* 2 bytes */
2355+ mb_len = string .len / 2 ;
2356+ chunk_len = split_length * 2 ;
2357+ } else if (mbfl_encoding -> flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE )) { /* 4 bytes */
2358+ mb_len = string .len / 4 ;
2359+ chunk_len = split_length * 4 ;
2360+ } else if (mbfl_encoding -> mblen_table != NULL ) {
2361+ /* second scenario: variable width encodings with length table */
2362+ char unsigned const * mbtab = mbfl_encoding -> mblen_table ;
2363+
2364+ /* assume that we have 1-bytes characters */
2365+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2366+
2367+ while (p < last ) { /* split cycle work until the cursor has reached the last byte */
2368+ char const * chunk_p = p ; /* chunk first byte pointer */
2369+ chunk_len = 0 ; /* chunk length in bytes */
2370+ for (zend_long char_count = 0 ; char_count < split_length && p < last ; ++ char_count ) {
2371+ char unsigned const m = mbtab [* (const unsigned char * )p ]; /* single character length table */
2372+ chunk_len += m ;
2373+ p += m ;
2374+ }
2375+ if (p >= last ) chunk_len -= p - last ; /* check if chunk is in bounds */
2376+ add_next_index_stringl (return_value , chunk_p , chunk_len );
2377+ }
2378+ return ;
2379+ } else {
2380+ /* third scenario: other multibyte encodings */
2381+ mbfl_convert_filter * filter , * decoder ;
2382+
2383+ /* assume that we have 1-bytes characters */
2384+ array_init_size (return_value , (string .len + split_length ) / split_length ); /* round up */
2385+
2386+ /* decoder filter to decode wchar to encoding */
2387+ mbfl_memory_device device ;
2388+ mbfl_memory_device_init (& device , split_length + 1 , 0 );
2389+
2390+ decoder = mbfl_convert_filter_new (
2391+ & mbfl_encoding_wchar ,
2392+ string .encoding ,
2393+ mbfl_memory_device_output ,
2394+ NULL ,
2395+ & device );
2396+ /* if something wrong with the decoded */
2397+ if (decoder == NULL ) {
2398+ RETURN_FALSE ;
2399+ }
2400+
2401+ /* wchar filter */
2402+ mbfl_string_init (& result_string ); /* mbfl_string to store chunk in the callback */
2403+ struct mbfl_split_params params = { /* init callback function params structure */
2404+ .return_value = return_value ,
2405+ .result_string = & result_string ,
2406+ .mb_chunk_length = 0 ,
2407+ .split_length = (size_t )split_length ,
2408+ .next_filter = decoder ,
2409+ };
2410+
2411+ filter = mbfl_convert_filter_new (
2412+ string .encoding ,
2413+ & mbfl_encoding_wchar ,
2414+ mbfl_split_output ,
2415+ NULL ,
2416+ & params );
2417+ /* if something wrong with the filter */
2418+ if (filter == NULL ){
2419+ mbfl_convert_filter_delete (decoder ); /* this will free allocated memory for the decoded */
2420+ RETURN_FALSE ;
2421+ }
2422+
2423+ while (p < last - 1 ) { /* cycle each byte except last with callback function */
2424+ (* filter -> filter_function )(* p ++ , filter );
2425+ }
2426+ params .mb_chunk_length = split_length - 1 ; /* force to finish current chunk */
2427+ (* filter -> filter_function )(* p ++ , filter ); /*process last char */
2428+
2429+ mbfl_convert_filter_delete (decoder );
2430+ mbfl_convert_filter_delete (filter );
2431+ return ;
2432+ }
2433+
2434+ /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
2435+ chunks = (mb_len + split_length - 1 ) / split_length ; /* (round up idiom) */
2436+ array_init_size (return_value , chunks );
2437+ if (chunks != 0 ) {
2438+ for (zend_long i = 0 ; i < chunks - 1 ; p += chunk_len , ++ i ) {
2439+ add_next_index_stringl (return_value , p , chunk_len );
2440+ }
2441+ add_next_index_stringl (return_value , p , last - p );
2442+ }
2443+ }
2444+ /* }}} */
2445+
22762446/* {{{ proto int mb_strlen(string str [, string encoding])
22772447 Get character numbers of a string */
22782448PHP_FUNCTION (mb_strlen )
0 commit comments