@@ -404,6 +404,228 @@ public function testAcceptsJsonPath()
404404 $ this ->assertSame ('red ' , $ result [0 ]['color ' ]);
405405 }
406406
407+ /**
408+ * @dataProvider provideUnicodeEscapeSequencesProvider
409+ */
410+ public function testUnicodeEscapeSequences (string $ jsonPath , array $ expected )
411+ {
412+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
413+ }
414+
415+ public static function provideUnicodeEscapeSequencesProvider (): array
416+ {
417+ return [
418+ [
419+ '$["caf\u00e9"] ' ,
420+ ['coffee ' ],
421+ ],
422+ [
423+ '$["\u65e5\u672c"] ' ,
424+ ['Japan ' ],
425+ ],
426+ [
427+ '$["M\u00fcller"] ' ,
428+ [],
429+ ],
430+
431+ [
432+ '$["emoji\ud83d\ude00"] ' ,
433+ ['smiley ' ],
434+ ],
435+
436+ [
437+ '$["tab\there"] ' ,
438+ ['with tab ' ],
439+ ],
440+ [
441+ '$["new\nline"] ' ,
442+ ['with newline ' ],
443+ ],
444+ [
445+ '$["quote\"here"] ' ,
446+ ['with quote ' ],
447+ ],
448+ [
449+ '$["backslash \\\\here"] ' ,
450+ ['with backslash ' ],
451+ ],
452+ [
453+ '$["apostrophe \'here"] ' ,
454+ ['with apostrophe ' ],
455+ ],
456+
457+ [
458+ '$["control\u0001char"] ' ,
459+ ['with control char ' ],
460+ ],
461+
462+ [
463+ '$["\u0063af\u00e9"] ' ,
464+ ['coffee ' ],
465+ ]
466+ ];
467+ }
468+
469+ /**
470+ * @dataProvider provideSingleQuotedStringProvider
471+ */
472+ public function testSingleQuotedStrings (string $ jsonPath , array $ expected )
473+ {
474+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
475+ }
476+
477+ public static function provideSingleQuotedStringProvider (): array
478+ {
479+ return [
480+ [
481+ '$[ \'caf\u00e9 \'] ' ,
482+ ['coffee ' ],
483+ ],
484+ [
485+ '$[ \'\u65e5\u672c \'] ' ,
486+ ['Japan ' ],
487+ ],
488+ [
489+ '$[ \'quote"here \'] ' ,
490+ ['with quote ' ],
491+ ],
492+ [
493+ '$[ \'apostrophe \\\'here \'] ' ,
494+ ['with apostrophe ' ],
495+ ]
496+ ];
497+ }
498+
499+ /**
500+ * @dataProvider provideFilterWithUnicodeProvider
501+ */
502+ public function testFilterWithUnicodeStrings (string $ jsonPath , int $ expectedCount , string $ expectedCountry )
503+ {
504+ $ result = self ::getUnicodeDocumentCrawler ()->find ($ jsonPath );
505+
506+ $ this ->assertCount ($ expectedCount , $ result );
507+
508+ if ($ expectedCount > 0 ) {
509+ $ this ->assertSame ($ expectedCountry , $ result [0 ]['country ' ]);
510+ }
511+ }
512+
513+ public static function provideFilterWithUnicodeProvider (): array
514+ {
515+ return [
516+ [
517+ '$.users[?(@.name == "caf\u00e9")] ' ,
518+ 1 ,
519+ 'France ' ,
520+ ],
521+ [
522+ '$.users[?(@.name == "\u65e5\u672c\u592a\u90ce")] ' ,
523+ 1 ,
524+ 'Japan ' ,
525+ ],
526+ [
527+ '$.users[?(@.name == "Jos\u00e9")] ' ,
528+ 1 ,
529+ 'Spain ' ,
530+ ],
531+ [
532+ '$.users[?(@.name == "John")] ' ,
533+ 1 ,
534+ 'USA ' ,
535+ ],
536+ [
537+ '$.users[?(@.name == "NonExistent\u0020Name")] ' ,
538+ 0 ,
539+ '' ,
540+ ]
541+ ];
542+ }
543+
544+ /**
545+ * @dataProvider provideInvalidUnicodeSequenceProvider
546+ */
547+ public function testInvalidUnicodeSequencesAreProcessedAsLiterals (string $ jsonPath )
548+ {
549+ $ this ->assertIsArray (self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ), 'invalid unicode sequence should be treated as literal and not throw ' );
550+ }
551+
552+ public static function provideInvalidUnicodeSequenceProvider (): array
553+ {
554+ return [
555+ [
556+ '$["test\uZZZZ"] ' ,
557+ ],
558+ [
559+ '$["test\u123"] ' ,
560+ ],
561+ [
562+ '$["test\u"] ' ,
563+ ]
564+ ];
565+ }
566+
567+ /**
568+ * @dataProvider provideComplexUnicodePath
569+ */
570+ public function testComplexUnicodePaths (string $ jsonPath , array $ expected )
571+ {
572+ $ complexJson = [
573+ 'データ ' => [
574+ 'ユーザー ' => [
575+ ['名前 ' => 'テスト ' , 'ID ' => 1 ],
576+ ['名前 ' => 'サンプル ' , 'ID ' => 2 ]
577+ ]
578+ ],
579+ 'special🔑 ' => [
580+ 'value💎 ' => 'treasure '
581+ ]
582+ ];
583+
584+ $ crawler = new JsonCrawler (json_encode ($ complexJson ));
585+
586+ $ this ->assertSame ($ expected , $ crawler ->find ($ jsonPath ));
587+ }
588+
589+ public static function provideComplexUnicodePath (): array
590+ {
591+ return [
592+ [
593+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][0]["\u540d\u524d"] ' ,
594+ ['テスト ' ],
595+ ],
596+ [
597+ '$["special\ud83d\udd11"]["value\ud83d\udc8e"] ' ,
598+ ['treasure ' ],
599+ ],
600+ [
601+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][*]["\u540d\u524d"] ' ,
602+ ['テスト ' , 'サンプル ' ],
603+ ]
604+ ];
605+ }
606+
607+ public function testSurrogatePairHandling ()
608+ {
609+ $ json = ['𝒽𝑒𝓁𝓁𝑜 ' => 'mathematical script hello ' ];
610+ $ crawler = new JsonCrawler (json_encode ($ json ));
611+
612+ // mathematical script "hello" requires surrogate pairs for each character
613+ $ result = $ crawler ->find ('$["\ud835\udcbd\ud835\udc52\ud835\udcc1\ud835\udcc1\ud835\udc5c"] ' );
614+ $ this ->assertSame (['mathematical script hello ' ], $ result );
615+ }
616+
617+ public function testMixedQuoteTypes ()
618+ {
619+ $ json = ['key"with"quotes ' => 'value1 ' , "key'with'apostrophes " => 'value2 ' ];
620+ $ crawler = new JsonCrawler (json_encode ($ json ));
621+
622+ $ result = $ crawler ->find ('$[ \'key"with"quotes \'] ' );
623+ $ this ->assertSame (['value1 ' ], $ result );
624+
625+ $ result = $ crawler ->find ('$["key \'with \'apostrophes"] ' );
626+ $ this ->assertSame (['value2 ' ], $ result );
627+ }
628+
407629 private static function getBookstoreCrawler (): JsonCrawler
408630 {
409631 return new JsonCrawler (<<<JSON
@@ -453,4 +675,28 @@ private static function getSimpleCollectionCrawler(): JsonCrawler
453675{"a": [3, 5, 1, 2, 4, 6]}
454676JSON );
455677 }
678+
679+ private static function getUnicodeDocumentCrawler (): JsonCrawler
680+ {
681+ $ json = [
682+ 'café ' => 'coffee ' ,
683+ '日本 ' => 'Japan ' ,
684+ 'emoji😀 ' => 'smiley ' ,
685+ 'tab here ' => 'with tab ' ,
686+ "new \nline " => 'with newline ' ,
687+ 'quote"here ' => 'with quote ' ,
688+ 'backslash \\here ' => 'with backslash ' ,
689+ 'apostrophe \'here ' => 'with apostrophe ' ,
690+ "control \x01char " => 'with control char ' ,
691+ 'users ' => [
692+ ['name ' => 'café ' , 'country ' => 'France ' ],
693+ ['name ' => '日本太郎 ' , 'country ' => 'Japan ' ],
694+ ['name ' => 'John ' , 'country ' => 'USA ' ],
695+ ['name ' => 'Müller ' , 'country ' => 'Germany ' ],
696+ ['name ' => 'José ' , 'country ' => 'Spain ' ]
697+ ]
698+ ];
699+
700+ return new JsonCrawler (json_encode ($ json ));
701+ }
456702}
0 commit comments