@@ -404,6 +404,260 @@ public function testAcceptsJsonPath()
404404 $ this ->assertSame ('red ' , $ result [0 ]['color ' ]);
405405 }
406406
407+ /**
408+ * @dataProvider provideUnicodeEscapeSequencesProvider
409+ */
410+ public function testUnicodeEscapeSequences (string $ jsonPath , array $ expected )
411+ {
412+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
413+ }
414+
415+ public static function provideUnicodeEscapeSequencesProvider (): array
416+ {
417+ return [
418+ [
419+ '$["caf\u00e9"] ' ,
420+ ['coffee ' ],
421+ ],
422+ [
423+ '$["\u65e5\u672c"] ' ,
424+ ['Japan ' ],
425+ ],
426+ [
427+ '$["M\u00fcller"] ' ,
428+ [],
429+ ],
430+
431+ [
432+ '$["emoji\ud83d\ude00"] ' ,
433+ ['smiley ' ],
434+ ],
435+
436+ [
437+ '$["tab\there"] ' ,
438+ ['with tab ' ],
439+ ],
440+ [
441+ '$["new\nline"] ' ,
442+ ['with newline ' ],
443+ ],
444+ [
445+ '$["quote\"here"] ' ,
446+ ['with quote ' ],
447+ ],
448+ [
449+ '$["backslash \\\\here"] ' ,
450+ ['with backslash ' ],
451+ ],
452+ [
453+ '$["apostrophe \'here"] ' ,
454+ ['with apostrophe ' ],
455+ ],
456+
457+ [
458+ '$["control\u0001char"] ' ,
459+ ['with control char ' ],
460+ ],
461+
462+ [
463+ '$["\u0063af\u00e9"] ' ,
464+ ['coffee ' ],
465+ ]
466+ ];
467+ }
468+
469+ /**
470+ * @dataProvider provideSingleQuotedStringProvider
471+ */
472+ public function testSingleQuotedStrings (string $ jsonPath , array $ expected )
473+ {
474+ $ this ->assertSame ($ expected , self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ));
475+ }
476+
477+ public static function provideSingleQuotedStringProvider (): array
478+ {
479+ return [
480+ [
481+ "$['caf \\u00e9'] " ,
482+ ['coffee ' ],
483+ ],
484+ [
485+ "$[' \\u65e5 \\u672c'] " ,
486+ ['Japan ' ],
487+ ],
488+ [
489+ "$['quote \"here'] " ,
490+ ['with quote ' ],
491+ ],
492+ [
493+ "$['M \\u00fcller'] " ,
494+ [],
495+ ],
496+
497+ [
498+ "$['emoji \\ud83d \\ude00'] " ,
499+ ['smiley ' ],
500+ ],
501+
502+ [
503+ "$['tab \\there'] " ,
504+ ['with tab ' ],
505+ ],
506+ [
507+ "$['quote \\\"here'] " ,
508+ ['with quote ' ],
509+ ],
510+ [
511+ "$['backslash \\\\here'] " ,
512+ ['with backslash ' ],
513+ ],
514+ [
515+ "$['apostrophe \\'here'] " ,
516+ ['with apostrophe ' ],
517+ ],
518+
519+ [
520+ "$['control \\u0001char'] " ,
521+ ['with control char ' ],
522+ ],
523+
524+ [
525+ "$[' \\u0063af \\u00e9'] " ,
526+ ['coffee ' ],
527+ ]
528+ ];
529+ }
530+
531+ /**
532+ * @dataProvider provideFilterWithUnicodeProvider
533+ */
534+ public function testFilterWithUnicodeStrings (string $ jsonPath , int $ expectedCount , string $ expectedCountry )
535+ {
536+ $ result = self ::getUnicodeDocumentCrawler ()->find ($ jsonPath );
537+
538+ $ this ->assertCount ($ expectedCount , $ result );
539+
540+ if ($ expectedCount > 0 ) {
541+ $ this ->assertSame ($ expectedCountry , $ result [0 ]['country ' ]);
542+ }
543+ }
544+
545+ public static function provideFilterWithUnicodeProvider (): array
546+ {
547+ return [
548+ [
549+ '$.users[?(@.name == "caf\u00e9")] ' ,
550+ 1 ,
551+ 'France ' ,
552+ ],
553+ [
554+ '$.users[?(@.name == "\u65e5\u672c\u592a\u90ce")] ' ,
555+ 1 ,
556+ 'Japan ' ,
557+ ],
558+ [
559+ '$.users[?(@.name == "Jos\u00e9")] ' ,
560+ 1 ,
561+ 'Spain ' ,
562+ ],
563+ [
564+ '$.users[?(@.name == "John")] ' ,
565+ 1 ,
566+ 'USA ' ,
567+ ],
568+ [
569+ '$.users[?(@.name == "NonExistent\u0020Name")] ' ,
570+ 0 ,
571+ '' ,
572+ ]
573+ ];
574+ }
575+
576+ /**
577+ * @dataProvider provideInvalidUnicodeSequenceProvider
578+ */
579+ public function testInvalidUnicodeSequencesAreProcessedAsLiterals (string $ jsonPath )
580+ {
581+ $ this ->assertIsArray (self ::getUnicodeDocumentCrawler ()->find ($ jsonPath ), 'invalid unicode sequence should be treated as literal and not throw ' );
582+ }
583+
584+ public static function provideInvalidUnicodeSequenceProvider (): array
585+ {
586+ return [
587+ [
588+ '$["test\uZZZZ"] ' ,
589+ ],
590+ [
591+ '$["test\u123"] ' ,
592+ ],
593+ [
594+ '$["test\u"] ' ,
595+ ]
596+ ];
597+ }
598+
599+ /**
600+ * @dataProvider provideComplexUnicodePath
601+ */
602+ public function testComplexUnicodePaths (string $ jsonPath , array $ expected )
603+ {
604+ $ complexJson = [
605+ 'データ ' => [
606+ 'ユーザー ' => [
607+ ['名前 ' => 'テスト ' , 'ID ' => 1 ],
608+ ['名前 ' => 'サンプル ' , 'ID ' => 2 ]
609+ ]
610+ ],
611+ 'special🔑 ' => [
612+ 'value💎 ' => 'treasure '
613+ ]
614+ ];
615+
616+ $ crawler = new JsonCrawler (json_encode ($ complexJson ));
617+
618+ $ this ->assertSame ($ expected , $ crawler ->find ($ jsonPath ));
619+ }
620+
621+ public static function provideComplexUnicodePath (): array
622+ {
623+ return [
624+ [
625+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][0]["\u540d\u524d"] ' ,
626+ ['テスト ' ],
627+ ],
628+ [
629+ '$["special\ud83d\udd11"]["value\ud83d\udc8e"] ' ,
630+ ['treasure ' ],
631+ ],
632+ [
633+ '$["\u30c7\u30fc\u30bf"]["\u30e6\u30fc\u30b6\u30fc"][*]["\u540d\u524d"] ' ,
634+ ['テスト ' , 'サンプル ' ],
635+ ]
636+ ];
637+ }
638+
639+ public function testSurrogatePairHandling ()
640+ {
641+ $ json = ['𝒽𝑒𝓁𝓁𝑜 ' => 'mathematical script hello ' ];
642+ $ crawler = new JsonCrawler (json_encode ($ json ));
643+
644+ // mathematical script "hello" requires surrogate pairs for each character
645+ $ result = $ crawler ->find ('$["\ud835\udcbd\ud835\udc52\ud835\udcc1\ud835\udcc1\ud835\udc5c"] ' );
646+ $ this ->assertSame (['mathematical script hello ' ], $ result );
647+ }
648+
649+ public function testMixedQuoteTypes ()
650+ {
651+ $ json = ['key"with"quotes ' => 'value1 ' , "key'with'apostrophes " => 'value2 ' ];
652+ $ crawler = new JsonCrawler (json_encode ($ json ));
653+
654+ $ result = $ crawler ->find ('$[ \'key"with"quotes \'] ' );
655+ $ this ->assertSame (['value1 ' ], $ result );
656+
657+ $ result = $ crawler ->find ('$["key \'with \'apostrophes"] ' );
658+ $ this ->assertSame (['value2 ' ], $ result );
659+ }
660+
407661 private static function getBookstoreCrawler (): JsonCrawler
408662 {
409663 return new JsonCrawler (<<<JSON
@@ -453,4 +707,28 @@ private static function getSimpleCollectionCrawler(): JsonCrawler
453707{"a": [3, 5, 1, 2, 4, 6]}
454708JSON );
455709 }
710+
711+ private static function getUnicodeDocumentCrawler (): JsonCrawler
712+ {
713+ $ json = [
714+ 'café ' => 'coffee ' ,
715+ '日本 ' => 'Japan ' ,
716+ 'emoji😀 ' => 'smiley ' ,
717+ 'tab here ' => 'with tab ' ,
718+ "new \nline " => 'with newline ' ,
719+ 'quote"here ' => 'with quote ' ,
720+ 'backslash \\here ' => 'with backslash ' ,
721+ 'apostrophe \'here ' => 'with apostrophe ' ,
722+ "control \x01char " => 'with control char ' ,
723+ 'users ' => [
724+ ['name ' => 'café ' , 'country ' => 'France ' ],
725+ ['name ' => '日本太郎 ' , 'country ' => 'Japan ' ],
726+ ['name ' => 'John ' , 'country ' => 'USA ' ],
727+ ['name ' => 'Müller ' , 'country ' => 'Germany ' ],
728+ ['name ' => 'José ' , 'country ' => 'Spain ' ]
729+ ]
730+ ];
731+
732+ return new JsonCrawler (json_encode ($ json ));
733+ }
456734}
0 commit comments