|
| 1 | +<?php |
| 2 | + |
| 3 | +/* |
| 4 | + * This file is part of the Symfony package. |
| 5 | + * |
| 6 | + * (c) Fabien Potencier <fabien@symfony.com> |
| 7 | + * |
| 8 | + * For the full copyright and license information, please view the LICENSE |
| 9 | + * file that was distributed with this source code. |
| 10 | + */ |
| 11 | + |
| 12 | +namespace Symfony\Component\DomCrawler\Tests; |
| 13 | + |
| 14 | +use PHPUnit\Framework\Attributes\DataProvider; |
| 15 | +use PHPUnit\Framework\Attributes\RequiresPhp; |
| 16 | +use Symfony\Component\DomCrawler\Crawler; |
| 17 | + |
| 18 | +#[RequiresPhp('8.4')] |
| 19 | +class NativeHtml5ParserCrawlerTest extends AbstractCrawlerTestCase |
| 20 | +{ |
| 21 | + public static function getDoctype(): string |
| 22 | + { |
| 23 | + return '<!DOCTYPE html>'; |
| 24 | + } |
| 25 | + |
| 26 | + public function testIteration() |
| 27 | + { |
| 28 | + $crawler = $this->createTestCrawler()->filterXPath('//li'); |
| 29 | + |
| 30 | + $this->assertInstanceOf(\Traversable::class, $crawler); |
| 31 | + $this->assertContainsOnlyInstancesOf('DOMElement', iterator_to_array($crawler), 'Iterating a Crawler gives DOMElement instances'); |
| 32 | + } |
| 33 | + |
| 34 | + public function testHtml() |
| 35 | + { |
| 36 | + $this->assertEquals('<img alt="Bar">', $this->createTestCrawler()->filterXPath('//a[5]')->html()); |
| 37 | + $this->assertEquals('<input type="text" value="TextValue" name="TextName"><input type="submit" value="FooValue" name="FooName" id="FooId"><input type="button" value="BarValue" name="BarName" id="BarId"><button value="ButtonValue" name="ButtonName" id="ButtonId"><input type="submit" value="FooBarValue" name="FooBarName" form="FooFormId"><input type="text" value="FooTextValue" name="FooTextName" form="FooFormId"><input type="image" alt="ImageAlt" form="FooFormId"></button>', trim(preg_replace('~>\s+<~', '><', $this->createTestCrawler()->filterXPath('//form[@id="FooFormId"]')->html()))); |
| 38 | + |
| 39 | + try { |
| 40 | + $this->createTestCrawler()->filterXPath('//ol')->html(); |
| 41 | + $this->fail('->html() throws an \InvalidArgumentException if the node list is empty'); |
| 42 | + } catch (\InvalidArgumentException $e) { |
| 43 | + $this->assertTrue(true, '->html() throws an \InvalidArgumentException if the node list is empty'); |
| 44 | + } |
| 45 | + |
| 46 | + $this->assertSame('my value', $this->createTestCrawler(null)->filterXPath('//ol')->html('my value')); |
| 47 | + } |
| 48 | + |
| 49 | + public function testFilterXpathComplexQueries() |
| 50 | + { |
| 51 | + $crawler = $this->createTestCrawler()->filterXPath('//body'); |
| 52 | + |
| 53 | + $this->assertCount(0, $crawler->filterXPath('/input')); |
| 54 | + $this->assertCount(0, $crawler->filterXPath('/body')); |
| 55 | + $this->assertCount(1, $crawler->filterXPath('./body')); |
| 56 | + $this->assertCount(1, $crawler->filterXPath('.//body')); |
| 57 | + $this->assertCount(6, $crawler->filterXPath('.//input')); |
| 58 | + $this->assertCount(7, $crawler->filterXPath('//form')->filterXPath('//button | //input')); |
| 59 | + $this->assertCount(1, $crawler->filterXPath('body')); |
| 60 | + $this->assertCount(8, $crawler->filterXPath('//button | //input')); |
| 61 | + $this->assertCount(1, $crawler->filterXPath('//body')); |
| 62 | + $this->assertCount(1, $crawler->filterXPath('descendant-or-self::body')); |
| 63 | + $this->assertCount(1, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('./div'), 'A child selection finds only the current div'); |
| 64 | + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('descendant::div'), 'A descendant selector matches the current div and its child'); |
| 65 | + $this->assertCount(3, $crawler->filterXPath('//div[@id="parent"]')->filterXPath('//div'), 'A descendant selector matches the current div and its child'); |
| 66 | + $this->assertCount(5, $crawler->filterXPath('(//a | //div)//img')); |
| 67 | + $this->assertCount(7, $crawler->filterXPath('((//a | //div)//img | //ul)')); |
| 68 | + $this->assertCount(7, $crawler->filterXPath('( ( //a | //div )//img | //ul )')); |
| 69 | + $this->assertCount(1, $crawler->filterXPath("//a[./@href][((./@id = 'Klausi|Claudiu' or normalize-space(string(.)) = 'Klausi|Claudiu' or ./@title = 'Klausi|Claudiu' or ./@rel = 'Klausi|Claudiu') or .//img[./@alt = 'Klausi|Claudiu'])]")); |
| 70 | + } |
| 71 | + |
| 72 | + public function testAddHtml5() |
| 73 | + { |
| 74 | + // Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596) |
| 75 | + $crawler = $this->createCrawler(); |
| 76 | + $crawler->add($this->getDoctype().'<html><body><h1><p>Foo</p></h1></body></html>'); |
| 77 | + $this->assertEquals('Foo', $crawler->filterXPath('//h1')->text(), '->add() adds nodes from a string'); |
| 78 | + } |
| 79 | + |
| 80 | + #[DataProvider('validHtml5Provider')] |
| 81 | + public function testHtml5ParserParseContentStartingWithValidHeading(string $content) |
| 82 | + { |
| 83 | + $crawler = $this->createCrawler(); |
| 84 | + $crawler->addHtmlContent($content); |
| 85 | + self::assertEquals( |
| 86 | + 'Foo', |
| 87 | + $crawler->filterXPath('//h1')->text(), |
| 88 | + '->addHtmlContent() parses valid HTML with comment before doctype' |
| 89 | + ); |
| 90 | + } |
| 91 | + |
| 92 | + public static function validHtml5Provider(): iterable |
| 93 | + { |
| 94 | + $html = self::getDoctype().'<html><body><h1><p>Foo</p></h1></body></html>'; |
| 95 | + $BOM = \chr(0xEF).\chr(0xBB).\chr(0xBF); |
| 96 | + |
| 97 | + yield 'BOM first' => [$BOM.$html]; |
| 98 | + yield 'Single comment' => ['<!-- comment -->'.$html]; |
| 99 | + yield 'Multiline comment' => ["<!-- \n multiline comment \n -->".$html]; |
| 100 | + yield 'Several comments' => ['<!--c--> <!--cc-->'.$html]; |
| 101 | + yield 'Whitespaces' => [' '.$html]; |
| 102 | + yield 'All together' => [$BOM.' <!--c-->'.$html]; |
| 103 | + } |
| 104 | + |
| 105 | + public function testHtml5ParserNotSameAsNativeParserForSpecificHtml() |
| 106 | + { |
| 107 | + // Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596) |
| 108 | + $html = $this->getDoctype().'<html><body><h1><p>Foo</p></h1></body></html>'; |
| 109 | + |
| 110 | + $html5Crawler = $this->createCrawler(); |
| 111 | + $html5Crawler->add($html); |
| 112 | + |
| 113 | + $nativeCrawler = parent::createCrawler(); |
| 114 | + $nativeCrawler->add($html); |
| 115 | + |
| 116 | + $this->assertNotEquals($nativeCrawler->filterXPath('//h1')->text(), $html5Crawler->filterXPath('//h1')->text(), 'Native parser and Html5 parser must be different'); |
| 117 | + } |
| 118 | + |
| 119 | + protected function createCrawler($node = null, ?string $uri = null, ?string $baseHref = null) |
| 120 | + { |
| 121 | + return new Crawler($node, $uri, $baseHref, true); |
| 122 | + } |
| 123 | +} |
0 commit comments