Skip to content

Commit 0fb5a25

Browse files
bug #62671 [DomCrawler] Fixing dealing with invalid charset (ThomasLandauer)
This PR was merged into the 7.4 branch. Discussion ---------- [DomCrawler] Fixing dealing with invalid charset | Q | A | ------------- | --- | Branch? | 7.4 | Bug fix? | yes | New feature? | no | Deprecations? | no | Issues | Fix #62625 | License | MIT Wrapping `Dom\HTMLDocument::createFromString()` into a `try...catch`. Commits ------- d241c56 Fixing #62625
2 parents 0af6151 + d241c56 commit 0fb5a25

File tree

2 files changed

+14
-2
lines changed

2 files changed

+14
-2
lines changed

src/Symfony/Component/DomCrawler/Crawler.php

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1110,7 +1110,11 @@ private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DO
11101110

11111111
$internalErrors = libxml_use_internal_errors(true);
11121112

1113-
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
1113+
try {
1114+
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
1115+
} catch (\ValueError) {
1116+
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS);
1117+
}
11141118

11151119
libxml_use_internal_errors($internalErrors);
11161120

src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1335,7 +1335,7 @@ public function testAddXmlContentWithErrors()
13351335
#[Group('legacy')]
13361336
public function testHtml5ParserNotSameAsNativeParserForSpecificHtml()
13371337
{
1338-
// Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
1338+
// HTML that creates a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
13391339
$html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html>';
13401340

13411341
$html5Crawler = new Crawler(null, null, null, true);
@@ -1396,6 +1396,14 @@ public function testAlpineJs()
13961396
$this->assertCount(3, $crawler->filterXPath('//div'));
13971397
}
13981398

1399+
public function testInvalidCharset()
1400+
{
1401+
$email = "Content-Type: text/html; charset=foobar\n\n<!DOCTYPE html><html><body>One Two Three</body></html>";
1402+
$crawler = $this->createCrawler($email);
1403+
// Not really needed anymore, since the test would already have crashed with: ValueError: Dom\HTMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
1404+
$this->assertSame('Content-Type: text/html; charset=foobar One Two Three', $crawler->text());
1405+
}
1406+
13991407
protected function createTestCrawler($uri = null)
14001408
{
14011409
$html = $this->getDoctype().'

0 commit comments

Comments
 (0)