Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -1110,7 +1110,11 @@ private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DO

$internalErrors = libxml_use_internal_errors(true);

$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
try {
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
} catch (\ValueError) {
$document = \Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS);
}

libxml_use_internal_errors($internalErrors);

Expand Down
10 changes: 9 additions & 1 deletion src/Symfony/Component/DomCrawler/Tests/CrawlerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -1335,7 +1335,7 @@ public function testAddXmlContentWithErrors()
#[Group('legacy')]
public function testHtml5ParserNotSameAsNativeParserForSpecificHtml()
{
// Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
// HTML that creates a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
$html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html>';

$html5Crawler = new Crawler(null, null, null, true);
Expand Down Expand Up @@ -1396,6 +1396,14 @@ public function testAlpineJs()
$this->assertCount(3, $crawler->filterXPath('//div'));
}

public function testInvalidCharset()
{
$email = "Content-Type: text/html; charset=foobar\n\n<!DOCTYPE html><html><body>One Two Three</body></html>";
$crawler = $this->createCrawler($email);
// Not really needed anymore, since the test would already have crashed with: ValueError: Dom\HTMLDocument::createFromString(): Argument #3 ($overrideEncoding) must be a valid document encoding
$this->assertSame('Content-Type: text/html; charset=foobar One Two Three', $crawler->text());
}

protected function createTestCrawler($uri = null)
{
$html = $this->getDoctype().'
Expand Down