Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions UPGRADE-8.0.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,11 @@ AssetMapper

* Remove `ImportMapConfigReader::splitPackageNameAndFilePath()`, use `ImportMapEntry::splitPackageNameAndFilePath()` instead

BrowserKit
----------

* Remove `AbstractBrowser::useHtml5Parser()`; the native HTML5 parser is used unconditionally

Cache
-----

Expand Down Expand Up @@ -117,6 +122,11 @@ DoctrineBridge
* Remove support for auto-mapping Doctrine entities to controller arguments; use explicit mapping instead
* Make `ProxyCacheWarmer` class `final`

DomCrawler
----------

* Remove argument `$useHtml5Parser` of `Crawler`'s constructor; the native HTML5 parser is used unconditionally

ExpressionLanguage
------------------

Expand Down
22 changes: 1 addition & 21 deletions src/Symfony/Component/BrowserKit/AbstractBrowser.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,6 @@ abstract class AbstractBrowser
/** @psalm-var TResponse */
protected object $response;
protected Crawler $crawler;
/** @deprecated since Symfony 7.4, to be removed in Symfony 8 */
protected bool $useHtml5Parser = true;
protected bool $insulated = false;
protected ?string $redirect;
protected bool $followRedirects = true;
Expand Down Expand Up @@ -202,24 +200,6 @@ public function getCrawler(): Crawler
return $this->crawler ?? throw new BadMethodCallException(\sprintf('The "request()" method must be called before "%s()".', __METHOD__));
}

/**
* Sets whether parsing should be done using "masterminds/html5".
*
* @deprecated since Symfony 7.4, Symfony 8 will unconditionally use the native HTML5 parser
*
* @return $this
*/
public function useHtml5Parser(bool $useHtml5Parser): static
{
if (\PHP_VERSION_ID >= 80400) {
trigger_deprecation('symfony/browser-kit', '7.4', 'Method "%s()" is deprecated. Symfony 8 will unconditionally use the native HTML5 parser.', __METHOD__);
}

$this->useHtml5Parser = $useHtml5Parser;

return $this;
}

/**
* Returns the current BrowserKit Response instance.
*/
Expand Down Expand Up @@ -507,7 +487,7 @@ protected function createCrawlerFromContent(string $uri, string $content, string
return null;
}

$crawler = new Crawler(null, $uri, null, $this->useHtml5Parser);
$crawler = new Crawler(null, $uri, null);
$crawler->addContent($content, $type);

return $crawler;
Expand Down
5 changes: 5 additions & 0 deletions src/Symfony/Component/BrowserKit/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGELOG
=========

8.0
---

* Remove `AbstractBrowser::useHtml5Parser()`; the native HTML5 parser is used unconditionally

7.4
---

Expand Down
5 changes: 5 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,11 @@
CHANGELOG
=========

8.0
---

* Remove argument `$useHtml5Parser` of `Crawler`'s constructor; the native HTML5 parser is used unconditionally

7.4
---

Expand Down
93 changes: 1 addition & 92 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@

namespace Symfony\Component\DomCrawler;

use Masterminds\HTML5;
use Symfony\Component\CssSelector\CssSelectorConverter;

/**
Expand Down Expand Up @@ -53,23 +52,15 @@ class Crawler implements \Countable, \IteratorAggregate
*/
private bool $isHtml = true;

private ?HTML5 $html5Parser = null;

/**
* @param \DOMNodeList|\DOMNode|\DOMNode[]|string|null $node A Node to use as the base for the crawling
*/
public function __construct(
\DOMNodeList|\DOMNode|array|string|null $node = null,
protected ?string $uri = null,
?string $baseHref = null,
private bool $useHtml5Parser = true,
) {
if (\PHP_VERSION_ID >= 80400 && !$useHtml5Parser) {
trigger_deprecation('symfony/dom-crawler', '7.4', 'Disabling HTML5 parsing is deprecated. Symfony 8 will unconditionally use the native HTML5 parser.');
}

$this->baseHref = $baseHref ?: $uri;
$this->html5Parser = \PHP_VERSION_ID < 80400 && $useHtml5Parser ? new HTML5(['disable_html_ns' => true]) : null;
$this->cachedNamespaces = new \ArrayObject();

$this->add($node);
Expand Down Expand Up @@ -175,7 +166,7 @@ public function addContent(string $content, ?string $type = null): void
*/
public function addHtmlContent(string $content, string $charset = 'UTF-8'): void
{
$dom = $this->parseHtmlString($content, $charset);
$dom = $this->parseXhtml($content, $charset);
$this->addDocument($dom);

$base = $this->filterRelativeXPath('descendant-or-self::base')->extract(['href']);
Expand Down Expand Up @@ -609,10 +600,6 @@ public function html(?string $default = null): string
$node = $this->getNode(0);
$owner = $node->ownerDocument;

if ($this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
$owner = $this->html5Parser;
}

$html = '';
foreach ($node->childNodes as $child) {
$html .= $owner->saveHTML($child);
Expand All @@ -630,10 +617,6 @@ public function outerHtml(): string
$node = $this->getNode(0);
$owner = $node->ownerDocument;

if ($this->html5Parser && '<!DOCTYPE html>' === $owner->saveXML($owner->childNodes[0])) {
$owner = $this->html5Parser;
}

return $owner->saveHTML($node);
}

Expand Down Expand Up @@ -1064,48 +1047,8 @@ protected function sibling(\DOMNode $node, string $siblingDir = 'nextSibling'):
return $nodes;
}

private function parseHtml5(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
{
if (!$this->supportsEncoding($charset)) {
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
$charset = 'UTF-8';
}

return $this->html5Parser->parse($htmlContent, ['encoding' => $charset]);
}

private function supportsEncoding(string $encoding): bool
{
try {
return '' === @mb_convert_encoding('', $encoding, 'UTF-8');
} catch (\Throwable $e) {
return false;
}
}

private function parseXhtml(string $htmlContent, string $charset = 'UTF-8'): \DOMDocument
{
if (\PHP_VERSION_ID < 80400 || !$this->useHtml5Parser) {
if ('UTF-8' === $charset && preg_match('//u', $htmlContent)) {
$htmlContent = '<?xml encoding="UTF-8">'.$htmlContent;
} else {
$htmlContent = $this->convertToHtmlEntities($htmlContent, $charset);
}

$internalErrors = libxml_use_internal_errors(true);

$dom = new \DOMDocument('1.0', $charset);
$dom->validateOnParse = true;

if ('' !== trim($htmlContent)) {
@$dom->loadHTML($htmlContent);
}

libxml_use_internal_errors($internalErrors);

return $dom;
}

$document = @\Dom\HTMLDocument::createFromString($htmlContent, \Dom\HTML_NO_DEFAULT_NS, $charset);
$htmlContent = $document->saveXml();
$charset = $document->inputEncoding;
Expand Down Expand Up @@ -1202,7 +1145,6 @@ private function createSubCrawler(\DOMNodeList|\DOMNode|array|string|null $nodes
$crawler->document = $this->document;
$crawler->namespaces = $this->namespaces;
$crawler->cachedNamespaces = $this->cachedNamespaces;
$crawler->html5Parser = $this->html5Parser;

return $crawler;
}
Expand All @@ -1219,39 +1161,6 @@ private function createCssSelectorConverter(): CssSelectorConverter
return new CssSelectorConverter($this->isHtml);
}

/**
* Parse string into DOMDocument object using HTML5 parser if the content is HTML5 and the library is available.
* Use libxml parser otherwise.
*/
private function parseHtmlString(string $content, string $charset): \DOMDocument
{
if ($this->canParseHtml5String($content)) {
return $this->parseHtml5($content, $charset);
}

return $this->parseXhtml($content, $charset);
}

private function canParseHtml5String(string $content): bool
{
if (!$this->html5Parser) {
return false;
}

if (false === $pos = stripos($content, '<!doctype html>')) {
return false;
}

$header = substr($content, 0, $pos);

return '' === $header || $this->isValidHtml5Heading($header);
}

private function isValidHtml5Heading(string $heading): bool
{
return 1 === preg_match('/^\x{FEFF}?\s*(<!--[^>]*?-->\s*)*$/u', $heading);
}

private function normalizeWhitespace(string $string): string
{
return trim(preg_replace("/(?:[ \n\r\t\x0C]{2,}+|[\n\r\t\x0C])/", ' ', $string), " \n\r\t\x0C");
Expand Down
18 changes: 0 additions & 18 deletions src/Symfony/Component/DomCrawler/Tests/CrawlerTestCase.php
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@
namespace Symfony\Component\DomCrawler\Tests;

use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\Attributes\Group;
use PHPUnit\Framework\Attributes\IgnoreDeprecations;
use PHPUnit\Framework\Attributes\RequiresPhpExtension;
use PHPUnit\Framework\Error\Notice;
use PHPUnit\Framework\TestCase;
Expand Down Expand Up @@ -1329,22 +1327,6 @@ public function testAddXmlContentWithErrors()
libxml_use_internal_errors($internalErrors);
}

#[IgnoreDeprecations]
#[Group('legacy')]
public function testHtml5ParserNotSameAsNativeParserForSpecificHtml()
{
// Html who create a bug specific to the DOM extension (see https://github.com/symfony/symfony/issues/28596)
$html = '<!DOCTYPE html><html><body><h1><p>Foo</p></h1></body></html>';

$html5Crawler = new Crawler(null, null, null, true);
$html5Crawler->add($html);

$nativeCrawler = new Crawler(null, null, null, false);
$nativeCrawler->add($html);

$this->assertNotEquals($nativeCrawler->filterXPath('//h1')->text(), $html5Crawler->filterXPath('//h1')->text(), 'Native parser and Html5 parser must be different');
}

public function testAddHtml5()
{
// Ensure a bug specific to the DOM extension is fixed (see https://github.com/symfony/symfony/issues/28596)
Expand Down

This file was deleted.

Loading
Loading