Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
222 changes: 222 additions & 0 deletions src/Symfony/Component/DomCrawler/AbstractUriElement.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,222 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\DomCrawler;

/**
* Any HTML element that can link to an URI.
*
* @author Fabien Potencier <fabien@symfony.com>
*
* @api
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

All @api tags must be removed, we don't use them anymore.

*/
abstract class AbstractUriElement implements UriElementInterface
{
/**
* @var \DOMElement
*/
protected $node;

/**
* @var string The method to use for the link
*/
protected $method;

/**
* @var string The URI of the page where the link is embedded (or the base href)
*/
protected $currentUri;

/**
* Constructor.
*
* @param \DOMElement $node A \DOMElement instance
* @param string $currentUri The URI of the page where the link is embedded (or the base href)
* @param string $method The method to use for the link (get by default)
*
* @throws \InvalidArgumentException if the node is not a link
*
* @api
*/
public function __construct(\DOMElement $node, $currentUri, $method = 'GET')
{
if (!in_array(strtolower(substr($currentUri, 0, 4)), array('http', 'file'))) {
throw new \InvalidArgumentException(sprintf('Current URI must be an absolute URL ("%s").', $currentUri));
}

$this->setNode($node);
$this->method = $method ? strtoupper($method) : null;
$this->currentUri = $currentUri;
}

/**
* Gets the node associated with this link.
*
* @return \DOMElement A \DOMElement instance
*/
public function getNode()
{
return $this->node;
}

/**
* Gets the method associated with this link.
*
* @return string The method
*
* @api
*/
public function getMethod()
{
return $this->method;
}

/**
* Gets the URI associated with this link.
*
* @return string The URI
*
* @api
*/
public function getUri()
{
$uri = trim($this->getRawUri());

// absolute URL?
if (null !== parse_url($uri, PHP_URL_SCHEME)) {
return $uri;
}

// empty URI
if (!$uri) {
return $this->currentUri;
}

// an anchor
if ('#' === $uri[0]) {
return $this->cleanupAnchor($this->currentUri).$uri;
}

$baseUri = $this->cleanupUri($this->currentUri);

if ('?' === $uri[0]) {
return $baseUri.$uri;
}

// absolute URL with relative schema
if (0 === strpos($uri, '//')) {
return preg_replace('#^([^/]*)//.*$#', '$1', $baseUri).$uri;
}

$baseUri = preg_replace('#^(.*?//[^/]*)(?:\/.*)?$#', '$1', $baseUri);

// absolute path
if ('/' === $uri[0]) {
return $baseUri.$uri;
}

// relative path
$path = parse_url(substr($this->currentUri, strlen($baseUri)), PHP_URL_PATH);
$path = $this->canonicalizePath(substr($path, 0, strrpos($path, '/')).'/'.$uri);

return $baseUri.('' === $path || '/' !== $path[0] ? '/' : '').$path;
}

/**
* Returns raw URI data.
*
* @return string
*/
abstract protected function getRawUri();

/**
* Returns the canonicalized URI path (see RFC 3986, section 5.2.4).
*
* @param string $path URI path
*
* @return string
*/
protected function canonicalizePath($path)
{
if ('' === $path || '/' === $path) {
return $path;
}

if ('.' === substr($path, -1)) {
$path .= '/';
}

$output = array();

foreach (explode('/', $path) as $segment) {
if ('..' === $segment) {
array_pop($output);
} elseif ('.' !== $segment) {
array_push($output, $segment);
}
}

return implode('/', $output);
}

/**
* Sets current \DOMElement instance.
*
* @param \DOMElement $node A \DOMElement instance
*
* @throws \LogicException If given node is not an anchor
*/
abstract protected function setNode(\DOMElement $node);

/**
* Removes the query string and the anchor from the given uri.
*
* @param string $uri The uri to clean
*
* @return string
*/
private function cleanupUri($uri)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cleanUpUri() seems more correct to me.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

and same below

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

These functions were not introduced by this PR, do you think it's still relevant I rename them here?

{
return $this->cleanupQuery($this->cleanupAnchor($uri));
}

/**
* Remove the query string from the uri.
*
* @param string $uri
*
* @return string
*/
private function cleanupQuery($uri)
{
if (false !== $pos = strpos($uri, '?')) {
return substr($uri, 0, $pos);
}

return $uri;
}

/**
* Remove the anchor from the uri.
*
* @param string $uri
*
* @return string
*/
private function cleanupAnchor($uri)
{
if (false !== $pos = strpos($uri, '#')) {
return substr($uri, 0, $pos);
}

return $uri;
}
}
6 changes: 6 additions & 0 deletions src/Symfony/Component/DomCrawler/CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,12 @@
CHANGELOG
=========

2.7.0
-----

* All the URI parsing logic have been abstracted in the `AbstractUriElement` class. The `Link` class is now a child of `AbstractUriElement` which implements the new `UriElementInterface`, describing the common `getNode`, `getMethod` and `getUri` methods.
* Added an `Image` class to crawl images and parse their `src` attribute, and `selectImage`, `image`, `images` methods in `Crawler`, the image version of the equivalent `link` methods.

2.5.0
-----

Expand Down
53 changes: 53 additions & 0 deletions src/Symfony/Component/DomCrawler/Crawler.php
Original file line number Diff line number Diff line change
Expand Up @@ -691,6 +691,22 @@ public function selectLink($value)
return $this->filterRelativeXPath($xpath);
}

/**
* Selects images by alt value.
*
* @param string $value The image alt
*
* @return Crawler A new instance of Crawler with the filtered list of nodes
*
* @api
*/
public function selectImage($value)
{
$xpath = sprintf('descendant-or-self::img[contains(normalize-space(string(@alt)), %s)]', static::xpathLiteral($value));

return $this->filterRelativeXPath($xpath);
}

/**
* Selects a button by name or alt value for images.
*
Expand Down Expand Up @@ -749,6 +765,43 @@ public function links()
return $links;
}

/**
* Returns an Image object for the first node in the list.
*
* @return Image An Image instance
*
* @throws \InvalidArgumentException If the current node list is empty
*
* @api
*/
public function image()
{
if (!count($this)) {
throw new \InvalidArgumentException('The current node list is empty.');
}

$node = $this->getNode(0);

return new Image($node, $this->baseHref);
}

/**
* Returns an array of Image objects for the nodes in the list.
*
* @return Image[] An array of Image instances
*
* @api
*/
public function images()
{
$images = array();
foreach ($this as $node) {
$images[] = new Image($node, $this->baseHref);
}

return $images;
}

/**
* Returns a Form object for the first node in the list.
*
Expand Down
39 changes: 39 additions & 0 deletions src/Symfony/Component/DomCrawler/Image.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\DomCrawler;

/**
* Image represents an HTML image (an HTML img tag).
*
* @api
*/
class Image extends AbstractUriElement
{
public function __construct(\DOMElement $node, $currentUri)
{
parent::__construct($node, $currentUri);
}

protected function getRawUri()
{
return $this->node->getAttribute('src');
}

protected function setNode(\DOMElement $node)
{
if ('img' !== $node->nodeName) {
throw new \LogicException(sprintf('Unable to visualize a "%s" tag.', $node->nodeName));
}

$this->node = $node;
}
}
Loading