Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
157 changes: 157 additions & 0 deletions src/Symfony/Component/Serializer/Normalizer/DataUriNormalizer.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\Serializer\Normalizer;

use Symfony\Component\HttpFoundation\File\File;
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeGuesser;
use Symfony\Component\HttpFoundation\File\MimeType\MimeTypeGuesserInterface;
use Symfony\Component\Serializer\Exception\InvalidArgumentException;
use Symfony\Component\Serializer\Exception\UnexpectedValueException;

/**
* Normalizes an {@see \SplFileInfo} object to a data URI.
* Denormalizes a data URI to a {@see \SplFileObject} object.
*
* @author Kévin Dunglas <dunglas@gmail.com>
*/
class DataUriNormalizer implements NormalizerInterface, DenormalizerInterface
{
/**
* @var MimeTypeGuesserInterface
*/
private $mimeTypeGuesser;

public function __construct(MimeTypeGuesserInterface $mimeTypeGuesser = null)
{
if (null === $mimeTypeGuesser && class_exists('Symfony\Component\HttpFoundation\File\MimeType\MimeTypeGuesser')) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The class_exists() call is useless (if the class is missing, the MimeTypeGuesserInterface is missing too).

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is not useless in the following case: new DataUriNormalizer() (without argument).

The constructor argument is optional, so the existence of the interface will not be checked and you cannot know if MimeTypeGuesser exists or no.

$mimeTypeGuesser = MimeTypeGuesser::getInstance();
}

$this->mimeTypeGuesser = $mimeTypeGuesser;
}

/**
* {@inheritdoc}
*/
public function normalize($object, $format = null, array $context = array())
{
if (!$object instanceof \SplFileInfo) {
throw new InvalidArgumentException('The object must be an instance of "\SplFileInfo".');
}

$mimeType = $this->getMimeType($object);
$splFileObject = $this->extractSplFileObject($object);

$data = '';

$splFileObject->rewind();
while (!$splFileObject->eof()) {
$data .= $splFileObject->fgets();
}

if ('text' === explode('/', $mimeType, 2)[0]) {
return sprintf('data:%s,%s', $mimeType, rawurlencode($data));
}

return sprintf('data:%s;base64,%s', $mimeType, base64_encode($data));
}

/**
* {@inheritdoc}
*/
public function supportsNormalization($data, $format = null)
{
return $data instanceof \SplFileInfo;
}

/**
* {@inheritdoc}
*
* Regex adapted from Brian Grinstead code.
*
* @see https://gist.github.com/bgrins/6194623
*
* @throws InvalidArgumentException
* @throws UnexpectedValueException
*/
public function denormalize($data, $class, $format = null, array $context = array())
{
if (!preg_match('/^data:([a-z0-9]+\/[a-z0-9]+(;[a-z0-9\-]+\=[a-z0-9\-]+)?)?(;base64)?,[a-z0-9\!\$\&\\\'\,\(\)\*\+\,\;\=\-\.\_\~\:\@\/\?\%\s]*\s*$/i', $data)) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

^data:([a-z0-9]+\/[a-z0-9]+(;[a-z0-9\-]+\=[a-z0-9\-]+)?)?(;base64)?,
[a-z0-9\!\$\&\\\'\,\(\)\*\+\,\;\=\-\.\_\~\:\@\/\?\%\s]*\s*$

...could be made posessive/greedy for better performance...

^data:([a-z0-9]++\/[a-z0-9]++(;[a-z0-9\-]++\=[a-z0-9\-]++)?)?(;base64)?,
[a-z0-9\!\$\&\\\'\,\(\)\*\+\,\;\=\-\.\_\~\:\@\/\?\%\s]*+\s*+$

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On the other hand, is matching the string all the way to the end really necessary (with a large list of allowed characters)?

I mean we're not validating if the actual data corresponds with the encoding specification, the user could still (for example) use the base64 option but not send base64 encoded data.

We're taking the correctness of the data (more or less) for granted -- so why allow the regex engine to even bother scanning it?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There is two main reasons:

  • this is very security sensitive (in my former implementation, it was allowing to access to /etc/shadow for instance and, being not a security expert, I prefer to be conservative - what about specials chars...)
  • the PHP stream wrapper can generate an error if the data is invalid, the regex prevent it

However 👍 to make the regex as fast as possible

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Uh-oh, i can't imagine how the /etc/shadow file could get exposed since the
prefix is predetermined, can you please give me some documentation?

On 20:14, Thu, Feb 4, 2016 Kévin Dunglas notifications@github.com wrote:

In src/Symfony/Component/Serializer/Normalizer/DataUriNormalizer.php
#16164 (comment):

  •    return $data instanceof \SplFileInfo;
    
  • }
  • /**
  • \* {@inheritdoc}
    
  • *
    
  • \* Regex adapted from Brian Grinstead code.
    
  • *
    
  • \* @see https://gist.github.com/bgrins/6194623
    
  • *
    
  • \* @throws InvalidArgumentException
    
  • \* @throws UnexpectedValueException
    
  • */
    
  • public function denormalize($data, $class, $format = null, array $context = array())
  • {
  •    if (!preg_match('/^data:([a-z0-9]+\/[a-z0-9]+(;[a-z0-9-]+\=[a-z0-9-]+)?)?(;base64)?,[a-z0-9!\$&\\'\,()*+\,\;\=-._~:\@\/\?\%\s]_\s_$/i', $data)) {
    

There is two main reasons:

  • this is very security sensitive (in my former implementation, it was
    allowing to access to /etc/shadow for instance and, being not a security
    expert, I prefer to be conservative - what about specials chars...)
  • the PHP stream wrapper can generate an error if the data is invalid,
    the regex prevent it

However [image: 👍] to make the regex as fast as possible


Reply to this email directly or view it on GitHub
https://github.com/symfony/symfony/pull/16164/files#r51913006.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@dunglas the original RegEx does not match application/octet-stream because the second part of the mime type is not allowed to contain a dash.

Which means the DataUriNormalizer can't match a data uri that it produced (when no mime type info was available).

Also there's an issue in the normalizer logic, the Symfony mime type guesser tries to fetch the path of the file and assert that the file exists. This does not work with php://memory / php://temp or data: type files.

I could submit a PR with the aforementioned speedups as well as some logic to make the mime type guessing part optional (ex.: supply a mime type via $context).

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm checking out this list and apparently literal dots and underscores are also (sometimes) used in mime-type names (especially in the second part) and dashes are (sometimes) used in the first part.

So the RegEx needs to allow for those too.

throw new UnexpectedValueException('The provided "data:" URI is not valid.');
}

try {
switch ($class) {
case 'Symfony\Component\HttpFoundation\File\File':
return new File($data, false);

case 'SplFileObject':
case 'SplFileInfo':
return new \SplFileObject($data);
}
} catch (\RuntimeException $exception) {
throw new UnexpectedValueException($exception->getMessage(), $exception->getCode(), $exception);
}

throw new InvalidArgumentException(sprintf('The class parameter "%s" is not supported. It must be one of "SplFileInfo", "SplFileObject" or "Symfony\Component\HttpFoundation\File\File".', $class));
}

/**
* {@inheritdoc}
*/
public function supportsDenormalization($data, $type, $format = null)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not applying the regex to the data here too?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why not but will it be easy to debug that this validator is skipped regardless of the $type parameter just because the data coming from the user doesn't match a regex? IMO throwing an exception in denormalize() as we do right now is more intuitive.

{
$supportedTypes = array(
\SplFileInfo::class => true,
\SplFileObject::class => true,
'Symfony\Component\HttpFoundation\File\File' => true,
);

return isset($supportedTypes[$type]);
}

/**
* Gets the mime type of the object. Defaults to application/octet-stream.
*
* @param \SplFileInfo $object
*
* @return string
*/
private function getMimeType(\SplFileInfo $object)
{
if ($object instanceof File) {
return $object->getMimeType();
}

if ($this->mimeTypeGuesser && $mimeType = $this->mimeTypeGuesser->guess($object->getPathname())) {
return $mimeType;
}

return 'application/octet-stream';
}

/**
* Returns the \SplFileObject instance associated with the given \SplFileInfo instance.
*
* @param \SplFileInfo $object
*
* @return \SplFileObject
*/
private function extractSplFileObject(\SplFileInfo $object)
{
if ($object instanceof \SplFileObject) {
return $object;
}

return $object->openFile();
}
}
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions src/Symfony/Component/Serializer/Tests/Fixtures/test.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Kévin Dunglas
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ public function testInterface()
{
$this->assertInstanceOf('Symfony\Component\Serializer\Normalizer\NormalizerInterface', $this->normalizer);
$this->assertInstanceOf('Symfony\Component\Serializer\Normalizer\DenormalizerInterface', $this->normalizer);
$this->assertInstanceOf('Symfony\Component\Serializer\SerializerAwareInterface', $this->normalizer);
}

public function testSerialize()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
<?php

/*
* This file is part of the Symfony package.
*
* (c) Fabien Potencier <fabien@symfony.com>
*
* For the full copyright and license information, please view the LICENSE
* file that was distributed with this source code.
*/

namespace Symfony\Component\Serializer\Tests\Normalizer;

use Symfony\Component\HttpFoundation\File\File;
use Symfony\Component\Serializer\Normalizer\DataUriNormalizer;

/**
* @author Kévin Dunglas <dunglas@gmail.com>
*/
class DataUriNormalizerTest extends \PHPUnit_Framework_TestCase
{
const TEST_GIF_DATA = '';
const TEST_TXT_DATA = 'data:text/plain,K%C3%A9vin%20Dunglas%0A';
const TEST_TXT_CONTENT = "Kévin Dunglas\n";

/**
* @var DataUriNormalizer
*/
private $normalizer;

public function setUp()
{
$this->normalizer = new DataUriNormalizer();
}

public function testInterface()
{
$this->assertInstanceOf('Symfony\Component\Serializer\Normalizer\NormalizerInterface', $this->normalizer);
$this->assertInstanceOf('Symfony\Component\Serializer\Normalizer\DenormalizerInterface', $this->normalizer);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test looks useless to me.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I agree with you that it's not very useful but it guaranties that nobody remove the implements or an interface.

}

public function testSupportNormalization()
{
$this->assertFalse($this->normalizer->supportsNormalization(new \stdClass()));
$this->assertTrue($this->normalizer->supportsNormalization(new \SplFileObject('data:,Hello%2C%20World!')));
}

public function testNormalizeHttpFoundationFile()
{
$file = new File(__DIR__.'/../Fixtures/test.gif');

$this->assertSame(self::TEST_GIF_DATA, $this->normalizer->normalize($file));
}

public function testNormalizeSplFileInfo()
{
$file = new \SplFileInfo(__DIR__.'/../Fixtures/test.gif');

$this->assertSame(self::TEST_GIF_DATA, $this->normalizer->normalize($file));
}

public function testNormalizeText()
{
$file = new \SplFileObject(__DIR__.'/../Fixtures/test.txt');

$data = $this->normalizer->normalize($file);

$this->assertSame(self::TEST_TXT_DATA, $data);
$this->assertSame(self::TEST_TXT_CONTENT, file_get_contents($data));
}

public function testSupportsDenormalization()
{
$this->assertFalse($this->normalizer->supportsDenormalization('foo', 'Bar'));
$this->assertTrue($this->normalizer->supportsDenormalization(self::TEST_GIF_DATA, 'SplFileInfo'));
$this->assertTrue($this->normalizer->supportsDenormalization(self::TEST_GIF_DATA, 'SplFileObject'));
$this->assertTrue($this->normalizer->supportsDenormalization(self::TEST_TXT_DATA, 'Symfony\Component\HttpFoundation\File\File'));
}

public function testDenormalizeSplFileInfo()
{
$file = $this->normalizer->denormalize(self::TEST_TXT_DATA, 'SplFileInfo');

$this->assertInstanceOf('SplFileInfo', $file);
$this->assertSame(file_get_contents(self::TEST_TXT_DATA), $this->getContent($file));
}

public function testDenormalizeSplFileObject()
{
$file = $this->normalizer->denormalize(self::TEST_TXT_DATA, 'SplFileObject');

$this->assertInstanceOf('SplFileObject', $file);
$this->assertEquals(file_get_contents(self::TEST_TXT_DATA), $this->getContent($file));
}

public function testDenormalizeHttpFoundationFile()
{
$file = $this->normalizer->denormalize(self::TEST_GIF_DATA, 'Symfony\Component\HttpFoundation\File\File');

$this->assertInstanceOf('Symfony\Component\HttpFoundation\File\File', $file);
$this->assertSame(file_get_contents(self::TEST_GIF_DATA), $this->getContent($file->openFile()));
}

/**
* @expectedException \Symfony\Component\Serializer\Exception\UnexpectedValueException
* @expectedExceptionMessage The provided "data:" URI is not valid.
*/
public function testGiveNotAccessToLocalFiles()
{
$this->normalizer->denormalize('/etc/shadow', 'SplFileObject');
}

/**
* @expectedException \Symfony\Component\Serializer\Exception\UnexpectedValueException
* @dataProvider invalidUriProvider
*/
public function testInvalidData($uri)
{
$this->normalizer->denormalize($uri, 'SplFileObject');
}

public function invalidUriProvider()
{
return array(
array('dataxbase64'),
array('data:HelloWorld'),
array('data:text/html;charset=,%3Ch1%3EHello!%3C%2Fh1%3E'),
array('data:text/html;charset,%3Ch1%3EHello!%3C%2Fh1%3E'),
array('data:base64,iVBORw0KGgoAAAANSUhEUgAAABAAAAAQAQMAAAAlPW0iAAAABlBMVEUAAAD///+l2Z/dAAAAM0lEQVR4nGP4/5/h/1+G/58ZDrAz3D/McH8yw83NDDeNGe4Ug9C9zwz3gVLMDA/A6P9/AFGGFyjOXZtQAAAAAElFTkSuQmCC'),
array(''),
array('http://wikipedia.org'),
array('base64'),
array('iVBORw0KGgoAAAANSUhEUgAAABAAAAAQAQMAAAAlPW0iAAAABlBMVEUAAAD///+l2Z/dAAAAM0lEQVR4nGP4/5/h/1+G/58ZDrAz3D/McH8yw83NDDeNGe4Ug9C9zwz3gVLMDA/A6P9/AFGGFyjOXZtQAAAAAElFTkSuQmCC'),
array(' '),
array(' '),
);
}

/**
* @dataProvider validUriProvider
*/
public function testValidData($uri)
{
$this->assertInstanceOf('SplFileObject', $this->normalizer->denormalize($uri, 'SplFileObject'));
}

public function validUriProvider()
{
$data = array(
array(''),
array(''),
array(' '),
array('data:,Hello%2C%20World!'),
array('data:text/html,%3Ch1%3EHello%2C%20World!%3C%2Fh1%3E'),
array('data:,A%20brief%20note'),
array('data:text/html;charset=US-ASCII,%3Ch1%3EHello!%3C%2Fh1%3E'),
);

if (!defined('HHVM_VERSION')) {
// See https://github.com/facebook/hhvm/issues/6354
$data[] = array('data:text/plain;charset=utf-8;base64,SGVsbG8gV29ybGQh');
}

return $data;
}

private function getContent(\SplFileObject $file)
{
$buffer = '';
while (!$file->eof()) {
$buffer .= $file->fgets();
}

return $buffer;
}
}
4 changes: 3 additions & 1 deletion src/Symfony/Component/Serializer/composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
"symfony/yaml": "~2.8|~3.0",
"symfony/config": "~2.8|~3.0",
"symfony/property-access": "~2.8|~3.0",
"symfony/http-foundation": "~2.8|~3.0",
"doctrine/annotations": "~1.0",
"doctrine/cache": "~1.0"
},
Expand All @@ -31,7 +32,8 @@
"doctrine/cache": "For using the default cached annotation reader and metadata cache.",
"symfony/yaml": "For using the default YAML mapping loader.",
"symfony/config": "For using the XML mapping loader.",
"symfony/property-access": "For using the ObjectNormalizer."
"symfony/property-access": "For using the ObjectNormalizer.",
"symfony/http-foundation": "To use the DataUriNormalizer."
},
"autoload": {
"psr-4": { "Symfony\\Component\\Serializer\\": "" }
Expand Down