Skip to content
Closed
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f8d7aec
Got initial API added
SammyK Feb 13, 2015
26e4ed2
Got mostly working. I think Hex is broken. Need to add int.
SammyK Feb 13, 2015
3c5fcac
Updated to use zend_string but getting memory leaks. Doh!
SammyK Feb 20, 2015
e96b077
Got random_bytes() working again.
SammyK Feb 20, 2015
aa0ca69
Fix random_int() checking size of wrong var.
SammyK Feb 20, 2015
b32e0d0
Got random_int() seemingly working thanks to @ircmaxell
SammyK Feb 20, 2015
2c659ed
Make maximum argument to random_int() optional with default to INT_MAX.
SammyK Feb 20, 2015
a1e6229
Remove random_hex(). *sadface*
SammyK Feb 20, 2015
bbc9198
Detect presence of /dev/arandom
lt Feb 21, 2015
7a99db6
Tidy up `php_random_bytes` and add /dev/arandom
lt Feb 21, 2015
3d413ad
Ensure random_int() uses a uniform distribution
lt Feb 22, 2015
513d5c9
Allow full integer range from random_int()
lt Feb 24, 2015
77f99cc
Use arc4random where present
lt Feb 24, 2015
7ef5754
Merge pull request #1 from lt/rand-bytes
SammyK Feb 24, 2015
766ce0c
Add tests
SammyK Feb 24, 2015
99e36d6
Fix wording in error message. Add check for max value.
SammyK Feb 24, 2015
c6fc391
Fix return types on error. Avoid a warning on BSD systems.
SammyK Feb 25, 2015
ab02b7b
Add fd caching
lt Mar 4, 2015
fd0570b
Merge remote-tracking branch 'leigh/rand-bytes' into rand-bytes
SammyK Mar 13, 2015
7ae4917
Fixes based on PR feedback
lt Mar 27, 2015
a67e42f
Changes based on feedback
lt Apr 9, 2015
f8a6d38
Normalized the return value for errors & updated tests.
SammyK Apr 10, 2015
2990341
Fix merge conflicts
SammyK Apr 10, 2015
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions Zend/Zend.m4
Original file line number Diff line number Diff line change
Expand Up @@ -425,11 +425,13 @@ if test -r "/dev/urandom" && test -c "/dev/urandom"; then
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no)
AC_MSG_CHECKING(whether /dev/arandom exists)
if test -r "/dev/arandom" && test -c "/dev/arandom"; then
AC_DEFINE([HAVE_DEV_ARANDOM], 1, [Define if the target system has /dev/arandom device])
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no)
fi
fi

AC_MSG_CHECKING(whether /dev/arandom exists)
if test -r "/dev/arandom" && test -c "/dev/arandom"; then
AC_DEFINE([HAVE_DEV_ARANDOM], 1, [Define if the target system has /dev/arandom device])
AC_MSG_RESULT(yes)
else
AC_MSG_RESULT(no)
fi

13 changes: 13 additions & 0 deletions ext/standard/basic_functions.c
Original file line number Diff line number Diff line change
Expand Up @@ -1897,6 +1897,16 @@ ZEND_END_ARG_INFO()
ZEND_BEGIN_ARG_INFO(arginfo_mt_getrandmax, 0)
ZEND_END_ARG_INFO()
/* }}} */
/* {{{ random.c */
ZEND_BEGIN_ARG_INFO_EX(arginfo_random_bytes, 0, 0, 0)
ZEND_ARG_INFO(0, length)
ZEND_END_ARG_INFO()

ZEND_BEGIN_ARG_INFO_EX(arginfo_random_int, 0, 0, 0)
ZEND_ARG_INFO(0, min)
ZEND_ARG_INFO(0, max)
ZEND_END_ARG_INFO()
/* }}} */
/* {{{ sha1.c */
ZEND_BEGIN_ARG_INFO_EX(arginfo_sha1, 0, 0, 1)
ZEND_ARG_INFO(0, str)
Expand Down Expand Up @@ -2820,6 +2830,9 @@ const zend_function_entry basic_functions[] = { /* {{{ */
PHP_FE(mt_srand, arginfo_mt_srand)
PHP_FE(mt_getrandmax, arginfo_mt_getrandmax)

PHP_FE(random_bytes, arginfo_random_bytes)
PHP_FE(random_int, arginfo_random_int)

#if HAVE_GETSERVBYNAME
PHP_FE(getservbyname, arginfo_getservbyname)
#endif
Expand Down
8 changes: 7 additions & 1 deletion ext/standard/config.m4
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,11 @@ dnl Check for atomic operation API availability in Solaris
dnl
AC_CHECK_HEADERS([atomic.h])

dnl
dnl Check for arc4random on BSD systems
dnl
AC_CHECK_DECLS([arc4random_buf])

dnl
dnl Setup extension sources
dnl
Expand All @@ -605,7 +610,8 @@ PHP_NEW_EXTENSION(standard, array.c base64.c basic_functions.c browscap.c crc32.
incomplete_class.c url_scanner_ex.c ftp_fopen_wrapper.c \
http_fopen_wrapper.c php_fopen_wrapper.c credits.c css.c \
var_unserializer.c ftok.c sha1.c user_filters.c uuencode.c \
filters.c proc_open.c streamsfuncs.c http.c password.c,,,
filters.c proc_open.c streamsfuncs.c http.c password.c \
random.c,,,
-DZEND_ENABLE_STATIC_TSRMLS_CACHE=1)

PHP_ADD_MAKEFILE_FRAGMENT
Expand Down
2 changes: 1 addition & 1 deletion ext/standard/config.w32
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ EXTENSION("standard", "array.c base64.c basic_functions.c browscap.c \
url_scanner_ex.c ftp_fopen_wrapper.c http_fopen_wrapper.c \
php_fopen_wrapper.c credits.c css.c var_unserializer.c ftok.c sha1.c \
user_filters.c uuencode.c filters.c proc_open.c password.c \
streamsfuncs.c http.c flock_compat.c", false /* never shared */,
streamsfuncs.c http.c flock_compat.c random.c", false /* never shared */,
'/DZEND_ENABLE_STATIC_TSRMLS_CACHE=1');
PHP_INSTALL_HEADERS("", "ext/standard");
if (PHP_MBREGEX != "no") {
Expand Down
33 changes: 33 additions & 0 deletions ext/standard/php_random.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
/*
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2015 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Sammy Kaye Powers <me@sammyk.me> |
+----------------------------------------------------------------------+
*/

/* $Id$ */

#ifndef PHP_RANDOM_H
#define PHP_RANDOM_H

PHP_FUNCTION(random_bytes);
PHP_FUNCTION(random_int);
#endif

/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
*/
1 change: 1 addition & 0 deletions ext/standard/php_standard.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
#include "php_ftok.h"
#include "php_type.h"
#include "php_password.h"
#include "php_random.h"

#define phpext_standard_ptr basic_functions_module_ptr
PHP_MINIT_FUNCTION(standard_filters);
Expand Down
170 changes: 170 additions & 0 deletions ext/standard/random.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
/*
+----------------------------------------------------------------------+
| PHP Version 7 |
+----------------------------------------------------------------------+
| Copyright (c) 1997-2015 The PHP Group |
+----------------------------------------------------------------------+
| This source file is subject to version 3.01 of the PHP license, |
| that is bundled with this package in the file LICENSE, and is |
| available through the world-wide-web at the following url: |
| http://www.php.net/license/3_01.txt |
| If you did not receive a copy of the PHP license and are unable to |
| obtain it through the world-wide-web, please send a note to |
| license@php.net so we can mail you a copy immediately. |
+----------------------------------------------------------------------+
| Authors: Sammy Kaye Powers <me@sammyk.me> |
+----------------------------------------------------------------------+
*/

/* $Id$ */

#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <math.h>

#include "php.h"

#if PHP_WIN32
# include "win32/winutil.h"
#endif

static int php_random_bytes(void *bytes, size_t size)
{
int n = 0;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better would be ssize_t.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The declaration should also be moved into the while loop, otherwise this will likely generate a warning on BSD systems with arc4random.


#if PHP_WIN32
/* Defer to CryptGenRandom on Windows */
if (php_win32_get_random_bytes(bytes, size) == FAILURE) {
php_error_docref(NULL, E_WARNING, "Could not gather sufficient random data");
return FAILURE;
}
#else
#if HAVE_DECL_ARC4RANDOM_BUF
arc4random_buf(bytes, size);
#else
int fd = -1;
size_t read_bytes = 0;
#if HAVE_DEV_ARANDOM
fd = open("/dev/arandom", O_RDONLY);
#else
#if HAVE_DEV_URANDOM
fd = open("/dev/urandom", O_RDONLY);
#endif // URANDOM
#endif // ARANDOM
if (fd < 0) {
php_error_docref(NULL, E_WARNING, "Cannot open source device");
return FAILURE;
}

while (read_bytes < size) {
n = read(fd, bytes + read_bytes, size - read_bytes);
if (n < 0) {
break;
}
read_bytes += n;
}

close(fd);
if (read_bytes < size) {
php_error_docref(NULL, E_WARNING, "Could not gather sufficient random data");
return FAILURE;
}
#endif // !ARC4RANDOM_BUF
#endif // !WIN32

return SUCCESS;
}

/* {{{ proto string random_bytes(int length)
Return an arbitrary length of pseudo-random bytes as binary string */
PHP_FUNCTION(random_bytes)
{
zend_long size;
zend_string *bytes;

if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &size) == FAILURE) {
return;
}

if (size < 1) {
php_error_docref(NULL, E_WARNING, "Length must be greater than 0");
RETURN_FALSE;
}

bytes = zend_string_alloc(size, 0);

if (php_random_bytes(bytes->val, size) == FAILURE) {
zend_string_release(bytes);
return;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Probably that should be RETURN_FALSE as well? Same in random_int.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice catch! I'll fix.

}

bytes->val[size] = '\0';

RETURN_STR(bytes);
}
/* }}} */

/* {{{ proto int random_int(int min, int max)
Return an arbitrary pseudo-random integer */
PHP_FUNCTION(random_int)
{
zend_long min = ZEND_LONG_MIN;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@SammyK Since both parameters are required, we don't need these defaults any more.

zend_long max = ZEND_LONG_MAX;
zend_ulong limit;
zend_ulong umax;
zend_ulong result;

if (ZEND_NUM_ARGS() == 1) {
php_error_docref(NULL, E_WARNING, "A minimum and maximum value are expected, only minimum given");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

since you already have LONG_MIN and LONG_MAX, so if only minimum value min is given. then simply assume it means min to LONG_MAX?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@laruence: I thought about the same thing but then I realize that reading:
random_int(10);
might be understood as 10 being the max.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm, that could be a doc issue.. but it's not a big deal :)

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oops - this is a dingleberry left over from when min and max were optional args. They are both required in the current spec that's being voted on. I'll remove this check. :)

RETURN_FALSE;
}

if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ll", &min, &max) == FAILURE) {
return;
}

if (min >= max) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would change >= to be >.
Reason: If I need a number between a and b, I expect that it could possibly return either a or b, if the range is inclusive (note that it isn't clear in the RFC).
In the case a == b, I would expect that this method returns a or b.
If that isn't particularly useful with fixed value being passed to the function, it is in the case of variables.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, if min == max, then there's nothing random about the answer (it's min). Hence there's no reason for the function to return a valid value (in fact, returning a value could mask bugs where you thought it was random but it wasn't).

php_error_docref(NULL, E_WARNING, "Minimum value must be less than the maximum value");
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

maybe something like: 1st arg must be less than 2nd arg is more obvious for user?

RETURN_FALSE;
}

umax = max - min;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We need to make sure the min & max values weren't the same.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You're already checking this on line 126.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh snap. Not. Enough. Coffee! Oh wait, I'm drinking decaf for some reason today. :/


if (php_random_bytes(&result, sizeof(result)) == FAILURE) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What is the principal difference of this move from casting some garbage into integer? Could someone point to a theory behind this?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you mean? How else would you do it?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah, that was actually my question :) Maybe I'm too fixed on LCG, so just making a stab on finding some system. In a LCG one would have a seed and a kind of formula. Here, we read some sequence of bits which are then inclined to be an integer.

Probably right, at the end those bits are glued together. At the end, the integer is in the exact range of how much bits was requested. But just wondering, no further shuffling, endianness difference, etc., just taking them as is? Are there some tests on the quality of the outcome? That's basically what I was asking.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@weltling There is no need. If the input to the algorithm is a sequence of independent, uniformly-distributed random bytes then the its output is guaranteed to be independent, uniformly-distributed over the requested range and have the same "kind" of randomness as the input. In this case, since we are using trusted sources of crypto-secure pseudo-random bytes, the output is a crypto-secure pseudo-random integer.

Try thinking of it this way. Say you need random numbers in the range 0 to 13 and all you have as a source of randomness is a friend with a 20-sided die. The only way (that I know) to get the numbers you need without bias or skewing the distribution is to ask your friend to keep rolling the die until it gives a number in the desired range.

Now, say you need numbers in the range 0 to 8. You can map two disjoint subsets of the die's 0 to 19 range to the desired range and this improves the algorithm's efficiency.

I believe this is exactly analogous to how this algorithm is supposed to work. (I am making no comment about the correctness of the implementation.)

It's an old algorithm we can trust.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@weltling There's no need for any extra work. If an integer occupies 64 bits of memory, and we use a random source to set every one of those bits, the result is a random integer with the same quality as the random source.

Endianness does not matter since every byte is independently random. If the bytes are ordered AB they are equally random to being ordered BA.

There are tools that can test the quality of the random output, but you'll literally be testing the underlying source. We're putting our faith in the Linux/Windows/BSD APIs here. If it turns out that these sources are in fact low quality, then civilisation itself will collapse :)

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@tom--

I believe this is exactly analogous to how this algorithm is supposed to work.

Indeed. Find the ceiling under RAND_MAX where upper_bound % ceiling == 0, and discard all values greater than that ceiling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@lt the world is concurrent enough to not to break because of whichever virtual RNG :) But ok, so the presumption of innocence is applied to the OS randomness sources.

@tom-- yeah, maybe also an improvement could be to ask the friend to throw more than one die at once. Possibly it could reduce the whole circles count. However not sure how reliable it would be with this method (i mean how many uniform random data one can get at once), for LCG i can say it could be done with something like AVX/SSE vectorization capabilities. Here it's probably only to be spotted empirically.

Thanks for the answers, guys.

return;
}

// Special case where no modulus is required
if (umax == ZEND_ULONG_MAX) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm pretty sure this is dead code - an ULONG_MAX is always higher than LONG_MAX, and therefore even if you have umax = ZEND_LONG_MAX - 0, it can never be as high ...

Also, for some reason random_int(0, PHP_INT_MAX) results in umax = -1 ...

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@narfbg I don't think it's dead code. If you take the absolute minimum value, (unsigned) umax = (signed) PHP_INT_MAX - (signed) PHP_INT_MIN (full negative). Which should be the same as ZEND_ULONG_MAX.

Not sure how umax is equal to -1 under those conditions either. It's unsigned...

RETURN_LONG((zend_long)result);
}

// Increment the max so the range is inclusive of max
umax++;

// Powers of two are not biased
if (umax & ~umax != umax) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

use braces make it more readable

// Ceiling under which ZEND_LONG_MAX % max == 0
limit = ZEND_ULONG_MAX - (ZEND_ULONG_MAX % umax) - 1;

// Discard numbers over the limit to avoid modulo bias
while (result > limit) {
if (php_random_bytes(&result, sizeof(result)) == FAILURE) {
return;
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the random_int() function:

  • has a non-deterministic execution time (actually, it's quite random)
  • that doesn't have any limit
  • so the function isn't guaranteed to return at all, ever.

Naturally I understand that the likelihood of these characteristics causing problems in PHP applications is small but I think reviewers should be aware of it.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let bw = sizeof(zend_ulong) * 8. ZEND_ULONG_MAX % umax will take its largest value for umax = 2 ** (bw - 1), in which case limit = 2 ** (bw - 1) - 1. This means that all results with top bit 1 will be discarded here. For every result the probability for a set top bit is 1/2. As such the worst case probability that this loop has not stopped after generating n random numbers is 1 / 2**n.

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@nikic That looks correct.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why can't we just use the RAND_RANGE macro (or something similar) for this? It seems to me that fighting against the modulo problem is a wasted effort.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@datibbaw Because RAND_RANGE is highly biased. It's not an option for crypto-quality randomness. I'm not aware of algorithms for this that don't use a form of rejection sampling.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It seems to me that fighting against the modulo problem is a wasted effort.

Get out! :)

In rand() or mt_rand() I completely agree, it would be wasted effort. In a function that is advertised as crypto-quality it's important.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, that's fair enough; I shall read more into this, thanks :)

}

RETURN_LONG((zend_long)((result % umax) + min));
}
/* }}} */

/*
* Local variables:
* tab-width: 4
* c-basic-offset: 4
* End:
* vim600: sw=4 ts=4 fdm=marker
* vim<600: sw=4 ts=4
*/
14 changes: 14 additions & 0 deletions ext/standard/tests/random/random_bytes.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
--TEST--
Test normal operation of random_bytes()
--FILE--
<?php
//-=-=-=-

var_dump(strlen(bin2hex(random_bytes(16))));

var_dump(is_string(random_bytes(10)));

?>
--EXPECT--
int(32)
bool(true)
17 changes: 17 additions & 0 deletions ext/standard/tests/random/random_bytes_error.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
--TEST--
Test error operation of random_bytes()
--FILE--
<?php
//-=-=-=-

var_dump(random_bytes());

var_dump(random_bytes(-1));

?>
--EXPECTF--
Warning: random_bytes() expects exactly 1 parameter, 0 given in %s on line %d
NULL

Warning: random_bytes(): Length must be greater than 0 in %s on line %d
bool(false)
21 changes: 21 additions & 0 deletions ext/standard/tests/random/random_int.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
--TEST--
Test normal operation of random_int()
--FILE--
<?php
//-=-=-=-

var_dump(is_int(random_int()));

var_dump(is_int(random_int(10, 100)));

$x = random_int(10, 100);
var_dump($x >= 10 && $x <= 100);

var_dump(random_int(-1000, -1) < 0);

?>
--EXPECT--
bool(true)
bool(true)
bool(true)
bool(true)
17 changes: 17 additions & 0 deletions ext/standard/tests/random/random_int_error.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
--TEST--
Test error operation of random_int()
--FILE--
<?php
//-=-=-=-

var_dump(random_int(10));

var_dump(random_int(10, 0));

?>
--EXPECTF--
Warning: random_int(): A minimum and maximum value are expected, only minimum given in %s on line %d
bool(false)

Warning: random_int(): Minimum value must be less than the maximum value in %s on line %d
bool(false)