-
Notifications
You must be signed in to change notification settings - Fork 8k
[WIP] Easy User-land CSPRNG #1119
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 17 commits
f8d7aec
26e4ed2
3c5fcac
e96b077
aa0ca69
b32e0d0
2c659ed
a1e6229
bbc9198
7a99db6
3d413ad
513d5c9
77f99cc
7ef5754
766ce0c
99e36d6
c6fc391
ab02b7b
fd0570b
7ae4917
a67e42f
f8a6d38
2990341
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,33 @@ | ||
| /* | ||
| +----------------------------------------------------------------------+ | ||
| | PHP Version 7 | | ||
| +----------------------------------------------------------------------+ | ||
| | Copyright (c) 1997-2015 The PHP Group | | ||
| +----------------------------------------------------------------------+ | ||
| | This source file is subject to version 3.01 of the PHP license, | | ||
| | that is bundled with this package in the file LICENSE, and is | | ||
| | available through the world-wide-web at the following url: | | ||
| | http://www.php.net/license/3_01.txt | | ||
| | If you did not receive a copy of the PHP license and are unable to | | ||
| | obtain it through the world-wide-web, please send a note to | | ||
| | license@php.net so we can mail you a copy immediately. | | ||
| +----------------------------------------------------------------------+ | ||
| | Authors: Sammy Kaye Powers <me@sammyk.me> | | ||
| +----------------------------------------------------------------------+ | ||
| */ | ||
|
|
||
| /* $Id$ */ | ||
|
|
||
| #ifndef PHP_RANDOM_H | ||
| #define PHP_RANDOM_H | ||
|
|
||
| PHP_FUNCTION(random_bytes); | ||
| PHP_FUNCTION(random_int); | ||
| #endif | ||
|
|
||
| /* | ||
| * Local variables: | ||
| * tab-width: 4 | ||
| * c-basic-offset: 4 | ||
| * End: | ||
| */ |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| /* | ||
| +----------------------------------------------------------------------+ | ||
| | PHP Version 7 | | ||
| +----------------------------------------------------------------------+ | ||
| | Copyright (c) 1997-2015 The PHP Group | | ||
| +----------------------------------------------------------------------+ | ||
| | This source file is subject to version 3.01 of the PHP license, | | ||
| | that is bundled with this package in the file LICENSE, and is | | ||
| | available through the world-wide-web at the following url: | | ||
| | http://www.php.net/license/3_01.txt | | ||
| | If you did not receive a copy of the PHP license and are unable to | | ||
| | obtain it through the world-wide-web, please send a note to | | ||
| | license@php.net so we can mail you a copy immediately. | | ||
| +----------------------------------------------------------------------+ | ||
| | Authors: Sammy Kaye Powers <me@sammyk.me> | | ||
| +----------------------------------------------------------------------+ | ||
| */ | ||
|
|
||
| /* $Id$ */ | ||
|
|
||
| #include <stdlib.h> | ||
| #include <sys/stat.h> | ||
| #include <fcntl.h> | ||
| #include <math.h> | ||
|
|
||
| #include "php.h" | ||
|
|
||
| #if PHP_WIN32 | ||
| # include "win32/winutil.h" | ||
| #endif | ||
|
|
||
| static int php_random_bytes(void *bytes, size_t size) | ||
| { | ||
| #if PHP_WIN32 | ||
| /* Defer to CryptGenRandom on Windows */ | ||
| if (php_win32_get_random_bytes(bytes, size) == FAILURE) { | ||
| php_error_docref(NULL, E_WARNING, "Could not gather sufficient random data"); | ||
| return FAILURE; | ||
| } | ||
| #else | ||
| #if HAVE_DECL_ARC4RANDOM_BUF | ||
| arc4random_buf(bytes, size); | ||
| #else | ||
| int fd = -1; | ||
| size_t read_bytes = 0; | ||
| #if HAVE_DEV_ARANDOM | ||
| fd = open("/dev/arandom", O_RDONLY); | ||
| #else | ||
| #if HAVE_DEV_URANDOM | ||
| fd = open("/dev/urandom", O_RDONLY); | ||
| #endif // URANDOM | ||
| #endif // ARANDOM | ||
| if (fd < 0) { | ||
| php_error_docref(NULL, E_WARNING, "Cannot open source device"); | ||
| return FAILURE; | ||
| } | ||
|
|
||
| size_t n = 0; | ||
| while (read_bytes < size) { | ||
| n = read(fd, bytes + read_bytes, size - read_bytes); | ||
| if (n < 0) { | ||
| break; | ||
| } | ||
| read_bytes += n; | ||
| } | ||
|
|
||
| close(fd); | ||
| if (read_bytes < size) { | ||
| php_error_docref(NULL, E_WARNING, "Could not gather sufficient random data"); | ||
| return FAILURE; | ||
| } | ||
| #endif // !ARC4RANDOM_BUF | ||
| #endif // !WIN32 | ||
|
|
||
| return SUCCESS; | ||
| } | ||
|
|
||
| /* {{{ proto string random_bytes(int length) | ||
| Return an arbitrary length of pseudo-random bytes as binary string */ | ||
| PHP_FUNCTION(random_bytes) | ||
| { | ||
| zend_long size; | ||
| zend_string *bytes; | ||
|
|
||
| if (zend_parse_parameters(ZEND_NUM_ARGS(), "l", &size) == FAILURE) { | ||
| return; | ||
| } | ||
|
|
||
| if (size < 1) { | ||
| php_error_docref(NULL, E_WARNING, "Length must be greater than 0"); | ||
| RETURN_FALSE; | ||
| } | ||
|
|
||
| bytes = zend_string_alloc(size, 0); | ||
|
|
||
| if (php_random_bytes(bytes->val, size) == FAILURE) { | ||
| zend_string_release(bytes); | ||
| RETURN_FALSE; | ||
| } | ||
|
|
||
| bytes->val[size] = '\0'; | ||
|
|
||
| RETURN_STR(bytes); | ||
| } | ||
| /* }}} */ | ||
|
|
||
| /* {{{ proto int random_int(int min, int max) | ||
| Return an arbitrary pseudo-random integer */ | ||
| PHP_FUNCTION(random_int) | ||
| { | ||
| zend_long min = ZEND_LONG_MIN; | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @SammyK Since both parameters are required, we don't need these defaults any more. |
||
| zend_long max = ZEND_LONG_MAX; | ||
| zend_ulong limit; | ||
| zend_ulong umax; | ||
| zend_ulong result; | ||
|
|
||
| if (ZEND_NUM_ARGS() == 1) { | ||
| php_error_docref(NULL, E_WARNING, "A minimum and maximum value are expected, only minimum given"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. since you already have LONG_MIN and LONG_MAX, so if only minimum value min is given. then simply assume it means min to LONG_MAX?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @laruence: I thought about the same thing but then I realize that reading:
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm, that could be a doc issue.. but it's not a big deal :)
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oops - this is a dingleberry left over from when |
||
| RETURN_FALSE; | ||
| } | ||
|
|
||
| if (zend_parse_parameters(ZEND_NUM_ARGS(), "|ll", &min, &max) == FAILURE) { | ||
| return; | ||
| } | ||
|
|
||
| if (min >= max) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would change
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, if |
||
| php_error_docref(NULL, E_WARNING, "Minimum value must be less than the maximum value"); | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. maybe something like: 1st arg must be less than 2nd arg is more obvious for user? |
||
| RETURN_FALSE; | ||
| } | ||
|
|
||
| umax = max - min; | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We need to make sure the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. You're already checking this on line 126.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Oh snap. Not. Enough. Coffee! Oh wait, I'm drinking decaf for some reason today. :/ |
||
|
|
||
| if (php_random_bytes(&result, sizeof(result)) == FAILURE) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is the principal difference of this move from casting some garbage into integer? Could someone point to a theory behind this?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What do you mean? How else would you do it?
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yeah, that was actually my question :) Maybe I'm too fixed on LCG, so just making a stab on finding some system. In a LCG one would have a seed and a kind of formula. Here, we read some sequence of bits which are then inclined to be an integer. Probably right, at the end those bits are glued together. At the end, the integer is in the exact range of how much bits was requested. But just wondering, no further shuffling, endianness difference, etc., just taking them as is? Are there some tests on the quality of the outcome? That's basically what I was asking. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @weltling There is no need. If the input to the algorithm is a sequence of independent, uniformly-distributed random bytes then the its output is guaranteed to be independent, uniformly-distributed over the requested range and have the same "kind" of randomness as the input. In this case, since we are using trusted sources of crypto-secure pseudo-random bytes, the output is a crypto-secure pseudo-random integer. Try thinking of it this way. Say you need random numbers in the range 0 to 13 and all you have as a source of randomness is a friend with a 20-sided die. The only way (that I know) to get the numbers you need without bias or skewing the distribution is to ask your friend to keep rolling the die until it gives a number in the desired range. Now, say you need numbers in the range 0 to 8. You can map two disjoint subsets of the die's 0 to 19 range to the desired range and this improves the algorithm's efficiency. I believe this is exactly analogous to how this algorithm is supposed to work. (I am making no comment about the correctness of the implementation.) It's an old algorithm we can trust.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @weltling There's no need for any extra work. If an integer occupies 64 bits of memory, and we use a random source to set every one of those bits, the result is a random integer with the same quality as the random source. Endianness does not matter since every byte is independently random. If the bytes are ordered AB they are equally random to being ordered BA. There are tools that can test the quality of the random output, but you'll literally be testing the underlying source. We're putting our faith in the Linux/Windows/BSD APIs here. If it turns out that these sources are in fact low quality, then civilisation itself will collapse :)
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Indeed. Find the ceiling under RAND_MAX where
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @lt the world is concurrent enough to not to break because of whichever virtual RNG :) But ok, so the presumption of innocence is applied to the OS randomness sources. @tom-- yeah, maybe also an improvement could be to ask the friend to throw more than one die at once. Possibly it could reduce the whole circles count. However not sure how reliable it would be with this method (i mean how many uniform random data one can get at once), for LCG i can say it could be done with something like AVX/SSE vectorization capabilities. Here it's probably only to be spotted empirically. Thanks for the answers, guys. |
||
| RETURN_FALSE; | ||
| } | ||
|
|
||
| // Special case where no modulus is required | ||
| if (umax == ZEND_ULONG_MAX) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm pretty sure this is dead code - an ULONG_MAX is always higher than LONG_MAX, and therefore even if you have Also, for some reason
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @narfbg I don't think it's dead code. If you take the absolute minimum value, (unsigned) umax = (signed) PHP_INT_MAX - (signed) PHP_INT_MIN (full negative). Which should be the same as ZEND_ULONG_MAX. Not sure how |
||
| RETURN_LONG((zend_long)result); | ||
| } | ||
|
|
||
| // Increment the max so the range is inclusive of max | ||
| umax++; | ||
|
|
||
| // Powers of two are not biased | ||
| if (umax & ~umax != umax) { | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. use braces make it more readable |
||
| // Ceiling under which ZEND_LONG_MAX % max == 0 | ||
| limit = ZEND_ULONG_MAX - (ZEND_ULONG_MAX % umax) - 1; | ||
|
|
||
| // Discard numbers over the limit to avoid modulo bias | ||
| while (result > limit) { | ||
| if (php_random_bytes(&result, sizeof(result)) == FAILURE) { | ||
| return; | ||
| } | ||
| } | ||
|
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So the
Naturally I understand that the likelihood of these characteristics causing problems in PHP applications is small but I think reviewers should be aware of it.
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Let There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @nikic That looks correct.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why can't we just use the
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @datibbaw Because RAND_RANGE is highly biased. It's not an option for crypto-quality randomness. I'm not aware of algorithms for this that don't use a form of rejection sampling.
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Get out! :) In
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, that's fair enough; I shall read more into this, thanks :) |
||
| } | ||
|
|
||
| RETURN_LONG((zend_long)((result % umax) + min)); | ||
| } | ||
| /* }}} */ | ||
|
|
||
| /* | ||
| * Local variables: | ||
| * tab-width: 4 | ||
| * c-basic-offset: 4 | ||
| * End: | ||
| * vim600: sw=4 ts=4 fdm=marker | ||
| * vim<600: sw=4 ts=4 | ||
| */ | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,14 @@ | ||
| --TEST-- | ||
| Test normal operation of random_bytes() | ||
| --FILE-- | ||
| <?php | ||
| //-=-=-=- | ||
|
|
||
| var_dump(strlen(bin2hex(random_bytes(16)))); | ||
|
|
||
| var_dump(is_string(random_bytes(10))); | ||
|
|
||
| ?> | ||
| --EXPECT-- | ||
| int(32) | ||
| bool(true) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| --TEST-- | ||
| Test error operation of random_bytes() | ||
| --FILE-- | ||
| <?php | ||
| //-=-=-=- | ||
|
|
||
| var_dump(random_bytes()); | ||
|
|
||
| var_dump(random_bytes(-1)); | ||
|
|
||
| ?> | ||
| --EXPECTF-- | ||
| Warning: random_bytes() expects exactly 1 parameter, 0 given in %s on line %d | ||
| NULL | ||
|
|
||
| Warning: random_bytes(): Length must be greater than 0 in %s on line %d | ||
| bool(false) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,21 @@ | ||
| --TEST-- | ||
| Test normal operation of random_int() | ||
| --FILE-- | ||
| <?php | ||
| //-=-=-=- | ||
|
|
||
| var_dump(is_int(random_int())); | ||
|
|
||
| var_dump(is_int(random_int(10, 100))); | ||
|
|
||
| $x = random_int(10, 100); | ||
| var_dump($x >= 10 && $x <= 100); | ||
|
|
||
| var_dump(random_int(-1000, -1) < 0); | ||
|
|
||
| ?> | ||
| --EXPECT-- | ||
| bool(true) | ||
| bool(true) | ||
| bool(true) | ||
| bool(true) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| --TEST-- | ||
| Test error operation of random_int() | ||
| --FILE-- | ||
| <?php | ||
| //-=-=-=- | ||
|
|
||
| var_dump(random_int(10)); | ||
|
|
||
| var_dump(random_int(10, 0)); | ||
|
|
||
| ?> | ||
| --EXPECTF-- | ||
| Warning: random_int(): A minimum and maximum value are expected, only minimum given in %s on line %d | ||
| bool(false) | ||
|
|
||
| Warning: random_int(): Minimum value must be less than the maximum value in %s on line %d | ||
| bool(false) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nmust be signed. It can beintorssize_t, but it needs to be signed. Otherwise theif (n < 0)condition can literally never happen.