[git commit] shuf: add a TODO, code shrink
Denys Vlasenko
vda.linux at googlemail.com
Tue Sep 7 20:51:42 UTC 2021
commit: https://git.busybox.net/busybox/commit/?id=6a9b3f7acfaa7365515f1eb70427d5ddd687c162
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master
function old new delta
shuf_main 501 500 -1
Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
coreutils/shuf.c | 24 ++++++++++++++----------
1 file changed, 14 insertions(+), 10 deletions(-)
diff --git a/coreutils/shuf.c b/coreutils/shuf.c
index 50483a25e..3def3d80f 100644
--- a/coreutils/shuf.c
+++ b/coreutils/shuf.c
@@ -44,21 +44,25 @@
*/
static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
{
- unsigned i;
- unsigned r;
- char *tmp;
-
srand(monotonic_us());
- for (i = numlines - 1; outlines > 0; i--, outlines--) {
- r = rand();
+ while (outlines != 0) {
+ char *tmp;
+ unsigned r = rand();
/* RAND_MAX can be as small as 32767 */
- if (i > RAND_MAX)
+ if (numlines > RAND_MAX)
r ^= rand() << 15;
- r %= i + 1;
- tmp = lines[i];
- lines[i] = lines[r];
+ r %= numlines;
+//TODO: the above method is seriously non-uniform when numlines is very large.
+//For example, with numlines of 0xf0000000,
+//values of (r % numlines) in [0, 0x0fffffff] range
+//are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
+//whereas only one value, r=0xefffffff, maps to 0xefffffff.
+ numlines--;
+ tmp = lines[numlines];
+ lines[numlines] = lines[r];
lines[r] = tmp;
+ outlines--;
}
}
More information about the busybox-cvs
mailing list