[git commit] shuf: add a TODO, code shrink

Denys Vlasenko vda.linux at googlemail.com
Tue Sep 7 20:51:42 UTC 2021


commit: https://git.busybox.net/busybox/commit/?id=6a9b3f7acfaa7365515f1eb70427d5ddd687c162
branch: https://git.busybox.net/busybox/commit/?id=refs/heads/master

function                                             old     new   delta
shuf_main                                            501     500      -1

Signed-off-by: Denys Vlasenko <vda.linux at googlemail.com>
---
 coreutils/shuf.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/coreutils/shuf.c b/coreutils/shuf.c
index 50483a25e..3def3d80f 100644
--- a/coreutils/shuf.c
+++ b/coreutils/shuf.c
@@ -44,21 +44,25 @@
  */
 static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
 {
-	unsigned i;
-	unsigned r;
-	char *tmp;
-
 	srand(monotonic_us());
 
-	for (i = numlines - 1; outlines > 0; i--, outlines--) {
-		r = rand();
+	while (outlines != 0) {
+		char *tmp;
+		unsigned r = rand();
 		/* RAND_MAX can be as small as 32767 */
-		if (i > RAND_MAX)
+		if (numlines > RAND_MAX)
 			r ^= rand() << 15;
-		r %= i + 1;
-		tmp = lines[i];
-		lines[i] = lines[r];
+		r %= numlines;
+//TODO: the above method is seriously non-uniform when numlines is very large.
+//For example, with numlines of   0xf0000000,
+//values of (r % numlines) in [0, 0x0fffffff] range
+//are more likely: e.g. r=1 and r=0xf0000001 both map to 1,
+//whereas only one value, r=0xefffffff, maps to 0xefffffff.
+		numlines--;
+		tmp = lines[numlines];
+		lines[numlines] = lines[r];
 		lines[r] = tmp;
+		outlines--;
 	}
 }
 


More information about the busybox-cvs mailing list