blob: 3b2ba93cf9460c6c92f4c035051404da5e4b3a1c [file] [log] [blame]
Denys Vlasenko2cdcb102014-03-05 18:56:20 +01001/* vi: set sw=4 ts=4: */
2/*
3 * shuf: Write a random permutation of the input lines to standard output.
4 *
5 * Copyright (C) 2014 by Bartosz Golaszewski <bartekgola@gmail.com>
6 *
7 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
8 */
Denys Vlasenko2cdcb102014-03-05 18:56:20 +01009//config:config SHUF
Denys Vlasenko4eed2c62017-07-18 22:01:24 +020010//config: bool "shuf (5.4 kb)"
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010011//config: default y
12//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020013//config: Generate random permutations
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010014
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010015//applet:IF_SHUF(APPLET_NOEXEC(shuf, shuf, BB_DIR_USR_BIN, BB_SUID_DROP, shuf))
16
Denys Vlasenko0c4dbd42017-09-18 16:28:43 +020017//kbuild:lib-$(CONFIG_SHUF) += shuf.o
18
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010019//usage:#define shuf_trivial_usage
Bartosz Golaszewski190693c2014-03-07 14:07:35 +010020//usage: "[-e|-i L-H] [-n NUM] [-o FILE] [-z] [FILE|ARG...]"
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010021//usage:#define shuf_full_usage "\n\n"
Denys Vlasenko69f95672014-03-07 14:41:53 +010022//usage: "Randomly permute lines\n"
23//usage: "\n -e Treat ARGs as lines"
24//usage: "\n -i L-H Treat numbers L-H as lines"
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010025//usage: "\n -n NUM Output at most NUM lines"
Denys Vlasenko69f95672014-03-07 14:41:53 +010026//usage: "\n -o FILE Write to FILE, not standard output"
Ron Yorston8817e282021-08-07 09:41:49 +010027//usage: "\n -z NUL terminated output"
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010028
29#include "libbb.h"
30
31/* This is a NOEXEC applet. Be very careful! */
32
33#define OPT_e (1 << 0)
34#define OPT_i (1 << 1)
35#define OPT_n (1 << 2)
36#define OPT_o (1 << 3)
37#define OPT_z (1 << 4)
38#define OPT_STR "ei:n:o:z"
39
40/*
41 * Use the Fisher-Yates shuffle algorithm on an array of lines.
Ron Yorston8817e282021-08-07 09:41:49 +010042 * If the required number of output lines is less than the total
43 * we can stop shuffling early.
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010044 */
Ron Yorston8817e282021-08-07 09:41:49 +010045static void shuffle_lines(char **lines, unsigned numlines, unsigned outlines)
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010046{
47 unsigned i;
48 unsigned r;
49 char *tmp;
50
51 srand(monotonic_us());
52
Ron Yorston8817e282021-08-07 09:41:49 +010053 for (i = numlines-1; outlines > 0; i--, outlines--) {
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010054 r = rand();
55 /* RAND_MAX can be as small as 32767 */
56 if (i > RAND_MAX)
57 r ^= rand() << 15;
Denys Vlasenko9de9c872017-07-09 00:39:15 +020058 r %= i + 1;
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010059 tmp = lines[i];
60 lines[i] = lines[r];
61 lines[r] = tmp;
62 }
63}
64
65int shuf_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
66int shuf_main(int argc, char **argv)
67{
68 unsigned opts;
69 char *opt_i_str, *opt_n_str, *opt_o_str;
70 unsigned i;
71 char **lines;
Ron Yorston8817e282021-08-07 09:41:49 +010072 unsigned numlines, outlines;
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010073 char eol;
74
Denys Vlasenko22542ec2017-08-08 21:55:02 +020075 opts = getopt32(argv, "^"
76 OPT_STR
77 "\0" "e--i:i--e"/* mutually exclusive */,
78 &opt_i_str, &opt_n_str, &opt_o_str
79 );
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010080
81 argc -= optind;
82 argv += optind;
83
84 /* Prepare lines for shuffling - either: */
85 if (opts & OPT_e) {
86 /* make lines from command-line arguments */
Denys Vlasenko2cdcb102014-03-05 18:56:20 +010087 numlines = argc;
88 lines = argv;
89 } else
90 if (opts & OPT_i) {
91 /* create a range of numbers */
92 char *dash;
93 unsigned lo, hi;
94
95 dash = strchr(opt_i_str, '-');
96 if (!dash) {
97 bb_error_msg_and_die("bad range '%s'", opt_i_str);
98 }
99 *dash = '\0';
100 lo = xatou(opt_i_str);
101 hi = xatou(dash + 1);
Maninder Singh7db312a2015-06-01 10:40:09 +0000102 *dash = '-';
Denys Vlasenko2cdcb102014-03-05 18:56:20 +0100103 if (hi < lo) {
104 bb_error_msg_and_die("bad range '%s'", opt_i_str);
105 }
106
107 numlines = (hi+1) - lo;
108 lines = xmalloc(numlines * sizeof(lines[0]));
109 for (i = 0; i < numlines; i++) {
Denys Vlasenko102f0d02014-03-07 14:32:39 +0100110 lines[i] = (char*)(uintptr_t)lo;
Denys Vlasenko2cdcb102014-03-05 18:56:20 +0100111 lo++;
112 }
113 } else {
114 /* default - read lines from stdin or the input file */
115 FILE *fp;
116
117 if (argc > 1)
118 bb_show_usage();
119
120 fp = xfopen_stdin(argv[0] ? argv[0] : "-");
121 lines = NULL;
122 numlines = 0;
123 for (;;) {
124 char *line = xmalloc_fgetline(fp);
125 if (!line)
126 break;
127 lines = xrealloc_vector(lines, 6, numlines);
128 lines[numlines++] = line;
129 }
130 fclose_if_not_stdin(fp);
131 }
132
Ron Yorston8817e282021-08-07 09:41:49 +0100133 outlines = numlines;
134 if (opts & OPT_n) {
135 outlines = xatou(opt_n_str);
136 if (outlines > numlines)
137 outlines = numlines;
138 }
139
140 shuffle_lines(lines, numlines, outlines);
Denys Vlasenko2cdcb102014-03-05 18:56:20 +0100141
142 if (opts & OPT_o)
143 xmove_fd(xopen(opt_o_str, O_WRONLY|O_CREAT|O_TRUNC), STDOUT_FILENO);
144
Denys Vlasenko2cdcb102014-03-05 18:56:20 +0100145 eol = '\n';
146 if (opts & OPT_z)
147 eol = '\0';
148
Ron Yorston8817e282021-08-07 09:41:49 +0100149 for (i = numlines - outlines; i < numlines; i++) {
Denys Vlasenko102f0d02014-03-07 14:32:39 +0100150 if (opts & OPT_i)
151 printf("%u%c", (unsigned)(uintptr_t)lines[i], eol);
152 else
153 printf("%s%c", lines[i], eol);
Denys Vlasenko2cdcb102014-03-05 18:56:20 +0100154 }
155
156 fflush_stdout_and_exit(EXIT_SUCCESS);
157}