blob: e566dc1bc8292619b0c8cb537a6e9ae69e5ef98b [file] [log] [blame]
Erik Andersene49d5ec2000-02-08 19:58:47 +00001/* vi: set sw=4 ts=4: */
John Beppuabb47722000-01-06 00:48:21 +00002/*
Manuel Novoa III cad53642003-03-19 09:13:01 +00003 * uniq implementation for busybox
John Beppuabb47722000-01-06 00:48:21 +00004 *
Manuel Novoa III 84b93f72005-09-15 08:06:42 +00005 * Copyright (C) 2005 Manuel Novoa III <mjn3@codepoet.org>
John Beppuabb47722000-01-06 00:48:21 +00006 *
Bernhard Reutner-Fischerab187822005-10-26 10:47:26 +00007 * Licensed under the GPL v2 or later, see the file LICENSE in this tarball.
John Beppuabb47722000-01-06 00:48:21 +00008 */
9
Manuel Novoa III cad53642003-03-19 09:13:01 +000010/* BB_AUDIT SUSv3 compliant */
11/* http://www.opengroup.org/onlinepubs/007904975/utilities/uniq.html */
12
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000013#include "libbb.h"
John Beppuabb47722000-01-06 00:48:21 +000014
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +000015int uniq_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +000016int uniq_main(int argc UNUSED_PARAM, char **argv)
John Beppuabb47722000-01-06 00:48:21 +000017{
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020018 const char *input_filename;
Denis Vlasenko96b99b82008-05-03 07:21:27 +000019 unsigned skip_fields, skip_chars, max_chars;
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000020 unsigned opt;
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020021 char *cur_line;
22 const char *cur_compare;
John Beppuabb47722000-01-06 00:48:21 +000023
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000024 enum {
25 OPT_c = 0x1,
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020026 OPT_d = 0x2, /* print only dups */
27 OPT_u = 0x4, /* print only uniq */
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000028 OPT_f = 0x8,
29 OPT_s = 0x10,
Denis Vlasenko96b99b82008-05-03 07:21:27 +000030 OPT_w = 0x20,
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000031 };
Manuel Novoa III cad53642003-03-19 09:13:01 +000032
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000033 skip_fields = skip_chars = 0;
Denis Vlasenko1a6adbd2009-03-09 16:43:28 +000034 max_chars = INT_MAX;
Matt Kraaie0bcce02000-09-27 02:29:39 +000035
Denis Vlasenko96b99b82008-05-03 07:21:27 +000036 opt_complementary = "f+:s+:w+";
37 opt = getopt32(argv, "cduf:s:w:", &skip_fields, &skip_chars, &max_chars);
Denis Vlasenko4caa09a2007-03-31 10:19:11 +000038 argv += optind;
39
Denys Vlasenko44f174e2009-08-31 05:15:21 +020040 input_filename = argv[0];
41 if (input_filename) {
42 const char *output;
Manuel Novoa III cad53642003-03-19 09:13:01 +000043
Denys Vlasenko44f174e2009-08-31 05:15:21 +020044 if (input_filename[0] != '-' || input_filename[1]) {
45 close(STDIN_FILENO); /* == 0 */
46 xopen(input_filename, O_RDONLY); /* fd will be 0 */
47 }
48 output = argv[1];
49 if (output) {
50 if (argv[2])
51 bb_show_usage();
52 if (output[0] != '-' || output[1]) {
53 // Won't work with "uniq - FILE" and closed stdin:
54 //close(STDOUT_FILENO);
55 //xopen3(output, O_WRONLY | O_CREAT | O_TRUNC, 0666);
56 xmove_fd(xopen3(output, O_WRONLY | O_CREAT | O_TRUNC, 0666), STDOUT_FILENO);
57 }
58 }
Manuel Novoa III 415f6c92005-09-08 06:02:49 +000059 }
Matt Kraaie0bcce02000-09-27 02:29:39 +000060
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020061 cur_compare = cur_line = NULL; /* prime the pump */
Manuel Novoa III cad53642003-03-19 09:13:01 +000062
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000063 do {
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020064 unsigned i;
65 unsigned long dups;
66 char *old_line;
67 const char *old_compare;
68
69 old_line = cur_line;
70 old_compare = cur_compare;
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000071 dups = 0;
72
73 /* gnu uniq ignores newlines */
Denys Vlasenko1249dbb2009-08-30 19:35:06 +020074 while ((cur_line = xmalloc_fgetline(stdin)) != NULL) {
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020075 cur_compare = cur_line;
Denis Vlasenkof0ed3762006-10-26 23:21:47 +000076 for (i = skip_fields; i; i--) {
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020077 cur_compare = skip_whitespace(cur_compare);
78 cur_compare = skip_non_whitespace(cur_compare);
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000079 }
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020080 for (i = skip_chars; *cur_compare && i; i--) {
81 ++cur_compare;
Manuel Novoa III 415f6c92005-09-08 06:02:49 +000082 }
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000083
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020084 if (!old_line || strncmp(old_compare, cur_compare, max_chars)) {
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000085 break;
Manuel Novoa III cad53642003-03-19 09:13:01 +000086 }
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000087
Denys Vlasenkoa9c9bf52009-08-30 19:34:09 +020088 free(cur_line);
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020089 ++dups; /* testing for overflow seems excessive */
Manuel Novoa III 84b93f72005-09-15 08:06:42 +000090 }
91
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +020092 if (old_line) {
93 if (!(opt & (OPT_d << !!dups))) { /* (if dups, opt & OPT_u) */
Denys Vlasenko44f174e2009-08-31 05:15:21 +020094 if (opt & OPT_c) {
95 /* %7lu matches GNU coreutils 6.9 */
96 printf("%7lu ", dups + 1);
97 }
Denys Vlasenko1249dbb2009-08-30 19:35:06 +020098 printf("%s\n", old_line);
Manuel Novoa III cad53642003-03-19 09:13:01 +000099 }
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +0200100 free(old_line);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000101 }
Denys Vlasenko0aec9ff2009-08-30 19:32:45 +0200102 } while (cur_line);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000103
Denys Vlasenko1249dbb2009-08-30 19:35:06 +0200104 die_if_ferror(stdin, input_filename);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000105
Denis Vlasenkof0ed3762006-10-26 23:21:47 +0000106 fflush_stdout_and_exit(EXIT_SUCCESS);
John Beppuabb47722000-01-06 00:48:21 +0000107}