blob: c5e1de6f576e28f026628367ea2de9f4f52c10d1 [file] [log] [blame]
Denis Vlasenko3952f202007-08-13 14:10:24 +00001/* expand - convert tabs to spaces
2 * unexpand - convert spaces to tabs
3 *
4 * Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc.
5 *
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2 or later, see file LICENSE in this source tree.
Denis Vlasenko3952f202007-08-13 14:10:24 +00007 *
8 * David MacKenzie <djm@gnu.ai.mit.edu>
9 *
10 * Options for expand:
Denys Vlasenkobbc7bee2017-01-21 02:49:58 +010011 * -t num --tabs NUM Convert tabs to num spaces (default 8 spaces).
Denis Vlasenko3952f202007-08-13 14:10:24 +000012 * -i --initial Only convert initial tabs on each line to spaces.
13 *
14 * Options for unexpand:
15 * -a --all Convert all blanks, instead of just initial blanks.
16 * -f --first-only Convert only leading sequences of blanks (default).
Denys Vlasenkobbc7bee2017-01-21 02:49:58 +010017 * -t num --tabs NUM Have tabs num characters apart instead of 8.
Denis Vlasenko3952f202007-08-13 14:10:24 +000018 *
19 * Busybox version (C) 2007 by Tito Ragusa <farmatito@tiscali.it>
20 *
21 * Caveat: this versions of expand and unexpand don't accept tab lists.
22 */
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010023//config:config EXPAND
Denys Vlasenkob097a842018-12-28 03:20:17 +010024//config: bool "expand (5.1 kb)"
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010025//config: default y
26//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020027//config: By default, convert all tabs to spaces.
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010028//config:
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010029//config:config UNEXPAND
Denys Vlasenkob097a842018-12-28 03:20:17 +010030//config: bool "unexpand (5.3 kb)"
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010031//config: default y
32//config: help
Denys Vlasenko72089cf2017-07-21 09:50:55 +020033//config: By default, convert only leading sequences of blanks to tabs.
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010034
35//applet:IF_EXPAND(APPLET(expand, BB_DIR_USR_BIN, BB_SUID_DROP))
Denys Vlasenko205d48e2017-01-29 14:57:33 +010036// APPLET_ODDNAME:name main location suid_type help
Denys Vlasenkoaf3f4202016-11-23 14:46:56 +010037//applet:IF_UNEXPAND(APPLET_ODDNAME(unexpand, expand, BB_DIR_USR_BIN, BB_SUID_DROP, unexpand))
38
39//kbuild:lib-$(CONFIG_EXPAND) += expand.o
40//kbuild:lib-$(CONFIG_UNEXPAND) += expand.o
Pere Orga34425382011-03-31 14:43:25 +020041
42//usage:#define expand_trivial_usage
43//usage: "[-i] [-t N] [FILE]..."
44//usage:#define expand_full_usage "\n\n"
45//usage: "Convert tabs to spaces, writing to stdout\n"
Pere Orga34425382011-03-31 14:43:25 +020046//usage: "\n -i Don't convert tabs after non blanks"
47//usage: "\n -t Tabstops every N chars"
Pere Orga34425382011-03-31 14:43:25 +020048
49//usage:#define unexpand_trivial_usage
50//usage: "[-fa][-t N] [FILE]..."
51//usage:#define unexpand_full_usage "\n\n"
52//usage: "Convert spaces to tabs, writing to stdout\n"
Pere Orga34425382011-03-31 14:43:25 +020053//usage: "\n -a Convert all blanks"
54//usage: "\n -f Convert only leading blanks"
55//usage: "\n -t N Tabstops every N chars"
Pere Orga34425382011-03-31 14:43:25 +020056
Denis Vlasenko3952f202007-08-13 14:10:24 +000057#include "libbb.h"
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +010058#include "unicode.h"
Denis Vlasenko3952f202007-08-13 14:10:24 +000059
60enum {
61 OPT_INITIAL = 1 << 0,
62 OPT_TABS = 1 << 1,
63 OPT_ALL = 1 << 2,
64};
65
Denys Vlasenko92549252019-06-08 13:04:44 +020066//FIXME: does not work properly with input containing NULs
Denys Vlasenko48eebc82019-06-09 09:16:03 +020067//coreutils 8.30 preserves NULs but treats them as chars of width zero:
68//AB<nul><tab>C will expand <tab> to 6 spaces, not 5.
Denys Vlasenko92549252019-06-08 13:04:44 +020069
Denis Vlasenko3952f202007-08-13 14:10:24 +000070#if ENABLE_EXPAND
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +010071static void expand(FILE *file, unsigned tab_size, unsigned opt)
Denis Vlasenko3952f202007-08-13 14:10:24 +000072{
Denis Vlasenko3952f202007-08-13 14:10:24 +000073
Denys Vlasenko48eebc82019-06-09 09:16:03 +020074 for (;;) {
75 char *line;
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +010076 char *ptr;
77 char *ptr_strbeg;
Denys Vlasenko48eebc82019-06-09 09:16:03 +020078//commented-out code handles NULs, +90 bytes of code, not tested much
79// size_t linelen;
80// unsigned len = 0;
Denis Vlasenko3139ea72008-12-14 15:45:25 +000081
Denys Vlasenko48eebc82019-06-09 09:16:03 +020082// linelen = 1024 * 1024;
83// line = xmalloc_fgets_str_len(file, "\n", &linelen);
84 line = xmalloc_fgets(file); //
85 if (!line)
86 break;
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +010087 ptr = ptr_strbeg = line;
Denys Vlasenko48eebc82019-06-09 09:16:03 +020088 for (;;) {
89 unsigned char c = *ptr;
90 if (c == '\0') {
91// size_t rem = line + linelen - ptr;
92// if (rem > 0) {
93//# if ENABLE_UNICODE_SUPPORT
94// len += unicode_strwidth(ptr_strbeg);
95//# else
96// len += ptr - ptr_strbeg;
97//# endif
98// printf("%s%c", ptr_strbeg, '\0');
99// memmove(ptr, ptr + 1, rem + 1);
100// ptr_strbeg = ptr;
101// linelen--;
102// continue;
103// }
104 break;
105 }
Denis Vlasenko3139ea72008-12-14 15:45:25 +0000106 if ((opt & OPT_INITIAL) && !isblank(c)) {
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100107 /* not space or tab */
Denis Vlasenko3139ea72008-12-14 15:45:25 +0000108 break;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000109 }
Denis Vlasenko3139ea72008-12-14 15:45:25 +0000110 if (c == '\t') {
Denys Vlasenko48eebc82019-06-09 09:16:03 +0200111 unsigned len = 0; //
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100112 *ptr = '\0';
Denys Vlasenko19158a82010-03-26 14:06:56 +0100113# if ENABLE_UNICODE_SUPPORT
Denys Vlasenko48eebc82019-06-09 09:16:03 +0200114 len += unicode_strwidth(ptr_strbeg);
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100115# else
Denys Vlasenko48eebc82019-06-09 09:16:03 +0200116 len += ptr - ptr_strbeg;
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100117# endif
118 len = tab_size - (len % tab_size);
119 /*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */
120 printf("%s%*s", ptr_strbeg, len, "");
Denys Vlasenko48eebc82019-06-09 09:16:03 +0200121// len = 0;
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100122 ptr_strbeg = ptr + 1;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000123 }
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100124 ptr++;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000125 }
Ron Yorstoncad3fc72021-02-03 20:47:14 +0100126 fputs_stdout(ptr_strbeg);
Denis Vlasenko3139ea72008-12-14 15:45:25 +0000127 free(line);
Denis Vlasenko3952f202007-08-13 14:10:24 +0000128 }
129}
130#endif
131
132#if ENABLE_UNEXPAND
Denis Vlasenko3139ea72008-12-14 15:45:25 +0000133static void unexpand(FILE *file, unsigned tab_size, unsigned opt)
Denis Vlasenko3952f202007-08-13 14:10:24 +0000134{
135 char *line;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000136
137 while ((line = xmalloc_fgets(file)) != NULL) {
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000138 char *ptr = line;
139 unsigned column = 0;
140
141 while (*ptr) {
142 unsigned n;
Tomas Heinrich968951f2010-03-26 09:46:07 +0100143 unsigned len = 0;
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000144
145 while (*ptr == ' ') {
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000146 ptr++;
Tomas Heinrich968951f2010-03-26 09:46:07 +0100147 len++;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000148 }
Tomas Heinrich968951f2010-03-26 09:46:07 +0100149 column += len;
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000150 if (*ptr == '\t') {
151 column += tab_size - (column % tab_size);
152 ptr++;
153 continue;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000154 }
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000155
156 n = column / tab_size;
Tomas Heinrich968951f2010-03-26 09:46:07 +0100157 if (n) {
158 len = column = column % tab_size;
159 while (n--)
160 putchar('\t');
161 }
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000162
Mark Edgarb2ab9202020-05-30 19:05:46 +0200163 if (!(opt & OPT_ALL) && ptr != line) {
Tomas Heinrich968951f2010-03-26 09:46:07 +0100164 printf("%*s%s", len, "", ptr);
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000165 break;
166 }
167 n = strcspn(ptr, "\t ");
Tomas Heinrich968951f2010-03-26 09:46:07 +0100168 printf("%*s%.*s", len, "", n, ptr);
Denys Vlasenko19158a82010-03-26 14:06:56 +0100169# if ENABLE_UNICODE_SUPPORT
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100170 {
Michael Tokarev22bb81f2013-12-09 16:09:35 +0400171 char c = ptr[n];
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100172 ptr[n] = '\0';
Michael Tokarev22bb81f2013-12-09 16:09:35 +0400173 len = unicode_strwidth(ptr);
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100174 ptr[n] = c;
175 }
176# else
177 len = n;
178# endif
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000179 ptr += n;
Tomas Heinrichd2b1ba62010-01-04 16:21:31 +0100180 column = (column + len) % tab_size;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000181 }
Denis Vlasenkoe40e76f2008-12-26 14:56:03 +0000182 free(line);
Denis Vlasenko3952f202007-08-13 14:10:24 +0000183 }
184}
185#endif
186
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000187int expand_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000188int expand_main(int argc UNUSED_PARAM, char **argv)
Denis Vlasenko3952f202007-08-13 14:10:24 +0000189{
190 /* Default 8 spaces for 1 tab */
191 const char *opt_t = "8";
192 FILE *file;
193 unsigned tab_size;
194 unsigned opt;
195 int exit_status = EXIT_SUCCESS;
196
Denys Vlasenko28055022010-01-04 20:49:58 +0100197 init_unicode();
Denis Vlasenko3952f202007-08-13 14:10:24 +0000198
199 if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) {
Denys Vlasenko036585a2017-08-08 16:38:18 +0200200 opt = getopt32long(argv, "it:",
201 "initial\0" No_argument "i"
202 "tabs\0" Required_argument "t"
203 , &opt_t
204 );
Denis Vlasenko62a90cd2008-03-17 09:07:36 +0000205 } else {
Denys Vlasenko22542ec2017-08-08 21:55:02 +0200206 opt = getopt32long(argv, "^"
207 "ft:a"
208 "\0"
209 "ta" /* -t NUM sets -a */,
Mark Edgarb2ab9202020-05-30 19:05:46 +0200210 "first-only\0" No_argument "f"
Denys Vlasenko036585a2017-08-08 16:38:18 +0200211 "tabs\0" Required_argument "t"
212 "all\0" No_argument "a"
213 , &opt_t
214 );
Mark Edgarb2ab9202020-05-30 19:05:46 +0200215 /* -t implies -a, but an explicit -f overrides */
216 if (opt & OPT_INITIAL) opt &= ~OPT_ALL;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000217 }
218 tab_size = xatou_range(opt_t, 1, UINT_MAX);
219
220 argv += optind;
221
Denis Vlasenko3952f202007-08-13 14:10:24 +0000222 if (!*argv) {
223 *--argv = (char*)bb_msg_standard_input;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000224 }
Denis Vlasenko3952f202007-08-13 14:10:24 +0000225 do {
Denis Vlasenko62a90cd2008-03-17 09:07:36 +0000226 file = fopen_or_warn_stdin(*argv);
227 if (!file) {
228 exit_status = EXIT_FAILURE;
229 continue;
Denis Vlasenko3952f202007-08-13 14:10:24 +0000230 }
231
232 if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e'))
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000233 IF_EXPAND(expand(file, tab_size, opt));
Denis Vlasenko62a90cd2008-03-17 09:07:36 +0000234 else
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000235 IF_UNEXPAND(unexpand(file, tab_size, opt));
Denis Vlasenko3952f202007-08-13 14:10:24 +0000236
237 /* Check and close the file */
Denis Vlasenko62a90cd2008-03-17 09:07:36 +0000238 if (fclose_if_not_stdin(file)) {
Denis Vlasenko0c97c9d2007-10-01 11:58:38 +0000239 bb_simple_perror_msg(*argv);
Denis Vlasenko3952f202007-08-13 14:10:24 +0000240 exit_status = EXIT_FAILURE;
241 }
242 /* If stdin also clear EOF */
Denis Vlasenko6a2f7f42007-08-16 10:35:17 +0000243 if (file == stdin)
Denis Vlasenko3952f202007-08-13 14:10:24 +0000244 clearerr(file);
245 } while (*++argv);
246
247 /* Now close stdin also */
248 /* (if we didn't read from it, it's a no-op) */
249 if (fclose(stdin))
James Byrne69374872019-07-02 11:35:03 +0200250 bb_simple_perror_msg_and_die(bb_msg_standard_input);
Denis Vlasenko3952f202007-08-13 14:10:24 +0000251
252 fflush_stdout_and_exit(exit_status);
253}