Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 1 | /* expand - convert tabs to spaces |
| 2 | * unexpand - convert spaces to tabs |
| 3 | * |
| 4 | * Copyright (C) 89, 91, 1995-2006 Free Software Foundation, Inc. |
| 5 | * |
Denys Vlasenko | 0ef64bd | 2010-08-16 20:14:46 +0200 | [diff] [blame] | 6 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 7 | * |
| 8 | * David MacKenzie <djm@gnu.ai.mit.edu> |
| 9 | * |
| 10 | * Options for expand: |
Denys Vlasenko | bbc7bee | 2017-01-21 02:49:58 +0100 | [diff] [blame] | 11 | * -t num --tabs NUM Convert tabs to num spaces (default 8 spaces). |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 12 | * -i --initial Only convert initial tabs on each line to spaces. |
| 13 | * |
| 14 | * Options for unexpand: |
| 15 | * -a --all Convert all blanks, instead of just initial blanks. |
| 16 | * -f --first-only Convert only leading sequences of blanks (default). |
Denys Vlasenko | bbc7bee | 2017-01-21 02:49:58 +0100 | [diff] [blame] | 17 | * -t num --tabs NUM Have tabs num characters apart instead of 8. |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 18 | * |
| 19 | * Busybox version (C) 2007 by Tito Ragusa <farmatito@tiscali.it> |
| 20 | * |
| 21 | * Caveat: this versions of expand and unexpand don't accept tab lists. |
| 22 | */ |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 23 | //config:config EXPAND |
Denys Vlasenko | b097a84 | 2018-12-28 03:20:17 +0100 | [diff] [blame] | 24 | //config: bool "expand (5.1 kb)" |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 25 | //config: default y |
| 26 | //config: help |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 27 | //config: By default, convert all tabs to spaces. |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 28 | //config: |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 29 | //config:config UNEXPAND |
Denys Vlasenko | b097a84 | 2018-12-28 03:20:17 +0100 | [diff] [blame] | 30 | //config: bool "unexpand (5.3 kb)" |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 31 | //config: default y |
| 32 | //config: help |
Denys Vlasenko | 72089cf | 2017-07-21 09:50:55 +0200 | [diff] [blame] | 33 | //config: By default, convert only leading sequences of blanks to tabs. |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 34 | |
| 35 | //applet:IF_EXPAND(APPLET(expand, BB_DIR_USR_BIN, BB_SUID_DROP)) |
Denys Vlasenko | 205d48e | 2017-01-29 14:57:33 +0100 | [diff] [blame] | 36 | // APPLET_ODDNAME:name main location suid_type help |
Denys Vlasenko | af3f420 | 2016-11-23 14:46:56 +0100 | [diff] [blame] | 37 | //applet:IF_UNEXPAND(APPLET_ODDNAME(unexpand, expand, BB_DIR_USR_BIN, BB_SUID_DROP, unexpand)) |
| 38 | |
| 39 | //kbuild:lib-$(CONFIG_EXPAND) += expand.o |
| 40 | //kbuild:lib-$(CONFIG_UNEXPAND) += expand.o |
Pere Orga | 3442538 | 2011-03-31 14:43:25 +0200 | [diff] [blame] | 41 | |
| 42 | //usage:#define expand_trivial_usage |
| 43 | //usage: "[-i] [-t N] [FILE]..." |
| 44 | //usage:#define expand_full_usage "\n\n" |
| 45 | //usage: "Convert tabs to spaces, writing to stdout\n" |
Pere Orga | 3442538 | 2011-03-31 14:43:25 +0200 | [diff] [blame] | 46 | //usage: "\n -i Don't convert tabs after non blanks" |
| 47 | //usage: "\n -t Tabstops every N chars" |
Pere Orga | 3442538 | 2011-03-31 14:43:25 +0200 | [diff] [blame] | 48 | |
| 49 | //usage:#define unexpand_trivial_usage |
| 50 | //usage: "[-fa][-t N] [FILE]..." |
| 51 | //usage:#define unexpand_full_usage "\n\n" |
| 52 | //usage: "Convert spaces to tabs, writing to stdout\n" |
Pere Orga | 3442538 | 2011-03-31 14:43:25 +0200 | [diff] [blame] | 53 | //usage: "\n -a Convert all blanks" |
| 54 | //usage: "\n -f Convert only leading blanks" |
| 55 | //usage: "\n -t N Tabstops every N chars" |
Pere Orga | 3442538 | 2011-03-31 14:43:25 +0200 | [diff] [blame] | 56 | |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 57 | #include "libbb.h" |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 58 | #include "unicode.h" |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 59 | |
| 60 | enum { |
| 61 | OPT_INITIAL = 1 << 0, |
| 62 | OPT_TABS = 1 << 1, |
| 63 | OPT_ALL = 1 << 2, |
| 64 | }; |
| 65 | |
Denys Vlasenko | 9254925 | 2019-06-08 13:04:44 +0200 | [diff] [blame] | 66 | //FIXME: does not work properly with input containing NULs |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 67 | //coreutils 8.30 preserves NULs but treats them as chars of width zero: |
| 68 | //AB<nul><tab>C will expand <tab> to 6 spaces, not 5. |
Denys Vlasenko | 9254925 | 2019-06-08 13:04:44 +0200 | [diff] [blame] | 69 | |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 70 | #if ENABLE_EXPAND |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 71 | static void expand(FILE *file, unsigned tab_size, unsigned opt) |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 72 | { |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 73 | |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 74 | for (;;) { |
| 75 | char *line; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 76 | char *ptr; |
| 77 | char *ptr_strbeg; |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 78 | //commented-out code handles NULs, +90 bytes of code, not tested much |
| 79 | // size_t linelen; |
| 80 | // unsigned len = 0; |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 81 | |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 82 | // linelen = 1024 * 1024; |
| 83 | // line = xmalloc_fgets_str_len(file, "\n", &linelen); |
| 84 | line = xmalloc_fgets(file); // |
| 85 | if (!line) |
| 86 | break; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 87 | ptr = ptr_strbeg = line; |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 88 | for (;;) { |
| 89 | unsigned char c = *ptr; |
| 90 | if (c == '\0') { |
| 91 | // size_t rem = line + linelen - ptr; |
| 92 | // if (rem > 0) { |
| 93 | //# if ENABLE_UNICODE_SUPPORT |
| 94 | // len += unicode_strwidth(ptr_strbeg); |
| 95 | //# else |
| 96 | // len += ptr - ptr_strbeg; |
| 97 | //# endif |
| 98 | // printf("%s%c", ptr_strbeg, '\0'); |
| 99 | // memmove(ptr, ptr + 1, rem + 1); |
| 100 | // ptr_strbeg = ptr; |
| 101 | // linelen--; |
| 102 | // continue; |
| 103 | // } |
| 104 | break; |
| 105 | } |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 106 | if ((opt & OPT_INITIAL) && !isblank(c)) { |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 107 | /* not space or tab */ |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 108 | break; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 109 | } |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 110 | if (c == '\t') { |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 111 | unsigned len = 0; // |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 112 | *ptr = '\0'; |
Denys Vlasenko | 19158a8 | 2010-03-26 14:06:56 +0100 | [diff] [blame] | 113 | # if ENABLE_UNICODE_SUPPORT |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 114 | len += unicode_strwidth(ptr_strbeg); |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 115 | # else |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 116 | len += ptr - ptr_strbeg; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 117 | # endif |
| 118 | len = tab_size - (len % tab_size); |
| 119 | /*while (ptr[1] == '\t') { ptr++; len += tab_size; } - can handle many tabs at once */ |
| 120 | printf("%s%*s", ptr_strbeg, len, ""); |
Denys Vlasenko | 48eebc8 | 2019-06-09 09:16:03 +0200 | [diff] [blame] | 121 | // len = 0; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 122 | ptr_strbeg = ptr + 1; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 123 | } |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 124 | ptr++; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 125 | } |
Ron Yorston | cad3fc7 | 2021-02-03 20:47:14 +0100 | [diff] [blame^] | 126 | fputs_stdout(ptr_strbeg); |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 127 | free(line); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 128 | } |
| 129 | } |
| 130 | #endif |
| 131 | |
| 132 | #if ENABLE_UNEXPAND |
Denis Vlasenko | 3139ea7 | 2008-12-14 15:45:25 +0000 | [diff] [blame] | 133 | static void unexpand(FILE *file, unsigned tab_size, unsigned opt) |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 134 | { |
| 135 | char *line; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 136 | |
| 137 | while ((line = xmalloc_fgets(file)) != NULL) { |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 138 | char *ptr = line; |
| 139 | unsigned column = 0; |
| 140 | |
| 141 | while (*ptr) { |
| 142 | unsigned n; |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 143 | unsigned len = 0; |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 144 | |
| 145 | while (*ptr == ' ') { |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 146 | ptr++; |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 147 | len++; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 148 | } |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 149 | column += len; |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 150 | if (*ptr == '\t') { |
| 151 | column += tab_size - (column % tab_size); |
| 152 | ptr++; |
| 153 | continue; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 154 | } |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 155 | |
| 156 | n = column / tab_size; |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 157 | if (n) { |
| 158 | len = column = column % tab_size; |
| 159 | while (n--) |
| 160 | putchar('\t'); |
| 161 | } |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 162 | |
Mark Edgar | b2ab920 | 2020-05-30 19:05:46 +0200 | [diff] [blame] | 163 | if (!(opt & OPT_ALL) && ptr != line) { |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 164 | printf("%*s%s", len, "", ptr); |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 165 | break; |
| 166 | } |
| 167 | n = strcspn(ptr, "\t "); |
Tomas Heinrich | 968951f | 2010-03-26 09:46:07 +0100 | [diff] [blame] | 168 | printf("%*s%.*s", len, "", n, ptr); |
Denys Vlasenko | 19158a8 | 2010-03-26 14:06:56 +0100 | [diff] [blame] | 169 | # if ENABLE_UNICODE_SUPPORT |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 170 | { |
Michael Tokarev | 22bb81f | 2013-12-09 16:09:35 +0400 | [diff] [blame] | 171 | char c = ptr[n]; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 172 | ptr[n] = '\0'; |
Michael Tokarev | 22bb81f | 2013-12-09 16:09:35 +0400 | [diff] [blame] | 173 | len = unicode_strwidth(ptr); |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 174 | ptr[n] = c; |
| 175 | } |
| 176 | # else |
| 177 | len = n; |
| 178 | # endif |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 179 | ptr += n; |
Tomas Heinrich | d2b1ba6 | 2010-01-04 16:21:31 +0100 | [diff] [blame] | 180 | column = (column + len) % tab_size; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 181 | } |
Denis Vlasenko | e40e76f | 2008-12-26 14:56:03 +0000 | [diff] [blame] | 182 | free(line); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 183 | } |
| 184 | } |
| 185 | #endif |
| 186 | |
Denis Vlasenko | 9b49a5e | 2007-10-11 10:05:36 +0000 | [diff] [blame] | 187 | int expand_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; |
Denis Vlasenko | a60f84e | 2008-07-05 09:18:54 +0000 | [diff] [blame] | 188 | int expand_main(int argc UNUSED_PARAM, char **argv) |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 189 | { |
| 190 | /* Default 8 spaces for 1 tab */ |
| 191 | const char *opt_t = "8"; |
| 192 | FILE *file; |
| 193 | unsigned tab_size; |
| 194 | unsigned opt; |
| 195 | int exit_status = EXIT_SUCCESS; |
| 196 | |
Denys Vlasenko | 2805502 | 2010-01-04 20:49:58 +0100 | [diff] [blame] | 197 | init_unicode(); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 198 | |
| 199 | if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) { |
Denys Vlasenko | 036585a | 2017-08-08 16:38:18 +0200 | [diff] [blame] | 200 | opt = getopt32long(argv, "it:", |
| 201 | "initial\0" No_argument "i" |
| 202 | "tabs\0" Required_argument "t" |
| 203 | , &opt_t |
| 204 | ); |
Denis Vlasenko | 62a90cd | 2008-03-17 09:07:36 +0000 | [diff] [blame] | 205 | } else { |
Denys Vlasenko | 22542ec | 2017-08-08 21:55:02 +0200 | [diff] [blame] | 206 | opt = getopt32long(argv, "^" |
| 207 | "ft:a" |
| 208 | "\0" |
| 209 | "ta" /* -t NUM sets -a */, |
Mark Edgar | b2ab920 | 2020-05-30 19:05:46 +0200 | [diff] [blame] | 210 | "first-only\0" No_argument "f" |
Denys Vlasenko | 036585a | 2017-08-08 16:38:18 +0200 | [diff] [blame] | 211 | "tabs\0" Required_argument "t" |
| 212 | "all\0" No_argument "a" |
| 213 | , &opt_t |
| 214 | ); |
Mark Edgar | b2ab920 | 2020-05-30 19:05:46 +0200 | [diff] [blame] | 215 | /* -t implies -a, but an explicit -f overrides */ |
| 216 | if (opt & OPT_INITIAL) opt &= ~OPT_ALL; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 217 | } |
| 218 | tab_size = xatou_range(opt_t, 1, UINT_MAX); |
| 219 | |
| 220 | argv += optind; |
| 221 | |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 222 | if (!*argv) { |
| 223 | *--argv = (char*)bb_msg_standard_input; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 224 | } |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 225 | do { |
Denis Vlasenko | 62a90cd | 2008-03-17 09:07:36 +0000 | [diff] [blame] | 226 | file = fopen_or_warn_stdin(*argv); |
| 227 | if (!file) { |
| 228 | exit_status = EXIT_FAILURE; |
| 229 | continue; |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 230 | } |
| 231 | |
| 232 | if (ENABLE_EXPAND && (!ENABLE_UNEXPAND || applet_name[0] == 'e')) |
Denis Vlasenko | 5e34ff2 | 2009-04-21 11:09:40 +0000 | [diff] [blame] | 233 | IF_EXPAND(expand(file, tab_size, opt)); |
Denis Vlasenko | 62a90cd | 2008-03-17 09:07:36 +0000 | [diff] [blame] | 234 | else |
Denis Vlasenko | 5e34ff2 | 2009-04-21 11:09:40 +0000 | [diff] [blame] | 235 | IF_UNEXPAND(unexpand(file, tab_size, opt)); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 236 | |
| 237 | /* Check and close the file */ |
Denis Vlasenko | 62a90cd | 2008-03-17 09:07:36 +0000 | [diff] [blame] | 238 | if (fclose_if_not_stdin(file)) { |
Denis Vlasenko | 0c97c9d | 2007-10-01 11:58:38 +0000 | [diff] [blame] | 239 | bb_simple_perror_msg(*argv); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 240 | exit_status = EXIT_FAILURE; |
| 241 | } |
| 242 | /* If stdin also clear EOF */ |
Denis Vlasenko | 6a2f7f4 | 2007-08-16 10:35:17 +0000 | [diff] [blame] | 243 | if (file == stdin) |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 244 | clearerr(file); |
| 245 | } while (*++argv); |
| 246 | |
| 247 | /* Now close stdin also */ |
| 248 | /* (if we didn't read from it, it's a no-op) */ |
| 249 | if (fclose(stdin)) |
James Byrne | 6937487 | 2019-07-02 11:35:03 +0200 | [diff] [blame] | 250 | bb_simple_perror_msg_and_die(bb_msg_standard_input); |
Denis Vlasenko | 3952f20 | 2007-08-13 14:10:24 +0000 | [diff] [blame] | 251 | |
| 252 | fflush_stdout_and_exit(exit_status); |
| 253 | } |