Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 1 | /* vi: set sw=4 ts=4: */ |
| 2 | /* |
| 3 | * Utility routines. |
| 4 | * |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 5 | * Copyright (C) Manuel Novoa III <mjn3@codepoet.org> |
Glenn L McGrath | b4a1baa | 2003-01-13 22:09:50 +0000 | [diff] [blame] | 6 | * and Vladimir Oleynik <dzo@simtreas.ru> |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 7 | * |
Denys Vlasenko | 0ef64bd | 2010-08-16 20:14:46 +0200 | [diff] [blame] | 8 | * Licensed under GPLv2 or later, see file LICENSE in this source tree. |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 9 | */ |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 10 | #include "libbb.h" |
| 11 | |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 12 | #define WANT_HEX_ESCAPES 1 |
| 13 | |
| 14 | /* Usual "this only works for ascii compatible encodings" disclaimer. */ |
| 15 | #undef _tolower |
| 16 | #define _tolower(X) ((X)|((char) 0x20)) |
| 17 | |
Denis Vlasenko | defc1ea | 2008-06-27 02:52:20 +0000 | [diff] [blame] | 18 | char FAST_FUNC bb_process_escape_sequence(const char **ptr) |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 19 | { |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 20 | const char *q; |
Denis Vlasenko | 248b4a7 | 2008-09-19 23:43:59 +0000 | [diff] [blame] | 21 | unsigned num_digits; |
Denis Vlasenko | 248b4a7 | 2008-09-19 23:43:59 +0000 | [diff] [blame] | 22 | unsigned n; |
Denis Vlasenko | 248b4a7 | 2008-09-19 23:43:59 +0000 | [diff] [blame] | 23 | unsigned base; |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 24 | |
| 25 | num_digits = n = 0; |
| 26 | base = 8; |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 27 | q = *ptr; |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 28 | |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 29 | if (WANT_HEX_ESCAPES && *q == 'x') { |
Eric Andersen | b2a3005 | 2004-07-26 12:11:32 +0000 | [diff] [blame] | 30 | ++q; |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 31 | base = 16; |
| 32 | ++num_digits; |
Eric Andersen | b2a3005 | 2004-07-26 12:11:32 +0000 | [diff] [blame] | 33 | } |
| 34 | |
Denys Vlasenko | ecc2a2e | 2009-08-29 22:53:41 +0200 | [diff] [blame] | 35 | /* bash requires leading 0 in octal escapes: |
| 36 | * \02 works, \2 does not (prints \ and 2). |
| 37 | * We treat \2 as a valid octal escape sequence. */ |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 38 | do { |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 39 | unsigned r; |
Denys Vlasenko | 5360059 | 2010-10-23 21:06:06 +0200 | [diff] [blame] | 40 | unsigned d = (unsigned char)(*q) - '0'; |
Denys Vlasenko | 9a2b6dc | 2018-11-29 13:15:57 +0100 | [diff] [blame] | 41 | #if WANT_HEX_ESCAPES |
Denys Vlasenko | 480c7e5 | 2018-11-29 12:34:50 +0100 | [diff] [blame] | 42 | if (d >= 10) { |
Denys Vlasenko | 9a2b6dc | 2018-11-29 13:15:57 +0100 | [diff] [blame] | 43 | d = (unsigned char)_tolower(*q) - 'a'; |
| 44 | //d += 10; |
| 45 | /* The above would map 'A'-'F' and 'a'-'f' to 10-15, |
Denys Vlasenko | 480c7e5 | 2018-11-29 12:34:50 +0100 | [diff] [blame] | 46 | * however, some chars like '@' would map to 9 < base. |
| 47 | * Do not allow that, map invalid chars to N > base: |
| 48 | */ |
Denys Vlasenko | 480c7e5 | 2018-11-29 12:34:50 +0100 | [diff] [blame] | 49 | if ((int)d >= 0) |
| 50 | d += 10; |
| 51 | } |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 52 | #endif |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 53 | if (d >= base) { |
Denys Vlasenko | 5360059 | 2010-10-23 21:06:06 +0200 | [diff] [blame] | 54 | if (WANT_HEX_ESCAPES && base == 16) { |
| 55 | --num_digits; |
| 56 | if (num_digits == 0) { |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 57 | /* \x<bad_char>: return '\', |
| 58 | * leave ptr pointing to x */ |
| 59 | return '\\'; |
Denys Vlasenko | 5360059 | 2010-10-23 21:06:06 +0200 | [diff] [blame] | 60 | } |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 61 | } |
Eric Andersen | ccfc448 | 2004-07-27 16:45:46 +0000 | [diff] [blame] | 62 | break; |
Eric Andersen | b2a3005 | 2004-07-26 12:11:32 +0000 | [diff] [blame] | 63 | } |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 64 | |
| 65 | r = n * base + d; |
| 66 | if (r > UCHAR_MAX) { |
| 67 | break; |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 68 | } |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 69 | |
| 70 | n = r; |
| 71 | ++q; |
| 72 | } while (++num_digits < 3); |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 73 | |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 74 | if (num_digits == 0) { |
| 75 | /* Not octal or hex escape sequence. |
| 76 | * Is it one-letter one? */ |
| 77 | |
| 78 | /* bash builtin "echo -e '\ec'" interprets \e as ESC, |
| 79 | * but coreutils "/bin/echo -e '\ec'" does not. |
| 80 | * Manpages tend to support coreutils way. |
| 81 | * Update: coreutils added support for \e on 28 Oct 2009. */ |
| 82 | static const char charmap[] ALIGN1 = { |
Denys Vlasenko | a2d27a1 | 2010-10-25 12:14:21 +0200 | [diff] [blame] | 83 | 'a', 'b', 'e', 'f', 'n', 'r', 't', 'v', '\\', '\0', |
| 84 | '\a', '\b', 27, '\f', '\n', '\r', '\t', '\v', '\\', '\\', |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 85 | }; |
| 86 | const char *p = charmap; |
Manuel Novoa III | cad5364 | 2003-03-19 09:13:01 +0000 | [diff] [blame] | 87 | do { |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 88 | if (*p == *q) { |
| 89 | q++; |
| 90 | break; |
| 91 | } |
Denys Vlasenko | a2d27a1 | 2010-10-25 12:14:21 +0200 | [diff] [blame] | 92 | } while (*++p != '\0'); |
| 93 | /* p points to found escape char or NUL, |
Denys Vlasenko | 5360059 | 2010-10-23 21:06:06 +0200 | [diff] [blame] | 94 | * advance it and find what it translates to. |
Denys Vlasenko | 2b299fe | 2010-10-24 01:58:04 +0200 | [diff] [blame] | 95 | * Note that \NUL and unrecognized sequence \z return '\' |
| 96 | * and leave ptr pointing to NUL or z. */ |
| 97 | n = p[sizeof(charmap) / 2]; |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 98 | } |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 99 | |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 100 | *ptr = q; |
Manuel Novoa III | 413db4d | 2004-07-29 23:15:16 +0000 | [diff] [blame] | 101 | |
Eric Andersen | e5dfced | 2001-04-09 22:48:12 +0000 | [diff] [blame] | 102 | return (char) n; |
Eric Andersen | aad1a88 | 2001-03-16 22:47:14 +0000 | [diff] [blame] | 103 | } |
Denys Vlasenko | 5360059 | 2010-10-23 21:06:06 +0200 | [diff] [blame] | 104 | |
| 105 | char* FAST_FUNC strcpy_and_process_escape_sequences(char *dst, const char *src) |
| 106 | { |
| 107 | while (1) { |
| 108 | char c, c1; |
| 109 | c = c1 = *src++; |
| 110 | if (c1 == '\\') |
| 111 | c1 = bb_process_escape_sequence(&src); |
| 112 | *dst = c1; |
| 113 | if (c == '\0') |
| 114 | return dst; |
| 115 | dst++; |
| 116 | } |
| 117 | } |