Tarun Kundu | 567116b | 2024-08-15 16:22:58 -0700 | [diff] [blame] | 1 | |
| 2 | #include <sys/types.h> |
| 3 | #include <ctype.h> |
| 4 | #include <stdlib.h> |
| 5 | #include "unescape_c_string.h" |
| 6 | |
| 7 | #ifndef isoctal |
| 8 | # define isoctal(c) ((c) >= '0' && (c) <= '7') |
| 9 | #endif |
| 10 | #ifndef digittoint |
| 11 | # define digittoint(c) (((c) >= '0' && (c) <= '9') ? (c) - '0' : tolower(c) - 'a' + 10) |
| 12 | #endif |
| 13 | |
| 14 | typedef enum C_STRING_STATE { |
| 15 | C_STRING_STATE_GROUND, |
| 16 | C_STRING_STATE_START, |
| 17 | C_STRING_STATE_OCTAL2, |
| 18 | C_STRING_STATE_OCTAL3, |
| 19 | C_STRING_STATE_HEX1, |
| 20 | C_STRING_STATE_HEX2 |
| 21 | } C_STRING_STATE; |
| 22 | |
| 23 | int |
| 24 | unescape_c_string(char *cp, char c, int *astate, int flag) |
| 25 | { |
| 26 | if (flag & C_STRING_FLAG_END) { |
| 27 | switch (*astate) { |
| 28 | case C_STRING_STATE_OCTAL2: |
| 29 | case C_STRING_STATE_OCTAL3: |
| 30 | case C_STRING_STATE_HEX1: |
| 31 | case C_STRING_STATE_HEX2: |
| 32 | *astate = C_STRING_STATE_GROUND; |
| 33 | return C_STRING_RESULT_VALID; |
| 34 | case C_STRING_STATE_GROUND: |
| 35 | return C_STRING_RESULT_NOCHAR; |
| 36 | default: |
| 37 | return C_STRING_RESULT_SYNBAD; |
| 38 | } |
| 39 | } |
| 40 | |
| 41 | switch (*astate) { |
| 42 | case C_STRING_STATE_GROUND: |
| 43 | *cp = 0; |
| 44 | if (c == '\\') { |
| 45 | *astate = C_STRING_STATE_START; |
| 46 | return C_STRING_RESULT_PENDING; |
| 47 | } |
| 48 | *cp = c; |
| 49 | return C_STRING_RESULT_VALID; |
| 50 | |
| 51 | case C_STRING_STATE_START: |
| 52 | *cp = 0; |
| 53 | switch (c) { |
| 54 | case '\\': |
| 55 | *cp = c; |
| 56 | *astate = C_STRING_STATE_GROUND; |
| 57 | return C_STRING_RESULT_VALID; |
| 58 | |
| 59 | case '0': case '1': case '2': case '3': |
| 60 | case '4': case '5': case '6': case '7': |
| 61 | *cp = (c - '0'); |
| 62 | *astate = C_STRING_STATE_OCTAL2; |
| 63 | return C_STRING_RESULT_PENDING; |
| 64 | |
| 65 | case 'x': |
| 66 | *astate = C_STRING_STATE_HEX1; |
| 67 | return C_STRING_RESULT_PENDING; |
| 68 | |
| 69 | case 'n': |
| 70 | *cp = '\n'; |
| 71 | *astate = C_STRING_STATE_GROUND; |
| 72 | return C_STRING_RESULT_VALID; |
| 73 | case 'r': |
| 74 | *cp = '\r'; |
| 75 | *astate = C_STRING_STATE_GROUND; |
| 76 | return C_STRING_RESULT_VALID; |
| 77 | case 'b': |
| 78 | *cp = '\b'; |
| 79 | *astate = C_STRING_STATE_GROUND; |
| 80 | return C_STRING_RESULT_VALID; |
| 81 | case 'a': |
| 82 | *cp = '\a'; |
| 83 | *astate = C_STRING_STATE_GROUND; |
| 84 | return C_STRING_RESULT_VALID; |
| 85 | case 'v': |
| 86 | *cp = '\v'; |
| 87 | *astate = C_STRING_STATE_GROUND; |
| 88 | return C_STRING_RESULT_VALID; |
| 89 | case 't': |
| 90 | *cp = '\t'; |
| 91 | *astate = C_STRING_STATE_GROUND; |
| 92 | return C_STRING_RESULT_VALID; |
| 93 | case 'f': |
| 94 | *cp = '\f'; |
| 95 | *astate = C_STRING_STATE_GROUND; |
| 96 | return C_STRING_RESULT_VALID; |
| 97 | case 's': |
| 98 | *cp = ' '; |
| 99 | *astate = C_STRING_STATE_GROUND; |
| 100 | return C_STRING_RESULT_VALID; |
| 101 | case 'E': |
| 102 | *cp = '\033'; |
| 103 | *astate = C_STRING_STATE_GROUND; |
| 104 | return C_STRING_RESULT_VALID; |
| 105 | |
| 106 | case '\n': |
| 107 | case '$': |
| 108 | *astate = C_STRING_STATE_GROUND; |
| 109 | return C_STRING_RESULT_NOCHAR; |
| 110 | } |
| 111 | *astate = C_STRING_STATE_GROUND; |
| 112 | return C_STRING_RESULT_SYNBAD; |
| 113 | |
| 114 | case C_STRING_STATE_OCTAL2: |
| 115 | if (isoctal((int) (unsigned char) c)) { |
| 116 | *cp = (*cp << 3) + (c - '0'); |
| 117 | *astate = C_STRING_STATE_OCTAL3; |
| 118 | return C_STRING_RESULT_PENDING; |
| 119 | } |
| 120 | *astate = C_STRING_STATE_GROUND; |
| 121 | return C_STRING_RESULT_VALIDPUSH; |
| 122 | |
| 123 | case C_STRING_STATE_OCTAL3: |
| 124 | *astate = C_STRING_STATE_GROUND; |
| 125 | if (isoctal((int) (unsigned char) c)) { |
| 126 | *cp = (*cp << 3) + (c - '0'); |
| 127 | return C_STRING_RESULT_VALID; |
| 128 | } |
| 129 | return C_STRING_RESULT_VALIDPUSH; |
| 130 | |
| 131 | case C_STRING_STATE_HEX1: |
| 132 | if (isxdigit((int) (unsigned char) c)) { |
| 133 | *cp = digittoint((int) (unsigned char) c); |
| 134 | *astate = C_STRING_STATE_HEX2; |
| 135 | return C_STRING_RESULT_PENDING; |
| 136 | } |
| 137 | *astate = C_STRING_STATE_GROUND; |
| 138 | return C_STRING_RESULT_VALIDPUSH; |
| 139 | |
| 140 | case C_STRING_STATE_HEX2: |
| 141 | *astate = C_STRING_STATE_GROUND; |
| 142 | if (isxdigit((int) (unsigned char) c)) { |
| 143 | *cp = (*cp << 4) + digittoint((int) (unsigned char) c); |
| 144 | return C_STRING_RESULT_VALID; |
| 145 | } |
| 146 | return C_STRING_RESULT_VALIDPUSH; |
| 147 | |
| 148 | default: |
| 149 | *astate = C_STRING_STATE_GROUND; |
| 150 | return C_STRING_RESULT_SYNBAD; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | int |
| 155 | str_unescape_c_string(char *dst, const char *src) |
| 156 | { |
| 157 | char c; |
| 158 | char *start = dst; |
| 159 | int state = 0; |
| 160 | |
| 161 | while ((c = *src++)) { |
| 162 | again: |
| 163 | switch (unescape_c_string(dst, c, &state, 0)) { |
| 164 | case C_STRING_RESULT_VALID: |
| 165 | dst++; |
| 166 | break; |
| 167 | case C_STRING_RESULT_VALIDPUSH: |
| 168 | dst++; |
| 169 | goto again; |
| 170 | case C_STRING_RESULT_PENDING: |
| 171 | case C_STRING_RESULT_NOCHAR: |
| 172 | break; |
| 173 | default: |
| 174 | *dst = 0; |
| 175 | return -1; |
| 176 | } |
| 177 | } |
| 178 | if (unescape_c_string(dst, c, &state, C_STRING_FLAG_END) == C_STRING_RESULT_VALID) { |
| 179 | dst++; |
| 180 | } |
| 181 | *dst = 0; |
| 182 | return dst - start; |
| 183 | } |
| 184 | |
| 185 | ssize_t |
| 186 | strn_unescape_c_string(char *dst, const char *src, size_t sz) |
| 187 | { |
| 188 | char c, p; |
| 189 | char *start = dst, *end = dst + sz - 1; |
| 190 | int state = 0; |
| 191 | |
| 192 | if (sz > 0) { |
| 193 | *end = 0; |
| 194 | } |
| 195 | while ((c = *src++)) { |
| 196 | again: |
| 197 | switch (unescape_c_string(&p, c, &state, 0)) { |
| 198 | case C_STRING_RESULT_VALID: |
| 199 | if (dst < end) { |
| 200 | *dst = p; |
| 201 | } |
| 202 | dst++; |
| 203 | break; |
| 204 | case C_STRING_RESULT_VALIDPUSH: |
| 205 | if (dst < end) { |
| 206 | *dst = p; |
| 207 | } |
| 208 | dst++; |
| 209 | goto again; |
| 210 | case 0: |
| 211 | case C_STRING_RESULT_NOCHAR: |
| 212 | break; |
| 213 | default: |
| 214 | if (dst <= end) { |
| 215 | *dst = 0; |
| 216 | } |
| 217 | return -1; |
| 218 | } |
| 219 | } |
| 220 | if (unescape_c_string(&p, c, &state, C_STRING_FLAG_END) == C_STRING_RESULT_VALID) { |
| 221 | if (dst < end) { |
| 222 | *dst = p; |
| 223 | } |
| 224 | dst++; |
| 225 | } |
| 226 | if (dst <= end) { |
| 227 | *dst = 0; |
| 228 | } |
| 229 | return dst - start; |
| 230 | } |