Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 1 | /* |
| 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2006-2016 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | /* Break up a delimited string into a vector of substrings */ |
| 18 | |
| 19 | #include <stdio.h> |
| 20 | #include <vppinfra/clib.h> |
| 21 | #include <vppinfra/vec.h> |
| 22 | #include <vppinfra/hash.h> |
| 23 | #include <stdarg.h> |
| 24 | |
| 25 | /* |
| 26 | * #define UNIT_TESTS 1 |
| 27 | * #define MATCH_TRACE 1 |
| 28 | */ |
| 29 | |
| 30 | /* |
| 31 | * delsvec |
| 32 | * break up an input string into a vector of [null-terminated] u8 *'s |
| 33 | * |
| 34 | * Each supplied delimiter character results in a string in the output |
| 35 | * vector, unless the delimiters occur back-to-back. When matched, |
| 36 | * a whitespace character in the delimiter consumes an arbitrary |
| 37 | * run of whitespace. See the unit tests at the end of this file |
| 38 | * for a set of examples. |
| 39 | * |
| 40 | * Returns a u8 **, or NULL if the input fails to match. It is assumed |
| 41 | * that both input and fmt are C strings, not necessarily vectors. |
| 42 | * |
| 43 | * Output strings are both vectors and proper C strings. |
| 44 | */ |
| 45 | |
| 46 | static u8 **string_cache; |
| 47 | static u8 **svec_cache; |
| 48 | |
| 49 | void delsvec_recycle_this_string (u8 *s) |
| 50 | { |
| 51 | if (s) { |
| 52 | _vec_len (s) = 0; |
| 53 | vec_add1(string_cache, s); |
| 54 | } |
| 55 | } |
| 56 | |
| 57 | void delsvec_recycle_this_svec (u8 **svec) |
| 58 | { |
| 59 | if (svec) { |
| 60 | if (svec_cache) { |
| 61 | vec_free (svec_cache); |
| 62 | } |
| 63 | _vec_len (svec) = 0; |
| 64 | svec_cache = svec; |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | int pvl (char *a) |
| 69 | { |
| 70 | return vec_len(a); |
| 71 | } |
| 72 | |
| 73 | u8 **delsvec(void *input_arg, char *fmt) |
| 74 | { |
| 75 | u8 **rv = 0; |
| 76 | int input_index=0; |
| 77 | u8 *this; |
| 78 | int dirflag=0; |
| 79 | int i; |
| 80 | u8 *input = input_arg; |
| 81 | |
| 82 | if (svec_cache) { |
| 83 | rv = svec_cache; |
| 84 | svec_cache = 0; |
| 85 | } |
| 86 | |
| 87 | while (fmt) { |
| 88 | dirflag=0; |
| 89 | if (vec_len (string_cache) > 0) { |
| 90 | this = string_cache [vec_len(string_cache)-1]; |
| 91 | _vec_len (string_cache) = vec_len (string_cache) - 1; |
| 92 | } else |
| 93 | this = 0; |
| 94 | /* |
| 95 | * '*' means one of two things: match the rest of the input, |
| 96 | * or match as many characters as possible |
| 97 | */ |
| 98 | if (fmt[0] == '*') { |
| 99 | fmt++; |
| 100 | dirflag=1; |
| 101 | /* |
| 102 | * no more format: eat rest of string... |
| 103 | */ |
| 104 | if (!fmt[0]) { |
| 105 | for (;input[input_index]; input_index++) |
| 106 | vec_add1(this, input[input_index]); |
| 107 | if (vec_len(this)) { |
| 108 | vec_add1(this, 0); |
| 109 | #ifdef MATCH_TRACE |
| 110 | printf("final star-match adds: '%s'\n", this); |
| 111 | #endif |
| 112 | vec_add1(rv, this); |
| 113 | } else { |
| 114 | vec_add1(string_cache, this); |
| 115 | } |
| 116 | |
| 117 | return(rv); |
| 118 | } |
| 119 | } |
| 120 | /* |
| 121 | * Left-to-right scan, adding chars until next delimiter char |
| 122 | * appears. |
| 123 | */ |
| 124 | if (!dirflag) { |
| 125 | while (input[input_index]) { |
| 126 | if (input[input_index] == fmt[0]) { |
| 127 | /* If we just (exact) matched a whitespace delimiter */ |
| 128 | if (fmt[0] == ' '){ |
| 129 | /* scan forward eating whitespace */ |
| 130 | while (input[input_index] == ' ' || |
| 131 | input[input_index] == '\t' || |
| 132 | input[input_index] == '\n') |
| 133 | input_index++; |
| 134 | input_index--; |
| 135 | } |
| 136 | goto found; |
| 137 | } |
| 138 | /* If we're looking for whitespace */ |
| 139 | if (fmt[0] == ' ') { |
| 140 | /* and we have whitespace */ |
| 141 | if (input[input_index] == ' ' || |
| 142 | input[input_index] == '\t' || |
| 143 | input[input_index] == '\n') { |
| 144 | /* scan forward eating whitespace */ |
| 145 | while (input[input_index] == ' ' || |
| 146 | input[input_index] == '\t' || |
| 147 | input[input_index] == '\n') { |
| 148 | input_index++; |
| 149 | } |
| 150 | input_index--; |
| 151 | goto found; |
| 152 | } |
| 153 | } |
| 154 | /* Not a delimiter, save it */ |
| 155 | vec_add1(this, input[input_index]); |
| 156 | input_index++; |
| 157 | } |
| 158 | /* |
| 159 | * Fell off the wagon, clean up and bail out |
| 160 | */ |
| 161 | bail: |
| 162 | |
| 163 | #ifdef MATCH_TRACE |
| 164 | printf("failed, fmt[0] = '%c', input[%d]='%s'\n", |
| 165 | fmt[0], input_index, &input[input_index]); |
| 166 | #endif |
| 167 | delsvec_recycle_this_string(this); |
| 168 | for (i = 0; i < vec_len(rv); i++) |
| 169 | delsvec_recycle_this_string(rv[i]); |
| 170 | delsvec_recycle_this_svec(rv); |
| 171 | return(0); |
| 172 | |
| 173 | found: |
| 174 | /* |
| 175 | * Delimiter matched |
| 176 | */ |
| 177 | input_index++; |
| 178 | fmt++; |
| 179 | /* |
| 180 | * If we actually accumulated non-delimiter characters, |
| 181 | * add them to the result vector |
| 182 | */ |
| 183 | if (vec_len(this)) { |
| 184 | vec_add1(this, 0); |
| 185 | #ifdef MATCH_TRACE |
| 186 | printf("match: add '%s'\n", this); |
| 187 | #endif |
| 188 | vec_add1(rv, this); |
| 189 | } else { |
| 190 | vec_add1(string_cache, this); |
| 191 | } |
| 192 | } else { |
| 193 | /* |
| 194 | * right-to-left scan, '*' not at |
| 195 | * the end of the delimiter string |
| 196 | */ |
| 197 | i = input_index; |
| 198 | while (input[++i]) |
| 199 | ; /* scan forward */ |
| 200 | i--; |
| 201 | while (i > input_index) { |
| 202 | if (input[i] == fmt[0]) |
| 203 | goto found2; |
| 204 | |
| 205 | if (fmt[0] == ' ' || fmt[0] == '\t' || |
| 206 | fmt[0] == '\n') { |
| 207 | if (input[i] == ' ' || |
| 208 | input[i] == '\t' || |
| 209 | input[i] == '\n') |
| 210 | goto found2; |
| 211 | } |
| 212 | i--; |
| 213 | } |
| 214 | goto bail; |
| 215 | |
| 216 | found2: |
| 217 | for (; input_index < i; input_index++) { |
| 218 | vec_add1(this, input[input_index]); |
| 219 | } |
| 220 | input_index++; |
| 221 | fmt++; |
| 222 | vec_add1(this, 0); |
| 223 | #ifdef MATCH_TRACE |
| 224 | printf("inner '*' match: add '%s'\n", this); |
| 225 | #endif |
| 226 | vec_add1(rv, this); |
| 227 | } |
| 228 | } |
| 229 | return (rv); |
| 230 | } |
| 231 | |
| 232 | #ifdef UNIT_TESTS |
| 233 | |
| 234 | typedef struct utest_ { |
| 235 | char *string; |
| 236 | char *fmt; |
| 237 | } utest_t; |
| 238 | |
| 239 | utest_t tests[] = { |
| 240 | #ifdef NOTDEF |
| 241 | {"Dec 7 08:56", |
| 242 | " :*"}, |
| 243 | {"Dec 17 08:56", |
| 244 | " :*"}, |
| 245 | {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds", |
| 246 | " ::. / // [] *"}, |
| 247 | {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file", |
| 248 | "///: ::. : []: *"}, |
| 249 | /* Expected to fail */ |
| 250 | {"Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File List:Successfully blobbified file list. Took 1 milliseconds", |
| 251 | "///: ::. : : *"}, |
| 252 | /* Expected to fail */ |
| 253 | {"RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest list to memory file", |
| 254 | " ::. / // [] *"}, |
| 255 | {"THIS that and + theother", "*+ *"}, |
| 256 | {"Dec 12 15:33:07.103 ifmgr/errors 0/RP0/CPU0 3# t2 Failed to open IM connection: No such file or directory", " ::. / // *"}, |
| 257 | {"Dec 16 21:43:47.328 ifmgr/bulk 0/3/CPU0 t8 Bulk DPC async download complete. Partitions 1, node_count 1, total_out 0, out_offset 0, out_expected 0: No error"," ::. / // *"}, |
| 258 | {"t:0x53034bd6 CPU:00 PROCESS :PROCCREATE_NAME", |
| 259 | ": : :*"}, |
| 260 | {" pid:1", " *"}, |
| 261 | {"t:0x53034cbb CPU:00 THREAD :THCREATE pid:1 tid:1", |
| 262 | ": : : pid: tid:*"}, |
| 263 | {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659", |
| 264 | ": : : *"}, |
| 265 | {"/hfr-base-3.3.85/lib/libttyconnection.dll 0xfc000000 0x0000306c 0xfc027000 0x000001c8 1", |
| 266 | " *"}, |
| 267 | {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_receive:ifmgr/t8:IMC_MSG_MTU_UPDATE:ppp_ma/t1", |
| 268 | " ::. // ::::*"}, |
| 269 | |
| 270 | {"Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 :msg_send_event:call:ifmgr/t8:124/0:cdp/t1", |
| 271 | " ::. // :msg_send_event::::*"}, |
| 272 | |
| 273 | {"Feb 28 02:38:26.125 seqtrace 0/1/CPU0 t1 :msg_receive_event:cdp/t1:124/0", |
| 274 | " ::. // :msg_receive_event::*"} |
| 275 | {"t:0x645dd86d CPU:00 USREVENT:EVENT:100, d0:0x00000002 d1:0x00000000", |
| 276 | ": : USREVENT:EVENT:, d0: *"} |
| 277 | {"t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659", |
| 278 | ": : : *"}, |
| 279 | {"t:0x2ccf9f5a CPU:00 INT_ENTR:0x80000000 (-2147483648) IP:0x002d8b18", |
| 280 | ": : INT_ENTR: IP:*"} |
| 281 | {"t:0xd473951c CPU:00 KER_EXIT:SCHED_GET/88 ret_val:2 sched_priority:10", |
| 282 | ": : KER_EXIT:SCHED_GET : sched_priority:*"} |
| 283 | {"t:0x00000123 CPU:01 SYSTEM :FUNC_ENTER thisfn:0x40e62048 call_site:0x00000000", |
| 284 | ": : SYSTEM :FUNC_ thisfn: *"}, |
| 285 | {"t:0x5af8de95 CPU:00 INT_HANDLER_ENTR:0x0000004d (77) PID:8200 IP:0x00000000 AREA:0x0bf9b290", ": : INT_HANDLER_*"}, |
| 286 | #endif |
| 287 | {"t:0x6d1ff92f CPU:00 CONTROL: BUFFER sequence = 1053, num_events = 714", |
| 288 | ": : CONTROL*"}, |
| 289 | {"t:0x6d1ff92f CPU:00 CONTROL :TIME msb:0x0000003c lsb(offset):0x6d1ff921", |
| 290 | ": : CONTROL*"}, |
| 291 | }; |
| 292 | |
| 293 | int main (int argc, char **argv) |
| 294 | { |
| 295 | int i, j; |
| 296 | u8 **svec; |
| 297 | |
| 298 | for (j = 0; j < ARRAY_LEN(tests); j++) { |
| 299 | printf ("input string: '%s'\n", tests[j].string); |
| 300 | printf ("delimiter arg: '%s'\n", tests[j].fmt); |
| 301 | printf ("parse trace:\n"); |
| 302 | svec = delsvec(tests[j].string, tests[j].fmt); |
| 303 | if (!svec) { |
| 304 | printf("index %d failed\n", j); |
| 305 | continue; |
| 306 | } |
| 307 | printf("%d substring vectors\n", vec_len(svec)); |
| 308 | for (i = 0; i < vec_len(svec); i++) { |
| 309 | printf("[%d]: '%s'\n", i, svec[i]); |
| 310 | } |
| 311 | printf ("-------------------\n"); |
| 312 | } |
| 313 | exit(0); |
| 314 | } |
| 315 | #endif |