Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 1 | /* |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 2 | *------------------------------------------------------------------ |
| 3 | * Copyright (c) 2006-2016 Cisco and/or its affiliates. |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at: |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | /* Break up a delimited string into a vector of substrings */ |
| 18 | |
| 19 | #include <stdio.h> |
| 20 | #include <vppinfra/clib.h> |
| 21 | #include <vppinfra/vec.h> |
| 22 | #include <vppinfra/hash.h> |
| 23 | #include <stdarg.h> |
| 24 | |
| 25 | /* |
| 26 | * #define UNIT_TESTS 1 |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 27 | * #define MATCH_TRACE 1 |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 28 | */ |
| 29 | |
| 30 | /* |
| 31 | * delsvec |
| 32 | * break up an input string into a vector of [null-terminated] u8 *'s |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 33 | * |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 34 | * Each supplied delimiter character results in a string in the output |
| 35 | * vector, unless the delimiters occur back-to-back. When matched, |
| 36 | * a whitespace character in the delimiter consumes an arbitrary |
| 37 | * run of whitespace. See the unit tests at the end of this file |
| 38 | * for a set of examples. |
| 39 | * |
| 40 | * Returns a u8 **, or NULL if the input fails to match. It is assumed |
| 41 | * that both input and fmt are C strings, not necessarily vectors. |
| 42 | * |
| 43 | * Output strings are both vectors and proper C strings. |
| 44 | */ |
| 45 | |
| 46 | static u8 **string_cache; |
| 47 | static u8 **svec_cache; |
| 48 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 49 | void |
| 50 | delsvec_recycle_this_string (u8 *s) |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 51 | { |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 52 | if (s) |
| 53 | { |
| 54 | vec_set_len (s, 0); |
| 55 | vec_add1 (string_cache, s); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 56 | } |
| 57 | } |
| 58 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 59 | void |
| 60 | delsvec_recycle_this_svec (u8 **svec) |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 61 | { |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 62 | if (svec) |
| 63 | { |
| 64 | if (svec_cache) |
| 65 | { |
| 66 | vec_free (svec_cache); |
| 67 | } |
| 68 | vec_set_len (svec, 0); |
| 69 | svec_cache = svec; |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 70 | } |
| 71 | } |
| 72 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 73 | int |
| 74 | pvl (char *a) |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 75 | { |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 76 | return vec_len (a); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 77 | } |
| 78 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 79 | u8 ** |
| 80 | delsvec (void *input_arg, char *fmt) |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 81 | { |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 82 | u8 **rv = 0; |
| 83 | int input_index = 0; |
| 84 | u8 *this; |
| 85 | int dirflag = 0; |
| 86 | int i; |
| 87 | u8 *input = input_arg; |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 88 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 89 | if (svec_cache) |
| 90 | { |
| 91 | rv = svec_cache; |
| 92 | svec_cache = 0; |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 93 | } |
| 94 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 95 | while (fmt) |
| 96 | { |
| 97 | dirflag = 0; |
| 98 | if (vec_len (string_cache) > 0) |
| 99 | { |
| 100 | this = string_cache[vec_len (string_cache) - 1]; |
| 101 | vec_set_len (string_cache, vec_len (string_cache) - 1); |
| 102 | } |
| 103 | else |
| 104 | this = 0; |
| 105 | /* |
| 106 | * '*' means one of two things: match the rest of the input, |
| 107 | * or match as many characters as possible |
| 108 | */ |
| 109 | if (fmt[0] == '*') |
| 110 | { |
| 111 | fmt++; |
| 112 | dirflag = 1; |
| 113 | /* |
| 114 | * no more format: eat rest of string... |
| 115 | */ |
| 116 | if (!fmt[0]) |
| 117 | { |
| 118 | for (; input[input_index]; input_index++) |
| 119 | vec_add1 (this, input[input_index]); |
| 120 | if (vec_len (this)) |
| 121 | { |
| 122 | vec_add1 (this, 0); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 123 | #ifdef MATCH_TRACE |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 124 | printf ("final star-match adds: '%s'\n", this); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 125 | #endif |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 126 | vec_add1 (rv, this); |
| 127 | } |
| 128 | else |
| 129 | { |
| 130 | vec_add1 (string_cache, this); |
| 131 | } |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 132 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 133 | return (rv); |
| 134 | } |
| 135 | } |
| 136 | /* |
| 137 | * Left-to-right scan, adding chars until next delimiter char |
| 138 | * appears. |
| 139 | */ |
| 140 | if (!dirflag) |
| 141 | { |
| 142 | while (input[input_index]) |
| 143 | { |
| 144 | if (input[input_index] == fmt[0]) |
| 145 | { |
| 146 | /* If we just (exact) matched a whitespace delimiter */ |
| 147 | if (fmt[0] == ' ') |
| 148 | { |
| 149 | /* scan forward eating whitespace */ |
| 150 | while (input[input_index] == ' ' || |
| 151 | input[input_index] == '\t' || |
| 152 | input[input_index] == '\n') |
| 153 | input_index++; |
| 154 | input_index--; |
| 155 | } |
| 156 | goto found; |
| 157 | } |
| 158 | /* If we're looking for whitespace */ |
| 159 | if (fmt[0] == ' ') |
| 160 | { |
| 161 | /* and we have whitespace */ |
| 162 | if (input[input_index] == ' ' || |
| 163 | input[input_index] == '\t' || input[input_index] == '\n') |
| 164 | { |
| 165 | /* scan forward eating whitespace */ |
| 166 | while (input[input_index] == ' ' || |
| 167 | input[input_index] == '\t' || |
| 168 | input[input_index] == '\n') |
| 169 | { |
| 170 | input_index++; |
| 171 | } |
| 172 | input_index--; |
| 173 | goto found; |
| 174 | } |
| 175 | } |
| 176 | /* Not a delimiter, save it */ |
| 177 | vec_add1 (this, input[input_index]); |
| 178 | input_index++; |
| 179 | } |
| 180 | /* |
| 181 | * Fell off the wagon, clean up and bail out |
| 182 | */ |
| 183 | bail: |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 184 | |
| 185 | #ifdef MATCH_TRACE |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 186 | printf ("failed, fmt[0] = '%c', input[%d]='%s'\n", fmt[0], |
| 187 | input_index, &input[input_index]); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 188 | #endif |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 189 | delsvec_recycle_this_string (this); |
| 190 | for (i = 0; i < vec_len (rv); i++) |
| 191 | delsvec_recycle_this_string (rv[i]); |
| 192 | delsvec_recycle_this_svec (rv); |
| 193 | return (0); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 194 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 195 | found: |
| 196 | /* |
| 197 | * Delimiter matched |
| 198 | */ |
| 199 | input_index++; |
| 200 | fmt++; |
| 201 | /* |
| 202 | * If we actually accumulated non-delimiter characters, |
| 203 | * add them to the result vector |
| 204 | */ |
| 205 | if (vec_len (this)) |
| 206 | { |
| 207 | vec_add1 (this, 0); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 208 | #ifdef MATCH_TRACE |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 209 | printf ("match: add '%s'\n", this); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 210 | #endif |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 211 | vec_add1 (rv, this); |
| 212 | } |
| 213 | else |
| 214 | { |
| 215 | vec_add1 (string_cache, this); |
| 216 | } |
| 217 | } |
| 218 | else |
| 219 | { |
| 220 | /* |
| 221 | * right-to-left scan, '*' not at |
| 222 | * the end of the delimiter string |
| 223 | */ |
| 224 | i = input_index; |
| 225 | while (input[++i]) |
| 226 | ; /* scan forward */ |
| 227 | i--; |
| 228 | while (i > input_index) |
| 229 | { |
| 230 | if (input[i] == fmt[0]) |
| 231 | goto found2; |
| 232 | |
| 233 | if (fmt[0] == ' ' || fmt[0] == '\t' || fmt[0] == '\n') |
| 234 | { |
| 235 | if (input[i] == ' ' || input[i] == '\t' || input[i] == '\n') |
| 236 | goto found2; |
| 237 | } |
| 238 | i--; |
| 239 | } |
| 240 | goto bail; |
| 241 | |
| 242 | found2: |
| 243 | for (; input_index < i; input_index++) |
| 244 | { |
| 245 | vec_add1 (this, input[input_index]); |
| 246 | } |
| 247 | input_index++; |
| 248 | fmt++; |
| 249 | vec_add1 (this, 0); |
| 250 | #ifdef MATCH_TRACE |
| 251 | printf ("inner '*' match: add '%s'\n", this); |
| 252 | #endif |
| 253 | vec_add1 (rv, this); |
| 254 | } |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 255 | } |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 256 | return (rv); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 257 | } |
| 258 | |
| 259 | #ifdef UNIT_TESTS |
| 260 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 261 | typedef struct utest_ |
| 262 | { |
| 263 | char *string; |
| 264 | char *fmt; |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 265 | } utest_t; |
| 266 | |
| 267 | utest_t tests[] = { |
| 268 | #ifdef NOTDEF |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 269 | { "Dec 7 08:56", " :*" }, |
| 270 | { "Dec 17 08:56", " :*" }, |
| 271 | { "Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File " |
| 272 | "List:Successfully blobbified file list. Took 1 milliseconds", |
| 273 | " ::. / // [] *" }, |
| 274 | { "RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest " |
| 275 | "list to memory file", |
| 276 | "///: ::. : []: *" }, |
| 277 | /* Expected to fail */ |
| 278 | { "Dec 7 08:56:41.239 install/inst_repl 0/9/CPU0 t1 [40989] File " |
| 279 | "List:Successfully blobbified file list. Took 1 milliseconds", |
| 280 | "///: ::. : : *" }, |
| 281 | /* Expected to fail */ |
| 282 | { "RP/0/9/CPU0:Dec 7 08:55:28.550 : sam_server[291]: SAM backs up digest " |
| 283 | "list to memory file", |
| 284 | " ::. / // [] *" }, |
| 285 | { "THIS that and + theother", "*+ *" }, |
| 286 | { "Dec 12 15:33:07.103 ifmgr/errors 0/RP0/CPU0 3# t2 Failed to open IM " |
| 287 | "connection: No such file or directory", |
| 288 | " ::. / // *" }, |
| 289 | { "Dec 16 21:43:47.328 ifmgr/bulk 0/3/CPU0 t8 Bulk DPC async download " |
| 290 | "complete. Partitions 1, node_count 1, total_out 0, out_offset 0, " |
| 291 | "out_expected 0: No error", |
| 292 | " ::. / // *" }, |
| 293 | { "t:0x53034bd6 CPU:00 PROCESS :PROCCREATE_NAME", ": : :*" }, |
| 294 | { " pid:1", " *" }, |
| 295 | { "t:0x53034cbb CPU:00 THREAD :THCREATE pid:1 tid:1", |
| 296 | ": : : pid: tid:*" }, |
| 297 | { "t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659", |
| 298 | ": : : *" }, |
| 299 | { "/hfr-base-3.3.85/lib/libttyconnection.dll 0xfc000000 0x0000306c " |
| 300 | "0xfc027000 0x000001c8 1", |
| 301 | " *" }, |
| 302 | { "Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 " |
| 303 | ":msg_receive:ifmgr/t8:IMC_MSG_MTU_UPDATE:ppp_ma/t1", |
| 304 | " ::. // ::::*" }, |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 305 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 306 | { "Feb 28 02:38:26.123 seqtrace 0/1/CPU0 t8 " |
| 307 | ":msg_send_event:call:ifmgr/t8:124/0:cdp/t1", |
| 308 | " ::. // :msg_send_event::::*" }, |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 309 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 310 | { "Feb 28 02:38:26.125 seqtrace 0/1/CPU0 t1 " |
| 311 | ":msg_receive_event:cdp/t1:124/0", |
| 312 | " ::. // :msg_receive_event::*" } { |
| 313 | "t:0x645dd86d CPU:00 USREVENT:EVENT:100, d0:0x00000002 d1:0x00000000", |
| 314 | ": : USREVENT:EVENT:, d0: *" } { |
| 315 | "t:0x5303f950 CPU:00 COMM :REC_PULSE scoid:0x40000003 pid:364659", |
| 316 | ": : : *" }, |
| 317 | { "t:0x2ccf9f5a CPU:00 INT_ENTR:0x80000000 (-2147483648) " |
| 318 | "IP:0x002d8b18", |
| 319 | ": : INT_ENTR: IP:*" } { |
| 320 | "t:0xd473951c CPU:00 KER_EXIT:SCHED_GET/88 ret_val:2 sched_priority:10", |
| 321 | ": : KER_EXIT:SCHED_GET : sched_priority:*" } { |
| 322 | "t:0x00000123 CPU:01 SYSTEM :FUNC_ENTER thisfn:0x40e62048 " |
| 323 | "call_site:0x00000000", |
| 324 | ": : SYSTEM :FUNC_ thisfn: *" }, |
| 325 | { "t:0x5af8de95 CPU:00 INT_HANDLER_ENTR:0x0000004d (77) PID:8200 " |
| 326 | "IP:0x00000000 AREA:0x0bf9b290", |
| 327 | ": : INT_HANDLER_*" }, |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 328 | #endif |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 329 | { "t:0x6d1ff92f CPU:00 CONTROL: BUFFER sequence = 1053, num_events = 714", |
| 330 | ": : CONTROL*" }, |
| 331 | { "t:0x6d1ff92f CPU:00 CONTROL :TIME msb:0x0000003c lsb(offset):0x6d1ff921", |
| 332 | ": : CONTROL*" }, |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 333 | }; |
| 334 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 335 | int |
| 336 | main (int argc, char **argv) |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 337 | { |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 338 | int i, j; |
| 339 | u8 **svec; |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 340 | |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 341 | for (j = 0; j < ARRAY_LEN (tests); j++) |
| 342 | { |
| 343 | printf ("input string: '%s'\n", tests[j].string); |
| 344 | printf ("delimiter arg: '%s'\n", tests[j].fmt); |
| 345 | printf ("parse trace:\n"); |
| 346 | svec = delsvec (tests[j].string, tests[j].fmt); |
| 347 | if (!svec) |
| 348 | { |
| 349 | printf ("index %d failed\n", j); |
| 350 | continue; |
| 351 | } |
| 352 | printf ("%d substring vectors\n", vec_len (svec)); |
| 353 | for (i = 0; i < vec_len (svec); i++) |
| 354 | { |
| 355 | printf ("[%d]: '%s'\n", i, svec[i]); |
| 356 | } |
| 357 | printf ("-------------------\n"); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 358 | } |
Florin Coras | 7e43005 | 2023-08-26 12:59:11 -0700 | [diff] [blame] | 359 | exit (0); |
Dave Barach | 52642c3 | 2016-02-11 19:28:19 -0500 | [diff] [blame] | 360 | } |
| 361 | #endif |