Wolfgang Denk | a5ecbe6 | 2013-03-23 23:50:31 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright (c) 2004-2005 Sergey Lyubka <valenok@gmail.com> |
| 3 | * All rights reserved |
| 4 | * |
| 5 | * "THE BEER-WARE LICENSE" (Revision 42): |
| 6 | * Sergey Lyubka wrote this file. As long as you retain this notice you |
| 7 | * can do whatever you want with this stuff. If we meet some day, and you think |
| 8 | * this stuff is worth it, you can buy me a beer in return. |
| 9 | */ |
| 10 | |
| 11 | /* |
| 12 | * Downloaded Sat Nov 5 17:43:06 CET 2011 at |
| 13 | * http://slre.sourceforge.net/1.0/slre.c |
| 14 | */ |
| 15 | |
| 16 | #ifdef SLRE_TEST |
| 17 | #include <stdio.h> |
| 18 | #include <assert.h> |
| 19 | #include <ctype.h> |
| 20 | #include <stdlib.h> |
| 21 | #include <string.h> |
| 22 | #else |
| 23 | #include <common.h> |
| 24 | #include <linux/ctype.h> |
| 25 | #endif /* SLRE_TEST */ |
| 26 | |
| 27 | #include <errno.h> |
| 28 | |
| 29 | #include <slre.h> |
| 30 | |
| 31 | enum {END, BRANCH, ANY, EXACT, ANYOF, ANYBUT, OPEN, CLOSE, BOL, EOL, |
| 32 | STAR, PLUS, STARQ, PLUSQ, QUEST, SPACE, NONSPACE, DIGIT}; |
| 33 | |
| 34 | #ifdef SLRE_TEST |
| 35 | static struct { |
| 36 | const char *name; |
| 37 | int narg; |
| 38 | const char *flags; |
| 39 | } opcodes[] = { |
| 40 | {"END", 0, ""}, /* End of code block or program */ |
| 41 | {"BRANCH", 2, "oo"}, /* Alternative operator, "|" */ |
| 42 | {"ANY", 0, ""}, /* Match any character, "." */ |
| 43 | {"EXACT", 2, "d"}, /* Match exact string */ |
| 44 | {"ANYOF", 2, "D"}, /* Match any from set, "[]" */ |
| 45 | {"ANYBUT", 2, "D"}, /* Match any but from set, "[^]"*/ |
| 46 | {"OPEN ", 1, "i"}, /* Capture start, "(" */ |
| 47 | {"CLOSE", 1, "i"}, /* Capture end, ")" */ |
| 48 | {"BOL", 0, ""}, /* Beginning of string, "^" */ |
| 49 | {"EOL", 0, ""}, /* End of string, "$" */ |
| 50 | {"STAR", 1, "o"}, /* Match zero or more times "*" */ |
| 51 | {"PLUS", 1, "o"}, /* Match one or more times, "+" */ |
| 52 | {"STARQ", 1, "o"}, /* Non-greedy STAR, "*?" */ |
| 53 | {"PLUSQ", 1, "o"}, /* Non-greedy PLUS, "+?" */ |
| 54 | {"QUEST", 1, "o"}, /* Match zero or one time, "?" */ |
| 55 | {"SPACE", 0, ""}, /* Match whitespace, "\s" */ |
| 56 | {"NONSPACE", 0, ""}, /* Match non-space, "\S" */ |
| 57 | {"DIGIT", 0, ""} /* Match digit, "\d" */ |
| 58 | }; |
| 59 | #endif /* SLRE_TEST */ |
| 60 | |
| 61 | /* |
| 62 | * Commands and operands are all unsigned char (1 byte long). All code offsets |
| 63 | * are relative to current address, and positive (always point forward). Data |
| 64 | * offsets are absolute. Commands with operands: |
| 65 | * |
| 66 | * BRANCH offset1 offset2 |
| 67 | * Try to match the code block that follows the BRANCH instruction |
| 68 | * (code block ends with END). If no match, try to match code block that |
| 69 | * starts at offset1. If either of these match, jump to offset2. |
| 70 | * |
| 71 | * EXACT data_offset data_length |
| 72 | * Try to match exact string. String is recorded in data section from |
| 73 | * data_offset, and has length data_length. |
| 74 | * |
| 75 | * OPEN capture_number |
| 76 | * CLOSE capture_number |
| 77 | * If the user have passed 'struct cap' array for captures, OPEN |
| 78 | * records the beginning of the matched substring (cap->ptr), CLOSE |
| 79 | * sets the length (cap->len) for respective capture_number. |
| 80 | * |
| 81 | * STAR code_offset |
| 82 | * PLUS code_offset |
| 83 | * QUEST code_offset |
| 84 | * *, +, ?, respectively. Try to gobble as much as possible from the |
| 85 | * matched buffer, until code block that follows these instructions |
| 86 | * matches. When the longest possible string is matched, |
| 87 | * jump to code_offset |
| 88 | * |
| 89 | * STARQ, PLUSQ are non-greedy versions of STAR and PLUS. |
| 90 | */ |
| 91 | |
| 92 | static const char *meta_chars = "|.^$*+?()[\\"; |
| 93 | |
| 94 | #ifdef SLRE_TEST |
| 95 | |
| 96 | static void |
| 97 | print_character_set(FILE *fp, const unsigned char *p, int len) |
| 98 | { |
| 99 | int i; |
| 100 | |
| 101 | for (i = 0; i < len; i++) { |
| 102 | if (i > 0) |
| 103 | (void) fputc(',', fp); |
| 104 | if (p[i] == 0) { |
| 105 | i++; |
| 106 | if (p[i] == 0) |
| 107 | (void) fprintf(fp, "\\x%02x", p[i]); |
| 108 | else |
| 109 | (void) fprintf(fp, "%s", opcodes[p[i]].name); |
| 110 | } else if (isprint(p[i])) { |
| 111 | (void) fputc(p[i], fp); |
| 112 | } else { |
| 113 | (void) fprintf(fp, "\\x%02x", p[i]); |
| 114 | } |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | void |
| 119 | slre_dump(const struct slre *r, FILE *fp) |
| 120 | { |
| 121 | int i, j, ch, op, pc; |
| 122 | |
| 123 | for (pc = 0; pc < r->code_size; pc++) { |
| 124 | |
| 125 | op = r->code[pc]; |
| 126 | (void) fprintf(fp, "%3d %s ", pc, opcodes[op].name); |
| 127 | |
| 128 | for (i = 0; opcodes[op].flags[i] != '\0'; i++) |
| 129 | switch (opcodes[op].flags[i]) { |
| 130 | case 'i': |
| 131 | (void) fprintf(fp, "%d ", r->code[pc + 1]); |
| 132 | pc++; |
| 133 | break; |
| 134 | case 'o': |
| 135 | (void) fprintf(fp, "%d ", |
| 136 | pc + r->code[pc + 1] - i); |
| 137 | pc++; |
| 138 | break; |
| 139 | case 'D': |
| 140 | print_character_set(fp, r->data + |
| 141 | r->code[pc + 1], r->code[pc + 2]); |
| 142 | pc += 2; |
| 143 | break; |
| 144 | case 'd': |
| 145 | (void) fputc('"', fp); |
| 146 | for (j = 0; j < r->code[pc + 2]; j++) { |
| 147 | ch = r->data[r->code[pc + 1] + j]; |
| 148 | if (isprint(ch)) { |
| 149 | (void) fputc(ch, fp); |
| 150 | } else { |
| 151 | (void) fprintf(fp, |
| 152 | "\\x%02x", ch); |
| 153 | } |
| 154 | } |
| 155 | (void) fputc('"', fp); |
| 156 | pc += 2; |
| 157 | break; |
| 158 | } |
| 159 | |
| 160 | (void) fputc('\n', fp); |
| 161 | } |
| 162 | } |
| 163 | #endif /* SLRE_TEST */ |
| 164 | |
| 165 | static void |
| 166 | set_jump_offset(struct slre *r, int pc, int offset) |
| 167 | { |
| 168 | assert(offset < r->code_size); |
| 169 | |
| 170 | if (r->code_size - offset > 0xff) |
| 171 | r->err_str = "Jump offset is too big"; |
| 172 | else |
| 173 | r->code[pc] = (unsigned char) (r->code_size - offset); |
| 174 | } |
| 175 | |
| 176 | static void |
| 177 | emit(struct slre *r, int code) |
| 178 | { |
| 179 | if (r->code_size >= (int) (sizeof(r->code) / sizeof(r->code[0]))) |
| 180 | r->err_str = "RE is too long (code overflow)"; |
| 181 | else |
| 182 | r->code[r->code_size++] = (unsigned char) code; |
| 183 | } |
| 184 | |
| 185 | static void |
| 186 | store_char_in_data(struct slre *r, int ch) |
| 187 | { |
| 188 | if (r->data_size >= (int) sizeof(r->data)) |
| 189 | r->err_str = "RE is too long (data overflow)"; |
| 190 | else |
| 191 | r->data[r->data_size++] = ch; |
| 192 | } |
| 193 | |
| 194 | static void |
| 195 | exact(struct slre *r, const char **re) |
| 196 | { |
| 197 | int old_data_size = r->data_size; |
| 198 | |
| 199 | while (**re != '\0' && (strchr(meta_chars, **re)) == NULL) |
| 200 | store_char_in_data(r, *(*re)++); |
| 201 | |
| 202 | emit(r, EXACT); |
| 203 | emit(r, old_data_size); |
| 204 | emit(r, r->data_size - old_data_size); |
| 205 | } |
| 206 | |
| 207 | static int |
| 208 | get_escape_char(const char **re) |
| 209 | { |
| 210 | int res; |
| 211 | |
| 212 | switch (*(*re)++) { |
| 213 | case 'n': |
| 214 | res = '\n'; |
| 215 | break; |
| 216 | case 'r': |
| 217 | res = '\r'; |
| 218 | break; |
| 219 | case 't': |
| 220 | res = '\t'; |
| 221 | break; |
| 222 | case '0': |
| 223 | res = 0; |
| 224 | break; |
| 225 | case 'S': |
| 226 | res = NONSPACE << 8; |
| 227 | break; |
| 228 | case 's': |
| 229 | res = SPACE << 8; |
| 230 | break; |
| 231 | case 'd': |
| 232 | res = DIGIT << 8; |
| 233 | break; |
| 234 | default: |
| 235 | res = (*re)[-1]; |
| 236 | break; |
| 237 | } |
| 238 | |
| 239 | return res; |
| 240 | } |
| 241 | |
| 242 | static void |
| 243 | anyof(struct slre *r, const char **re) |
| 244 | { |
| 245 | int esc, old_data_size = r->data_size, op = ANYOF; |
| 246 | |
| 247 | if (**re == '^') { |
| 248 | op = ANYBUT; |
| 249 | (*re)++; |
| 250 | } |
| 251 | |
| 252 | while (**re != '\0') |
| 253 | |
| 254 | switch (*(*re)++) { |
| 255 | case ']': |
| 256 | emit(r, op); |
| 257 | emit(r, old_data_size); |
| 258 | emit(r, r->data_size - old_data_size); |
| 259 | return; |
| 260 | /* NOTREACHED */ |
| 261 | break; |
| 262 | case '\\': |
| 263 | esc = get_escape_char(re); |
| 264 | if ((esc & 0xff) == 0) { |
| 265 | store_char_in_data(r, 0); |
| 266 | store_char_in_data(r, esc >> 8); |
| 267 | } else { |
| 268 | store_char_in_data(r, esc); |
| 269 | } |
| 270 | break; |
| 271 | default: |
| 272 | store_char_in_data(r, (*re)[-1]); |
| 273 | break; |
| 274 | } |
| 275 | |
| 276 | r->err_str = "No closing ']' bracket"; |
| 277 | } |
| 278 | |
| 279 | static void |
| 280 | relocate(struct slre *r, int begin, int shift) |
| 281 | { |
| 282 | emit(r, END); |
| 283 | memmove(r->code + begin + shift, r->code + begin, r->code_size - begin); |
| 284 | r->code_size += shift; |
| 285 | } |
| 286 | |
| 287 | static void |
| 288 | quantifier(struct slre *r, int prev, int op) |
| 289 | { |
| 290 | if (r->code[prev] == EXACT && r->code[prev + 2] > 1) { |
| 291 | r->code[prev + 2]--; |
| 292 | emit(r, EXACT); |
| 293 | emit(r, r->code[prev + 1] + r->code[prev + 2]); |
| 294 | emit(r, 1); |
| 295 | prev = r->code_size - 3; |
| 296 | } |
| 297 | relocate(r, prev, 2); |
| 298 | r->code[prev] = op; |
| 299 | set_jump_offset(r, prev + 1, prev); |
| 300 | } |
| 301 | |
| 302 | static void |
| 303 | exact_one_char(struct slre *r, int ch) |
| 304 | { |
| 305 | emit(r, EXACT); |
| 306 | emit(r, r->data_size); |
| 307 | emit(r, 1); |
| 308 | store_char_in_data(r, ch); |
| 309 | } |
| 310 | |
| 311 | static void |
| 312 | fixup_branch(struct slre *r, int fixup) |
| 313 | { |
| 314 | if (fixup > 0) { |
| 315 | emit(r, END); |
| 316 | set_jump_offset(r, fixup, fixup - 2); |
| 317 | } |
| 318 | } |
| 319 | |
| 320 | static void |
| 321 | compile(struct slre *r, const char **re) |
| 322 | { |
| 323 | int op, esc, branch_start, last_op, fixup, cap_no, level; |
| 324 | |
| 325 | fixup = 0; |
| 326 | level = r->num_caps; |
| 327 | branch_start = last_op = r->code_size; |
| 328 | |
| 329 | for (;;) |
| 330 | switch (*(*re)++) { |
| 331 | case '\0': |
| 332 | (*re)--; |
| 333 | return; |
| 334 | /* NOTREACHED */ |
| 335 | break; |
| 336 | case '^': |
| 337 | emit(r, BOL); |
| 338 | break; |
| 339 | case '$': |
| 340 | emit(r, EOL); |
| 341 | break; |
| 342 | case '.': |
| 343 | last_op = r->code_size; |
| 344 | emit(r, ANY); |
| 345 | break; |
| 346 | case '[': |
| 347 | last_op = r->code_size; |
| 348 | anyof(r, re); |
| 349 | break; |
| 350 | case '\\': |
| 351 | last_op = r->code_size; |
| 352 | esc = get_escape_char(re); |
| 353 | if (esc & 0xff00) |
| 354 | emit(r, esc >> 8); |
| 355 | else |
| 356 | exact_one_char(r, esc); |
| 357 | break; |
| 358 | case '(': |
| 359 | last_op = r->code_size; |
| 360 | cap_no = ++r->num_caps; |
| 361 | emit(r, OPEN); |
| 362 | emit(r, cap_no); |
| 363 | |
| 364 | compile(r, re); |
| 365 | if (*(*re)++ != ')') { |
| 366 | r->err_str = "No closing bracket"; |
| 367 | return; |
| 368 | } |
| 369 | |
| 370 | emit(r, CLOSE); |
| 371 | emit(r, cap_no); |
| 372 | break; |
| 373 | case ')': |
| 374 | (*re)--; |
| 375 | fixup_branch(r, fixup); |
| 376 | if (level == 0) { |
| 377 | r->err_str = "Unbalanced brackets"; |
| 378 | return; |
| 379 | } |
| 380 | return; |
| 381 | /* NOTREACHED */ |
| 382 | break; |
| 383 | case '+': |
| 384 | case '*': |
| 385 | op = (*re)[-1] == '*' ? STAR : PLUS; |
| 386 | if (**re == '?') { |
| 387 | (*re)++; |
| 388 | op = op == STAR ? STARQ : PLUSQ; |
| 389 | } |
| 390 | quantifier(r, last_op, op); |
| 391 | break; |
| 392 | case '?': |
| 393 | quantifier(r, last_op, QUEST); |
| 394 | break; |
| 395 | case '|': |
| 396 | fixup_branch(r, fixup); |
| 397 | relocate(r, branch_start, 3); |
| 398 | r->code[branch_start] = BRANCH; |
| 399 | set_jump_offset(r, branch_start + 1, branch_start); |
| 400 | fixup = branch_start + 2; |
| 401 | r->code[fixup] = 0xff; |
| 402 | break; |
| 403 | default: |
| 404 | (*re)--; |
| 405 | last_op = r->code_size; |
| 406 | exact(r, re); |
| 407 | break; |
| 408 | } |
| 409 | } |
| 410 | |
| 411 | int |
| 412 | slre_compile(struct slre *r, const char *re) |
| 413 | { |
| 414 | r->err_str = NULL; |
| 415 | r->code_size = r->data_size = r->num_caps = r->anchored = 0; |
| 416 | |
| 417 | if (*re == '^') |
| 418 | r->anchored++; |
| 419 | |
| 420 | emit(r, OPEN); /* This will capture what matches full RE */ |
| 421 | emit(r, 0); |
| 422 | |
| 423 | while (*re != '\0') |
| 424 | compile(r, &re); |
| 425 | |
| 426 | if (r->code[2] == BRANCH) |
| 427 | fixup_branch(r, 4); |
| 428 | |
| 429 | emit(r, CLOSE); |
| 430 | emit(r, 0); |
| 431 | emit(r, END); |
| 432 | |
| 433 | return (r->err_str == NULL ? 1 : 0); |
| 434 | } |
| 435 | |
| 436 | static int match(const struct slre *, int, |
| 437 | const char *, int, int *, struct cap *); |
| 438 | |
| 439 | static void |
| 440 | loop_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) |
| 441 | { |
| 442 | int saved_offset, matched_offset; |
| 443 | |
| 444 | saved_offset = matched_offset = *ofs; |
| 445 | |
| 446 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
| 447 | saved_offset = *ofs; |
| 448 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
| 449 | matched_offset = saved_offset; |
| 450 | *ofs = saved_offset; |
| 451 | } |
| 452 | |
| 453 | *ofs = matched_offset; |
| 454 | } |
| 455 | |
| 456 | static void |
| 457 | loop_non_greedy(const struct slre *r, int pc, const char *s, int len, int *ofs) |
| 458 | { |
| 459 | int saved_offset = *ofs; |
| 460 | |
| 461 | while (match(r, pc + 2, s, len, ofs, NULL)) { |
| 462 | saved_offset = *ofs; |
| 463 | if (match(r, pc + r->code[pc + 1], s, len, ofs, NULL)) |
| 464 | break; |
| 465 | } |
| 466 | |
| 467 | *ofs = saved_offset; |
| 468 | } |
| 469 | |
| 470 | static int |
| 471 | is_any_of(const unsigned char *p, int len, const char *s, int *ofs) |
| 472 | { |
| 473 | int i, ch; |
| 474 | |
| 475 | ch = s[*ofs]; |
| 476 | |
| 477 | for (i = 0; i < len; i++) |
| 478 | if (p[i] == ch) { |
| 479 | (*ofs)++; |
| 480 | return 1; |
| 481 | } |
| 482 | |
| 483 | return 0; |
| 484 | } |
| 485 | |
| 486 | static int |
| 487 | is_any_but(const unsigned char *p, int len, const char *s, int *ofs) |
| 488 | { |
| 489 | int i, ch; |
| 490 | |
| 491 | ch = s[*ofs]; |
| 492 | |
| 493 | for (i = 0; i < len; i++) { |
| 494 | if (p[i] == ch) |
| 495 | return 0; |
| 496 | } |
| 497 | |
| 498 | (*ofs)++; |
| 499 | return 1; |
| 500 | } |
| 501 | |
| 502 | static int |
| 503 | match(const struct slre *r, int pc, const char *s, int len, |
| 504 | int *ofs, struct cap *caps) |
| 505 | { |
| 506 | int n, saved_offset, res = 1; |
| 507 | |
| 508 | while (res && r->code[pc] != END) { |
| 509 | |
| 510 | assert(pc < r->code_size); |
| 511 | assert(pc < (int) (sizeof(r->code) / sizeof(r->code[0]))); |
| 512 | |
| 513 | switch (r->code[pc]) { |
| 514 | case BRANCH: |
| 515 | saved_offset = *ofs; |
| 516 | res = match(r, pc + 3, s, len, ofs, caps); |
| 517 | if (res == 0) { |
| 518 | *ofs = saved_offset; |
| 519 | res = match(r, pc + r->code[pc + 1], |
| 520 | s, len, ofs, caps); |
| 521 | } |
| 522 | pc += r->code[pc + 2]; |
| 523 | break; |
| 524 | case EXACT: |
| 525 | res = 0; |
| 526 | n = r->code[pc + 2]; /* String length */ |
| 527 | if (n <= len - *ofs && !memcmp(s + *ofs, r->data + |
| 528 | r->code[pc + 1], n)) { |
| 529 | (*ofs) += n; |
| 530 | res = 1; |
| 531 | } |
| 532 | pc += 3; |
| 533 | break; |
| 534 | case QUEST: |
| 535 | res = 1; |
| 536 | saved_offset = *ofs; |
| 537 | if (!match(r, pc + 2, s, len, ofs, caps)) |
| 538 | *ofs = saved_offset; |
| 539 | pc += r->code[pc + 1]; |
| 540 | break; |
| 541 | case STAR: |
| 542 | res = 1; |
| 543 | loop_greedy(r, pc, s, len, ofs); |
| 544 | pc += r->code[pc + 1]; |
| 545 | break; |
| 546 | case STARQ: |
| 547 | res = 1; |
| 548 | loop_non_greedy(r, pc, s, len, ofs); |
| 549 | pc += r->code[pc + 1]; |
| 550 | break; |
| 551 | case PLUS: |
| 552 | res = match(r, pc + 2, s, len, ofs, caps); |
| 553 | if (res == 0) |
| 554 | break; |
| 555 | |
| 556 | loop_greedy(r, pc, s, len, ofs); |
| 557 | pc += r->code[pc + 1]; |
| 558 | break; |
| 559 | case PLUSQ: |
| 560 | res = match(r, pc + 2, s, len, ofs, caps); |
| 561 | if (res == 0) |
| 562 | break; |
| 563 | |
| 564 | loop_non_greedy(r, pc, s, len, ofs); |
| 565 | pc += r->code[pc + 1]; |
| 566 | break; |
| 567 | case SPACE: |
| 568 | res = 0; |
| 569 | if (*ofs < len && isspace(((unsigned char *)s)[*ofs])) { |
| 570 | (*ofs)++; |
| 571 | res = 1; |
| 572 | } |
| 573 | pc++; |
| 574 | break; |
| 575 | case NONSPACE: |
| 576 | res = 0; |
| 577 | if (*ofs < len && |
| 578 | !isspace(((unsigned char *)s)[*ofs])) { |
| 579 | (*ofs)++; |
| 580 | res = 1; |
| 581 | } |
| 582 | pc++; |
| 583 | break; |
| 584 | case DIGIT: |
| 585 | res = 0; |
| 586 | if (*ofs < len && isdigit(((unsigned char *)s)[*ofs])) { |
| 587 | (*ofs)++; |
| 588 | res = 1; |
| 589 | } |
| 590 | pc++; |
| 591 | break; |
| 592 | case ANY: |
| 593 | res = 0; |
| 594 | if (*ofs < len) { |
| 595 | (*ofs)++; |
| 596 | res = 1; |
| 597 | } |
| 598 | pc++; |
| 599 | break; |
| 600 | case ANYOF: |
| 601 | res = 0; |
| 602 | if (*ofs < len) |
| 603 | res = is_any_of(r->data + r->code[pc + 1], |
| 604 | r->code[pc + 2], s, ofs); |
| 605 | pc += 3; |
| 606 | break; |
| 607 | case ANYBUT: |
| 608 | res = 0; |
| 609 | if (*ofs < len) |
| 610 | res = is_any_but(r->data + r->code[pc + 1], |
| 611 | r->code[pc + 2], s, ofs); |
| 612 | pc += 3; |
| 613 | break; |
| 614 | case BOL: |
| 615 | res = *ofs == 0 ? 1 : 0; |
| 616 | pc++; |
| 617 | break; |
| 618 | case EOL: |
| 619 | res = *ofs == len ? 1 : 0; |
| 620 | pc++; |
| 621 | break; |
| 622 | case OPEN: |
| 623 | if (caps != NULL) |
| 624 | caps[r->code[pc + 1]].ptr = s + *ofs; |
| 625 | pc += 2; |
| 626 | break; |
| 627 | case CLOSE: |
| 628 | if (caps != NULL) |
| 629 | caps[r->code[pc + 1]].len = (s + *ofs) - |
| 630 | caps[r->code[pc + 1]].ptr; |
| 631 | pc += 2; |
| 632 | break; |
| 633 | case END: |
| 634 | pc++; |
| 635 | break; |
| 636 | default: |
| 637 | printf("unknown cmd (%d) at %d\n", r->code[pc], pc); |
| 638 | assert(0); |
| 639 | break; |
| 640 | } |
| 641 | } |
| 642 | |
| 643 | return res; |
| 644 | } |
| 645 | |
| 646 | int |
| 647 | slre_match(const struct slre *r, const char *buf, int len, |
| 648 | struct cap *caps) |
| 649 | { |
| 650 | int i, ofs = 0, res = 0; |
| 651 | |
| 652 | if (r->anchored) { |
| 653 | res = match(r, 0, buf, len, &ofs, caps); |
| 654 | } else { |
| 655 | for (i = 0; i < len && res == 0; i++) { |
| 656 | ofs = i; |
| 657 | res = match(r, 0, buf, len, &ofs, caps); |
| 658 | } |
| 659 | } |
| 660 | |
| 661 | return res; |
| 662 | } |
| 663 | |
| 664 | #ifdef SLRE_TEST |
| 665 | #define N_CAPS 5 |
| 666 | |
| 667 | int main(int argc, char *argv[]) |
| 668 | { |
| 669 | struct slre slre; |
| 670 | struct cap caps[N_CAPS]; |
| 671 | unsigned char data[1 * 1024 * 1024]; |
| 672 | FILE *fp; |
| 673 | int i, res, len; |
| 674 | |
| 675 | if (argc < 2) { |
| 676 | fprintf(stderr, "Usage: %s 'slre' <file>\n", argv[0]); |
| 677 | return 1; |
| 678 | } |
| 679 | |
| 680 | fp = fopen(argv[2], "rb"); |
| 681 | if (fp == NULL) { |
| 682 | fprintf(stderr, "Error: cannot open %s:%s\n", |
| 683 | argv[2], strerror(errno)); |
| 684 | return 1; |
| 685 | } |
| 686 | |
| 687 | if (!slre_compile(&slre, argv[1])) { |
| 688 | fprintf(stderr, "Error compiling slre: %s\n", slre.err_str); |
| 689 | return 1; |
| 690 | } |
| 691 | |
| 692 | slre_dump(&slre, stderr); |
| 693 | |
| 694 | while (fgets(data, sizeof(data), fp) != NULL) { |
| 695 | len = strlen(data); |
| 696 | |
| 697 | if ((len > 0) && (data[len-1] == '\n')) { |
| 698 | data[len-1] = '\0'; |
| 699 | --len; |
| 700 | } |
| 701 | |
| 702 | printf("Data = \"%s\"\n", data); |
| 703 | |
| 704 | (void) memset(caps, 0, sizeof(caps)); |
| 705 | |
| 706 | res = 0; |
| 707 | |
| 708 | res = slre_match(&slre, data, len, caps); |
| 709 | printf("Result [%d]: %d\n", i, res); |
| 710 | |
| 711 | for (i = 0; i < N_CAPS; i++) { |
| 712 | if (caps[i].len > 0) { |
| 713 | printf("Substring %d: len=%d [%.*s]\n", i, |
| 714 | caps[i].len, |
| 715 | caps[i].len, caps[i].ptr); |
| 716 | } |
| 717 | } |
| 718 | printf("----------------------------------------------------\n"); |
| 719 | } |
| 720 | (void) fclose(fp); |
| 721 | |
| 722 | return 0; |
| 723 | } |
| 724 | #endif /* SLRE_TEST */ |