|
61 | 61 |
|
62 | 62 | /// Node used to build Aho-Corasick search trie |
63 | 63 | typedef struct { |
64 | | - char c; |
| 64 | + unsigned char c; |
65 | 65 | unsigned char type; |
66 | 66 | int len; |
67 | 67 |
|
@@ -490,25 +490,29 @@ int options[] = { |
490 | 490 | char * haystack[] = { |
491 | 491 | "footbally", |
492 | 492 | "ufootbally", |
493 | | - "footbal" |
| 493 | + "footbal", |
| 494 | + "föot" |
494 | 495 | }; |
495 | 496 |
|
496 | 497 |
|
497 | | -char * results[3][3] = { |
| 498 | +char * results[3][4] = { |
498 | 499 | { |
499 | 500 | "foot\notb\nfootball\nball\nally\n", |
500 | 501 | "ufo\nfoot\notb\nfootball\nball\nally\n", |
501 | | - "foot\notb\n" |
| 502 | + "foot\notb\n", |
| 503 | + "föot\n" |
502 | 504 | }, |
503 | 505 | { |
504 | 506 | "foot\nfootball\n", |
505 | 507 | "ufo\notb\nally\n", |
506 | | - "foot\n" |
| 508 | + "foot\n", |
| 509 | + "föot\n" |
507 | 510 | }, |
508 | 511 | { |
509 | 512 | "football\n", |
510 | 513 | "ufo\notb\nally\n", |
511 | | - "foot\n" |
| 514 | + "foot\n", |
| 515 | + "föot\n" |
512 | 516 | } |
513 | 517 | }; |
514 | 518 |
|
@@ -559,6 +563,7 @@ void Test_ac_search(CuTest * tc) { |
559 | 563 | ac_insert(a, "ally", 39); |
560 | 564 | ac_insert(a, "ufo", 38); |
561 | 565 | ac_insert(a, "otb", 37); |
| 566 | + ac_insert(a, "föot", 69); |
562 | 567 |
|
563 | 568 | F(i, (int) (sizeof(options) / sizeof(options[0]))) { |
564 | 569 | // Prepare AC trie with new options |
@@ -586,6 +591,99 @@ void Test_ac_search(CuTest * tc) { |
586 | 591 | #endif |
587 | 592 |
|
588 | 593 |
|
| 594 | +/// Monitor one character at a time for matches |
| 595 | +size_t ac_step(size_t s, ac * a, int options, unsigned char c, size_t * len, unsigned char * type) { |
| 596 | + *len = -1; |
| 597 | + *type = '\0'; |
| 598 | + |
| 599 | + // Check for path that allows us to match next character |
| 600 | + while (s && a->node[s].child[c] == 0) { |
| 601 | + s = a->node[s].ac_fail; |
| 602 | + } |
| 603 | + |
| 604 | + // Accept next character |
| 605 | + s = a->node[s].child[c]; |
| 606 | + |
| 607 | + // Do we have a match |
| 608 | + size_t temp_s = s; |
| 609 | + |
| 610 | + while (temp_s) { |
| 611 | + if (a->node[temp_s].type) { |
| 612 | + // This is a match |
| 613 | + if (*len != -1) { |
| 614 | + if (options & AC_LONGEST) { |
| 615 | + // Is this longer than the current match? |
| 616 | + if (*len == a->node[temp_s].len) { |
| 617 | + // Update existing match |
| 618 | + *len = a->node[temp_s].len; |
| 619 | + *type = a->node[temp_s].type; |
| 620 | + } else { |
| 621 | + // Ignore this match |
| 622 | + } |
| 623 | + } else { |
| 624 | + // Ignore this match |
| 625 | + } |
| 626 | + } else { |
| 627 | + *len = a->node[temp_s].len; |
| 628 | + *type = a->node[temp_s].type; |
| 629 | + } |
| 630 | + } |
| 631 | + |
| 632 | + temp_s = a->node[temp_s].ac_fail; |
| 633 | + } |
| 634 | + |
| 635 | + return s; |
| 636 | +} |
| 637 | + |
| 638 | + |
| 639 | +#ifdef TEST |
| 640 | + |
| 641 | +unsigned char step_result[3][6] = { |
| 642 | + { 0, 0, 42, 0, 0, 44 }, |
| 643 | + { 0, 0, 42, 0, 0, 44 }, |
| 644 | + { 0, 0, 42, 0, 0, 44 } |
| 645 | +}; |
| 646 | + |
| 647 | +void Test_ac_step(CuTest * tc) { |
| 648 | + ac * a = ac_new(0); |
| 649 | + |
| 650 | + ac_insert(a, "foo", 42); |
| 651 | + ac_insert(a, "bar", 43); |
| 652 | + ac_insert(a, "foobar", 44); |
| 653 | + |
| 654 | + char * haystack = "foobar"; |
| 655 | + |
| 656 | + F(i, (int) (sizeof(options) / sizeof(options[0]))) { |
| 657 | + ac_prepare(a, options[i]); |
| 658 | + size_t s = 0; |
| 659 | + size_t len = 0; |
| 660 | + unsigned char type = 0; |
| 661 | + |
| 662 | + F(j, (int) sizeof(haystack)) { |
| 663 | + s = ac_step(s, a, options[i], (unsigned char) haystack[j], &len, &type); |
| 664 | + CuAssertIntEquals(tc, step_result[i][j], type); |
| 665 | + switch(type) { |
| 666 | + case 0: |
| 667 | + break; |
| 668 | + case 42: |
| 669 | + CuAssertIntEquals(tc, 3, (int) len); |
| 670 | + break; |
| 671 | + case 43: |
| 672 | + CuAssertIntEquals(tc, 3, (int) len); |
| 673 | + break; |
| 674 | + case 44: |
| 675 | + CuAssertIntEquals(tc, 6, (int) len); |
| 676 | + break; |
| 677 | + } |
| 678 | + } |
| 679 | + } |
| 680 | + |
| 681 | + ac_free(a); |
| 682 | +} |
| 683 | + |
| 684 | +#endif |
| 685 | + |
| 686 | + |
589 | 687 | static void trie_node_to_graphviz(ac * a, size_t s, FILE * out) { |
590 | 688 | trie_node * n = &a->node[s]; |
591 | 689 |
|
@@ -616,4 +714,3 @@ void ac_to_graphviz(ac * a, FILE * out) { |
616 | 714 |
|
617 | 715 | fprintf(out, "}\n"); |
618 | 716 | } |
619 | | - |
|
0 commit comments