2013-05-04 20:35:10Morris

[UVA][2D String matching][KMP、AC自動機] 11019 - Matrix Matcher

Problem H
Matrix Matcher
Input: Standard Input

Output: Standard Output

 

Given an N * M matrix, your task is to find the number of occurences of an X * Y pattern.

 

Input

The first line contains a single integer t(t ≤ 15), the number of test cases.

 

For each case, the first line contains two integers N and M (N, M ≤ 1000). The next N lines contain M characters each.

 

The next line contains two integers X and Y (X, Y ≤ 100). The next X lines contain Y characters each. 

 

Output

For each case, output a single integer in its own line, the number of occurrences.

 

Sample Input                               Output for Sample Input

2
1 1
x
1 1
y
3 3
abc
bcd
cde
2 2
bc
cd

0

2

 


Problem Setter: Rujia Liu, EPS

Special Thanks: Wenbin Tang

 

Warming: The judge input file size is about 7 MB. So please make sure that you use a fast IO function (eg. scanf()) to read input.


做法來源:http://www.csie.ntnu.edu.tw/~u91029/StringMatching.html#5

演算法筆記!2D String Matching: Baker-Bird Algorithm

步驟 1.
把T橫切成一條一條,
把P橫切成一條一條,
然後每一條T都執行Multi-Pattern String Matching,例如Aho-Corasick Algorithm。
如果第a條P,從T[i,j]開始出現,那麼就進行紀錄:M[i,j] = a。
M是一個跟T一樣大的陣列。

步驟 2.
把M直切成一條一條,
每一條M都執行String Matching,例如Morris-Pratt Algorithm。
看看是否出現1234567...n這個字串(剛剛P共有n條)。

要點
當P有某兩條相同時,則要小心處理,把這兩條的編號設為相同。
AC 自動機不用跑匹配的後綴,因為必然是沒有的,所以在這裡的 AC 自動機寫法比較特別。

#include <stdio.h>
#include <string.h>
#include <queue>
#include <algorithm>
using namespace std;
char T[1005][1005], P[105][105];
char M[1005][1005];
//<AC automation>
struct Node {
    Node *next[26], *fail;
    int matched, label;
    Node() {
        fail = NULL;
        matched = -1;
        label = -1;
        memset(next, 0, sizeof(next));
    }
};
int insertTrie(const char str[], Node *root, int label) {
    static Node *p;
    static int i, idx;
    p = root;
    for(i = 0; str[i]; i++) {
        idx = str[i]-'a';
        if(p->next[idx] == NULL)
            p->next[idx] = new Node();
        p = p->next[idx];
    }
    if(p->label == -1)
        p->label = label;
    return p->label;
}
void buildACautomation(Node *root) {
    queue<Node*> Q;
    Node *nd, *p;
    root->fail = NULL;
    Q.push(root);
    while(!Q.empty()) {
        nd = Q.front(), Q.pop();
        for(int i = 0; i < 26; i++) {
            if(nd->next[i] == NULL)
                continue;
            Q.push(nd->next[i]);
            p = nd->fail;
            while(p != NULL && p->next[i] == NULL)
                p = p->fail;
            if(p == NULL)
                nd->next[i]->fail = root;
            else
                nd->next[i]->fail = p->next[i];
        }
    }
}
void quertACautomaiton(const char* str, Node *root, int row) {
    static Node *p, *q;
    static int i, idx;
    p = root;
    for(i = 0; str[i]; i++) {
        idx = str[i]-'a';
        while(p != root && p->next[idx] == NULL)
            p = p->fail;
        p = p->next[idx];
        p = (p == NULL) ? root : p;
        q = p;
        M[row][i] = -1;
        if(q != root && q->label >= 0)
            M[row][i] = q->label;
    }
}
void freeTrie(Node *root) {
    queue<Node*> Q;
    Node *nd;
    while(!Q.empty()) {
        nd = Q.front(), Q.pop();
        for(int i = 0; i < 26; i++)
            if(nd->next[i] != NULL)
                Q.push(nd->next[i]);
        delete nd;
    }
}
//</AC automation>
int kmpTable[105];
void KMPtable(char *P, int len) {
    int i, j;
    kmpTable[0] = -1, i = 1, j = -1;
    while(i < len) {
        while(j >= 0 && P[j+1] != P[i])
            j = kmpTable[j];
        if(P[j+1] == P[i])
            j++;
        kmpTable[i++] = j;
    }
}
int KMPMatching(char T[], char P[], int tlen, int plen) {
    int i, j;
    int matchCnt = 0;
    for(i = 0, j = -1; i < tlen; i++) {
        while(j >= 0 && P[j+1] != T[i])
            j = kmpTable[j];
        if(P[j+1] == T[i])  j++;
        if(j == plen-1) {
            matchCnt++;
            j = kmpTable[j];
        }
    }
    return matchCnt;
}
int main() {
    int testcase;
    int n, m, x, y;
    int i, j, k;
    scanf("%d", &testcase);
    char pattern[105], str[1005];
    while(testcase--) {
        Node *root = new Node();
        scanf("%d %d", &n, &m);
        for(i = 0; i < n; i++)
            scanf("%s", T[i]);
        scanf("%d %d", &x, &y);
        for(i = 0; i < x; i++) {
            scanf("%s", P[i]);
            pattern[i] = insertTrie(P[i], root, i);
        }
        buildACautomation(root);
        for(i = 0; i < n; i++)
            quertACautomaiton(T[i], root, i);
        /*for(i = 0; i < x; i++)
            printf("%d xx\n", pattern[i]);
        for(i = 0; i < n; i++, puts(""))
            for(j = 0; j < m; j++)
                printf("%3d ", M[i][j]);*/
        KMPtable(pattern, x);
        int ret = 0;
        for(i = 0; i < m; i++) {
            for(j = 0; j < n; j++)
                str[j] = M[j][i];
            ret += KMPMatching(str, pattern, n, x);
        }
        printf("%d\n", ret);
        freeTrie(root);
    }
    return 0;
}
/*
3
1 1
x
1 1
y
3 3
abc
bcd
cde
2 2
bc
cd
3 3
aaa
aaa
aaa
2 2
aa
aa
*/