[UVA][2D String matching][KMP、AC自動機] 11019 - Matrix Matcher
Problem H
Matrix Matcher
Input: Standard Input
Output: Standard Output
Given an N * M matrix, your task is to find the number of occurences of an X * Y pattern.
Input
The first line contains a single integer t(t ≤ 15), the number of test cases.
For each case, the first line contains two integers N and M (N, M ≤ 1000). The next N lines contain M characters each.
The next line contains two integers X and Y (X, Y ≤ 100). The next X lines contain Y characters each.
Output
For each case, output a single integer in its own line, the number of occurrences.
Sample Input Output for Sample Input
2 1 1 x 1 1 y 3 3 abc bcd cde 2 2 bc cd |
0 2
|
Problem Setter: Rujia Liu, EPS
Special Thanks: Wenbin Tang
Warming: The judge input file size is about 7 MB. So please make sure that you use a fast IO function (eg. scanf()) to read input.
做法來源:http://www.csie.ntnu.edu.tw/~u91029/StringMatching.html#5
演算法筆記!2D String Matching: Baker-Bird Algorithm
步驟 1. 把T橫切成一條一條, 把P橫切成一條一條, 然後每一條T都執行Multi-Pattern String Matching,例如Aho-Corasick Algorithm。 如果第a條P,從T[i,j]開始出現,那麼就進行紀錄:M[i,j] = a。 M是一個跟T一樣大的陣列。 步驟 2. 把M直切成一條一條, 每一條M都執行String Matching,例如Morris-Pratt Algorithm。 看看是否出現1234567...n這個字串(剛剛P共有n條)。 要點 當P有某兩條相同時,則要小心處理,把這兩條的編號設為相同。
AC 自動機不用跑匹配的後綴,因為必然是沒有的,所以在這裡的 AC 自動機寫法比較特別。
#include <stdio.h>
#include <string.h>
#include <queue>
#include <algorithm>
using namespace std;
char T[1005][1005], P[105][105];
char M[1005][1005];
//<AC automation>
struct Node {
Node *next[26], *fail;
int matched, label;
Node() {
fail = NULL;
matched = -1;
label = -1;
memset(next, 0, sizeof(next));
}
};
int insertTrie(const char str[], Node *root, int label) {
static Node *p;
static int i, idx;
p = root;
for(i = 0; str[i]; i++) {
idx = str[i]-'a';
if(p->next[idx] == NULL)
p->next[idx] = new Node();
p = p->next[idx];
}
if(p->label == -1)
p->label = label;
return p->label;
}
void buildACautomation(Node *root) {
queue<Node*> Q;
Node *nd, *p;
root->fail = NULL;
Q.push(root);
while(!Q.empty()) {
nd = Q.front(), Q.pop();
for(int i = 0; i < 26; i++) {
if(nd->next[i] == NULL)
continue;
Q.push(nd->next[i]);
p = nd->fail;
while(p != NULL && p->next[i] == NULL)
p = p->fail;
if(p == NULL)
nd->next[i]->fail = root;
else
nd->next[i]->fail = p->next[i];
}
}
}
void quertACautomaiton(const char* str, Node *root, int row) {
static Node *p, *q;
static int i, idx;
p = root;
for(i = 0; str[i]; i++) {
idx = str[i]-'a';
while(p != root && p->next[idx] == NULL)
p = p->fail;
p = p->next[idx];
p = (p == NULL) ? root : p;
q = p;
M[row][i] = -1;
if(q != root && q->label >= 0)
M[row][i] = q->label;
}
}
void freeTrie(Node *root) {
queue<Node*> Q;
Node *nd;
while(!Q.empty()) {
nd = Q.front(), Q.pop();
for(int i = 0; i < 26; i++)
if(nd->next[i] != NULL)
Q.push(nd->next[i]);
delete nd;
}
}
//</AC automation>
int kmpTable[105];
void KMPtable(char *P, int len) {
int i, j;
kmpTable[0] = -1, i = 1, j = -1;
while(i < len) {
while(j >= 0 && P[j+1] != P[i])
j = kmpTable[j];
if(P[j+1] == P[i])
j++;
kmpTable[i++] = j;
}
}
int KMPMatching(char T[], char P[], int tlen, int plen) {
int i, j;
int matchCnt = 0;
for(i = 0, j = -1; i < tlen; i++) {
while(j >= 0 && P[j+1] != T[i])
j = kmpTable[j];
if(P[j+1] == T[i]) j++;
if(j == plen-1) {
matchCnt++;
j = kmpTable[j];
}
}
return matchCnt;
}
int main() {
int testcase;
int n, m, x, y;
int i, j, k;
scanf("%d", &testcase);
char pattern[105], str[1005];
while(testcase--) {
Node *root = new Node();
scanf("%d %d", &n, &m);
for(i = 0; i < n; i++)
scanf("%s", T[i]);
scanf("%d %d", &x, &y);
for(i = 0; i < x; i++) {
scanf("%s", P[i]);
pattern[i] = insertTrie(P[i], root, i);
}
buildACautomation(root);
for(i = 0; i < n; i++)
quertACautomaiton(T[i], root, i);
/*for(i = 0; i < x; i++)
printf("%d xx\n", pattern[i]);
for(i = 0; i < n; i++, puts(""))
for(j = 0; j < m; j++)
printf("%3d ", M[i][j]);*/
KMPtable(pattern, x);
int ret = 0;
for(i = 0; i < m; i++) {
for(j = 0; j < n; j++)
str[j] = M[j][i];
ret += KMPMatching(str, pattern, n, x);
}
printf("%d\n", ret);
freeTrie(root);
}
return 0;
}
/*
3
1 1
x
1 1
y
3 3
abc
bcd
cde
2 2
bc
cd
3 3
aaa
aaa
aaa
2 2
aa
aa
*/