写在前面
这篇文章的主体是在没网的悲惨状况下完成的。
前置知识:Trie 树,DFA,KMP 字符串匹配算法。
请务必深刻理解!
引入
给定一个模式串 \(s\),\(n\) 个匹配串 \(t_1\sim t_n\),求在模式串中各文本串分别出现的次数。
\(1\le |s|\le 10^6\),\(\sum |t_i|\le 10^6\)。
若 \(n = 1\),可以使用 KMP 算法在 \(O(|s| + |t|)\) 的时空复杂度内求解。
AC 自动机可以认为是 KMP 算法在 Trie 树上的应用,与 KMP 算法在失配时应用已匹配部分的 \(\operatorname{border}\) 进行跳转类似,AC 自动机在失配时会根据失配指针跳转到 Trie 树上代表已匹配部分的 \(\operatorname{border}\) 的节点,从而加速匹配。
定义
\(s[i:j]\):字符串 \(s\) 的子串 \(s_i\cdots s_j\)。
真前/后缀:字符串 \(s\) 的真前缀定义为满足不等于它本身的 \(s\) 的前缀。同理就有了真后缀的定义:满足不等于它本身的 \(s\) 的后缀。
\(\operatorname{border}\):字符串 \(s\) 的 \(\operatorname{border}\) 定义为,满足既是 \(s\) 的真前缀,又是 \(s\) 的真后缀的最长的字符串 \(t\)。
如 \(\texttt{aabaa}\) 的 \(\operatorname{border}\) 为 \(\texttt{aa}\)。
原理
例题
P3808 【模板】AC 自动机(简单版)
//知识点:ACAM
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#include <queue>
#define LL long long
const int kN = 5e6 + 10;
//=============================================================
int n;
char s[kN];
//=============================================================
inline int read() {
int f = 1, w = 0;
char ch = getchar();
for (; !isdigit(ch); ch = getchar())
if (ch == '-') f = -1;
for (; isdigit(ch); ch = getchar()) w = (w << 3) + (w << 1) + (ch ^ '0');
return f * w;
}
void Chkmax(int &fir, int sec) {
if (sec > fir) fir = sec;
}
void Chkmin(int &fir, int sec) {
if (sec < fir) fir = sec;
}
struct ACAM {
int node_num, tr[kN][26], cnt[kN], fail[kN];
void Insert(char *s_) {
int now = 0, lth = strlen(s_ + 1);
for (int i = 1; i <= lth; ++ i) {
if (! tr[now][s_[i] - 'a']) tr[now][s_[i] - 'a'] = ++ node_num;
now = tr[now][s_[i] - 'a'];
}
cnt[now] ++;
}
void Build() {
std::queue <int> q;
for (int i = 0; i < 26; ++ i) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (! q.empty()) {
int u_ = q.front(); q.pop();
for (int i = 0; i < 26; ++ i) {
if (tr[u_][i]) {
fail[tr[u_][i]] = tr[fail[u_]][i];
q.push(tr[u_][i]);
} else {
tr[u_][i] = tr[fail[u_]][i];
}
}
}
}
int Query(char *s_) {
int now = 0, ret = 0, lth = strlen(s_ + 1);
for (int i = 1; i <= lth; ++ i) {
now = tr[now][s_[i] - 'a'];
for (int j = now; j && cnt[j] != -1; j = fail[j]) {
ret += cnt[j];
cnt[j] = -1;
}
}
return ret;
}
} acam;
//=============================================================
int main() {
n = read();
for (int i = 1; i <= n; ++ i) {
scanf("%s", s + 1);
acam.Insert(s);
}
acam.Build();
scanf("%s", s + 1);
printf("%d\n", acam.Query(s));
return 0;
}
P3796 【模板】AC 自动机(加强版)
//知识点:ACAM
/*
By:Luckyblock
*/
#include <algorithm>
#include <cctype>
#include <cstdio>
#include <cstring>
#include <queue>
#define LL long long
const int kN = 150 + 1;
const int kT = 1e6 + 10;
const int kNN = 2e4 + 10;
//=============================================================
int n;
char s[kN][71], t[kT];
//=============================================================
inline int read() {
int f = 1, w = 0;
char ch = getchar();
for (; !isdigit(ch); ch = getchar())
if (ch == '-') f = -1;
for (; isdigit(ch); ch = getchar()) w = (w << 3) + (w << 1) + (ch ^ '0');
return f * w;
}
void Chkmax(int &fir, int sec) {
if (sec > fir) fir = sec;
}
void Chkmin(int &fir, int sec) {
if (sec < fir) fir = sec;
}
struct ACAM {
int node_num, tr[kNN][26], cnt[kNN], fail[kNN];
void Init() {
node_num = 0;
memset(tr, 0, sizeof (tr));
memset(cnt, 0, sizeof (cnt));
memset(fail, 0, sizeof (fail));
}
void Insert(int id_, char *s_) {
int now = 0, lth = strlen(s_ + 1);
for (int i = 1; i <= lth; ++ i) {
if (! tr[now][s_[i] - 'a']) tr[now][s_[i] - 'a'] = ++ node_num;
now = tr[now][s_[i] - 'a'];
}
cnt[now] = id_;
}
void Build() {
std::queue <int> q;
for (int i = 0; i < 26; ++ i) {
if (tr[0][i]) q.push(tr[0][i]);
}
while (! q.empty()) {
int u_ = q.front(); q.pop();
for (int i = 0; i < 26; ++ i) {
if (tr[u_][i]) {
fail[tr[u_][i]] = tr[fail[u_]][i];
q.push(tr[u_][i]);
} else {
tr[u_][i] = tr[fail[u_]][i];
}
}
}
}
void Query(char *s_) {
int now = 0, lth = strlen(s_ + 1);
int ans = 0, sum[kN] = {0};
for (int i = 1; i <= lth; ++ i) {
now = tr[now][s_[i] - 'a'];
for (int j = now; j; j = fail[j]) {
sum[cnt[j]] ++;
}
}
for (int i = 1; i <= n; ++ i) Chkmax(ans, sum[i]);
printf("%d\n", ans);
for (int i = 1; i <= n; ++ i) {
if (sum[i] == ans) {
printf("%s\n", s[i] + 1);
}
}
}
} acam;
void Init() {
acam.Init();
}
//=============================================================
int main() {
while (true) {
n = read();
if (! n) break;
Init();
for (int i = 1; i <= n; ++ i) {
scanf("%s", s[i] + 1);
acam.Insert(i, s[i]);
}
acam.Build();
scanf("%s", t + 1);
acam.Query(t);
}
return 0;
}