CSU1632Repeated Substrings(后缀数组/最长公共前缀)

题意就是求一个字符串的重复出现(出现次数>=2)的不同子串的个数。

标准解法是后缀数组、最长公共前缀的应用,对于样例aabaab,先将所有后缀排序:

aab

3    aabaab

1    ab

2    abaab

0    b

1    baab

每个后缀前面数字代表这个后缀与它之前的后缀(rank比它小1)的最长公共前缀的长度:然而就可以这样理解这个最长公共前缀LCP、aabaab与aab的最长公共前缀是3,那说明子串a、aa、aab都至少出现的两次,那么这就是后缀aab重复出现的子串个数

然后我们考虑后缀ab与后缀aabaab的最长公共前缀=1,这时由于LCP(ab, aabaab) <= LCP(aabaab, aab),所以就说明ab与aabaab的所有公共前缀(只有LCP(ab, aabaab) = 1个)之前都已经计算过了,所以我们直接跳过

之后我们考虑后缀abaab与后缀ab的最长公共前缀LCP(abaab, ab) > LCP(ab, aabaab),同时,由于LCP(ab, aabaab) != 0,所以考虑abaab与ab的两个公共前缀(重复出现的子串)a、ab时,已经有了LCP(ab, aabaab) = 1个公共前缀(重复出现的子串)a已经计算过了,所以这时新的重复出现的子串的个数为LCP(abaab, ab) - LCP(ab, aabaab) = 1

最后总结起来就是:

    if(height[i] <= height[i - 1]) continue;

    if(height[i] > height[i - 1] ) ans += height[i] - height[i - 1]

这就是最后的答案。

 #include <map>
#include <set>
#include <stack>
#include <queue>
#include <cmath>
#include <ctime>
#include <vector>
#include <cstdio>
#include <cctype>
#include <cstring>
#include <cstdlib>
#include <iostream>
#include <algorithm>
using namespace std;
#define INF 0x3f3f3f3f
#define inf (-((LL)1<<40))
#define lson k<<1, L, (L + R)>>1
#define rson k<<1|1, ((L + R)>>1) + 1, R
#define mem0(a) memset(a,0,sizeof(a))
#define mem1(a) memset(a,-1,sizeof(a))
#define mem(a, b) memset(a, b, sizeof(a))
#define FIN freopen("in.txt", "r", stdin)
#define FOUT freopen("out.txt", "w", stdout)
#define rep(i, a, b) for(int i = a; i <= b; i ++)
#define dec(i, a, b) for(int i = a; i >= b; i --) template<class T> T CMP_MIN(T a, T b) { return a < b; }
template<class T> T CMP_MAX(T a, T b) { return a > b; }
template<class T> T MAX(T a, T b) { return a > b ? a : b; }
template<class T> T MIN(T a, T b) { return a < b ? a : b; }
template<class T> T GCD(T a, T b) { return b ? GCD(b, a%b) : a; }
template<class T> T LCM(T a, T b) { return a / GCD(a,b) * b; } //typedef __int64 LL;
typedef long long LL;
const int MAXN = ;
const int MAXM = ;
const double eps = 1e-;
LL MOD = ; struct SufArray {
char s[MAXN];
int sa[MAXN], t[MAXN], t2[MAXN], c[MAXN], n, m;
int rnk[MAXN], height[MAXN];
int mi[MAXN][], idxK[MAXN]; void init() {
mem0(s);
mem0(height);
}
void read_str() {
gets(s);
m = ;
n = strlen(s);
s[n++] = ' ';
}
void build_sa() {
int *x = t, *y = t2;
rep (i, , m - ) c[i] = ;
rep (i, , n - ) c[x[i] = s[i]] ++;
rep (i, , m - ) c[i] += c[i - ];
dec (i, n - , ) sa[--c[x[i]]] = i;
for(int k = ; k <= n; k <<= ) {
int p = ;
rep (i, n - k, n - ) y[p++] = i;
rep (i, , n - ) if(sa[i] >= k) y[p++] = sa[i] - k;
rep (i, , m - ) c[i] = ;
rep (i, , n - ) c[x[y[i]]] ++;
rep (i, , m - ) c[i] += c[i - ];
dec (i, n - , ) sa[--c[x[y[i]]]] = y[i];
swap(x, y);
p = ;
x[sa[]] = ;
rep (i, , n - ) {
x[sa[i]] = y[sa[i - ]] == y[sa[i]] && y[sa[i - ] + k] == y[sa[i] + k] ? p - : p++;
}
if(p >= n) break;
m = p;
}
}
void get_height() {
int k = ;
rep (i, , n - ) rnk[sa[i]] = i;
rep (i, , n - ) {
if(k) k --;
int j = sa[rnk[i] - ];
while(s[i + k] == s[j + k]) k ++;
height[rnk[i]] = k;
}
}
void rmq_init(int *a, int n) {
rep (i, , n - ) mi[i][] = a[i];
for(int j = ; ( << j) <= n; j ++) {
for(int i = ; i + (<<j) - < n; i ++) {
mi[i][j] = min(mi[i][j - ], mi[i + ( << (j - ))][j - ]);
}
}
rep (len, , n) {
idxK[len] = ;
while(( << (idxK[len] + )) <= len) idxK[len] ++;
}
}
int rmq_min(int l, int r) {
int len = r - l + , k = idxK[len];
return min(mi[l][k], mi[r - ( << k) + ][k]);
}
void lcp_init() {
get_height();
rmq_init(height, n);
}
int get_lcp(int a, int b) {
if(a == b) return n - a - ;
return rmq_min(min(rnk[a], rnk[b]) + , max(rnk[a], rnk[b]));
}
void solve() {
get_height();
LL ans = , pre = ;
rep (i, , n - ) {
if(height[i] > pre) ans += height[i] - pre;
pre = height[i];
}
cout << ans << endl;
}
}; int T;
SufArray sa; int main()
{
while(~scanf("%d%*c", &T)) while(T--){
sa.init();
sa.read_str();
sa.build_sa();
sa.solve();
}
return ;
}
/**************************************************************
Problem: 1632
User: csust_Rush
Language: C++
Result: Accepted
Time:880 ms
Memory:13192 kb
****************************************************************/
上一篇:POJ 2774 后缀数组:查找最长公共子


下一篇:js中返回上一页失效的解决办法