POJ - 3415 Common Substrings (后缀数组）

2023-07-24 22:52:40

A substring of a string T is defined as:

T( i, k)= T_iT_i ₊₁... T_i+k _-1, 1≤ i≤ i+k-1≤| T|.

Given two strings A, B and one integer K, we define S, a set of triples (i, j, k):

S = {( i, j, k) | k≥ K, A( i, k)= B( j, k)}.

You are to give the value of |S| for specific A, B and K.

Input

The input file contains several blocks of data. For each block, the first line contains one integer K, followed by two lines containing strings A and B, respectively. The input file is ended by K=0.

1 ≤ |A|, |B| ≤ 10⁵
1 ≤ K ≤ min{|A|, |B|}
Characters of A and B are all Latin letters.

Output

For each case, output an integer |S|.

Sample Input

2

aababaa

abaabaa

1

xx

xx

0

Sample Output

22

5

题意

长度不小于 k 的公共子串的个数

思路：

这题不是很好理解。

设第一个字符串为a，第二个为b

首先我们知道，枚举所有a的后缀，枚举所有b的后缀，将两个后缀的lcp-k+1加起来就是答案。

但是这个算法复杂度太高了，所以我们需要优化一下。

优化的方法就是使用单调栈。

在后缀数组中，lcp[i , j] 就是height[i+1] 到 height[j] 之间的最小值。

对于后缀数组中，第一个字符属于b的后缀，我们每次都o1地计算出，这个后缀与它之前属于a的后缀的lcp和是多少。

然后反过来求a前面的，与属于a的后缀的lcp的和。将这两个和加起来就是答案了。

具体来说，用一个cnt记录前面的lcp对答案的贡献，如果当前的height比单调队列的队顶小，说明对于之后的b来说，这个队顶的贡献已经不能达到了,所以，我们要将它的贡献减去它现在的贡献-height[i];

其他的地方我已经做了详细的注释，请直接查看代码。

#include<iostream>

#include<algorithm>

#include<vector>

#include<stack>

#include<queue>

#include<map>

#include<set>

#include<cstdio>

#include<cstring>

#include<cmath>

#include<ctime>

#define fuck(x) cerr<<#x<<" = "<<x<<endl;

#define debug(a, x) cerr<<#a<<"["<<x<<"] = "<<a[x]<<endl;

#define ls (t<<1)

#define rs ((t<<1)|1)

using namespace std;

typedef long long ll;

typedef unsigned long long ull;

const int maxn = ;

const int maxm = ;

const int inf = 0x3f3f3f3f;

const ll Inf = ;

const int mod = ;

const double eps = 1e-;

const double pi = acos(-);

char s[maxn];

int len, Rank[maxn], sa[maxn], tlen, tmp[maxn];

bool compare_sa(int i, int j) {

    if (Rank[i] != Rank[j]) { return Rank[i] < Rank[j]; }

    //如果以i开始,长度为k的字符串的长度,已经超出了字符串尾,那么就赋值为-1

    //这是因为,在前面所有数据相同的情况下,字符串短的字典序小.

    int ri = i + tlen <= len ? Rank[i + tlen] : -inf;

    int rj = j + tlen <= len ? Rank[j + tlen] : -inf;

    return ri < rj;

}

void construct_sa() {

    //初始的RANK为字符的ASCII码

    for (int i = ; i <= len; i++) {

        sa[i] = i;

        Rank[i] = i < len ? s[i] : -inf;

    }

    for (tlen = ; tlen <= len; tlen *= ) {

        sort(sa, sa + len + , compare_sa);

        tmp[sa[]] = ;

        //全新版本的RANK,tmp用来计算新的rank

        //将字典序最小的后缀rank计为0

        //sa之中表示的后缀都是有序的,所以将下一个后缀与前一个后缀比较,如果大于前一个后缀,rank就比前一个加一.

        //否则就和前一个相等.

        for (int i = ; i <= len; i++) {

            tmp[sa[i]] = tmp[sa[i - ]] + (compare_sa(sa[i - ], sa[i]) ?  : );

        }

        for (int i = ; i <= len; i++) {

            Rank[i] = tmp[i];

        }

    }

}

int height[maxn];

void construct_lcp() {

//    for(int i=0;i<=n;i++){Rank[sa[i]]=i;}

    int h = ;

    height[] = ;

    for (int i = ; i < len; i++) {//i为后缀数组起始位置

        int j = sa[Rank[i] - ];//获取当前后缀的前一个后缀(排序后)

        if (h > )h--;

        for (; j + h < len && i + h < len; h++) {

            if (s[j + h] != s[i + h])break;

        }

        height[Rank[i]] = h;

    }

}

int st[maxn][];

void rmq_init() {

    for (int i = ; i <= len; i++) {

        st[i][] = height[i];

    }

    int l = ;

    for (int i = ; l <= len; i++) {

        for (int j = ; j + l /  <= len; j++) {

            st[j][i] = min(st[j][i - ], st[j + l / ][i - ]);

        }

        l <<= ;

    }

}

int ask_min(int i, int j) {

    int k = int(log(j - i + 1.0) / log(2.0));

    return min(st[i][k], st[j - ( << k) + ][k]);

}

int lcp(int a, int b)//此处参数是,原字符串下标

{

    a = Rank[a], b = Rank[b];

    if (a > b)

        swap(a, b);

    return ask_min(a + , b);

}

int la,lb;

struct node{

    int lcp;ll num;

}sta[maxn];

int top=;

int main() {

//    ios::sync_with_stdio(false);

//    freopen("in.txt", "r", stdin);

    int k;

    while (scanf("%d", &k) != EOF && k) {

        scanf("%s",s);

        la=strlen(s);

        s[la]='$';

        scanf("%s",s+la+);

        len=strlen(s);

        construct_sa();

        construct_lcp();

        ll cnt,ans,num;

        cnt=ans=num=;

        for(int i=;i<=len;i++){

            if(height[i]<k){

                top = cnt =; //height 小于k的时候，显然之前的贡献对于后面的后缀都是没有用的了。

            }else{

                num=;//用来记录之前和它相同的height数.

                //实际上，这里的相同,并不是真正的相同，而是如果在某个height1之后出现了一个height2比height1小，

                // 那么height2之后，就认为height1和height2相等

                if(sa[i-]<la){

                    cnt+=height[i]-k+;

                    num++;

                }while(top&&sta[top].lcp>=height[i]){

                    cnt-=sta[top].num*(sta[top].lcp-height[i]);//去除多余的贡献

                    num+=sta[top--].num;

                }if(sa[i]>la){

                    ans+=cnt;

                }sta[++top]={height[i],num};

            }

        }

        cnt=num=top=;

        for(int i=;i<=len;i++){

            if(height[i]<k){

                top = cnt =;

            }else{

                num=;

                if(sa[i-]>la){

                    cnt+=height[i]-k+;

                    num++;

                }while(top&&sta[top].lcp>=height[i]){

                    cnt-=sta[top].num*(sta[top].lcp-height[i]);

                    num+=sta[top--].num;

                }if(sa[i]<la){

                    ans+=cnt;

                }sta[++top]={height[i],num};

            }

        }

        printf("%lld\n",ans);

    }

    return ;

}

码农公寓

相关文章