Substrings

SP8222

You are given a string S which consists of 250000 lowercase latin letters at most. We define F(x) as the maximal number of times that some string with length x appears in S. For example for string ‘ababa’ F(3) will be 2 because there is a string ‘aba’ that occurs twice. Your task is to output F(i) for every i so that 1<=i<=|S|.
Input
String S consists of at most 250000 lowercase latin letters.
Output
Output |S| lines. On the i-th line output F(i).
Example
Input:
ababa
Output:
3
2
2
1
1

题意
有字符串SSS,S250000|S|\leq 250000∣S∣≤250000。我们将 F(x)F(x)F(x)定义为某些长度xxx的字符串在SSS中出现的最大次数。例如字符串ababaf,因为有一个字符串aba出现两次。你的任务是输出F(x)F(x)F(x)每一个xxx,以使1<=x<=S1<=x<=|S|1<=x<=∣S∣。
题解
因为对于S的任意子串,都是由初始状态出发走到某个状态u得到的(子串对应的状态唯一)。而SAM中的endpos标记了该子串出现的所有位置,因此为了获得所有子串的数量,我们首先要求出endpos集合的大小,采用记忆化搜索。
定义
EndPosSize[u]为状态u的endpos的大小。
初始化
每添加一个字符,当前的状态的EndPos=1。其他的为0。
即当前状态至少存在一个endpos,就是当前添加的位置。
转移方程
EndPosSize[u]=vSonOf(u)EndPos[v]EndPosSize[u]=\sum\limits_{v\in{SonOf(u)}}{EndPos[v]}EndPosSize[u]=v∈SonOf(u)∑​EndPos[v]
因为在后缀连接树中,子节点是对父节点的一种不重复的划分,因此父节点的endpos大小就是其所有子节点endpos大小的和。
这样我们就求出了每个状态的endpos的大小。而根据后缀连接树的性质,每个状态的字符串集合种的字符串都是连续的,且长度区间为[len(link(u))+1...len(u)][len(link(u))+1...len(u)][len(link(u))+1...len(u)],其中u为当前状态。
至此,endpos为每个状态的大小,又有了每个状态的对应的长度,直接更新取最大即可。当由于后缀连接树很可能不是平衡的,因此区间更新所有状态的时间复杂度为O(S2)O(|S|^2)O(∣S∣2),会超时。可以采用线段树优化区间更新,时间复杂度为O(SlogS)O(|S|log|S|)O(∣S∣log∣S∣)。
AC代码

#include<iostream>
#include<string>
#include<cstring>
#include<algorithm>
#include<vector>
#include<cmath>
#include<map>
using namespace std;
const int MAXN = 250005;
int n;
int EndPosSize[2 * MAXN];
char S[MAXN];
//后缀自动机
struct SAM {
	int size, last;
	struct Node {
		int len, link;
		int next[26];
		void clear() {
			len = link = 0;
			memset(next, 0, sizeof(next));
		}
	} node[MAXN * 2];
	void init() {
		for (int i = 0; i < size; i++) {
			node[i].clear();
		}
		node[0].link = -1;
		size = 1;
		last = 0;
	}
	void insert(char x) {
		int ch = x - 'a';
		int cur = size++;
		EndPosSize[cur] = 1;
		node[cur].len = node[last].len + 1;
		int p = last;
		while (p != -1 && !node[p].next[ch]) {
			node[p].next[ch] = cur;
			p = node[p].link;
		}
		if (p == -1) {
			node[cur].link = 0;
		}
		else {
			int q = node[p].next[ch];
			if (node[p].len + 1 == node[q].len) {
				node[cur].link = q;
			}
			else {
				int clone = size++;
				node[clone] = node[q];
				node[clone].len = node[p].len + 1;
				while (p != -1 && node[p].next[ch] == q) {
					node[p].next[ch] = clone;
					p = node[p].link;
				}
				node[q].link = node[cur].link = clone;
			}
		}
		last = cur;
	}
}sam;
//后缀连接树建图
vector<int> G[2 * MAXN];
//保存F(x)
int Ans[MAXN];
//记忆化搜索
int DFS(int Cur) {
	//加上所有子节点
	for (int i = 0; i < G[Cur].size(); ++i) {
		int& Next = G[Cur][i];
		EndPosSize[Cur] += DFS(Next);
	}
	return EndPosSize[Cur];
}
//线段树
class SegmentTree{
	int
		Data[MAXN * 4],
		LazyTag[MAXN * 4];
public:
	void PushUp(const int& root) {
		this->Data[root] = this->Data[root << 1] + this->Data[root << 1 | 1];
	}
	void PushDown(int root, int len) {
		if (!this->LazyTag[root]) {
			return;
		}
		const int
			&& LeftSon = root << 1,
			&& RightSon = root << 1 | 1;
		this->LazyTag[LeftSon] = max(this->LazyTag[LeftSon], this->LazyTag[root]);
		this->LazyTag[RightSon] = max(this->LazyTag[RightSon], this->LazyTag[root]);
		this->Data[LeftSon] = max(this->Data[LeftSon], this->LazyTag[root]);
		this->Data[RightSon] = max(this->Data[RightSon], this->LazyTag[root]);
		this->LazyTag[root] = 0;
	}
	void UpGrade(const int& left, const int& right, int leftcur, int rightcur, int root,const int& val) {
		if (leftcur >= left && rightcur <= right) {
			this->Data[root] = max(this->Data[root], val);
			this->LazyTag[root] = max(this->LazyTag[root], val);
			return;
		}
		this->PushDown(root, rightcur - leftcur + 1);
		const int&& mid = (leftcur + rightcur) >> 1;
		if (mid >= left) {
			this->UpGrade(left, right, leftcur, mid, root << 1, val);
		}
		if (mid + 1 <= right) {
			this->UpGrade(left, right, mid + 1, rightcur, root << 1 | 1, val);
		}
		this->PushUp(root);
	}
	int Query(const int& left, const int& right, int leftcur, int rightcur, int root) {
		if (leftcur >= left && rightcur <= right) {
			return this->Data[root];
		}
		int&& Ans = 0;
		const int&& mid = (leftcur + rightcur) >> 1;
		this->PushDown(root, rightcur - leftcur + 1);
		if (mid >= left) {
			Ans += this->Query(left, right, leftcur, mid, root << 1);
		}
		if (mid + 1 <= right) {
			Ans += this->Query(left, right, mid + 1, rightcur, root << 1 | 1);
		}
		return Ans;
	}
}Tree;
int main() {
	scanf("%s", S);
	int&& n = strlen(S);
	sam.init();
	for (int i = 0; i < n; ++i) {
		sam.insert(S[i]);
	}
	//建树
	for (int i = 1; i < sam.size; ++i) {
		if (sam.node[i].link != -1) {
			G[sam.node[i].link].push_back(i);
		}
	}
	//求EndPosSize
	DFS(0);
	//答案更新
	for (int i = 1; i < sam.size; ++i) {
		int&& Left = sam.node[sam.node[i].link].len + 1;
		Left = max(1, Left);
		Tree.UpGrade(Left, sam.node[i].len, 1, n, 1, EndPosSize[i]);
	}
	for (int i = 1; i <= n; ++i) {
		printf("%d\n", Tree.Query(i, i, 1, n, 1));
	}
	return 0;
}
SubstringsSubstrings ORZZROORZZRO 发布了44 篇原创文章 · 获赞 47 · 访问量 2万+ 私信 关注
上一篇:linux – 如何将OPAM中安装的库安装到OCaml?


下一篇:与Python相比,在Haskell中调用c函数