题意:给定一个字符串,q次询问,每次询问给定一个数k,查询原串的所有不同子串中字典序第k小的子串在原串中的开始和结束位置,若有多个答案则输出最小的开始位置,不存在输出0 0
后缀自动机经典问题,所以我用后缀数组
预处理sum数组记录不同字符串的个数,即sum[i] = len - sa[i] + 1 -height[i] + sum[i-1] (len为原串长度)
对于每个k 若k > sum[n] 则输出0 0 ,即k大于不同子串的总数
否则,二分sum数组找到第k小子串所在的sa数组,即找到相应子串所位于的后缀,但是该串的开始位置不一定是最小的,所以顺着sa数组要往后找是否还有更小的答案
至于为什么只要往后,因为当前找到子串的一定是该串在sa数组中第一次,之后若还出现,height数组会将其覆盖,所以要往后找
代码很好理解,详见代码
#include <bits/stdc++.h> using namespace std; typedef long long ll; const int maxn = 100010; int n; int sa[maxn], x[maxn], c[maxn], y[maxn], height[maxn]; char s[maxn]; ll sum[maxn]; void SA() //O(nlogn) { int m = 128; for (int i = 0; i <= m; i++) c[i] = 0; for (int i = 1; i <= n; i++) c[x[i] = s[i]]++; for (int i = 1; i <= m; i++) c[i] += c[i - 1]; for (int i = n; i >= 1; i--) sa[c[x[i]]--] = i; for (int k = 1; k <= n; k <<= 1) { int p = 0; for (int i = 0; i <= m; i++) y[i] = 0; for (int i = n - k + 1; i <= n; i++) y[++p] = i; for (int i = 1; i <= n; i++) if (sa[i] > k) y[++p] = sa[i] - k; for (int i = 0; i <= m; i++) c[i] = 0; for (int i = 1; i <= n; i++) c[x[y[i]]]++; for (int i = 1; i <= m; i++) c[i] += c[i - 1]; for (int i = n; i >= 1; i--) sa[c[x[y[i]]]--] = y[i]; swap(x, y); x[sa[1]] = 1; p = 1; for (int i = 2; i <= n; ++i) x[sa[i]] = (y[sa[i]] == y[sa[i - 1]] && y[sa[i] + k] == y[sa[i - 1] + k]) ? p : ++p; if (p >= n) break; m = p; } } void get_height() { int k = 0; //for (int i=1; i<=n; ++i) rk[sa[i]]=i; x数组即为rank数组 for (int i = 1; i <= n; ++i) { if (x[i] == 1) continue; //第一名height为0 if (k) --k; //h[i]>=h[i-1]-1; int j = sa[x[i] - 1]; while (j + k <= n && i + k <= n && s[i + k] == s[j + k]) ++k; height[x[i]] = k; } } int main() { int q; while (~scanf("%s%d", s + 1, &q)) { n = strlen(s + 1); SA(); get_height(); height[1] = 0; for (int i = 1; i <= n; i++) { sum[i] = n - sa[i] + 1 - height[i] + sum[i - 1]; } ll v, l = 0, r = 0; while (q--) { scanf("%lld", &v); v = (l ^ r ^ v) + 1; if (v > sum[n]) l = r = 0; else { int pos = lower_bound(sum + 1, sum + 1 + n, v) - sum; v -= sum[pos - 1]; l = sa[pos]; r = sa[pos] + v + height[pos] - 1; int len = r - l + 1; pos++; while (height[pos] >= len) { if (l > sa[pos]) { l = sa[pos]; r = l + len - 1; } pos++; } } printf("%lld %lld\n", l, r); } } return 0; }