http://www.51nod.com/onlineJudge/questionCode.html#!problemId=1244
模板题。。。
杜教筛和基于质因子分解的筛法都写了一下模板。
杜教筛
用杜教筛求积性函数\(f(n)\)的前缀和\(S(n)=\sum\limits_{i=1}^nf(i)\),需要构造一个\(g(n)\)使得\(\sum\limits_{d|n}f(d)g\left(\frac nd\right)\)和\(\sum\limits_{i=1}^ng(i)\)都可以快速求出。因为我们有公式:
\]
对于\(\mu(n)\)的前缀和,很明显\(g(n)=1\)。这样的话:
\]
\]
用Hash表存储S的值
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
typedef long long ll;
const ll N = 1E10;
const int UP = 3981071;
int mu[UP + 3], prime[UP + 3], num = 0, sum[UP + 3];
bool notp[UP + 3];
void Euler_shai() {
sum[1] = 1;
for (int i = 2; i <= UP; ++i) {
if (!notp[i]) {
prime[++num] = i;
mu[i] = -1;
}
for (int j = 1, pro; j <= num && (pro = prime[j] * i) <= UP; ++j) {
notp[pro] = true;
if (i % prime[j] == 0) {
mu[pro] = 0;
break;
} else
mu[pro] = -mu[i];
}
sum[i] = sum[i - 1] + mu[i];
}
}
struct HashTable {
static const int p = 1000007;
ll val[p], ref[p];
HashTable() {memset(ref, -1, sizeof(ref));}
void add(ll pos, ll nu) {
int tmp = (int) (pos % p);
while (ref[tmp] != -1) {
if (ref[tmp] == pos) return;
++tmp; if (tmp == p) tmp = 0;
}
ref[tmp] = pos;
val[tmp] = nu;
}
ll query(ll pos) {
int tmp = (int) (pos % p);
while (ref[tmp] != pos) {++tmp; if (tmp == p) tmp = 0;}
return val[tmp];
}
} HT;
ll Sum(ll x) {
return x <= UP ? sum[x] : HT.query(x);
}
void DJ_shai(ll n) {
for (ll i = n, y; i >= 1; i = n / (y + 1)) {
y = n / i;
if (y <= UP) continue;
ll ret = 0;
for (ll j = 2, l, pre = 1; j <= y; ++j) {
l = y / j;
j = y / l;
ret += Sum(l) * (j - pre);
pre = j;
}
HT.add(y, 1ll - ret);
}
}
int main() {
Euler_shai();
ll a, b;
scanf("%lld%lld", &a, &b);
DJ_shai(b);
DJ_shai(a - 1);
printf("%lld\n", Sum(b) - Sum(a - 1));
return 0;
}
基于质因子分解的筛法
基于质因子分解的筛法细节比较多(貌似被称作洲哥筛?)。
\]
设小于等于\(\sqrt n\)的质数从小到大排列为\(p_1,p_2\dots p_m\)。
设\(g(i,j)\)表示\([1,j]\)内与前i个质数互质的数的个数。
转移:\(g(i,j)=g(i-1,j)-g\left(i-1,\left\lfloor\frac j{p_i}\right\rfloor\right)\)
\(g(m,j)-1\)即为\([1,j]\)内大于\(\sqrt n\)的质数个数,它的相反数就是\(\sum\limits_{\sqrt n<p\leq j且p为质数}\mu(p)\)
枚举小于\(\sqrt n\)的所有数的\(\mu\),并和上面等式右边的括号内的数相乘求和。
这样剩下的就是大于等于\(\sqrt n\)的满足条件的\(\mu\)值,这些\(\mu\)值也要乘上括号内的数,不过这些括号内的数都是1,所以大于等于\(\sqrt n\)的满足条件的\(\mu\)值就可以统一计算了。
把\(p_1,p_2\dots p_m\)翻转,变成从大到小。
再设\(f(i,j)\)表示用前i个质数构成质因子的数中在\([1,j]\)内的数的\(\mu\)值和。
转移:\(f(i,j)=f(i-1,j)+\mu(p_i)f\left(i-1,\left\lfloor\frac j{p_i}\right\rfloor\right)\)
\(f(m,n)-\sum\limits_{1\leq i<\sqrt n}\mu(i)\)就是统一计算出来的和。
因为下取整只有\(O(\sqrt n)\)种取值,枚举小于等于\(\sqrt n\)的质数,质数个数大概是\(\frac{\sqrt n}{\log\sqrt n}\),所以时间复杂度是\(O\left(\frac n{\log n}\right)\)。
加上一些优化就可以达到\(O\left(\frac{n^{\frac 34}}{\log n}\right)\)。
这只是筛最简单的\(\mu\)的前缀和,更一般的积性函数的前缀和求法以及优化到\(O\left(\frac{n^{\frac 34}}{\log n}\right)\)的方法详见2016年候选人论文《积性函数求和的几种方法》,这里实在说不下了。
#include<cmath>
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
typedef long long ll;
const int N = 1E10;
const int UP = 1E5;
bool notp[UP + 3];
int prime[UP + 3], sum_p[UP + 3], sum_mu[UP + 3], mu[UP + 3], pre[UP * 2 + 3], num = 0;
ll G[UP * 2 + 3], F[UP * 2 + 3], J[UP * 2 + 3];
void Euler_shai(int n) {
mu[1] = sum_mu[1] = 1;
for (int i = 2; i <= n; ++i) {
if (!notp[i]) {
prime[++num] = i;
mu[i] = -1;
sum_p[i] = sum_p[i - 1] + 1;
} else
sum_p[i] = sum_p[i - 1];
for (int j = 1, pro; j <= num && (pro = i * prime[j]) <= n; ++j) {
notp[pro] = true;
if (i % prime[j] == 0) break;
else mu[pro] = -mu[i];
}
sum_mu[i] = sum_mu[i - 1] + mu[i];
}
}
struct HashTable {
static const int ppp = 2333333;
ll ref[ppp]; int val[ppp];
void clr() {memset(ref, -1, sizeof(ref)); ref[0] = val[0] = 0;}
void add(ll pos, int nu) {
int tmp = pos % ppp;
while (ref[tmp] != -1) {++tmp; if (tmp == ppp) tmp = 0;}
ref[tmp] = pos; val[tmp] = nu;
}
int query(ll pos) {
int tmp = pos % ppp;
while (ref[tmp] != pos) {++tmp; if (tmp == ppp) tmp = 0;}
return val[tmp];
}
} HT;
ll ZY_shai(ll n) {
int cnt = 0, sqf = floor(sqrt(n)), sqc = ceil(sqrt(n));
while (prime[num] > sqf) --num;
HT.clr();
for (ll i = n, y; i >= 1; i = n / (y + 1)) {
J[++cnt] = (y = n / i);
HT.add(y, cnt);
G[cnt] = y;
pre[cnt] = 0;
}
ll pp, delta;
for (int i = 1, p = prime[i]; i <= num; p = prime[++i]) {
pp = 1ll * p * p;
for (int j = cnt; j >= 1 && J[j] >= pp; --j) {
int id = HT.query(J[j] / p);
delta = max(G[id] - (i - 1 - pre[id]), 1ll);
G[j] -= delta;
pre[j] = i;
}
}
for (int j = cnt; j >= 1; --j)
G[j] = max(G[j] - (num - pre[j]), 1ll);
ll ans = 0;
for (int i = 1; i < sqc; ++i)
ans += (2ll - G[HT.query(n / i)]) * mu[i];
ll prep = 0;
for (int j = 1; j <= cnt; ++j) F[j] = 1;
for (int i = num, p = prime[i]; i >= 1; p = prime[--i]) {
pp = 1ll * p * p;
for (int j = cnt; j >= 1 && J[j] >= pp; --j) {
if (J[j] < prep * prep) {
if (J[j] > prep) F[j] = 1 - (sum_p[min(J[j], 1ll * sqf)] - sum_p[prep - 1]);
else F[j] = 1;
}
int id = HT.query(J[j] / p);
if (J[id] < prep * prep) {
if (J[id] >= prep) delta = 1 - (sum_p[min(J[id], 1ll * sqf)] - sum_p[prep - 1]);
else delta = 1;
} else
delta = F[id];
F[j] -= delta;
}
prep = p;
}
return ans + F[cnt] - sum_mu[sqc - 1];
}
int main() {
ll a, b;
scanf("%lld%lld", &a, &b);
Euler_shai((int) sqrt(b));
b = ZY_shai(b);
a = ZY_shai(a - 1);
printf("%lld\n", b - a);
return 0;
}
总结
杜教筛比质因子分解筛法要快。质因子分解筛法可以筛更加一般的积性函数,比杜教筛无脑,但细节巨多,代码量大(容易写残)。
总算写完了