显然我们要拆位, 因为每位都独立, 然后问题就变成能用dp求的东西,然后用矩阵快速幂优化一下。
注意mod为1的情况。
#include<bits/stdc++.h> #define LL long long #define fi first #define se second #define mk make_pair #define PLL pair<LL, LL> #define PLI pair<LL, int> #define PII pair<int, int> #define SZ(x) ((int)x.size()) #define ull unsigned long long using namespace std; const int N = 1e6 + 7; const int inf = 0x3f3f3f3f; const LL INF = 0x3f3f3f3f3f3f3f3f; const int mod = 1e9 + 7; const double eps = 1e-8; ull n, k, l, m; ull op[] = {1, 0, 1, 0}; struct Matrix { ull a[4][4]; Matrix() { memset(a, 0, sizeof(a)); } void init() { for(int i = 0; i < 4; i++) a[i][i] = 1; } Matrix operator * (const Matrix &B) const { Matrix C; for(int i = 0; i < 4; i++) for(int j = 0; j < 4; j++) for(int k = 0; k < 4; k++) C.a[i][j] = (C.a[i][j] + a[i][k] * B.a[k][j]) % m; return C; } Matrix operator ^ (ull b) { Matrix C; C.init(); Matrix A = (*this); while(b) { if(b & 1) C = C * A; A = A * A; b >>= 1; } return C; } } M; int main() { cin >> n >> k >> l >> m; ull ans = 1, ret0 = 0, ret1 = 0; M.a[0][0] = 1, M.a[0][1] = 0, M.a[0][2] = 1, M.a[0][3] = 0; M.a[1][0] = 0, M.a[1][1] = 1, M.a[1][2] = 0, M.a[1][3] = 1; M.a[2][0] = 1, M.a[2][1] = 0, M.a[2][2] = 0, M.a[2][3] = 0; M.a[3][0] = 0, M.a[3][1] = 1, M.a[3][2] = 1, M.a[3][3] = 1; Matrix tmp = M ^ (n - 1); for(int i = 0; i < 4; i += 2) for(int j = 0; j < 4; j++) ret0 = (ret0 + tmp.a[i][j] * op[j]) % m; for(int i = 1; i < 4; i += 2) for(int j = 0; j < 4; j++) ret1 = (ret1 + tmp.a[i][j] * op[j]) % m; for(int i = 0; i < l; i++) { if(k >> i & 1) ans = ans * ret1 % m, k ^= (1ll << i); else ans = ans * ret0 % m; } if(k) ans = 0; cout << ans % m << "\n"; return 0; } /* */