1.正常拆系数fft,8次dft
//#pragma GCC optimize(2)
//#pragma GCC optimize(3)
//#pragma GCC optimize(4)
//#pragma GCC optimize("unroll-loops")
//#pragma comment(linker, "/stack:200000000")
//#pragma GCC optimize("Ofast,no-stack-protector")
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native")
#include<bits/stdc++.h>
//#include <bits/extc++.h>
#define fi first
#define se second
#define db double
#define mp make_pair
#define pb push_back
#define mt make_tuple
//#define pi acos(-1.0)
#define ll long long
#define vi vector<int>
#define mod 1000000007
#define ld long double
//#define C 0.5772156649
#define ls l,m,rt<<1
#define rs m+1,r,rt<<1|1
#define sqr(x) ((x)*(x))
#define pll pair<ll,ll>
#define pil pair<int,ll>
#define pli pair<ll,int>
#define pii pair<int,int>
#define ull unsigned long long
#define bpc __builtin_popcount
#define base 1000000000000000000ll
#define fin freopen("a.txt","r",stdin)
#define fout freopen("a.txt","w",stdout)
#define fio ios::sync_with_stdio(false);cin.tie(0)
#define mr mt19937 rng(chrono::steady_clock::now().time_since_epoch().count())
inline ll gcd(ll a,ll b){return b?gcd(b,a%b):a;}
inline void sub(ll &a,ll b){a-=b;if(a<0)a+=mod;}
inline void add(ll &a,ll b){a+=b;if(a>=mod)a-=mod;}
template<typename T>inline T const& MAX(T const &a,T const &b){return a>b?a:b;}
template<typename T>inline T const& MIN(T const &a,T const &b){return a<b?a:b;}
inline ll qp(ll a,ll b){ll ans=1;while(b){if(b&1)ans=ans*a%mod;a=a*a%mod,b>>=1;}return ans;}
inline ll qp(ll a,ll b,ll c){ll ans=1;while(b){if(b&1)ans=ans*a%c;a=a*a%c,b>>=1;}return ans;}
using namespace std;
//using namespace __gnu_pbds;
const ld pi=acos(-1);
const ull ba=233;
const db eps=1e-5;
const ll INF=0x3f3f3f3f3f3f3f3f;
const int N=400000+10,maxn=2000000+10,inf=0x3f3f3f3f;
struct cd{
ld x,y;
cd(ld _x=0.0,ld _y=0.0):x(_x),y(_y){}
cd operator +(const cd &b)const{
return cd(x+b.x,y+b.y);
}
cd operator -(const cd &b)const{
return cd(x-b.x,y-b.y);
}
cd operator *(const cd &b)const{
return cd(x*b.x - y*b.y,x*b.y + y*b.x);
}
cd operator /(const db &b)const{
return cd(x/b,y/b);
}
}a[N],b[N],c[N],d[N];
int rev[N],A[N],B[N],C[N];
void getrev(int bit)
{
for(int i=0;i<(1<<bit);i++)
rev[i]=(rev[i>>1]>>1) | ((i&1)<<(bit-1));
}
void fft(cd *a,int n,int dft)
{
for(int i=0;i<n;i++)if(i<rev[i])swap(a[i],a[rev[i]]);
for(int step=1;step<n;step<<=1)
{
cd wn(cos(dft*pi/step),sin(dft*pi/step));
for(int j=0;j<n;j+=step<<1)
{
cd wnk(1,0);
for(int k=j;k<j+step;k++)
{
cd x=a[k];
cd y=wnk*a[k+step];
a[k]=x+y;a[k+step]=x-y;
wnk=wnk*wn;
}
}
}
if(dft==-1)for(int i=0;i<n;i++)a[i]=a[i]/n;
}
void mtt(int n,int m,int p) {
int sz=0;
while((1<<sz)<=n+m)sz++;getrev(sz);
int len=1<<sz;
for(int i=0;i<len;i++)
{
int t1=A[i]%p,t2=B[i]%p;
a[i]=cd(t1>>15,0),b[i]=cd(t1&0x7fff,0);
c[i]=cd(t2>>15,0),d[i]=cd(t2&0x7fff,0);
}
fft(a,len,1);fft(b,len,1);fft(c,len,1);fft(d,len,1);
for(int i=0;i<len;i++)
{
cd aa=a[i],bb=b[i],cc=c[i],dd=d[i];
a[i]=aa*cc;b[i]=bb*cc;c[i]=aa*dd;d[i]=bb*dd;
}
fft(a,len,-1);fft(b,len,-1);fft(c,len,-1);fft(d,len,-1);
for(int i=0;i<len;i++)
{
ll aa=(ll)(a[i].x+0.5),bb=(ll)(b[i].x+0.5);
ll cc=(ll)(c[i].x+0.5),dd=(ll)(d[i].x+0.5);
aa=(aa%p+p)%p;bb=(bb%p+p)%p;cc=(cc%p+p)%p;dd=(dd%p+p)%p;
C[i]=((((aa<<15)%p)<<15)%p+(bb<<15)%p+(cc<<15)%p+dd)%p;
}
}
int main()
{
int n,m,p;scanf("%d%d%d",&n,&m,&p);
for(int i=0;i<=n;i++)scanf("%d",&A[i]);
for(int i=0;i<=m;i++)scanf("%d",&B[i]);
mtt(n,m,p);
for(int i=0;i<=n+m;i++)printf("%d ",C[i]);
return 0;
}
/********************
********************/
2.黑科技优化拆系数fft,4次dft
//#pragma GCC optimize(2)
//#pragma GCC optimize(3)
//#pragma GCC optimize(4)
//#pragma GCC optimize("unroll-loops")
//#pragma comment(linker, "/stack:200000000")
//#pragma GCC optimize("Ofast,no-stack-protector")
//#pragma GCC target("sse,sse2,sse3,ssse3,sse4,popcnt,abm,mmx,avx,tune=native")
#include<bits/stdc++.h>
//#include <bits/extc++.h>
#define fi first
#define se second
#define db double
#define mp make_pair
#define pb push_back
#define mt make_tuple
//#define pi acos(-1.0)
#define ll long long
#define vi vector<int>
#define mod 1000000007
#define ld long double
//#define C 0.5772156649
#define ls l,m,rt<<1
#define rs m+1,r,rt<<1|1
#define sqr(x) ((x)*(x))
#define pll pair<ll,ll>
#define pil pair<int,ll>
#define pli pair<ll,int>
#define pii pair<int,int>
#define ull unsigned long long
#define bpc __builtin_popcount
#define base 1000000000000000000ll
#define fin freopen("a.txt","r",stdin)
#define fout freopen("a.txt","w",stdout)
#define fio ios::sync_with_stdio(false);cin.tie(0)
#define mr mt19937 rng(chrono::steady_clock::now().time_since_epoch().count())
inline ll gcd(ll a,ll b){return b?gcd(b,a%b):a;}
inline void sub(ll &a,ll b){a-=b;if(a<0)a+=mod;}
inline void add(ll &a,ll b){a+=b;if(a>=mod)a-=mod;}
template<typename T>inline T const& MAX(T const &a,T const &b){return a>b?a:b;}
template<typename T>inline T const& MIN(T const &a,T const &b){return a<b?a:b;}
inline ll qp(ll a,ll b){ll ans=1;while(b){if(b&1)ans=ans*a%mod;a=a*a%mod,b>>=1;}return ans;}
inline ll qp(ll a,ll b,ll c){ll ans=1;while(b){if(b&1)ans=ans*a%c;a=a*a%c,b>>=1;}return ans;}
using namespace std;
//using namespace __gnu_pbds;
const ull ba=233;
const db eps=1e-5;
const ld pi=acos(-1);
const ll INF=0x3f3f3f3f3f3f3f3f;
const int N=270000+10,maxn=2000000+10,inf=0x3f3f3f3f;
struct cd{
ld x,y;
cd(ld _x=0.0,ld _y=0.0):x(_x),y(_y){}
cd operator +(const cd &b)const{
return cd(x+b.x,y+b.y);
}
cd operator -(const cd &b)const{
return cd(x-b.x,y-b.y);
}
cd operator *(const cd &b)const{
return cd(x*b.x - y*b.y,x*b.y + y*b.x);
}
cd operator /(const db &b)const{
return cd(x/b,y/b);
}
}a[N],b[N],dfta[N],dftb[N],dftc[N],dftd[N];
cd conj(cd a){return cd(a.x,-a.y);}
int rev[N],A[N],B[N],C[N];
void getrev(int bit)
{
for(int i=0;i<(1<<bit);i++)
rev[i]=(rev[i>>1]>>1) | ((i&1)<<(bit-1));
}
void fft(cd *a,int n,int dft)
{
for(int i=0;i<n;i++)if(i<rev[i])swap(a[i],a[rev[i]]);
for(int step=1;step<n;step<<=1)
{
cd wn(cos(dft*pi/step),sin(dft*pi/step));
for(int j=0;j<n;j+=step<<1)
{
cd wnk(1,0);
for(int k=j;k<j+step;k++)
{
cd x=a[k];
cd y=wnk*a[k+step];
a[k]=x+y;a[k+step]=x-y;
wnk=wnk*wn;
}
}
}
if(dft==-1)for(int i=0;i<n;i++)a[i]=a[i]/n;
}
void mtt(int n,int m,int p) {
int sz=0;
while((1<<sz)<=n+m)sz++;getrev(sz);
int len=1<<sz;
for(int i=0;i<len;i++)
{
int x=(i>n?0:A[i]%p),y=(i>m?0:B[i]%p);
a[i]=cd(x&0x7fff,x>>15);
b[i]=cd(y&0x7fff,y>>15);
}
fft(a,len,1);fft(b,len,1);
for(int i=0;i<len;i++)
{
int j=(len-i)&(len-1);
cd aa,bb,cc,dd;
aa = (a[i] + conj(a[j])) * cd(0.5, 0);
bb = (a[i] - conj(a[j])) * cd(0, -0.5);
cc = (b[i] + conj(b[j])) * cd(0.5, 0);
dd = (b[i] - conj(b[j])) * cd(0, -0.5);
dfta[j] = aa * cc;dftb[j] = aa * dd;
dftc[j] = bb * cc;dftd[j] = bb * dd;
}
for(int i=0;i<len;i++)
{
a[i] = dfta[i] + dftb[i] * cd(0, 1);
b[i] = dftc[i] + dftd[i] * cd(0, 1);
}
fft(a,len,1);fft(b,len,1);
for(int i=0;i<len;i++)
{
int da = (ll)(a[i].x / len + 0.5) % p;
int bb = (ll)(a[i].y / len + 0.5) % p;
int dc = (ll)(b[i].x / len + 0.5) % p;
int dd = (ll)(b[i].y / len + 0.5) % p;
C[i] = (da + ((ll)(bb + dc) << 15) + ((ll)dd << 30)) % p;
C[i] = (C[i]+p)%p;
}
}
int main()
{
int n,m,p;scanf("%d%d%d",&n,&m,&p);
for(int i=0;i<=n;i++)scanf("%d",&A[i]);
for(int i=0;i<=m;i++)scanf("%d",&B[i]);
mtt(n,m,p);
for(int i=0;i<=n+m;i++)printf("%d ",C[i]);
return 0;
}
/********************
********************/