我删掉最后6行(S0=P的六行),两种情况,最后给出的结果(FP数组)不太一样?
--------------------------------------------------------------------------------------------------------------------------------------
attributes(global) SUBROUTINE CALPTENSOR_KERNEL(IM,ITYP,XP,NAPDEV,IA0, &
NPRT,mxKVOIS,KVOIS,INDI,mvl, &
cra011,cra021,cra031, &
cra012,cra022,cra032, &
cra013,cra023,cra033, &
CSI, KT, FPOTR, FPOTB, &
DEN,FP, S00)
!*** PURPOSE: to begine calculate the forces on atoms, and also the virial tensor
use gpu, only:mp_BLOCKSIZE
implicit none
!--- DUMMY VARIABLES
integer, value, intent(in)::IM
real(KINDDF), value, intent(in)::cra011,cra021,cra031,cra012,cra022,cra032,cra013,cra023,cra033
integer, device, intent(in)::ITYP(IM)
real(KINDDF), device, intent(in)::XP(3,IM)
integer, value, intent(in)::NAPDEV,IA0,NPRT,mxKVOIS
integer, device, intent(in)::KVOIS(NAPDEV)
integer, device, intent(in)::INDI(mxKVOIS,NAPDEV)
integer(1), device::mvl(mxKVOIS,NAPDEV)
real(KINDDF),value, intent(in)::CSI
integer, value, intent(in)::KT
real(KINDDF), device, intent(in)::FPOTR(KT,*)
real(KINDDF), device, intent(in)::FPOTB(KT,*)
real(KINDDF), device, intent(in):EN(IM)
!Output:
real(KINDDF), device, intent(out)::FP(3,NAPDEV)
real(KINDDF), device, intent(out)::S00(6,*)
!Local variables
integer::J,K,KK, IW, IIW, KTABij, KTABji, ITYPI, ITYPJ
real(KINDDF)::FORTOT,FPX,FPY,FPZ, SK, DK, R2, R, DENKI, DENKJ
real(KINDDF)::SEPX, SEPY, SEPZ, POSX, POSY, POSZ
real(KINDDF):1, P2, P3, P4, P5, P6
integer::IC,IC0, ITB, IBG, NTB, NTG, ITG
! ---
! --- Note: blockdim%y = blockdim%z = griddim%y = griddim%y = 1, which means that
! --- the range of threadidx%y, threadidx%z, blockidx%y, blockidx%z is [1,1]
ITB = threadidx%x ! ID of thread in one block
IBG = blockidx%x ! ID of block in one grid
NTB = blockdim%x !$$--- size of Block
NTG = NTB*griddim%x ! number of threads in one grid
ITG = (IBG-1)*NTB+ITB !$$ -- the id of thread in one grid
do IC=ITG, NPRT, NTG
!$$IC -- the id of the atom on the device
!$$ NOTE: IC is not the id of the same atom in the whole box
!$$POS-- position of the atom
!$$ NOTE: XP(I) is the position of Ith atom in the whole box
IC0 = IC+IA0
POSX = XP(1,IC0)
POSY = XP(2,IC0)
POSZ = XP(3,IC0)
ITYPI = ITYP(IC0)
DENKI = DEN(IC0)
!$$-- start calculation of electron density
IIW = KVOIS(IC)
FPX = 0.0d0
FPY = 0.0d0
FPZ = 0.0d0
P1= 0.0d0
P2= 0.0d0
P3= 0.0d0
P4= 0.0d0
P5= 0.0d0
P6= 0.0d0
DO IW=1, IIW, 1
!$$--- NOTE: the particles index of neighbore-list is the index of particle in the whole box
J=INDI(IW,IC)
!$$--- To calculate the seperation between particle IC and its IWth neighbore
SEPX = POSX - XP(1,J)
SEPY = POSY - XP(2,J)
SEPZ = POSZ - XP(3,J)
if(mvl(IW,IC)>0)then
if(iand( mvl(IW,IC), 1)==1)then
SEPX = SEPX - cra011
SEPY = SEPY - cra021
SEPZ = SEPZ - cra031
end if
if(iand( mvl(IW,IC), 2)==2)then
SEPX = SEPX + cra011
SEPY = SEPY + cra021
SEPZ = SEPZ + cra031
end if
if(iand( mvl(IW,IC), 4)==4)then
SEPX = SEPX - cra012
SEPY = SEPY - cra022
SEPZ = SEPZ - cra032
end if
if(iand( mvl(IW,IC), 8)==8)then
SEPX = SEPX + cra012
SEPY = SEPY + cra022
SEPZ = SEPZ + cra032
end if
if(iand( mvl(IW,IC), 16)==16)then
SEPX = SEPX - cra013
SEPY = SEPY - cra023
SEPZ = SEPZ - cra033
end if
if(iand( mvl(IW,IC), 32)==32)then
SEPX = SEPX + cra013
SEPY = SEPY + cra023
SEPZ = SEPZ + cra033
end if
end if
R2 = SEPX*SEPX+SEPY*SEPY+SEPZ*SEPZ
!$$--- To calculate electron density on atom I
ITYPJ=ITYP(J)
if(R2 < dcm_dtr2(ITYPI,ITYPJ)) then
KTABij = dcm_KPAIR(ITYPI,ITYPJ)
KTABji = dcm_KPAIR(ITYPJ,ITYPI)
R = DSQRT(R2)
SK= DSQRT(R)*CSI
KK = SK
DK = SK-KK
DENKJ = DEN(J)
FORTOT= (FPOTR(KTABij, KK) + DK*(FPOTR(KTABij, KK+1) - FPOTR(KTABij, KK)))/R2 + &
((FPOTB(KTABji, KK) + DK*(FPOTB(KTABji, KK+1) - FPOTB(KTABji, KK)))*DENKJ + &
(FPOTB(KTABij, KK) + DK*(FPOTB(KTABij, KK+1) - FPOTB(KTABij, KK)))*DENKI)/R
! = -{∂φ/∂r[ij]+(∂F/∂ρ[i])*(∂ρ[i]/∂r[ij])+(∂F/∂ρ[j])*(∂ρ[j]/∂r[ij])}/r[ij]
FPX = FPX + FORTOT*SEPX
FPY = FPY + FORTOT*SEPY
FPZ = FPZ + FORTOT*SEPZ
FORTOT = FORTOT*5.0d-1
P1 = P1+SEPX*SEPX*FORTOT
P2 = P2+SEPY*SEPY*FORTOT
P3 = P3+SEPZ*SEPZ*FORTOT
P4 = P4+SEPY*SEPZ*FORTOT
P5 = P5+SEPZ*SEPX*FORTOT
P6 = P6+SEPX*SEPY*FORTOT
end if
ENDDO
FP(1,IC) = FPX
FP(2,IC) = FPY
FP(3,IC) = FPZ
S00(1,IC) = P1
S00(2,IC) = P2
S00(3,IC) = P3
S00(4,IC) = P4
S00(5,IC) = P5
S00(6,IC) = P6
END DO
RETURN
END SUBROUTINE CALPTENSOR_KERNEL
--------------------------------------------------------------------------------------------------------------------------------------
attributes(global) SUBROUTINE CALPTENSOR_KERNEL(IM,ITYP,XP,NAPDEV,IA0, &
NPRT,mxKVOIS,KVOIS,INDI,mvl, &
cra011,cra021,cra031, &
cra012,cra022,cra032, &
cra013,cra023,cra033, &
CSI, KT, FPOTR, FPOTB, &
DEN,FP, S00)
!*** PURPOSE: to begine calculate the forces on atoms, and also the virial tensor
use gpu, only:mp_BLOCKSIZE
implicit none
!--- DUMMY VARIABLES
integer, value, intent(in)::IM
real(KINDDF), value, intent(in)::cra011,cra021,cra031,cra012,cra022,cra032,cra013,cra023,cra033
integer, device, intent(in)::ITYP(IM)
real(KINDDF), device, intent(in)::XP(3,IM)
integer, value, intent(in)::NAPDEV,IA0,NPRT,mxKVOIS
integer, device, intent(in)::KVOIS(NAPDEV)
integer, device, intent(in)::INDI(mxKVOIS,NAPDEV)
integer(1), device::mvl(mxKVOIS,NAPDEV)
real(KINDDF),value, intent(in)::CSI
integer, value, intent(in)::KT
real(KINDDF), device, intent(in)::FPOTR(KT,*)
real(KINDDF), device, intent(in)::FPOTB(KT,*)
real(KINDDF), device, intent(in):EN(IM)
!Output:
real(KINDDF), device, intent(out)::FP(3,NAPDEV)
real(KINDDF), device, intent(out)::S00(6,*)
!Local variables
integer::J,K,KK, IW, IIW, KTABij, KTABji, ITYPI, ITYPJ
real(KINDDF)::FORTOT,FPX,FPY,FPZ, SK, DK, R2, R, DENKI, DENKJ
real(KINDDF)::SEPX, SEPY, SEPZ, POSX, POSY, POSZ
real(KINDDF):1, P2, P3, P4, P5, P6
integer::IC,IC0, ITB, IBG, NTB, NTG, ITG
! ---
! --- Note: blockdim%y = blockdim%z = griddim%y = griddim%y = 1, which means that
! --- the range of threadidx%y, threadidx%z, blockidx%y, blockidx%z is [1,1]
ITB = threadidx%x ! ID of thread in one block
IBG = blockidx%x ! ID of block in one grid
NTB = blockdim%x !$$--- size of Block
NTG = NTB*griddim%x ! number of threads in one grid
ITG = (IBG-1)*NTB+ITB !$$ -- the id of thread in one grid
do IC=ITG, NPRT, NTG
!$$IC -- the id of the atom on the device
!$$ NOTE: IC is not the id of the same atom in the whole box
!$$POS-- position of the atom
!$$ NOTE: XP(I) is the position of Ith atom in the whole box
IC0 = IC+IA0
POSX = XP(1,IC0)
POSY = XP(2,IC0)
POSZ = XP(3,IC0)
ITYPI = ITYP(IC0)
DENKI = DEN(IC0)
!$$-- start calculation of electron density
IIW = KVOIS(IC)
FPX = 0.0d0
FPY = 0.0d0
FPZ = 0.0d0
P1= 0.0d0
P2= 0.0d0
P3= 0.0d0
P4= 0.0d0
P5= 0.0d0
P6= 0.0d0
DO IW=1, IIW, 1
!$$--- NOTE: the particles index of neighbore-list is the index of particle in the whole box
J=INDI(IW,IC)
!$$--- To calculate the seperation between particle IC and its IWth neighbore
SEPX = POSX - XP(1,J)
SEPY = POSY - XP(2,J)
SEPZ = POSZ - XP(3,J)
if(mvl(IW,IC)>0)then
if(iand( mvl(IW,IC), 1)==1)then
SEPX = SEPX - cra011
SEPY = SEPY - cra021
SEPZ = SEPZ - cra031
end if
if(iand( mvl(IW,IC), 2)==2)then
SEPX = SEPX + cra011
SEPY = SEPY + cra021
SEPZ = SEPZ + cra031
end if
if(iand( mvl(IW,IC), 4)==4)then
SEPX = SEPX - cra012
SEPY = SEPY - cra022
SEPZ = SEPZ - cra032
end if
if(iand( mvl(IW,IC), 8)==8)then
SEPX = SEPX + cra012
SEPY = SEPY + cra022
SEPZ = SEPZ + cra032
end if
if(iand( mvl(IW,IC), 16)==16)then
SEPX = SEPX - cra013
SEPY = SEPY - cra023
SEPZ = SEPZ - cra033
end if
if(iand( mvl(IW,IC), 32)==32)then
SEPX = SEPX + cra013
SEPY = SEPY + cra023
SEPZ = SEPZ + cra033
end if
end if
R2 = SEPX*SEPX+SEPY*SEPY+SEPZ*SEPZ
!$$--- To calculate electron density on atom I
ITYPJ=ITYP(J)
if(R2 < dcm_dtr2(ITYPI,ITYPJ)) then
KTABij = dcm_KPAIR(ITYPI,ITYPJ)
KTABji = dcm_KPAIR(ITYPJ,ITYPI)
R = DSQRT(R2)
SK= DSQRT(R)*CSI
KK = SK
DK = SK-KK
DENKJ = DEN(J)
FORTOT= (FPOTR(KTABij, KK) + DK*(FPOTR(KTABij, KK+1) - FPOTR(KTABij, KK)))/R2 + &
((FPOTB(KTABji, KK) + DK*(FPOTB(KTABji, KK+1) - FPOTB(KTABji, KK)))*DENKJ + &
(FPOTB(KTABij, KK) + DK*(FPOTB(KTABij, KK+1) - FPOTB(KTABij, KK)))*DENKI)/R
! = -{∂φ/∂r[ij]+(∂F/∂ρ[i])*(∂ρ[i]/∂r[ij])+(∂F/∂ρ[j])*(∂ρ[j]/∂r[ij])}/r[ij]
FPX = FPX + FORTOT*SEPX
FPY = FPY + FORTOT*SEPY
FPZ = FPZ + FORTOT*SEPZ
FORTOT = FORTOT*5.0d-1
P1 = P1+SEPX*SEPX*FORTOT
P2 = P2+SEPY*SEPY*FORTOT
P3 = P3+SEPZ*SEPZ*FORTOT
P4 = P4+SEPY*SEPZ*FORTOT
P5 = P5+SEPZ*SEPX*FORTOT
P6 = P6+SEPX*SEPY*FORTOT
end if
ENDDO
FP(1,IC) = FPX
FP(2,IC) = FPY
FP(3,IC) = FPZ
S00(1,IC) = P1
S00(2,IC) = P2
S00(3,IC) = P3
S00(4,IC) = P4
S00(5,IC) = P5
S00(6,IC) = P6
END DO
RETURN
END SUBROUTINE CALPTENSOR_KERNEL
原文发布时间为:2017-4-7 08:09:10
原文由:bqfuscu 发布,版权归属于原作者
本文来自云栖社区合作伙伴NVIDIA,了解相关信息可以关注NVIDIA官方网站