#include "pppm_cg.h"
#include <mpi.h>
#include <cmath>
#include <cstring>
#include "atom.h"
#include "gridcomm.h"
#include "domain.h"
#include "error.h"
#include "force.h"
#include "neighbor.h"
#include "memory.h"
#include "math_const.h"
#include "remap.h"
using namespace LAMMPS_NS;
using namespace MathConst;
#define OFFSET 16384
#define SMALLQ 0.00001
enum{REVERSE_RHO};
enum{FORWARD_IK,FORWARD_AD,FORWARD_IK_PERATOM,FORWARD_AD_PERATOM};
#ifdef FFT_SINGLE
#define ZEROF 0.0f
#else
#define ZEROF 0.0
#endif
PPPMCG::PPPMCG(LAMMPS *lmp) : PPPM(lmp),
is_charged(NULL)
{
num_charged = -1;
group_group_enable = 1;
}
void PPPMCG::settings(int narg, char **arg)
{
if ((narg < 1) || (narg > 2))
error->all(FLERR,"Illegal kspace_style pppm/cg command");
PPPM::settings(narg,arg);
if (narg == 2) smallq = fabs(force->numeric(FLERR,arg[1]));
else smallq = SMALLQ;
}
PPPMCG::~PPPMCG()
{
memory->destroy(is_charged);
}
void PPPMCG::compute(int eflag, int vflag)
{
ev_init(eflag,vflag);
if (evflag_atom && !peratom_allocate_flag) {
allocate_peratom();
cg_peratom->ghost_notify();
cg_peratom->setup();
}
if (atom->natoms != natoms_original) {
qsum_qsq();
natoms_original = atom->natoms;
}
if (qsqsum == 0.0) return;
if (triclinic == 0) boxlo = domain->boxlo;
else {
boxlo = domain->boxlo_lamda;
domain->x2lamda(atom->nlocal);
}
if (atom->nmax > nmax) {
memory->destroy(part2grid);
memory->destroy(is_charged);
nmax = atom->nmax;
memory->create(part2grid,nmax,3,"pppm:part2grid");
memory->create(is_charged,nmax,"pppm/cg:is_charged");
}
if (num_charged < 0) {
bigint charged_all, charged_num;
double charged_frac, charged_fmax, charged_fmin;
num_charged=0;
for (int i=0; i < atom->nlocal; ++i)
if (fabs(atom->q[i]) > smallq)
++num_charged;
if (atom->nlocal > 0)
charged_frac = static_cast<double>(num_charged) * 100.0
/ static_cast<double>(atom->nlocal);
else
charged_frac = 0.0;
MPI_Reduce(&charged_frac,&charged_fmax,1,MPI_DOUBLE,MPI_MAX,0,world);
MPI_Reduce(&charged_frac,&charged_fmin,1,MPI_DOUBLE,MPI_MIN,0,world);
charged_num = num_charged;
MPI_Reduce(&charged_num,&charged_all,1,MPI_LMP_BIGINT,MPI_SUM,0,world);
charged_frac = static_cast<double>(charged_all) * 100.0
/ static_cast<double>(atom->natoms);
if (me == 0) {
if (screen)
fprintf(screen,
" PPPM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
if (logfile)
fprintf(logfile,
" PPPM/cg optimization cutoff: %g\n"
" Total charged atoms: %.1f%%\n"
" Min/max charged atoms/proc: %.1f%% %.1f%%\n",
smallq,charged_frac,charged_fmin,charged_fmax);
}
}
if (neighbor->ago == 0) {
num_charged = 0;
for (int i = 0; i < atom->nlocal; ++i) {
if (fabs(atom->q[i]) > smallq) {
is_charged[num_charged] = i;
++num_charged;
}
}
}
particle_map();
make_rho();
cg->reverse_comm(this,REVERSE_RHO);
brick2fft();
poisson();
if (differentiation_flag == 1) cg->forward_comm(this,FORWARD_AD);
else cg->forward_comm(this,FORWARD_IK);
if (evflag_atom) {
if (differentiation_flag == 1 && vflag_atom)
cg_peratom->forward_comm(this,FORWARD_AD_PERATOM);
else if (differentiation_flag == 0)
cg_peratom->forward_comm(this,FORWARD_IK_PERATOM);
}
fieldforce();
if (evflag_atom) fieldforce_peratom();
const double qscale = qqrd2e * scale;
if (eflag_global) {
double energy_all;
MPI_Allreduce(&energy,&energy_all,1,MPI_DOUBLE,MPI_SUM,world);
energy = energy_all;
energy *= 0.5*volume;
energy -= g_ewald*qsqsum/MY_PIS +
MY_PI2*qsum*qsum / (g_ewald*g_ewald*volume);
energy *= qscale;
}
if (vflag_global) {
double virial_all[6];
MPI_Allreduce(virial,virial_all,6,MPI_DOUBLE,MPI_SUM,world);
for (int i = 0; i < 6; i++) virial[i] = 0.5*qscale*volume*virial_all[i];
}
if (evflag_atom) {
const double * const q = atom->q;
if (eflag_atom) {
for (int j = 0; j < num_charged; j++) {
const int i = is_charged[j];
eatom[i] *= 0.5;
eatom[i] -= g_ewald*q[i]*q[i]/MY_PIS + MY_PI2*q[i]*qsum /
(g_ewald*g_ewald*volume);
eatom[i] *= qscale;
}
}
if (vflag_atom) {
for (int n = 0; n < num_charged; n++) {
const int i = is_charged[n];
for (int j = 0; j < 6; j++) vatom[i][j] *= 0.5*qscale;
}
}
}
if (slabflag == 1) slabcorr();
if (triclinic) domain->lamda2x(atom->nlocal);
}
void PPPMCG::particle_map()
{
int nx,ny,nz;
double **x = atom->x;
if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2]))
error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
int flag = 0;
for (int j = 0; j < num_charged; j++) {
int i = is_charged[j];
nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
part2grid[i][0] = nx;
part2grid[i][1] = ny;
part2grid[i][2] = nz;
if (nx+nlower < nxlo_out || nx+nupper > nxhi_out ||
ny+nlower < nylo_out || ny+nupper > nyhi_out ||
nz+nlower < nzlo_out || nz+nupper > nzhi_out)
flag = 1;
}
if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
}
void PPPMCG::make_rho()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
double *q = atom->q;
double **x = atom->x;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
density_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
void PPPMCG::fieldforce_ik()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR ekx,eky,ekz;
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
ekx -= x0*vdx_brick[mz][my][mx];
eky -= x0*vdy_brick[mz][my][mx];
ekz -= x0*vdz_brick[mz][my][mx];
}
}
}
const double qfactor = qqrd2e * scale * q[i];
f[i][0] += qfactor*ekx;
f[i][1] += qfactor*eky;
if (slabflag != 2) f[i][2] += qfactor*ekz;
}
}
void PPPMCG::fieldforce_ad()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz;
FFT_SCALAR ekx,eky,ekz;
double s1,s2,s3;
double sf = 0.0;
double *prd;
if (triclinic == 0) prd = domain->prd;
else prd = domain->prd_lamda;
double xprd = prd[0];
double yprd = prd[1];
double zprd = prd[2];
double hx_inv = nx_pppm/xprd;
double hy_inv = ny_pppm/yprd;
double hz_inv = nz_pppm/zprd;
double *q = atom->q;
double **x = atom->x;
double **f = atom->f;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
compute_drho1d(dx,dy,dz);
ekx = eky = ekz = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
for (m = nlower; m <= nupper; m++) {
my = m+ny;
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
ekx += drho1d[0][l]*rho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
eky += rho1d[0][l]*drho1d[1][m]*rho1d[2][n]*u_brick[mz][my][mx];
ekz += rho1d[0][l]*rho1d[1][m]*drho1d[2][n]*u_brick[mz][my][mx];
}
}
}
ekx *= hx_inv;
eky *= hy_inv;
ekz *= hz_inv;
const double qfactor = qqrd2e * scale;
s1 = x[i][0]*hx_inv;
s2 = x[i][1]*hy_inv;
s3 = x[i][2]*hz_inv;
sf = sf_coeff[0]*sin(2*MY_PI*s1);
sf += sf_coeff[1]*sin(4*MY_PI*s1);
sf *= 2*q[i]*q[i];
f[i][0] += qfactor*(ekx*q[i] - sf);
sf = sf_coeff[2]*sin(2*MY_PI*s2);
sf += sf_coeff[3]*sin(4*MY_PI*s2);
sf *= 2*q[i]*q[i];
f[i][1] += qfactor*(eky*q[i] - sf);
sf = sf_coeff[4]*sin(2*MY_PI*s3);
sf += sf_coeff[5]*sin(4*MY_PI*s3);
sf *= 2*q[i]*q[i];
if (slabflag != 2) f[i][2] += qfactor*(ekz*q[i] - sf);
}
}
void PPPMCG::fieldforce_peratom()
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
FFT_SCALAR u,v0,v1,v2,v3,v4,v5;
double *q = atom->q;
double **x = atom->x;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
u = v0 = v1 = v2 = v3 = v4 = v5 = ZEROF;
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
z0 = rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
y0 = z0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
x0 = y0*rho1d[0][l];
if (eflag_atom) u += x0*u_brick[mz][my][mx];
if (vflag_atom) {
v0 += x0*v0_brick[mz][my][mx];
v1 += x0*v1_brick[mz][my][mx];
v2 += x0*v2_brick[mz][my][mx];
v3 += x0*v3_brick[mz][my][mx];
v4 += x0*v4_brick[mz][my][mx];
v5 += x0*v5_brick[mz][my][mx];
}
}
}
}
if (eflag_atom) eatom[i] += q[i]*u;
if (vflag_atom) {
vatom[i][0] += q[i]*v0;
vatom[i][1] += q[i]*v1;
vatom[i][2] += q[i]*v2;
vatom[i][3] += q[i]*v3;
vatom[i][4] += q[i]*v4;
vatom[i][5] += q[i]*v5;
}
}
}
void PPPMCG::slabcorr()
{
int i,j;
const double * const q = atom->q;
const double * const * const x = atom->x;
const double zprd = domain->zprd;
double dipole = 0.0;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
dipole += q[i]*x[i][2];
}
double dipole_all;
MPI_Allreduce(&dipole,&dipole_all,1,MPI_DOUBLE,MPI_SUM,world);
double dipole_r2 = 0.0;
if (eflag_atom || fabs(qsum) > SMALLQ) {
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
dipole_r2 += q[i]*x[i][2]*x[i][2];
}
double tmp;
MPI_Allreduce(&dipole_r2,&tmp,1,MPI_DOUBLE,MPI_SUM,world);
dipole_r2 = tmp;
}
const double e_slabcorr = MY_2PI*(dipole_all*dipole_all -
qsum*dipole_r2 - qsum*qsum*zprd*zprd/12.0)/volume;
const double qscale = qqrd2e * scale;
if (eflag_global) energy += qscale * e_slabcorr;
if (eflag_atom) {
const double efact = qscale * MY_2PI/volume;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
eatom[i] += efact * q[i]*(x[i][2]*dipole_all - 0.5*(dipole_r2 +
qsum*x[i][2]*x[i][2]) - qsum*zprd*zprd/12.0);
}
}
const double ffact = qscale * (-MY_4PI/volume);
double * const * const f = atom->f;
for (j = 0; j < num_charged; j++) {
i = is_charged[j];
f[i][2] += ffact * q[i]*(dipole_all - qsum*x[i][2]);
}
}
void PPPMCG::make_rho_groups(int groupbit_A, int groupbit_B, int BA_flag)
{
int i,l,m,n,nx,ny,nz,mx,my,mz;
FFT_SCALAR dx,dy,dz,x0,y0,z0;
memset(&(density_A_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
memset(&(density_B_brick[nzlo_out][nylo_out][nxlo_out]),0,
ngrid*sizeof(FFT_SCALAR));
const double * const q = atom->q;
const double * const * const x = atom->x;
const int * const mask = atom->mask;
for (int j = 0; j < num_charged; j++) {
i = is_charged[j];
if ((mask[i] & groupbit_A) && (mask[i] & groupbit_B))
if (BA_flag) continue;
if ((mask[i] & groupbit_A) || (mask[i] & groupbit_B)) {
nx = part2grid[i][0];
ny = part2grid[i][1];
nz = part2grid[i][2];
dx = nx+shiftone - (x[i][0]-boxlo[0])*delxinv;
dy = ny+shiftone - (x[i][1]-boxlo[1])*delyinv;
dz = nz+shiftone - (x[i][2]-boxlo[2])*delzinv;
compute_rho1d(dx,dy,dz);
z0 = delvolinv * q[i];
for (n = nlower; n <= nupper; n++) {
mz = n+nz;
y0 = z0*rho1d[2][n];
for (m = nlower; m <= nupper; m++) {
my = m+ny;
x0 = y0*rho1d[1][m];
for (l = nlower; l <= nupper; l++) {
mx = l+nx;
if (mask[i] & groupbit_A)
density_A_brick[mz][my][mx] += x0*rho1d[0][l];
if (mask[i] & groupbit_B)
density_B_brick[mz][my][mx] += x0*rho1d[0][l];
}
}
}
}
}
}
double PPPMCG::memory_usage()
{
double bytes = PPPM::memory_usage();
bytes += nmax * sizeof(int);
return bytes;
}