107 group_group_enable = 1;
117 MPI_Comm_rank(world,&
me);
118 MPI_Comm_size(world,&
nprocs);
153 memory->create(
acons,8,7,
"pppm:acons");
154 acons[1][0] = 2.0 / 3.0;
155 acons[2][0] = 1.0 / 50.0;
156 acons[2][1] = 5.0 / 294.0;
157 acons[3][0] = 1.0 / 588.0;
158 acons[3][1] = 7.0 / 1440.0;
159 acons[3][2] = 21.0 / 3872.0;
160 acons[4][0] = 1.0 / 4320.0;
161 acons[4][1] = 3.0 / 1936.0;
162 acons[4][2] = 7601.0 / 2271360.0;
163 acons[4][3] = 143.0 / 28800.0;
164 acons[5][0] = 1.0 / 23232.0;
165 acons[5][1] = 7601.0 / 13628160.0;
166 acons[5][2] = 143.0 / 69120.0;
167 acons[5][3] = 517231.0 / 106536960.0;
168 acons[5][4] = 106640677.0 / 11737571328.0;
169 acons[6][0] = 691.0 / 68140800.0;
170 acons[6][1] = 13.0 / 57600.0;
171 acons[6][2] = 47021.0 / 35512320.0;
172 acons[6][3] = 9694607.0 / 2095994880.0;
173 acons[6][4] = 733191589.0 / 59609088000.0;
174 acons[6][5] = 326190917.0 / 11700633600.0;
175 acons[7][0] = 1.0 / 345600.0;
176 acons[7][1] = 3617.0 / 35512320.0;
177 acons[7][2] = 745739.0 / 838397952.0;
178 acons[7][3] = 56399353.0 / 12773376000.0;
179 acons[7][4] = 25091609.0 / 1560084480.0;
180 acons[7][5] = 1755948832039.0 / 36229939200000.0;
181 acons[7][6] = 4887769399.0 / 37838389248.0;
233 if (
me == 0) utils::logmesg(lmp,
"PPPM initialization ...\n");
238 error->all(FLERR,fmt::format(
"PPPM order cannot be < 2 or > {}",
MAXORDER));
250 qqrd2e = force->qqrd2e;
252 natoms_original = atom->natoms;
254 if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
255 else accuracy = accuracy_relative * two_charge_force;
263 Grid3d *gctmp =
nullptr;
266 while (order >= minorder) {
267 if (iteration &&
me == 0)
268 error->warning(FLERR,
"Reducing PPPM order b/c stencil extends "
269 "beyond nearest neighbor processor");
276 gctmp =
new Grid3d(lmp, world, nx_pppm, ny_pppm, nz_pppm,
281 gctmp->setup_comm(tmp1, tmp2);
282 if (gctmp->ghost_adjacent())
break;
289 if (order < minorder) error->all(FLERR,
"PPPM order < minimum allowed order");
290 if (!overlap_allowed && !gctmp->ghost_adjacent())
291 error->all(FLERR,
"PPPM grid stencil extends "
292 "beyond nearest neighbor processor");
293 if (gctmp)
delete gctmp;
302 int ngrid_max,nfft_both_max;
303 MPI_Allreduce(&
ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
304 MPI_Allreduce(&
nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
307 std::string mesg = fmt::format(
" G vector (1/distance) = {:.8g}\n",g_ewald);
308 mesg += fmt::format(
" grid = {} {} {}\n",nx_pppm,ny_pppm,nz_pppm);
309 mesg += fmt::format(
" stencil order = {}\n",order);
310 mesg += fmt::format(
" estimated absolute RMS force accuracy = {:.8g}\n",
312 mesg += fmt::format(
" estimated relative force accuracy = {:.8g}\n",
313 estimated_accuracy/two_charge_force);
315 mesg += fmt::format(
" 3d grid and FFT values/proc = {} {}\n",
316 ngrid_max,nfft_both_max);
317 utils::logmesg(lmp,mesg);
332 if (comm->me == 0) utils::logmesg(lmp,
"Ewald initialization ...\n");
336 if (domain->dimension == 2)
337 error->all(FLERR,
"Cannot use Ewald with 2d simulation");
338 if (!atom->q_flag) error->all(FLERR,
"Kspace style requires atom attribute q");
339 if (slabflag == 0 && domain->nonperiodic > 0)
340 error->all(FLERR,
"Cannot use non-periodic boundaries with Ewald");
355 qqrd2e = force->qqrd2e;
357 natoms_original = atom->natoms;
361 accuracy = accuracy_relative;
362 g_ewald = sqrt(-2.0 * log(accuracy)) / (
cutoff*
hdnnp->cflength);
380 if (slabflag == 0 && domain->nonperiodic > 0)
381 error->all(FLERR,
"Cannot use non-periodic boundaries with PPPM");
383 if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
384 domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
385 error->all(FLERR,
"Incorrect boundaries with slab PPPM");
400 double xprd = prd[0] *
hdnnp->cflength;
401 double yprd = prd[1] *
hdnnp->cflength;
402 double zprd = prd[2] *
hdnnp->cflength;
403 volume = xprd * yprd * zprd;
411 double unitkx = (MY_2PI/xprd);
412 double unitky = (MY_2PI/yprd);
413 double unitkz = (MY_2PI/zprd);
420 per = i - nx_pppm*(2*i/nx_pppm);
425 per = i - ny_pppm*(2*i/ny_pppm);
430 per = i - nz_pppm*(2*i/nz_pppm);
470 double const xprd = domain->xprd *
hdnnp->cflength;
471 double const yprd = domain->yprd *
hdnnp->cflength;
472 double const zprd = domain->zprd *
hdnnp->cflength;
473 volume = xprd * yprd * zprd;
477 unitk[0] = 2.0*MY_PI/xprd;
478 unitk[1] = 2.0*MY_PI/yprd;
479 unitk[2] = 2.0*MY_PI/zprd;
507 if (
kmax > kmax_old) {
514 memory->create(
ek,
nmax,3,
"ewald:ek");
543 double preu = 4.0*M_PI/
volume;
546 etasq = 1.0 / (g_ewald*g_ewald);
552 for (m = 1; m <=
kmax; m++) {
584 for (k = 1; k <=
kxmax; k++) {
585 for (l = 1; l <=
kymax; l++) {
607 for (l = 1; l <=
kymax; l++) {
608 for (m = 1; m <=
kzmax; m++) {
630 for (k = 1; k <=
kxmax; k++) {
631 for (m = 1; m <=
kzmax; m++) {
653 for (k = 1; k <=
kxmax; k++) {
654 for (l = 1; l <=
kymax; l++) {
655 for (m = 1; m <=
kzmax; m++) {
696 double sqk,clpm,slpm;
698 double **x = atom->x;
699 int nlocal = atom->nlocal;
703 cflength =
hdnnp->cflength;
707 for (ic = 0; ic < 3; ic++) {
710 for (i = 0; i < nlocal; i++) {
713 cs[1][ic][i] = cos(
unitk[ic]*x[i][ic]*cflength);
714 sn[1][ic][i] = sin(
unitk[ic]*x[i][ic]*cflength);
715 cs[-1][ic][i] =
cs[1][ic][i];
716 sn[-1][ic][i] = -
sn[1][ic][i];
724 for (m = 2; m <=
kmax; m++) {
725 for (ic = 0; ic < 3; ic++) {
728 for (i = 0; i < nlocal; i++) {
729 cs[m][ic][i] =
cs[m-1][ic][i]*
cs[1][ic][i] -
730 sn[m-1][ic][i]*
sn[1][ic][i];
731 sn[m][ic][i] =
sn[m-1][ic][i]*
cs[1][ic][i] +
732 cs[m-1][ic][i]*
sn[1][ic][i];
733 cs[-m][ic][i] =
cs[m][ic][i];
734 sn[-m][ic][i] = -
sn[m][ic][i];
745 for (k = 1; k <=
kxmax; k++) {
746 for (l = 1; l <=
kymax; l++) {
749 for (i = 0; i < nlocal; i++) {
754 for (i = 0; i < nlocal; i++) {
765 for (l = 1; l <=
kymax; l++) {
766 for (m = 1; m <=
kzmax; m++) {
769 for (i = 0; i < nlocal; i++) {
774 for (i = 0; i < nlocal; i++) {
785 for (k = 1; k <=
kxmax; k++) {
786 for (m = 1; m <=
kzmax; m++) {
789 for (i = 0; i < nlocal; i++) {
794 for (i = 0; i < nlocal; i++) {
805 for (k = 1; k <=
kxmax; k++) {
806 for (l = 1; l <=
kymax; l++) {
807 for (m = 1; m <=
kzmax; m++) {
811 for (i = 0; i < nlocal; i++) {
812 clpm =
cs[l][1][i]*
cs[m][2][i] -
sn[l][1][i]*
sn[m][2][i];
813 slpm =
sn[l][1][i]*
cs[m][2][i] +
cs[l][1][i]*
sn[m][2][i];
818 for (i = 0; i < nlocal; i++) {
819 clpm =
cs[l][1][i]*
cs[m][2][i] +
sn[l][1][i]*
sn[m][2][i];
820 slpm = -
sn[l][1][i]*
cs[m][2][i] +
cs[l][1][i]*
sn[m][2][i];
825 for (i = 0; i < nlocal; i++) {
826 clpm =
cs[l][1][i]*
cs[m][2][i] +
sn[l][1][i]*
sn[m][2][i];
827 slpm =
sn[l][1][i]*
cs[m][2][i] -
cs[l][1][i]*
sn[m][2][i];
832 for (i = 0; i < nlocal; i++) {
833 clpm =
cs[l][1][i]*
cs[m][2][i] -
sn[l][1][i]*
sn[m][2][i];
834 slpm = -
sn[l][1][i]*
cs[m][2][i] -
cs[l][1][i]*
sn[m][2][i];
862 std::cout << energy <<
'\n';
865 std::cout << energy <<
'\n';
878 const double qscale = qqrd2e * scale;
882 MPI_Allreduce(&energy, &energy_all, 1, MPI_DOUBLE, MPI_SUM, world);
886 energy -= g_ewald * qsqsum / MY_PIS +
887 MY_PI2 * qsum * qsum / (g_ewald * g_ewald *
volume);
891 std::cout <<
"here" <<
'\n';
893 std::cout << MY_PIS <<
'\n';
894 std::cout << MY_PI2 <<
'\n';
990 int nlocal = atom->nlocal;
991 int *tag = atom->tag;
996 for (
int k = 0; k <
kcount; k++)
999 double sf_real_loc = 0.0;
1000 double sf_im_loc = 0.0;
1004 for (i = 0; i < nlocal; i++)
1006 double const qi = gsl_vector_get(v,tag[i]-1);
1007 sf_real_loc += qi *
sfexp_rl[k][i];
1011 MPI_Allreduce(&(sf_real_loc),&(
sf_real[k]),1,MPI_DOUBLE,MPI_SUM,world);
1012 MPI_Allreduce(&(sf_im_loc),&(
sf_im[k]),1,MPI_DOUBLE,MPI_SUM,world);
1069 order_allocated = order;
1070 memory->create(
gf_b,order,
"pppm:gf_b");
1071 memory->create2d_offset(
rho1d,3,-order/2,order/2,
"pppm:rho1d");
1072 memory->create2d_offset(
drho1d,3,-order/2,order/2,
"pppm:drho1d");
1073 memory->create2d_offset(
rho_coeff,order,(1-order)/2,order/2,
"pppm:rho_coeff");
1074 memory->create2d_offset(
drho_coeff,order,(1-order)/2,order/2,
1084 fft1 =
new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1087 0,0,&tmp,collective_flag);
1089 fft2 =
new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1092 0,0,&tmp,collective_flag);
1094 remap =
new Remap(lmp,world,
1097 1,0,0,FFT_PRECISION,collective_flag);
1102 gc =
new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1162 memory->destroy(
work1);
1163 memory->destroy(
work2);
1164 memory->destroy(
vg);
1170 memory->destroy(
gf_b);
1172 memory->destroy2d_offset(
rho1d,-order_allocated/2);
1173 memory->destroy2d_offset(
drho1d,-order_allocated/2);
1174 memory->destroy2d_offset(
rho_coeff,(1-order_allocated)/2);
1175 memory->destroy2d_offset(
drho_coeff,(1-order_allocated)/2);
1207 memory->destroy(
sf_im);
1495 if (comm->layout != Comm::LAYOUT_TILED) {
1496 nxlo_in =
static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
1497 nxhi_in =
static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
1499 nylo_in =
static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
1500 nyhi_in =
static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
1502 nzlo_in =
static_cast<int> (comm->zsplit[comm->myloc[2]] * nz_pppm);
1503 nzhi_in =
static_cast<int> (comm->zsplit[comm->myloc[2]+1] * nz_pppm) - 1;
1506 nxlo_in =
static_cast<int> (comm->mysplit[0][0] * nx_pppm);
1507 nxhi_in =
static_cast<int> (comm->mysplit[0][1] * nx_pppm) - 1;
1509 nylo_in =
static_cast<int> (comm->mysplit[1][0] * ny_pppm);
1510 nyhi_in =
static_cast<int> (comm->mysplit[1][1] * ny_pppm) - 1;
1512 nzlo_in =
static_cast<int> (comm->mysplit[2][0] * nz_pppm);
1513 nzhi_in =
static_cast<int> (comm->mysplit[2][1] * nz_pppm) - 1;
1541 double *prd,*sublo,*subhi;
1545 boxlo = domain->boxlo;
1546 sublo = domain->sublo;
1547 subhi = domain->subhi;
1554 sublo[0] = sublo[0] *
hdnnp->cflength;
1555 sublo[1] = sublo[1] *
hdnnp->cflength;
1556 sublo[2] = sublo[2] *
hdnnp->cflength;
1558 subhi[0] = subhi[0] *
hdnnp->cflength;
1559 subhi[1] = subhi[1] *
hdnnp->cflength;
1560 subhi[2] = subhi[2] *
hdnnp->cflength;
1562 double xprd = prd[0] *
hdnnp->cflength;
1563 double yprd = prd[1] *
hdnnp->cflength;
1564 double zprd = prd[2] *
hdnnp->cflength;
1566 double dist[3] = {0.0,0.0,0.0};
1567 double cuthalf = 0.5*neighbor->skin *
hdnnp->cflength;
1568 dist[0] = dist[1] = dist[2] = cuthalf;
1573 nlo =
static_cast<int> ((sublo[0]-dist[0]-
boxlo[0]) *
1575 nhi =
static_cast<int> ((subhi[0]+dist[0]-
boxlo[0]) *
1580 nlo =
static_cast<int> ((sublo[1]-dist[1]-
boxlo[1]) *
1582 nhi =
static_cast<int> ((subhi[1]+dist[1]-
boxlo[1]) *
1587 nlo =
static_cast<int> ((sublo[2]-dist[2]-
boxlo[2]) *
1589 nhi =
static_cast<int> ((subhi[2]+dist[2]-
boxlo[2]) *
1604 int npey_fft,npez_fft;
1610 int me_y =
me % npey_fft;
1611 int me_z =
me / npey_fft;
1616 nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
1618 nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
1729 memory->create2d_offset(a,order,-order,order,
"pppm:a");
1731 for (k = -order; k <= order; k++)
1732 for (l = 0; l < order; l++)
1736 for (j = 1; j < order; j++) {
1737 for (k = -j; k <= j; k += 2) {
1739 for (l = 0; l < j; l++) {
1740 a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
1742 s += powf(0.5,(
float) l+1) *
1743 (a[l][k-1] + powf(-1.0,(
float) l) * a[l][k+1]) / (l+1);
1745 s += pow(0.5,(
double) l+1) *
1746 (a[l][k-1] + pow(-1.0,(
double) l) * a[l][k+1]) / (l+1);
1754 for (k = -(order-1); k < order; k += 2) {
1755 for (l = 0; l < order; l++)
1757 for (l = 1; l < order; l++)
1762 memory->destroy2d_offset(a,-order);
1768 const double *
const prd = domain->prd;
1770 const double xprd = prd[0] *
hdnnp->cflength;
1771 const double yprd = prd[1] *
hdnnp->cflength;
1772 const double zprd = prd[2] *
hdnnp->cflength;
1774 const double unitkx = (MY_2PI/xprd);
1775 const double unitky = (MY_2PI/yprd);
1776 const double unitkz = (MY_2PI/zprd);
1779 double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
1780 double sum1,dot1,dot2;
1781 double numerator,denominator;
1784 int k,l,m,n,nx,ny,nz,kper,lper,mper;
1786 const int nbx =
static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
1788 const int nby =
static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
1790 const int nbz =
static_cast<int> ((g_ewald*zprd/(MY_PI*nz_pppm)) *
1792 const int twoorder = 2*order;
1796 mper = m - nz_pppm*(2*m/nz_pppm);
1797 snz = square(sin(0.5*unitkz*mper*zprd/nz_pppm));
1800 lper = l - ny_pppm*(2*l/ny_pppm);
1801 sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
1804 kper = k - nx_pppm*(2*k/nx_pppm);
1805 snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
1807 sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
1810 numerator = 12.5663706/sqk;
1811 denominator =
gf_denom(snx,sny,snz);
1814 for (nx = -nbx; nx <= nbx; nx++) {
1815 qx = unitkx*(kper+nx_pppm*nx);
1816 sx = exp(-0.25*square(qx/g_ewald));
1817 argx = 0.5*qx*xprd/nx_pppm;
1818 wx = powsinxx(argx,twoorder);
1820 for (ny = -nby; ny <= nby; ny++) {
1821 qy = unitky*(lper+ny_pppm*ny);
1822 sy = exp(-0.25*square(qy/g_ewald));
1823 argy = 0.5*qy*yprd/ny_pppm;
1824 wy = powsinxx(argy,twoorder);
1826 for (nz = -nbz; nz <= nbz; nz++) {
1827 qz = unitkz*(mper+nz_pppm*nz);
1828 sz = exp(-0.25*square(qz/g_ewald));
1829 argz = 0.5*qz*zprd/nz_pppm;
1830 wz = powsinxx(argz,twoorder);
1832 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
1833 dot2 = qx*qx+qy*qy+qz*qz;
1834 sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
1838 greensfn[n++] = numerator*sum1/denominator;
1851 double xprd = domain->xprd;
1852 double yprd = domain->yprd;
1853 double zprd = domain->zprd;
1854 bigint natoms = atom->natoms;
1855 if (natoms == 0) natoms = 1;
1858 double q2_over_sqrt = q2 / sqrt(natoms*
cutoff*xprd*yprd*zprd);
1859 double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*
cutoff*
cutoff);
1860 double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
1861 double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
1864 return estimated_accuracy;