n2p2 - A neural network potential package
Loading...
Searching...
No Matches
kspace_hdnnp.cpp
Go to the documentation of this file.
1/* ----------------------------------------------------------------------
2 LAMMPS - Large-scale Atomic/Molecular Massively Parallel Simulator
3 https://lammps.sandia.gov/, Sandia National Laboratories
4 Steve Plimpton, sjplimp@sandia.gov
5
6 Copyright (2003) Sandia Corporation. Under the terms of Contract
7 DE-AC04-94AL85000 with Sandia Corporation, the U.S. Government retains
8 certain rights in this software. This software is distributed under
9 the GNU General Public License.
10
11 See the README file in the top-level LAMMPS directory.
12------------------------------------------------------------------------- */
13
14#include "kspace_hdnnp.h"
15
16#include "angle.h"
17#include "atom.h"
18#include "bond.h"
19#include "comm.h"
20#include "domain.h"
21#include "error.h"
22#include "fft3d_wrap.h"
23#include "force.h"
24#include "grid3d.h"
25#include "math_const.h"
26#include "math_special.h"
27#include "memory.h"
28#include "neighbor.h"
29#include "pair.h"
30#include "remap_wrap.h"
31#include "fix_hdnnp.h"
32#include "pair_hdnnp_4g.h"
33
34#include <cmath>
35#include <cstring>
36#include <iostream>
37
38using namespace LAMMPS_NS;
39using namespace MathConst;
40using namespace MathSpecial;
41using namespace std;
42
43#define MAXORDER 7
44#define OFFSET 16384
45#define LARGE 10000.0
46#define SMALL 0.00001
47#define EPS_HOC 1.0e-7
48
51
52#ifdef FFT_SINGLE
53#define ZEROF 0.0f
54#define ONEF 1.0f
55#else
56#define ZEROF 0.0
57#define ONEF 1.0
58#endif
59
60/* ---------------------------------------------------------------------- */
61
62KSpaceHDNNP::KSpaceHDNNP(LAMMPS *lmp) : KSpace(lmp),
63 kxvecs(nullptr), kyvecs(nullptr), kzvecs(nullptr), kcoeff(nullptr), ug(nullptr), eg(nullptr), vg(nullptr),
64 ek(nullptr), sfexp_rl(nullptr), sfexp_im(nullptr), sf_real(nullptr), sf_im(nullptr),
65 sfexp_rl_all(nullptr),sfexp_im_all(nullptr),
66 cs(nullptr), sn(nullptr), factors(nullptr),
67 density_brick(nullptr), vdx_brick(nullptr),
68 vdy_brick(nullptr), vdz_brick(nullptr),u_brick(nullptr), v0_brick(nullptr), v1_brick(nullptr),
69 v2_brick(nullptr), v3_brick(nullptr),v4_brick(nullptr), v5_brick(nullptr), greensfn(nullptr),
70 fkx(nullptr), fky(nullptr), fkz(nullptr), density_fft(nullptr), work1(nullptr),
71 work2(nullptr), gf_b(nullptr), rho1d(nullptr),
72 rho_coeff(nullptr), drho1d(nullptr), drho_coeff(nullptr),
73 sf_precoeff1(nullptr), sf_precoeff2(nullptr), sf_precoeff3(nullptr),
74 sf_precoeff4(nullptr), sf_precoeff5(nullptr), sf_precoeff6(nullptr),
75 acons(nullptr), fft1(nullptr), fft2(nullptr), remap(nullptr), gc(nullptr),
76 gc_buf1(nullptr), gc_buf2(nullptr), part2grid(nullptr), boxlo(nullptr)
77{
78 hdnnp = nullptr;
79 hdnnp = (PairHDNNP4G *) force->pair_match("^hdnnp/4g",0);
80
81 ewaldflag = pppmflag = 0; // TODO check
82
83 eflag_global = 1; // calculate global energy
84
86 kewaldflag = 0;
87 kmax_created = 0;
88
89 kmax = 0;
90 kxvecs = kyvecs = kzvecs = nullptr;
91 kcoeff = nullptr;
92 ug = nullptr;
93 eg = vg = nullptr;
94 ek = nullptr;
95
96 sfexp_rl = sfexp_im = nullptr;
97 sf_im = sf_real = nullptr;
98 sfexp_rl_all = sfexp_im_all = nullptr;
99 cs = sn = nullptr;
100
101 kcount = 0;
102
105 //group_allocate_flag = 0;
106
107 group_group_enable = 1;
108 triclinic = domain->triclinic;
109
110 //TODO: add a flag-check for these initializations
111 nfactors = 3;
112 factors = new int[nfactors];
113 factors[0] = 2;
114 factors[1] = 3;
115 factors[2] = 5;
116
117 MPI_Comm_rank(world,&me);
118 MPI_Comm_size(world,&nprocs);
119
120 nfft_both = 0;
124
126 vx_brick = vy_brick = vz_brick = nullptr;
127 density_fft = nullptr;
128 u_brick = nullptr;
130 greensfn = nullptr;
131 work1 = work2 = nullptr;
132 vg = nullptr;
133 fkx = fky = fkz = nullptr;
134
137
138 gf_b = nullptr;
139 rho1d = rho_coeff = drho1d = drho_coeff = nullptr;
140
141 fft1 = fft2 = nullptr;
142 remap = nullptr;
143 gc = nullptr;
144 gc_buf1 = gc_buf2 = nullptr;
145
146 nmax = 0;
147 part2grid = nullptr;
148
149 // define acons coefficients for estimation of kspace errors
150 // see JCP 109, pg 7698 for derivation of coefficients
151 // higher order coefficients may be computed if needed
152
153 memory->create(acons,8,7,"pppm:acons");
154 acons[1][0] = 2.0 / 3.0;
155 acons[2][0] = 1.0 / 50.0;
156 acons[2][1] = 5.0 / 294.0;
157 acons[3][0] = 1.0 / 588.0;
158 acons[3][1] = 7.0 / 1440.0;
159 acons[3][2] = 21.0 / 3872.0;
160 acons[4][0] = 1.0 / 4320.0;
161 acons[4][1] = 3.0 / 1936.0;
162 acons[4][2] = 7601.0 / 2271360.0;
163 acons[4][3] = 143.0 / 28800.0;
164 acons[5][0] = 1.0 / 23232.0;
165 acons[5][1] = 7601.0 / 13628160.0;
166 acons[5][2] = 143.0 / 69120.0;
167 acons[5][3] = 517231.0 / 106536960.0;
168 acons[5][4] = 106640677.0 / 11737571328.0;
169 acons[6][0] = 691.0 / 68140800.0;
170 acons[6][1] = 13.0 / 57600.0;
171 acons[6][2] = 47021.0 / 35512320.0;
172 acons[6][3] = 9694607.0 / 2095994880.0;
173 acons[6][4] = 733191589.0 / 59609088000.0;
174 acons[6][5] = 326190917.0 / 11700633600.0;
175 acons[7][0] = 1.0 / 345600.0;
176 acons[7][1] = 3617.0 / 35512320.0;
177 acons[7][2] = 745739.0 / 838397952.0;
178 acons[7][3] = 56399353.0 / 12773376000.0;
179 acons[7][4] = 25091609.0 / 1560084480.0;
180 acons[7][5] = 1755948832039.0 / 36229939200000.0;
181 acons[7][6] = 4887769399.0 / 37838389248.0;
182}
183
184/* ---------------------------------------------------------------------- */
185
186void KSpaceHDNNP::settings(int narg, char **arg)
187{
188 if (narg < 2) error->all(FLERR,"Illegal kspace_style hdnnp command"); // we have two params, not one (style + FP)
189
190 if (strcmp(arg[0],"pppm") == 0) pppmflag = 1;
191 else if (strcmp(arg[0],"ewald") == 0) ewaldflag = 1;
192 else error->all(FLERR,"Illegal kspace_style hdnnp command");
193
194 accuracy_relative = fabs(utils::numeric(FLERR,arg[1],false,lmp));
195}
196
197/* ----------------------------------------------------------------------
198 free all memory
199------------------------------------------------------------------------- */
200
202{
203 if (pppmflag)
204 {
205 deallocate();
206 if (copymode) return;
207 delete [] factors;
208 //if (peratom_allocate_flag) deallocate_peratom();
209 //if (group_allocate_flag) deallocate_groups();
210 memory->destroy(part2grid);
211 memory->destroy(acons);
212 }
213 else if (ewaldflag)
214 {
215 deallocate();
216 //if (group_allocate_flag) deallocate_groups();
217 memory->destroy(ek);
218 //memory->destroy(acons);
219 memory->destroy3d_offset(cs,-kmax_created);
220 memory->destroy3d_offset(sn,-kmax_created);
221 }
222}
223
224/* ----------------------------------------------------------------------
225 called once before run
226------------------------------------------------------------------------- */
227
229{
230 // Make initializations based on the selected k-space style
231 if (pppmflag)
232 {
233 if (me == 0) utils::logmesg(lmp,"PPPM initialization ...\n");
234
235 // TODO: add other error handlings in LAMMPS
236
237 if (order < 2 || order > MAXORDER)
238 error->all(FLERR,fmt::format("PPPM order cannot be < 2 or > {}",MAXORDER));
239
240 // compute two charge force (?)
241 two_charge();
242
243 triclinic = domain->triclinic;
244
245 // extract short-range Coulombic cutoff from pair style
246 cutoff = hdnnp->maxCutoffRadius;
247
248 // compute qsum & qsqsum and warn if not charge-neutral
249 scale = 1.0;
250 qqrd2e = force->qqrd2e;
251 qsum_qsq();
252 natoms_original = atom->natoms;
253
254 if (accuracy_absolute >= 0.0) accuracy = accuracy_absolute;
255 else accuracy = accuracy_relative * two_charge_force;
256
257 // free all arrays previously allocated
258 deallocate();
259
260 //if (peratom_allocate_flag) deallocate_peratom();
261 //if (group_allocate_flag) deallocate_groups();
262
263 Grid3d *gctmp = nullptr;
264 int iteration = 0;
265
266 while (order >= minorder) {
267 if (iteration && me == 0)
268 error->warning(FLERR, "Reducing PPPM order b/c stencil extends "
269 "beyond nearest neighbor processor");
270
271 //compute_gf_denom(); //TODO: stagger ?
274 // overlap not allowed
275
276 gctmp = new Grid3d(lmp, world, nx_pppm, ny_pppm, nz_pppm,
279
280 int tmp1, tmp2;
281 gctmp->setup_comm(tmp1, tmp2);
282 if (gctmp->ghost_adjacent()) break;
283 delete gctmp;
284
285 order--;
286 iteration++;
287 }
288
289 if (order < minorder) error->all(FLERR,"PPPM order < minimum allowed order");
290 if (!overlap_allowed && !gctmp->ghost_adjacent())
291 error->all(FLERR,"PPPM grid stencil extends "
292 "beyond nearest neighbor processor");
293 if (gctmp) delete gctmp;
294
295 // adjust g_ewald TODO(kspace_modify)
296 if (!gewaldflag) adjust_gewald();
297
298 // calculate the final accuracy
299 double estimated_accuracy = final_accuracy();
300
301 // print stats
302 int ngrid_max,nfft_both_max;
303 MPI_Allreduce(&ngrid,&ngrid_max,1,MPI_INT,MPI_MAX,world);
304 MPI_Allreduce(&nfft_both,&nfft_both_max,1,MPI_INT,MPI_MAX,world);
305
306 if (me == 0) {
307 std::string mesg = fmt::format(" G vector (1/distance) = {:.8g}\n",g_ewald);
308 mesg += fmt::format(" grid = {} {} {}\n",nx_pppm,ny_pppm,nz_pppm);
309 mesg += fmt::format(" stencil order = {}\n",order);
310 mesg += fmt::format(" estimated absolute RMS force accuracy = {:.8g}\n",
311 estimated_accuracy);
312 mesg += fmt::format(" estimated relative force accuracy = {:.8g}\n",
313 estimated_accuracy/two_charge_force);
314 mesg += " using " LMP_FFT_PREC " precision " LMP_FFT_LIB "\n";
315 mesg += fmt::format(" 3d grid and FFT values/proc = {} {}\n",
316 ngrid_max,nfft_both_max);
317 utils::logmesg(lmp,mesg);
318 }
319
320 // allocate K-space dependent memory
321 // don't invoke allocate peratom() or group(), will be allocated when needed
322 allocate();
323
324 // pre-compute Green's function denomiator expansion
325 // pre-compute 1d charge distribution coefficients
326 // (differentiation_flag == 0)
329 }
330 else if (ewaldflag)
331 {
332 if (comm->me == 0) utils::logmesg(lmp,"Ewald initialization ...\n");
333
334 // error check
335 //triclinic_check();
336 if (domain->dimension == 2)
337 error->all(FLERR,"Cannot use Ewald with 2d simulation");
338 if (!atom->q_flag) error->all(FLERR,"Kspace style requires atom attribute q");
339 if (slabflag == 0 && domain->nonperiodic > 0)
340 error->all(FLERR,"Cannot use non-periodic boundaries with Ewald");
341
342 //TODO: no slab yet
343
344 // compute two charge force TODO:check
345 two_charge();
346
347 // extract short-range Coulombic cutoff from pair style
348 triclinic = domain->triclinic;
349
350 // extract short-range Coulombic cutoff from pair style
351 cutoff = hdnnp->maxCutoffRadius;
352
353 // compute qsum & qsqsum and warn if not charge-neutral TODO:check
354 scale = 1.0;
355 qqrd2e = force->qqrd2e;
356 qsum_qsq();
357 natoms_original = atom->natoms;
358
359 // Via the method in RuNNer (umgekehrt)
360 // LAMMPS uses a different methodology to calculate g_ewald
361 accuracy = accuracy_relative;
362 g_ewald = sqrt(-2.0 * log(accuracy)) / (cutoff*hdnnp->cflength);
363
364 // setup Ewald coefficients
365 setup();
366 }
367}
368
369/* ----------------------------------------------------------------------
370 adjust coeffs, called initially and whenever volume has changed
371------------------------------------------------------------------------- */
372
374{
375 if (pppmflag)
376 {
377 // TODO: trcilinic & slab
378 // perform some checks to avoid illegal boundaries with read_data
379
380 if (slabflag == 0 && domain->nonperiodic > 0)
381 error->all(FLERR,"Cannot use non-periodic boundaries with PPPM");
382 if (slabflag) {
383 if (domain->xperiodic != 1 || domain->yperiodic != 1 ||
384 domain->boundary[2][0] != 1 || domain->boundary[2][1] != 1)
385 error->all(FLERR,"Incorrect boundaries with slab PPPM");
386 }
387
388 int i,j,k,n;
389 double *prd;
390
391 // volume-dependent factors
392 // adjust z dimension for 2d slab PPPM
393 // z dimension for 3d PPPM is zprd since slab_volfactor = 1.0
394
395 //triclinic = 0 TODO:check
396
397 // WARNING: Immediately convert to HDNNP units!
398 // volume-dependent factors
399 prd = domain->prd;
400 double xprd = prd[0] * hdnnp->cflength;
401 double yprd = prd[1] * hdnnp->cflength;
402 double zprd = prd[2] * hdnnp->cflength;
403 volume = xprd * yprd * zprd;
404
405 delxinv = nx_pppm/xprd;
406 delyinv = ny_pppm/yprd;
407 delzinv = nz_pppm/zprd;
408
410
411 double unitkx = (MY_2PI/xprd);
412 double unitky = (MY_2PI/yprd);
413 double unitkz = (MY_2PI/zprd);
414
415 // fkx,fky,fkz for my FFT grid pts
416
417 double per;
418
419 for (i = nxlo_fft; i <= nxhi_fft; i++) {
420 per = i - nx_pppm*(2*i/nx_pppm);
421 fkx[i] = unitkx*per;
422 }
423
424 for (i = nylo_fft; i <= nyhi_fft; i++) {
425 per = i - ny_pppm*(2*i/ny_pppm);
426 fky[i] = unitky*per;
427 }
428
429 for (i = nzlo_fft; i <= nzhi_fft; i++) {
430 per = i - nz_pppm*(2*i/nz_pppm);
431 fkz[i] = unitkz*per;
432 }
433
434 // virial coefficients TODO
435
436 /*double sqk,vterm;
437
438 n = 0;
439 for (k = nzlo_fft; k <= nzhi_fft; k++) {
440 for (j = nylo_fft; j <= nyhi_fft; j++) {
441 for (i = nxlo_fft; i <= nxhi_fft; i++) {
442 sqk = fkx[i]*fkx[i] + fky[j]*fky[j] + fkz[k]*fkz[k];
443 if (sqk == 0.0) {
444 vg[n][0] = 0.0;
445 vg[n][1] = 0.0;
446 vg[n][2] = 0.0;
447 vg[n][3] = 0.0;
448 vg[n][4] = 0.0;
449 vg[n][5] = 0.0;
450 } else {
451 vterm = -2.0 * (1.0/sqk + 0.25/(g_ewald*g_ewald));
452 vg[n][0] = 1.0 + vterm*fkx[i]*fkx[i];
453 vg[n][1] = 1.0 + vterm*fky[j]*fky[j];
454 vg[n][2] = 1.0 + vterm*fkz[k]*fkz[k];
455 vg[n][3] = vterm*fkx[i]*fky[j];
456 vg[n][4] = vterm*fkx[i]*fkz[k];
457 vg[n][5] = vterm*fky[j]*fkz[k];
458 }
459 n++;
460 }
461 }
462 }*/
463
464 compute_gf_ik(); // diff option 'ik' is selected
465 }
466 else if (ewaldflag)
467 {
468 // WARNING: Immediately convert to HDNNP units!
469 // volume-dependent factors
470 double const xprd = domain->xprd * hdnnp->cflength;
471 double const yprd = domain->yprd * hdnnp->cflength;
472 double const zprd = domain->zprd * hdnnp->cflength;
473 volume = xprd * yprd * zprd;
474
475 //TODO: slab feature
476
477 unitk[0] = 2.0*MY_PI/xprd;
478 unitk[1] = 2.0*MY_PI/yprd;
479 unitk[2] = 2.0*MY_PI/zprd;
480
481 // Get k-space resolution via the method in RuNNer
482 ewald_recip_cutoff = sqrt(-2.0 * log(accuracy)) * g_ewald;
483 //ewald_real_cutoff = sqrt(-2.0 * log(accuracy)) / g_ewald;
484 ewald_real_cutoff = cutoff * hdnnp->cflength; // we skip the calculation
485
486 //std::cout << "recip cut:" << ewald_recip_cutoff << '\n';
487 //std::cout << "real cut: " << ewald_real_cutoff << '\n';
488 //std::cout << "eta: " << 1 / g_ewald << '\n';
489
490 int kmax_old = kmax;
491 kxmax = kymax = kzmax = 1;
492
493 ewald_pbc(ewald_recip_cutoff); // get kxmax, kymax and kzmax
495
496 kmax = MAX(kxmax,kymax);
497 kmax = MAX(kmax,kzmax);
498 kmax3d = 4*kmax*kmax*kmax + 6*kmax*kmax + 3*kmax;
499
500 // size change ?
504
505 //allocate();
506 // if size has grown, reallocate k-dependent and nlocal-dependent arrays
507 if (kmax > kmax_old) {
508 deallocate();
509 allocate();
510 memory->destroy(ek);
511 memory->destroy3d_offset(cs,-kmax_created);
512 memory->destroy3d_offset(sn,-kmax_created);
513 nmax = atom->nmax;
514 memory->create(ek,nmax,3,"ewald:ek");
515 memory->create3d_offset(cs,-kmax,kmax,3,nmax,"ewald:cs");
516 memory->create3d_offset(sn,-kmax,kmax,3,nmax,"ewald:sn");
518 }
519
520 // pre-compute Ewald coefficients and structure factor arrays
521 ewald_coeffs();
522 //ewald_sfexp();
523 }
524}
525
526// compute RMS accuracy for a dimension TODO: do we need this ?
527double KSpaceHDNNP::rms(int km, double prd, bigint natoms, double q2)
528{
529
530 if (natoms == 0) natoms = 1; // avoid division by zero
531 double value = 2.0*q2*g_ewald/prd *
532 sqrt(1.0/(MY_PI*km*natoms)) *
533 exp(-MY_PI*MY_PI*km*km/(g_ewald*g_ewald*prd*prd));
534
535 return value;
536}
537
538// Calculate Ewald coefficients
540{
541 int k,l,m;
542 double sqk,vterm;
543 double preu = 4.0*M_PI/volume;
544 double etasq;
545
546 etasq = 1.0 / (g_ewald*g_ewald); // LAMMPS truncation (RuNNer truncations has been removed)
547
548 kcount = 0;
549
550 // (k,0,0), (0,l,0), (0,0,m)
551
552 for (m = 1; m <= kmax; m++) {
553 sqk = (m*unitk[0]) * (m*unitk[0]);
554 if (sqk <= gsqmx) {
555 //fprintf(stderr, "sqk 1x = %24.16E, m %d\n", sqrt(sqk), m);
556 kxvecs[kcount] = m;
557 kyvecs[kcount] = 0;
558 kzvecs[kcount] = 0;
559 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
560 kcount++;
561 }
562 sqk = (m*unitk[1]) * (m*unitk[1]);
563 if (sqk <= gsqmx) {
564 //fprintf(stderr, "sqk 1y = %24.16E, m %d\n", sqrt(sqk), m);
565 kxvecs[kcount] = 0;
566 kyvecs[kcount] = m;
567 kzvecs[kcount] = 0;
568 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
569 kcount++;
570 }
571 sqk = (m*unitk[2]) * (m*unitk[2]);
572 if (sqk <= gsqmx) {
573 //fprintf(stderr, "sqk 1z = %24.16E, m %d\n", sqrt(sqk), m);
574 kxvecs[kcount] = 0;
575 kyvecs[kcount] = 0;
576 kzvecs[kcount] = m;
577 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
578 kcount++;
579 }
580 }
581
582 // 1 = (k,l,0), 2 = (k,-l,0)
583
584 for (k = 1; k <= kxmax; k++) {
585 for (l = 1; l <= kymax; l++) {
586 sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l);
587 if (sqk <= gsqmx) {
588 //fprintf(stderr, "sqk 2 = %24.16E, k %d l %d\n", sqrt(sqk), k, l);
589 kxvecs[kcount] = k;
590 kyvecs[kcount] = l;
591 kzvecs[kcount] = 0;
592 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
593 kcount++;
594
595 //fprintf(stderr, "sqk 2 = %24.16E, k %d -l %d\n", sqrt(sqk), k, l);
596 kxvecs[kcount] = k;
597 kyvecs[kcount] = -l;
598 kzvecs[kcount] = 0;
599 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
600 kcount++;;
601 }
602 }
603 }
604
605 // 1 = (0,l,m), 2 = (0,l,-m)
606
607 for (l = 1; l <= kymax; l++) {
608 for (m = 1; m <= kzmax; m++) {
609 sqk = (unitk[1]*l) * (unitk[1]*l) + (unitk[2]*m) * (unitk[2]*m);
610 if (sqk <= gsqmx) {
611 //fprintf(stderr, "sqk 3 = %24.16E, l %d m %d\n", sqrt(sqk), l, m);
612 kxvecs[kcount] = 0;
613 kyvecs[kcount] = l;
614 kzvecs[kcount] = m;
615 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
616 kcount++;
617
618 //fprintf(stderr, "sqk 3 = %24.16E, l %d -m %d\n", sqrt(sqk), l, m);
619 kxvecs[kcount] = 0;
620 kyvecs[kcount] = l;
621 kzvecs[kcount] = -m;
622 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
623 kcount++;
624 }
625 }
626 }
627
628 // 1 = (k,0,m), 2 = (k,0,-m)
629
630 for (k = 1; k <= kxmax; k++) {
631 for (m = 1; m <= kzmax; m++) {
632 sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[2]*m) * (unitk[2]*m);
633 if (sqk <= gsqmx) {
634 //fprintf(stderr, "sqk 4 = %24.16E, k %d m %d\n", sqrt(sqk), k, m);
635 kxvecs[kcount] = k;
636 kyvecs[kcount] = 0;
637 kzvecs[kcount] = m;
638 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
639 kcount++;
640
641 //fprintf(stderr, "sqk 4 = %24.16E, k %d -m %d\n", sqrt(sqk), k, m);
642 kxvecs[kcount] = k;
643 kyvecs[kcount] = 0;
644 kzvecs[kcount] = -m;
645 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
646 kcount++;
647 }
648 }
649 }
650
651 // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
652
653 for (k = 1; k <= kxmax; k++) {
654 for (l = 1; l <= kymax; l++) {
655 for (m = 1; m <= kzmax; m++) {
656 sqk = (unitk[0]*k) * (unitk[0]*k) + (unitk[1]*l) * (unitk[1]*l) +
657 (unitk[2]*m) * (unitk[2]*m);
658 if (sqk <= gsqmx) {
659 //fprintf(stderr, "sqk 5 = %24.16E, k %d l %d m %d\n", sqrt(sqk), k, l, m);
660 kxvecs[kcount] = k;
661 kyvecs[kcount] = l;
662 kzvecs[kcount] = m;
663 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
664 kcount++;
665
666 //fprintf(stderr, "sqk 5 = %24.16E, k %d -l %d m %d\n", sqrt(sqk), k, l, m);
667 kxvecs[kcount] = k;
668 kyvecs[kcount] = -l;
669 kzvecs[kcount] = m;
670 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
671 kcount++;
672
673 //fprintf(stderr, "sqk 5 = %24.16E, k %d l %d -m %d\n", sqrt(sqk), k, l, m);
674 kxvecs[kcount] = k;
675 kyvecs[kcount] = l;
676 kzvecs[kcount] = -m;
677 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
678 kcount++;
679
680 //fprintf(stderr, "sqk 5 = %24.16E, k %d -l %d -m %d\n", sqrt(sqk), k, l, m);
681 kxvecs[kcount] = k;
682 kyvecs[kcount] = -l;
683 kzvecs[kcount] = -m;
684 kcoeff[kcount] = preu*exp(-0.5*sqk*etasq)/sqk;
685 kcount++;
686 }
687 }
688 }
689 }
690}
691
692// Calculate Ewald structure factors
694{
695 int i,k,l,m,n,ic;
696 double sqk,clpm,slpm;
697
698 double **x = atom->x;
699 int nlocal = atom->nlocal;
700 double cflength;
701
702 n = 0;
703 cflength = hdnnp->cflength; // RuNNer truncation (conversion required)
704
705 // (k,0,0), (0,l,0), (0,0,m)
706
707 for (ic = 0; ic < 3; ic++) {
708 sqk = unitk[ic]*unitk[ic];
709 if (sqk <= gsqmx) {
710 for (i = 0; i < nlocal; i++) {
711 cs[0][ic][i] = 1.0;
712 sn[0][ic][i] = 0.0;
713 cs[1][ic][i] = cos(unitk[ic]*x[i][ic]*cflength);
714 sn[1][ic][i] = sin(unitk[ic]*x[i][ic]*cflength);
715 cs[-1][ic][i] = cs[1][ic][i];
716 sn[-1][ic][i] = -sn[1][ic][i];
717 sfexp_rl[n][i] = cs[1][ic][i];
718 sfexp_im[n][i] = sn[1][ic][i];
719 }
720 n++;
721 }
722 }
723
724 for (m = 2; m <= kmax; m++) {
725 for (ic = 0; ic < 3; ic++) {
726 sqk = m*unitk[ic] * m*unitk[ic];
727 if (sqk <= gsqmx) {
728 for (i = 0; i < nlocal; i++) {
729 cs[m][ic][i] = cs[m-1][ic][i]*cs[1][ic][i] -
730 sn[m-1][ic][i]*sn[1][ic][i];
731 sn[m][ic][i] = sn[m-1][ic][i]*cs[1][ic][i] +
732 cs[m-1][ic][i]*sn[1][ic][i];
733 cs[-m][ic][i] = cs[m][ic][i];
734 sn[-m][ic][i] = -sn[m][ic][i];
735 sfexp_rl[n][i] = cs[m][ic][i];
736 sfexp_im[n][i] = sn[m][ic][i];
737 }
738 n++;
739 }
740 }
741 }
742
743 // 1 = (k,l,0), 2 = (k,-l,0)
744
745 for (k = 1; k <= kxmax; k++) {
746 for (l = 1; l <= kymax; l++) {
747 sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]);
748 if (sqk <= gsqmx) {
749 for (i = 0; i < nlocal; i++) {
750 sfexp_rl[n][i] = (cs[k][0][i]*cs[l][1][i] - sn[k][0][i]*sn[l][1][i]);
751 sfexp_im[n][i] = (sn[k][0][i]*cs[l][1][i] + cs[k][0][i]*sn[l][1][i]);
752 }
753 n++;
754 for (i = 0; i < nlocal; i++) {
755 sfexp_rl[n][i] = (cs[k][0][i]*cs[l][1][i] + sn[k][0][i]*sn[l][1][i]);
756 sfexp_im[n][i] = (sn[k][0][i]*cs[l][1][i] - cs[k][0][i]*sn[l][1][i]);
757 }
758 n++;
759 }
760 }
761 }
762
763 // 1 = (0,l,m), 2 = (0,l,-m)
764
765 for (l = 1; l <= kymax; l++) {
766 for (m = 1; m <= kzmax; m++) {
767 sqk = (l*unitk[1] * l*unitk[1]) + (m*unitk[2] * m*unitk[2]);
768 if (sqk <= gsqmx) {
769 for (i = 0; i < nlocal; i++) {
770 sfexp_rl[n][i] = (cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i]);
771 sfexp_im[n][i] = (sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i]);
772 }
773 n++;
774 for (i = 0; i < nlocal; i++) {
775 sfexp_rl[n][i] = (cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i]);
776 sfexp_im[n][i] = (sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i]);
777 }
778 n++;
779 }
780 }
781 }
782
783 // 1 = (k,0,m), 2 = (k,0,-m)
784
785 for (k = 1; k <= kxmax; k++) {
786 for (m = 1; m <= kzmax; m++) {
787 sqk = (k*unitk[0] * k*unitk[0]) + (m*unitk[2] * m*unitk[2]);
788 if (sqk <= gsqmx) {
789 for (i = 0; i < nlocal; i++) {
790 sfexp_rl[n][i] = (cs[k][0][i]*cs[m][2][i] - sn[k][0][i]*sn[m][2][i]);
791 sfexp_im[n][i] = (sn[k][0][i]*cs[m][2][i] + cs[k][0][i]*sn[m][2][i]);
792 }
793 n++;
794 for (i = 0; i < nlocal; i++) {
795 sfexp_rl[n][i] = (cs[k][0][i]*cs[m][2][i] + sn[k][0][i]*sn[m][2][i]);
796 sfexp_im[n][i] = (sn[k][0][i]*cs[m][2][i] - cs[k][0][i]*sn[m][2][i]);
797 }
798 n++;
799 }
800 }
801 }
802
803 // 1 = (k,l,m), 2 = (k,-l,m), 3 = (k,l,-m), 4 = (k,-l,-m)
804
805 for (k = 1; k <= kxmax; k++) {
806 for (l = 1; l <= kymax; l++) {
807 for (m = 1; m <= kzmax; m++) {
808 sqk = (k*unitk[0] * k*unitk[0]) + (l*unitk[1] * l*unitk[1]) +
809 (m*unitk[2] * m*unitk[2]);
810 if (sqk <= gsqmx) {
811 for (i = 0; i < nlocal; i++) {
812 clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
813 slpm = sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
814 sfexp_rl[n][i] = (cs[k][0][i]*clpm - sn[k][0][i]*slpm);
815 sfexp_im[n][i] = (sn[k][0][i]*clpm + cs[k][0][i]*slpm);
816 }
817 n++;
818 for (i = 0; i < nlocal; i++) {
819 clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
820 slpm = -sn[l][1][i]*cs[m][2][i] + cs[l][1][i]*sn[m][2][i];
821 sfexp_rl[n][i] = (cs[k][0][i]*clpm - sn[k][0][i]*slpm);
822 sfexp_im[n][i] = (sn[k][0][i]*clpm + cs[k][0][i]*slpm);
823 }
824 n++;
825 for (i = 0; i < nlocal; i++) {
826 clpm = cs[l][1][i]*cs[m][2][i] + sn[l][1][i]*sn[m][2][i];
827 slpm = sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
828 sfexp_rl[n][i] = (cs[k][0][i]*clpm - sn[k][0][i]*slpm);
829 sfexp_im[n][i] = (sn[k][0][i]*clpm + cs[k][0][i]*slpm);
830 }
831 n++;
832 for (i = 0; i < nlocal; i++) {
833 clpm = cs[l][1][i]*cs[m][2][i] - sn[l][1][i]*sn[m][2][i];
834 slpm = -sn[l][1][i]*cs[m][2][i] - cs[l][1][i]*sn[m][2][i];
835 sfexp_rl[n][i] = (cs[k][0][i]*clpm - sn[k][0][i]*slpm);
836 sfexp_im[n][i] = (sn[k][0][i]*clpm + cs[k][0][i]*slpm);
837 }
838 n++;
839 }
840 }
841 }
842 }
843
844}
845
846// Compute E_QEQ (recip) in PPPM TODO:WIP
848{
849
850 // all procs communicate density values from their ghost cells
851 // to fully sum contribution in their 3d bricks
852 // remap from 3d decomposition to FFT decomposition
853
854 gc->reverse_comm(Grid3d::KSPACE,this,REVERSE_RHO,1,sizeof(FFT_SCALAR),
856 brick2fft();
857
858 // compute potential V(r) on my FFT grid and
859 // portion of e_long on this proc's FFT grid
860 // return potentials 3d brick decomposition
861
862 std::cout << energy << '\n';
863 poisson();
864
865 std::cout << energy << '\n';
866 exit(0);
867
868 // all procs communicate E-field values
869 // to fill ghost cells surrounding their 3d bricks
870 // TODO check
871 // differentiation_flag == 0
872
873 gc->forward_comm(Grid3d::KSPACE,this,FORWARD_IK,3,sizeof(FFT_SCALAR),
875
876 // sum global energy across procs and add in volume-dependent term
877
878 const double qscale = qqrd2e * scale;
879
880 if (eflag_global) {
881 double energy_all;
882 MPI_Allreduce(&energy, &energy_all, 1, MPI_DOUBLE, MPI_SUM, world);
883 energy = energy_all;
884
885 energy *= 0.5 * volume;
886 energy -= g_ewald * qsqsum / MY_PIS +
887 MY_PI2 * qsum * qsum / (g_ewald * g_ewald * volume);
888 energy *= qscale;
889 }
890
891 std::cout << "here" << '\n';
892 //std::cout << qscale << '\n';
893 std::cout << MY_PIS << '\n';
894 std::cout << MY_PI2 << '\n';
895 //std::cout << qsum << '\n';
896 //std::cout << g_ewald << '\n';
897 //std::cout << energy << '\n';
898 exit(0);
899
900 return energy;
901}
902
903// Returns dE_qeq/dQ_i in PPPM for a given local atom i (inx)
905{
906 int i,l,m,n,nx,ny,nz,mx,my,mz;
907 FFT_SCALAR dx,dy,dz,x0,y0,z0;
908 FFT_SCALAR dEdQx,dEdQy,dEdQz;
909
910 // loop over my charges, interpolate dEdQ from nearby grid points
911 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
912 // (dx,dy,dz) = distance to "lower left" grid pt
913 // (mx,my,mz) = global coords of moving stencil pt
914 // ek = 3 components of E-field on particle
915
916 double *q = atom->q;
917 double **x = atom->x;
918 double **f = atom->f;
919
920 double dEdQ;
921
922 int nlocal = atom->nlocal;
923
924 nx = part2grid[inx][0];
925 ny = part2grid[inx][1];
926 nz = part2grid[inx][2];
927 dx = nx + shiftone - (x[inx][0] - boxlo[0]) * delxinv;
928 dy = ny + shiftone - (x[inx][1] - boxlo[1]) * delyinv;
929 dz = nz + shiftone - (x[inx][2] - boxlo[2]) * delzinv;
930
931 compute_rho1d(dx, dy, dz);
932
933 dEdQx = dEdQy = dEdQz = ZEROF;
934 for (n = nlower; n <= nupper; n++) {
935 mz = n + nz;
936 z0 = rho1d[2][n];
937 for (m = nlower; m <= nupper; m++) {
938 my = m + ny;
939 y0 = z0 * rho1d[1][m];
940 for (l = nlower; l <= nupper; l++) {
941 mx = l + nx;
942 x0 = y0 * rho1d[0][l];
943 dEdQx -= x0 * vx_brick[mz][my][mx];
944 dEdQy -= x0 * vy_brick[mz][my][mx];
945 dEdQz -= x0 * vz_brick[mz][my][mx];
946 }
947 }
948 }
949
950 dEdQ = sqrt(dEdQx*dEdQx + dEdQy*dEdQy + dEdQz*dEdQz);
951
952 std::cout << dEdQ << '\n';
953
954
955 /*for (i = 0; i < nlocal; i++) {
956 nx = part2grid[i][0];
957 ny = part2grid[i][1];
958 nz = part2grid[i][2];
959 dx = nx + shiftone - (x[i][0] - boxlo[0]) * delxinv;
960 dy = ny + shiftone - (x[i][1] - boxlo[1]) * delyinv;
961 dz = nz + shiftone - (x[i][2] - boxlo[2]) * delzinv;
962
963 compute_rho1d(dx, dy, dz);
964
965 dEdQx = dEdQy = dEdQz = ZEROF;
966 for (n = nlower; n <= nupper; n++) {
967 mz = n + nz;
968 z0 = rho1d[2][n];
969 for (m = nlower; m <= nupper; m++) {
970 my = m + ny;
971 y0 = z0 * rho1d[1][m];
972 for (l = nlower; l <= nupper; l++) {
973 mx = l + nx;
974 x0 = y0 * rho1d[0][l];
975 dEdQx -= x0 * vx_brick[mz][my][mx];
976 dEdQy -= x0 * vy_brick[mz][my][mx];
977 dEdQz -= x0 * vz_brick[mz][my][mx];
978 }
979 }
980 }
981 }*/
982
983 return dEdQ;
984}
985
986// Compute E_QEQ (recip) in Ewald
987double KSpaceHDNNP::compute_ewald_eqeq(const gsl_vector *v)
988{
989 int i;
990 int nlocal = atom->nlocal;
991 int *tag = atom->tag;
992 double E_recip;
993
994
995 E_recip = 0.0;
996 for (int k = 0; k < kcount; k++) // over k-space
997 {
998 //fprintf(stderr, "kcoeff[%d] = %24.16E\n", k, hdnnp->kcoeff[k]);
999 double sf_real_loc = 0.0;
1000 double sf_im_loc = 0.0;
1001 sf_real[k] = 0.0;
1002 sf_im[k] = 0.0;
1003 // TODO: this loop over all atoms can be replaced by a MPIallreduce ?
1004 for (i = 0; i < nlocal; i++)
1005 {
1006 double const qi = gsl_vector_get(v,tag[i]-1);
1007 sf_real_loc += qi * sfexp_rl[k][i];
1008 sf_im_loc += qi * sfexp_im[k][i];
1009 }
1010 //fprintf(stderr, "sfexp %d : %24.16E\n", k, hdnnp->kcoeff[k] * (pow(sf_real,2) + pow(sf_im,2)));
1011 MPI_Allreduce(&(sf_real_loc),&(sf_real[k]),1,MPI_DOUBLE,MPI_SUM,world);
1012 MPI_Allreduce(&(sf_im_loc),&(sf_im[k]),1,MPI_DOUBLE,MPI_SUM,world);
1013 E_recip += kcoeff[k] * (pow(sf_real[k],2) + pow(sf_im[k],2));
1014 }
1015
1016 return E_recip;
1017}
1018
1019// TODO: this is called after the force->pair->compute calculations in verlet.cpp
1020// therefore we cannot make use of it as it is
1021void KSpaceHDNNP::compute(int eflag, int vflag)
1022{
1023 // Carry out k-space computations based on the selected method
1024 if (pppmflag)
1025 {
1026
1027 }
1028 else if (ewaldflag)
1029 {
1030
1031 }
1032}
1033
1035{
1036 // Make allocations based on the selected K-space method
1037 if (pppmflag)
1038 {
1039 memory->create3d_offset(density_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1040 nxlo_out,nxhi_out,"pppm:density_brick");
1041
1042 memory->create(density_fft,nfft_both,"pppm:density_fft");
1043 memory->create(greensfn,nfft_both,"pppm:greensfn");
1044 memory->create(work1,2*nfft_both,"pppm:work1");
1045 memory->create(work2,2*nfft_both,"pppm:work2");
1046 //memory->create(vg,nfft_both,6,"pppm:vg"); // TODO virial
1047
1048 // triclinic = 0 TODO: triclinic systems ?
1049 memory->create1d_offset(fkx,nxlo_fft,nxhi_fft,"pppm:fkx");
1050 memory->create1d_offset(fky,nylo_fft,nyhi_fft,"pppm:fky");
1051 memory->create1d_offset(fkz,nzlo_fft,nzhi_fft,"pppm:fkz");
1052
1053 // differentiation_flag = 0
1054 /*memory->create3d_offset(vdx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1055 nxlo_out,nxhi_out,"pppm:vdx_brick");
1056 memory->create3d_offset(vdy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1057 nxlo_out,nxhi_out,"pppm:vdy_brick");
1058 memory->create3d_offset(vdz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1059 nxlo_out,nxhi_out,"pppm:vdz_brick");*/
1060
1061 memory->create3d_offset(vx_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1062 nxlo_out,nxhi_out,"pppm:vx_brick");
1063 memory->create3d_offset(vy_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1064 nxlo_out,nxhi_out,"pppm:vy_brick");
1065 memory->create3d_offset(vz_brick,nzlo_out,nzhi_out,nylo_out,nyhi_out,
1066 nxlo_out,nxhi_out,"pppm:vz_brick");
1067
1068 // summation coeffs
1069 order_allocated = order;
1070 memory->create(gf_b,order,"pppm:gf_b");
1071 memory->create2d_offset(rho1d,3,-order/2,order/2,"pppm:rho1d");
1072 memory->create2d_offset(drho1d,3,-order/2,order/2,"pppm:drho1d");
1073 memory->create2d_offset(rho_coeff,order,(1-order)/2,order/2,"pppm:rho_coeff");
1074 memory->create2d_offset(drho_coeff,order,(1-order)/2,order/2,
1075 "pppm:drho_coeff");
1076
1077 // create 2 FFTs and a Remap
1078 // 1st FFT keeps data in FFT decomposition
1079 // 2nd FFT returns data in 3d brick decomposition
1080 // remap takes data from 3d brick to FFT decomposition
1081
1082 int tmp;
1083
1084 fft1 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1087 0,0,&tmp,collective_flag);
1088
1089 fft2 = new FFT3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1092 0,0,&tmp,collective_flag);
1093
1094 remap = new Remap(lmp,world,
1097 1,0,0,FFT_PRECISION,collective_flag);
1098
1099 // create ghost grid object for rho and electric field communication
1100 // also create 2 bufs for ghost grid cell comm, passed to GridComm methods
1101
1102 gc = new Grid3d(lmp,world,nx_pppm,ny_pppm,nz_pppm,
1105
1106 gc->setup_comm(ngc_buf1,ngc_buf2);
1107 npergrid = 3;
1108
1109 memory->create(gc_buf1,npergrid*ngc_buf1,"pppm:gc_buf1");
1110 memory->create(gc_buf2,npergrid*ngc_buf2,"pppm:gc_buf2");
1111 }
1112 else if (ewaldflag)
1113 {
1114
1115 //kxvecs = new int[kmax3d];
1116 //kyvecs = new int[kmax3d];
1117 //kzvecs = new int[kmax3d];
1118 //kcoeff = new int[kmax3d];
1119
1120 //sf_real = new double[kmax3d];
1121 //sf_im = new double[kmax3d];
1122
1123 //for(int i = 0; i < kmax3d; ++i){
1124 // sfexp_rl[i] = new int[nloc];
1125 // sfexp_im[i] = new int[nloc];
1126 //}
1127
1128 memory->create(kxvecs,kmax3d,"ewald:kxvecs");
1129 memory->create(kyvecs,kmax3d,"ewald:kyvecs");
1130 memory->create(kzvecs,kmax3d,"ewald:kzvecs");
1131 memory->create(kcoeff,kmax3d,"ewald:kcoeff");
1132
1133 memory->create(sfexp_rl,kmax3d,atom->natoms,"ewald:sfexp_rl");
1134 memory->create(sfexp_im,kmax3d,atom->natoms,"ewald:sfexp_im");
1135 memory->create(sf_real,kmax3d,"ewald:sf_rl");
1136 memory->create(sf_im,kmax3d,"ewald:sf_im");
1137
1138 //memory->create(eg,kmax3d,3,"ewald:eg");
1139 //memory->create(vg,kmax3d,6,"ewald:vg"); // TODO: might be required for pressure
1140
1141 }
1142}
1143
1145{
1146 // Make deallocations based on the selected K-space method
1147 if (pppmflag)
1148 {
1149 memory->destroy3d_offset(density_brick,nzlo_out,nylo_out,nxlo_out);
1150
1151 // differentiation_flag = 0
1152 /*memory->destroy3d_offset(vdx_brick,nzlo_out,nylo_out,nxlo_out);
1153 memory->destroy3d_offset(vdy_brick,nzlo_out,nylo_out,nxlo_out);
1154 memory->destroy3d_offset(vdz_brick,nzlo_out,nylo_out,nxlo_out);*/
1155
1156 memory->destroy3d_offset(vx_brick,nzlo_out,nylo_out,nxlo_out);
1157 memory->destroy3d_offset(vy_brick,nzlo_out,nylo_out,nxlo_out);
1158 memory->destroy3d_offset(vz_brick,nzlo_out,nylo_out,nxlo_out);
1159
1160 memory->destroy(density_fft);
1161 memory->destroy(greensfn);
1162 memory->destroy(work1);
1163 memory->destroy(work2);
1164 memory->destroy(vg);
1165
1166 memory->destroy1d_offset(fkx,nxlo_fft);
1167 memory->destroy1d_offset(fky,nylo_fft);
1168 memory->destroy1d_offset(fkz,nzlo_fft);
1169
1170 memory->destroy(gf_b);
1171
1172 memory->destroy2d_offset(rho1d,-order_allocated/2);
1173 memory->destroy2d_offset(drho1d,-order_allocated/2);
1174 memory->destroy2d_offset(rho_coeff,(1-order_allocated)/2);
1175 memory->destroy2d_offset(drho_coeff,(1-order_allocated)/2);
1176
1177 delete fft1;
1178 delete fft2;
1179 delete remap;
1180 delete gc;
1181 memory->destroy(gc_buf1);
1182 memory->destroy(gc_buf2);
1183 }
1184 else if (ewaldflag)
1185 {
1186
1187 //delete [] kxvecs;
1188 //delete [] kyvecs;
1189 //delete [] kzvecs;
1190 //delete [] kcoeff;
1191 //delete [] sf_real;
1192 //delete [] sf_im;
1193
1194 //for(int i = 0; i < kmax3d; ++i) {
1195 // delete [] sfexp_rl[i];
1196 // delete [] sfexp_im[i];
1197 //}
1198 //delete [] sfexp_rl;
1199 //delete [] sfexp_im;
1200
1201 memory->destroy(kxvecs);
1202 memory->destroy(kyvecs);
1203 memory->destroy(kzvecs);
1204 memory->destroy(kcoeff);
1205
1206 memory->destroy(sf_real);
1207 memory->destroy(sf_im);
1208 memory->destroy(sfexp_rl);
1209 memory->destroy(sfexp_im);
1210
1211 //memory->destroy(ek);
1212 //memory->destroy3d_offset(cs,-kmax_created);
1213 //memory->destroy3d_offset(sn,-kmax_created);
1214
1215 //memory->destroy(eg);
1216 //memory->destroy(vg);
1217 }
1218}
1219
1220// Maps atoms to corresponding grid points
1222{
1223 int nx,ny,nz;
1224
1225 double **x = atom->x;
1226 int nlocal = atom->nlocal;
1227
1228 int flag = 0;
1229
1230 // if atom count has changed, update qsum and qsqsum
1231
1232 if (atom->natoms != natoms_original) {
1233 qsum_qsq();
1234 natoms_original = atom->natoms;
1235 }
1236
1237 // return if there are no charges
1238
1239 if (qsqsum == 0.0) return;
1240
1241 boxlo = domain->boxlo; //triclinic = 0
1242
1243 if (!std::isfinite(boxlo[0]) || !std::isfinite(boxlo[1]) || !std::isfinite(boxlo[2]))
1244 error->one(FLERR,"Non-numeric box dimensions - simulation unstable");
1245
1246 if (atom->nmax > nmax) {
1247 memory->destroy(part2grid);
1248 nmax = atom->nmax;
1249 memory->create(part2grid,nmax,3,"kspacehdnnp:part2grid");
1250 }
1251
1252 for (int i = 0; i < nlocal; i++) {
1253
1254 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1255 // current particle coord can be outside global and local box
1256 // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1257
1258 nx = static_cast<int> ((x[i][0]-boxlo[0])*delxinv+shift) - OFFSET;
1259 ny = static_cast<int> ((x[i][1]-boxlo[1])*delyinv+shift) - OFFSET;
1260 nz = static_cast<int> ((x[i][2]-boxlo[2])*delzinv+shift) - OFFSET;
1261
1262
1263 // check that entire stencil around nx,ny,nz will fit in my 3d brick
1264
1268 flag = 1;
1269 }
1270 if (flag) error->one(FLERR,"Out of range atoms - cannot compute PPPM");
1271}
1272
1273void KSpaceHDNNP::make_rho_qeq(const gsl_vector *v)
1274{
1275 int l,m,n,nx,ny,nz,mx,my,mz;
1276 FFT_SCALAR dx,dy,dz,x0,y0,z0;
1277
1278 int *tag = atom->tag;
1279
1280 // clear 3d density array
1281
1282 memset(&(density_brick[nzlo_out][nylo_out][nxlo_out]),0,
1283 ngrid*sizeof(FFT_SCALAR));
1284
1285 // loop over my charges, add their contribution to nearby grid points
1286 // (nx,ny,nz) = global coords of grid pt to "lower left" of charge
1287 // (dx,dy,dz) = distance to "lower left" grid pt
1288 // (mx,my,mz) = global coords of moving stencil pt
1289
1290 double **x = atom->x;
1291 int nlocal = atom->nlocal;
1292
1293 for (int i = 0; i < nlocal; i++) {
1294 //TODO:conversion ?
1295 nx = part2grid[i][0];
1296 ny = part2grid[i][1];
1297 nz = part2grid[i][2];
1298 dx = (nx+shiftone - (x[i][0]-boxlo[0])*delxinv) * hdnnp->cflength;
1299 dy = (ny+shiftone - (x[i][1]-boxlo[1])*delyinv) * hdnnp->cflength;
1300 dz = (nz+shiftone - (x[i][2]-boxlo[2])*delzinv) * hdnnp->cflength;
1301
1302 compute_rho1d(dx,dy,dz);
1303
1304 double const qi = gsl_vector_get(v, tag[i]-1);
1305
1306 //z0 = delvolinv * q[i];
1307 z0 = delvolinv * qi;
1308 for (n = nlower; n <= nupper; n++) {
1309 mz = n+nz;
1310 y0 = z0*rho1d[2][n];
1311 for (m = nlower; m <= nupper; m++) {
1312 my = m+ny;
1313 x0 = y0*rho1d[1][m];
1314 for (l = nlower; l <= nupper; l++) {
1315 mx = l+nx;
1316 density_brick[mz][my][mx] += x0*rho1d[0][l];
1317 }
1318 }
1319 }
1320 }
1321}
1322
1323void KSpaceHDNNP::compute_rho1d(const FFT_SCALAR &dx, const FFT_SCALAR &dy, const FFT_SCALAR &dz)
1324{
1325 int k,l;
1326 FFT_SCALAR r1,r2,r3;
1327
1328 for (k = (1-order)/2; k <= order/2; k++) {
1329 r1 = r2 = r3 = ZEROF;
1330 std::cout << r1 << '\n';
1331 std::cout << r2 << '\n';
1332 for (l = order-1; l >= 0; l--) {
1333 std::cout << l << '\n';
1334 r1 = rho_coeff[l][k] + r1*dx;
1335 std::cout << "aaau" << '\n';
1336 exit(0);
1337 r2 = rho_coeff[l][k] + r2*dy;
1338 r3 = rho_coeff[l][k] + r3*dz;
1339 }
1340 rho1d[0][k] = r1;
1341 rho1d[1][k] = r2;
1342 rho1d[2][k] = r3;
1343 }
1344}
1345
1346// Poission solver in PPPM
1348{
1349 int i,j,k,n;
1350 double eng;
1351
1352 // transform charge density (r -> k)
1353
1354 n = 0;
1355 for (i = 0; i < nfft; i++) {
1356 work1[n++] = density_fft[i];
1357 work1[n++] = ZEROF;
1358 }
1359
1360 fft1->compute(work1,work1,1);
1361
1362 // global energy and virial contribution
1363
1364 double scaleinv = 1.0/(nx_pppm*ny_pppm*nz_pppm);
1365 double s2 = scaleinv*scaleinv;
1366
1367 if (eflag_global || vflag_global) {
1368 if (vflag_global) {
1369 n = 0;
1370 for (i = 0; i < nfft; i++) {
1371 eng = s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
1372 for (j = 0; j < 6; j++) virial[j] += eng*vg[i][j];
1373 if (eflag_global) energy += eng;
1374 n += 2;
1375 }
1376 } else {
1377 n = 0;
1378 for (i = 0; i < nfft; i++) {
1379 //std::cout << "Green" << greensfn[i] << '\n';
1380 //std::cout << "S2" << s2 << '\n';
1381 //std::cout << "Par" << (work1[n]*work1[n] + work1[n+1]*work1[n+1]) << '\n';
1382 energy += s2 * greensfn[i] * (work1[n]*work1[n] + work1[n+1]*work1[n+1]);
1383 n += 2;
1384 }
1385 }
1386 }
1387
1388
1389 // scale by 1/total-grid-pts to get rho(k)
1390 // multiply by Green's function to get V(k)
1391
1392 n = 0;
1393 for (i = 0; i < nfft; i++) {
1394 work1[n++] *= scaleinv * greensfn[i];
1395 work1[n++] *= scaleinv * greensfn[i];
1396 }
1397
1398 // extra FFTs for per-atom energy/virial
1399 //if (evflag_atom) poisson_peratom(); // TODO: do we need ?
1400
1401 // compute V(r) in each of 3 dims by transformimg V(k)
1402 // FFT leaves data in 3d brick decomposition
1403 // copy it into inner portion of vx,vy,vz arrays
1404
1405 // x direction
1406
1407 n = 0;
1408 for (k = nzlo_fft; k <= nzhi_fft; k++)
1409 for (j = nylo_fft; j <= nyhi_fft; j++)
1410 for (i = nxlo_fft; i <= nxhi_fft; i++) {
1411 work2[n] = work1[n+1];
1412 work2[n+1] = work1[n];
1413 n += 2;
1414 }
1415
1416 fft2->compute(work2,work2,-1);
1417
1418 n = 0;
1419 for (k = nzlo_in; k <= nzhi_in; k++)
1420 for (j = nylo_in; j <= nyhi_in; j++)
1421 for (i = nxlo_in; i <= nxhi_in; i++) {
1422 vx_brick[k][j][i] = work2[n];
1423 n += 2;
1424 }
1425
1426 // y direction
1427
1428 n = 0;
1429 for (k = nzlo_fft; k <= nzhi_fft; k++)
1430 for (j = nylo_fft; j <= nyhi_fft; j++)
1431 for (i = nxlo_fft; i <= nxhi_fft; i++) {
1432 work2[n] = work1[n+1];
1433 work2[n+1] = work1[n];
1434 n += 2;
1435 }
1436
1437 fft2->compute(work2,work2,-1);
1438
1439 n = 0;
1440 for (k = nzlo_in; k <= nzhi_in; k++)
1441 for (j = nylo_in; j <= nyhi_in; j++)
1442 for (i = nxlo_in; i <= nxhi_in; i++) {
1443 vy_brick[k][j][i] = work2[n];
1444 n += 2;
1445 }
1446
1447 // z direction gradient
1448
1449 n = 0;
1450 for (k = nzlo_fft; k <= nzhi_fft; k++)
1451 for (j = nylo_fft; j <= nyhi_fft; j++)
1452 for (i = nxlo_fft; i <= nxhi_fft; i++) {
1453 work2[n] = work1[n+1];
1454 work2[n+1] = work1[n];
1455 n += 2;
1456 }
1457
1458 fft2->compute(work2,work2,-1);
1459
1460 n = 0;
1461 for (k = nzlo_in; k <= nzhi_in; k++)
1462 for (j = nylo_in; j <= nyhi_in; j++)
1463 for (i = nxlo_in; i <= nxhi_in; i++) {
1464 vz_brick[k][j][i] = work2[n];
1465 n += 2;
1466 }
1467}
1468
1469//remap density from 3d brick decomposition to FFT decomposition
1471{
1472 int n,ix,iy,iz;
1473
1474 // copy grabs inner portion of density from 3d brick
1475 // remap could be done as pre-stage of FFT,
1476 // but this works optimally on only double values, not complex values
1477
1478 n = 0;
1479 for (iz = nzlo_in; iz <= nzhi_in; iz++)
1480 for (iy = nylo_in; iy <= nyhi_in; iy++)
1481 for (ix = nxlo_in; ix <= nxhi_in; ix++)
1482 density_fft[n++] = density_brick[iz][iy][ix];
1483
1485}
1486
1488{
1489 // global indices of PPPM grid range from 0 to N-1
1490 // nlo_in,nhi_in = lower/upper limits of the 3d sub-brick of
1491 // global PPPM grid that I own without ghost cells
1492 // for slab PPPM, assign z grid as if it were not extended
1493 // both non-tiled and tiled proc layouts use 0-1 fractional sumdomain info
1494
1495 if (comm->layout != Comm::LAYOUT_TILED) {
1496 nxlo_in = static_cast<int> (comm->xsplit[comm->myloc[0]] * nx_pppm);
1497 nxhi_in = static_cast<int> (comm->xsplit[comm->myloc[0]+1] * nx_pppm) - 1;
1498
1499 nylo_in = static_cast<int> (comm->ysplit[comm->myloc[1]] * ny_pppm);
1500 nyhi_in = static_cast<int> (comm->ysplit[comm->myloc[1]+1] * ny_pppm) - 1;
1501
1502 nzlo_in = static_cast<int> (comm->zsplit[comm->myloc[2]] * nz_pppm);
1503 nzhi_in = static_cast<int> (comm->zsplit[comm->myloc[2]+1] * nz_pppm) - 1;
1504
1505 } else {
1506 nxlo_in = static_cast<int> (comm->mysplit[0][0] * nx_pppm);
1507 nxhi_in = static_cast<int> (comm->mysplit[0][1] * nx_pppm) - 1;
1508
1509 nylo_in = static_cast<int> (comm->mysplit[1][0] * ny_pppm);
1510 nyhi_in = static_cast<int> (comm->mysplit[1][1] * ny_pppm) - 1;
1511
1512 nzlo_in = static_cast<int> (comm->mysplit[2][0] * nz_pppm);
1513 nzhi_in = static_cast<int> (comm->mysplit[2][1] * nz_pppm) - 1;
1514 }
1515
1516
1517 // nlower,nupper = stencil size for mapping particles to PPPM grid
1518 //TODO: conversion ?
1519 nlower = -(order-1)/2;
1520 nupper = order/2;
1521
1522 // shift values for particle <-> grid mapping
1523 // add/subtract OFFSET to avoid int(-0.75) = 0 when want it to be -1
1524 //TODO: conversion ?
1525 if (order % 2) shift = OFFSET + 0.5;
1526 else shift = OFFSET;
1527 if (order % 2) shiftone = 0.0;
1528 else shiftone = 0.5;
1529
1530 // nlo_out,nhi_out = lower/upper limits of the 3d sub-brick of
1531 // global PPPM grid that my particles can contribute charge to
1532 // effectively nlo_in,nhi_in + ghost cells
1533 // nlo,nhi = global coords of grid pt to "lower left" of smallest/largest
1534 // position a particle in my box can be at
1535 // dist[3] = particle position bound = subbox + skin/2.0 + qdist
1536 // qdist = offset due to TIP4P fictitious charge
1537 // convert to triclinic if necessary
1538 // nlo_out,nhi_out = nlo,nhi + stencil size for particle mapping
1539 // for slab PPPM, assign z grid as if it were not extended
1540
1541 double *prd,*sublo,*subhi;
1542
1543 // triclinic = 0, no slab
1544 prd = domain->prd;
1545 boxlo = domain->boxlo;
1546 sublo = domain->sublo;
1547 subhi = domain->subhi;
1548
1549 // Unit conversions for n2p2
1550 boxlo[0] = boxlo[0] * hdnnp->cflength;
1551 boxlo[1] = boxlo[1] * hdnnp->cflength;
1552 boxlo[2] = boxlo[2] * hdnnp->cflength;
1553
1554 sublo[0] = sublo[0] * hdnnp->cflength;
1555 sublo[1] = sublo[1] * hdnnp->cflength;
1556 sublo[2] = sublo[2] * hdnnp->cflength;
1557
1558 subhi[0] = subhi[0] * hdnnp->cflength;
1559 subhi[1] = subhi[1] * hdnnp->cflength;
1560 subhi[2] = subhi[2] * hdnnp->cflength;
1561
1562 double xprd = prd[0] * hdnnp->cflength;
1563 double yprd = prd[1] * hdnnp->cflength;
1564 double zprd = prd[2] * hdnnp->cflength;
1565
1566 double dist[3] = {0.0,0.0,0.0};
1567 double cuthalf = 0.5*neighbor->skin * hdnnp->cflength;
1568 dist[0] = dist[1] = dist[2] = cuthalf;
1569
1570 int nlo,nhi;
1571 nlo = nhi = 0;
1572
1573 nlo = static_cast<int> ((sublo[0]-dist[0]-boxlo[0]) *
1574 nx_pppm/xprd + shift) - OFFSET;
1575 nhi = static_cast<int> ((subhi[0]+dist[0]-boxlo[0]) *
1576 nx_pppm/xprd + shift) - OFFSET;
1577 nxlo_out = nlo + nlower;
1578 nxhi_out = nhi + nupper;
1579
1580 nlo = static_cast<int> ((sublo[1]-dist[1]-boxlo[1]) *
1581 ny_pppm/yprd + shift) - OFFSET;
1582 nhi = static_cast<int> ((subhi[1]+dist[1]-boxlo[1]) *
1583 ny_pppm/yprd + shift) - OFFSET;
1584 nylo_out = nlo + nlower;
1585 nyhi_out = nhi + nupper;
1586
1587 nlo = static_cast<int> ((sublo[2]-dist[2]-boxlo[2]) *
1588 nz_pppm/zprd + shift) - OFFSET;
1589 nhi = static_cast<int> ((subhi[2]+dist[2]-boxlo[2]) *
1590 nz_pppm/zprd + shift) - OFFSET;
1591 nzlo_out = nlo + nlower;
1592 nzhi_out = nhi + nupper;
1593
1594 // x-pencil decomposition of FFT mesh
1595 // global indices range from 0 to N-1
1596 // each proc owns entire x-dimension, clumps of columns in y,z dimensions
1597 // npey_fft,npez_fft = # of procs in y,z dims
1598 // if nprocs is small enough, proc can own 1 or more entire xy planes,
1599 // else proc owns 2d sub-blocks of yz plane
1600 // me_y,me_z = which proc (0-npe_fft-1) I am in y,z dimensions
1601 // nlo_fft,nhi_fft = lower/upper limit of the section
1602 // of the global FFT mesh that I own in x-pencil decomposition
1603
1604 int npey_fft,npez_fft;
1605 if (nz_pppm >= nprocs) {
1606 npey_fft = 1;
1607 npez_fft = nprocs;
1608 } else procs2grid2d(nprocs,ny_pppm,nz_pppm,&npey_fft,&npez_fft);
1609
1610 int me_y = me % npey_fft;
1611 int me_z = me / npey_fft;
1612
1613 nxlo_fft = 0;
1614 nxhi_fft = nx_pppm - 1;
1615 nylo_fft = me_y*ny_pppm/npey_fft;
1616 nyhi_fft = (me_y+1)*ny_pppm/npey_fft - 1;
1617 nzlo_fft = me_z*nz_pppm/npez_fft;
1618 nzhi_fft = (me_z+1)*nz_pppm/npez_fft - 1;
1619
1620 // ngrid = count of PPPM grid pts owned by this proc, including ghosts
1621
1623 (nzhi_out-nzlo_out+1);
1624
1625 // count of FFT grids pts owned by this proc, without ghosts
1626 // nfft = FFT points in x-pencil FFT decomposition on this proc
1627 // nfft_brick = FFT points in 3d brick-decomposition on this proc
1628 // nfft_both = greater of 2 values
1629
1630 nfft = (nxhi_fft-nxlo_fft+1) * (nyhi_fft-nylo_fft+1) *
1631 (nzhi_fft-nzlo_fft+1);
1632 int nfft_brick = (nxhi_in-nxlo_in+1) * (nyhi_in-nylo_in+1) *
1633 (nzhi_in-nzlo_in+1);
1634 nfft_both = MAX(nfft,nfft_brick);
1635}
1636
1637/* ----------------------------------------------------------------------
1638 set global size of PPPM grid = nx,ny,nz_pppm
1639 used for charge accumulation, FFTs, and electric field interpolation
1640------------------------------------------------------------------------- */
1642{
1643 // use xprd,yprd,zprd (even if triclinic, and then scale later)
1644 // adjust z dimension for 2d slab PPPM
1645 // 3d PPPM just uses zprd since slab_volfactor = 1.0
1646
1647 double xprd = domain->xprd * hdnnp->cflength;
1648 double yprd = domain->yprd * hdnnp->cflength;
1649 double zprd = domain->zprd * hdnnp->cflength;
1650 //double zprd_slab = zprd*slab_volfactor;
1651
1652 // make initial g_ewald estimate
1653 // based on desired accuracy and real space cutoff
1654 // fluid-occupied volume used to estimate real-space error
1655 // zprd used rather than zprd_slab
1656
1657 double h;
1658 bigint natoms = atom->natoms;
1659
1660 // TODO: check this, we can also use 'kspace_modify' to pick gewald
1661 if (!gewaldflag) {
1662 if (accuracy <= 0.0)
1663 error->all(FLERR, "KSpace accuracy must be > 0");
1664 if (q2 == 0.0)
1665 error->all(FLERR, "Must use kspace_modify gewald for uncharged system");
1666 g_ewald = accuracy * sqrt(natoms * cutoff * xprd * yprd * zprd) / (2.0 * q2);
1667 if (g_ewald >= 1.0) g_ewald = (1.35 - 0.15 * log(accuracy)) / cutoff;
1668 else g_ewald = sqrt(-log(g_ewald)) / cutoff;
1669 }
1670
1671 // set optimal nx_pppm,ny_pppm,nz_pppm based on order and accuracy
1672 // nz_pppm uses extended zprd_slab instead of zprd
1673 // reduce it until accuracy target is met
1674
1675 if (!gridflag) { // gridflag = 0 if there is no kspace_modify command
1676
1677 // differentiation_flag = 0 & stagger_flag = 0 TODO
1678 double err;
1679 h_x = h_y = h_z = 1.0/g_ewald;
1680
1681 nx_pppm = static_cast<int> (xprd/h_x) + 1;
1682 ny_pppm = static_cast<int> (yprd/h_y) + 1;
1683 nz_pppm = static_cast<int> (zprd/h_z) + 1;
1684
1685 err = estimate_ik_error(h_x,xprd,natoms);
1686 while (err > accuracy) {
1687 err = estimate_ik_error(h_x,xprd,natoms);
1688 nx_pppm++;
1689 h_x = xprd/nx_pppm;
1690 }
1691
1692 err = estimate_ik_error(h_y,yprd,natoms);
1693 while (err > accuracy) {
1694 err = estimate_ik_error(h_y,yprd,natoms);
1695 ny_pppm++;
1696 h_y = yprd/ny_pppm;
1697 }
1698
1699 err = estimate_ik_error(h_z,zprd,natoms);
1700 while (err > accuracy) {
1701 err = estimate_ik_error(h_z,zprd,natoms);
1702 nz_pppm++;
1703 h_z = zprd/nz_pppm;
1704 }
1705 }
1706
1707 // boost grid size until it is factorable
1708
1709 while (!factorable(nx_pppm)) nx_pppm++;
1710 while (!factorable(ny_pppm)) ny_pppm++;
1711 while (!factorable(nz_pppm)) nz_pppm++;
1712
1713 // triclinic = 0
1714 h_x = xprd/nx_pppm;
1715 h_y = yprd/ny_pppm;
1716 h_z = zprd/nz_pppm;
1717
1718
1719 if (nx_pppm >= OFFSET || ny_pppm >= OFFSET || nz_pppm >= OFFSET)
1720 error->all(FLERR,"PPPM grid is too large");
1721}
1722
1724{
1725 int j,k,l,m;
1726 FFT_SCALAR s;
1727
1728 FFT_SCALAR **a;
1729 memory->create2d_offset(a,order,-order,order,"pppm:a");
1730
1731 for (k = -order; k <= order; k++)
1732 for (l = 0; l < order; l++)
1733 a[l][k] = 0.0;
1734
1735 a[0][0] = 1.0;
1736 for (j = 1; j < order; j++) {
1737 for (k = -j; k <= j; k += 2) {
1738 s = 0.0;
1739 for (l = 0; l < j; l++) {
1740 a[l+1][k] = (a[l][k+1]-a[l][k-1]) / (l+1);
1741#ifdef FFT_SINGLE
1742 s += powf(0.5,(float) l+1) *
1743 (a[l][k-1] + powf(-1.0,(float) l) * a[l][k+1]) / (l+1);
1744#else
1745 s += pow(0.5,(double) l+1) *
1746 (a[l][k-1] + pow(-1.0,(double) l) * a[l][k+1]) / (l+1);
1747#endif
1748 }
1749 a[0][k] = s;
1750 }
1751 }
1752
1753 m = (1-order)/2;
1754 for (k = -(order-1); k < order; k += 2) {
1755 for (l = 0; l < order; l++)
1756 rho_coeff[l][m] = a[l][k];
1757 for (l = 1; l < order; l++)
1758 drho_coeff[l-1][m] = l*a[l][k];
1759 m++;
1760 }
1761
1762 memory->destroy2d_offset(a,-order);
1763}
1764
1765// pre-compute modified (Hockney-Eastwood) Coulomb Green's function
1767{
1768 const double * const prd = domain->prd;
1769 //TODO: conversion ?
1770 const double xprd = prd[0] * hdnnp->cflength;
1771 const double yprd = prd[1] * hdnnp->cflength;
1772 const double zprd = prd[2] * hdnnp->cflength;
1773
1774 const double unitkx = (MY_2PI/xprd);
1775 const double unitky = (MY_2PI/yprd);
1776 const double unitkz = (MY_2PI/zprd);
1777
1778 double snx,sny,snz;
1779 double argx,argy,argz,wx,wy,wz,sx,sy,sz,qx,qy,qz;
1780 double sum1,dot1,dot2;
1781 double numerator,denominator;
1782 double sqk;
1783
1784 int k,l,m,n,nx,ny,nz,kper,lper,mper;
1785
1786 const int nbx = static_cast<int> ((g_ewald*xprd/(MY_PI*nx_pppm)) *
1787 pow(-log(EPS_HOC),0.25));
1788 const int nby = static_cast<int> ((g_ewald*yprd/(MY_PI*ny_pppm)) *
1789 pow(-log(EPS_HOC),0.25));
1790 const int nbz = static_cast<int> ((g_ewald*zprd/(MY_PI*nz_pppm)) *
1791 pow(-log(EPS_HOC),0.25));
1792 const int twoorder = 2*order;
1793
1794 n = 0;
1795 for (m = nzlo_fft; m <= nzhi_fft; m++) {
1796 mper = m - nz_pppm*(2*m/nz_pppm);
1797 snz = square(sin(0.5*unitkz*mper*zprd/nz_pppm));
1798
1799 for (l = nylo_fft; l <= nyhi_fft; l++) {
1800 lper = l - ny_pppm*(2*l/ny_pppm);
1801 sny = square(sin(0.5*unitky*lper*yprd/ny_pppm));
1802
1803 for (k = nxlo_fft; k <= nxhi_fft; k++) {
1804 kper = k - nx_pppm*(2*k/nx_pppm);
1805 snx = square(sin(0.5*unitkx*kper*xprd/nx_pppm));
1806
1807 sqk = square(unitkx*kper) + square(unitky*lper) + square(unitkz*mper);
1808
1809 if (sqk != 0.0) {
1810 numerator = 12.5663706/sqk;
1811 denominator = gf_denom(snx,sny,snz);
1812 sum1 = 0.0;
1813
1814 for (nx = -nbx; nx <= nbx; nx++) {
1815 qx = unitkx*(kper+nx_pppm*nx);
1816 sx = exp(-0.25*square(qx/g_ewald));
1817 argx = 0.5*qx*xprd/nx_pppm;
1818 wx = powsinxx(argx,twoorder);
1819
1820 for (ny = -nby; ny <= nby; ny++) {
1821 qy = unitky*(lper+ny_pppm*ny);
1822 sy = exp(-0.25*square(qy/g_ewald));
1823 argy = 0.5*qy*yprd/ny_pppm;
1824 wy = powsinxx(argy,twoorder);
1825
1826 for (nz = -nbz; nz <= nbz; nz++) {
1827 qz = unitkz*(mper+nz_pppm*nz);
1828 sz = exp(-0.25*square(qz/g_ewald));
1829 argz = 0.5*qz*zprd/nz_pppm;
1830 wz = powsinxx(argz,twoorder);
1831
1832 dot1 = unitkx*kper*qx + unitky*lper*qy + unitkz*mper*qz;
1833 dot2 = qx*qx+qy*qy+qz*qz;
1834 sum1 += (dot1/dot2) * sx*sy*sz * wx*wy*wz;
1835 }
1836 }
1837 }
1838 greensfn[n++] = numerator*sum1/denominator;
1839 } else greensfn[n++] = 0.0;
1840 }
1841 }
1842 }
1843}
1844
1845/* ----------------------------------------------------------------------
1846 calculate the final estimate of the accuracy
1847------------------------------------------------------------------------- */
1848
1850{
1851 double xprd = domain->xprd;
1852 double yprd = domain->yprd;
1853 double zprd = domain->zprd;
1854 bigint natoms = atom->natoms;
1855 if (natoms == 0) natoms = 1; // avoid division by zero
1856
1857 double df_kspace = compute_df_kspace();
1858 double q2_over_sqrt = q2 / sqrt(natoms*cutoff*xprd*yprd*zprd);
1859 double df_rspace = 2.0 * q2_over_sqrt * exp(-g_ewald*g_ewald*cutoff*cutoff);
1860 double df_table = estimate_table_accuracy(q2_over_sqrt,df_rspace);
1861 double estimated_accuracy = sqrt(df_kspace*df_kspace + df_rspace*df_rspace +
1862 df_table*df_table);
1863
1864 return estimated_accuracy;
1865}
1866
1867/* ----------------------------------------------------------------------
1868 compute estimated kspace force error
1869------------------------------------------------------------------------- */
1870
1872{
1873 double xprd = domain->xprd;
1874 double yprd = domain->yprd;
1875 double zprd = domain->zprd;
1876 bigint natoms = atom->natoms;
1877 double df_kspace = 0.0;
1878
1879 //differentiation_flag = 0
1880 double lprx = estimate_ik_error(h_x,xprd,natoms);
1881 double lpry = estimate_ik_error(h_y,yprd,natoms);
1882 double lprz = estimate_ik_error(h_z,zprd,natoms);
1883 df_kspace = sqrt(lprx*lprx + lpry*lpry + lprz*lprz) / sqrt(3.0);
1884
1885 return df_kspace;
1886}
1887
1888/* ----------------------------------------------------------------------
1889 estimate kspace force error for ik method
1890------------------------------------------------------------------------- */
1891
1892double KSpaceHDNNP::estimate_ik_error(double h, double prd, bigint natoms)
1893{
1894 double sum = 0.0;
1895 if (natoms == 0) return 0.0;
1896 for (int m = 0; m < order; m++)
1897 sum += acons[order][m] * pow(h*g_ewald,2.0*m);
1898 double value = q2 * pow(h*g_ewald,(double)order) *
1899 sqrt(g_ewald*prd*sqrt(MY_2PI)*sum/natoms) / (prd*prd);
1900
1901 return value;
1902}
1903
1904void KSpaceHDNNP::procs2grid2d(int nprocs, int nx, int ny, int *px, int *py)
1905{
1906 // loop thru all possible factorizations of nprocs
1907 // surf = surface area of largest proc sub-domain
1908 // innermost if test minimizes surface area and surface/volume ratio
1909
1910 int bestsurf = 2 * (nx + ny);
1911 int bestboxx = 0;
1912 int bestboxy = 0;
1913
1914 int boxx,boxy,surf,ipx,ipy;
1915
1916 ipx = 1;
1917 while (ipx <= nprocs) {
1918 if (nprocs % ipx == 0) {
1919 ipy = nprocs/ipx;
1920 boxx = nx/ipx;
1921 if (nx % ipx) boxx++;
1922 boxy = ny/ipy;
1923 if (ny % ipy) boxy++;
1924 surf = boxx + boxy;
1925 if (surf < bestsurf ||
1926 (surf == bestsurf && boxx*boxy > bestboxx*bestboxy)) {
1927 bestsurf = surf;
1928 bestboxx = boxx;
1929 bestboxy = boxy;
1930 *px = ipx;
1931 *py = ipy;
1932 }
1933 }
1934 ipx++;
1935 }
1936}
1937
1938// pre-compute Green's function denominator expansion coeffs, Gamma(2n)
1940{
1941 int k,l,m;
1942
1943 for (l = 1; l < order; l++) gf_b[l] = 0.0;
1944 gf_b[0] = 1.0;
1945
1946 for (m = 1; m < order; m++) {
1947 for (l = m; l > 0; l--)
1948 gf_b[l] = 4.0 * (gf_b[l]*(l-m)*(l-m-0.5)-gf_b[l-1]*(l-m-1)*(l-m-1));
1949 gf_b[0] = 4.0 * (gf_b[0]*(l-m)*(l-m-0.5));
1950 }
1951
1952 bigint ifact = 1;
1953 for (k = 1; k < 2*order; k++) ifact *= k;
1954 double gaminv = 1.0/ifact;
1955 for (l = 0; l < order; l++) gf_b[l] *= gaminv;
1956}
1957
1959{
1960 int i;
1961
1962 while (n > 1) {
1963 for (i = 0; i < nfactors; i++) {
1964 if (n % factors[i] == 0) {
1965 n /= factors[i];
1966 break;
1967 }
1968 }
1969 if (i == nfactors) return 0;
1970 }
1971
1972 return 1;
1973}
1974
1975/* ----------------------------------------------------------------------
1976 adjust the g_ewald parameter to near its optimal value
1977 using a Newton-Raphson solver
1978------------------------------------------------------------------------- */
1979
1981{
1982 double dx;
1983
1984 for (int i = 0; i < LARGE; i++) {
1985 dx = newton_raphson_f() / derivf();
1986 g_ewald -= dx;
1987 if (fabs(newton_raphson_f()) < SMALL) return;
1988 }
1989 error->all(FLERR, "Could not compute g_ewald");
1990}
1991
1992/* ----------------------------------------------------------------------
1993 calculate f(x) using Newton-Raphson solver
1994------------------------------------------------------------------------- */
1995
1997{
1998 double xprd = domain->xprd;
1999 double yprd = domain->yprd;
2000 double zprd = domain->zprd;
2001 bigint natoms = atom->natoms;
2002
2003 double df_rspace = 2.0*q2*exp(-g_ewald*g_ewald*cutoff*cutoff) /
2004 sqrt(natoms*cutoff*xprd*yprd*zprd);
2005
2006 double df_kspace = compute_df_kspace();
2007
2008 return df_rspace - df_kspace;
2009}
2010
2011/* ----------------------------------------------------------------------
2012 calculate numerical derivative f'(x) using forward difference
2013 [f(x + h) - f(x)] / h
2014------------------------------------------------------------------------- */
2015
2017{
2018 double h = 0.000001; //Derivative step-size
2019 double df,f1,f2,g_ewald_old;
2020
2021 f1 = newton_raphson_f();
2022 g_ewald_old = g_ewald;
2023 g_ewald += h;
2024 f2 = newton_raphson_f();
2025 g_ewald = g_ewald_old;
2026 df = (f2 - f1)/h;
2027
2028 return df;
2029}
2030
2031// Calculate Ewald eta param (original method in RuNNer - JACKSON_CATLOW)
2033{
2034 ewald_eta = 1.0 / sqrt(2.0 * M_PI);
2035
2036 //TODO: check
2037 if (mflag == 0) ewald_eta *= pow(volume * volume / atom->natoms, 1.0 / 6.0); // regular Ewald eta
2038 else ewald_eta *= pow(volume, 1.0 / 3.0); // matrix approach
2039
2040}
2041
2042// Calculate Ewald eta param (efficient method - KOLAFA_PARRAM)
2044{
2045 // Ratio of computing times for one real space and k space iteration.
2046 double TrOverTk = 3.676; // TODO: should it be hardcoded ?
2047
2048 // Unit Conversion (in KOLAFA-PERRAM method precision has unit of a force) TODO:check
2049 double fourPiEps = 1.0;
2050
2051 /*precision *= convEnergy / convLength;
2052 ewaldMaxCharge *= convCharge;
2053 fourPiEps = pow(convCharge, 2) / (convLength * convEnergy);*/
2054
2055 // Initial approximation
2056 double eta0 = pow(1 / TrOverTk * pow(volume, 2) / pow(2 * M_PI, 3),1.0 / 6.0);
2057
2058 // Selfconsistent calculation of eta
2059 ewald_eta = eta0;
2060 s_ewald = 0.0; // TODO: check
2061 double relError = 1.0;
2062 while (relError > 0.01)
2063 {
2064 // Calculates S
2065 double y = accuracy * sqrt(ewald_eta / sqrt(2)) * fourPiEps;
2066 y /= 2 * sqrt(atom->natoms * 1.0 / volume) * pow(ewald_max_charge, 2);
2067
2068 double relYError = 1.0;
2069 if (s_ewald <= 0.0)
2070 s_ewald = 0.5;
2071 double step = s_ewald;
2072 while (abs(step) / s_ewald > 0.01 || relYError > 0.01)
2073 {
2074 step = 2 * s_ewald / (4 * pow(s_ewald,2) + 1);
2075 step *= 1 - sqrt(s_ewald) * y / exp(-pow(s_ewald,2));
2076 if (s_ewald <= -step)
2077 {
2078 s_ewald /= 2;
2079 step = 1.0;
2080 }
2081 else
2082 s_ewald += step;
2083 relYError = (exp(-pow(s_ewald,2)) / sqrt(s_ewald) - y) / y;
2084 }
2085
2086 double newEta = eta0 * pow((1 + 1 / (2 * pow(s_ewald, 2))), 1.0 / 6.0);
2087 relError = abs(newEta - ewald_eta) / ewald_eta;
2088 ewald_eta = newEta;
2089 }
2090
2092}
2093
2094// Generate k-space grid in Ewald Sum (RuNNer)
2095void KSpaceHDNNP::ewald_pbc(double rcut)
2096{
2097 double proja = fabs(unitk[0]);
2098 double projb = fabs(unitk[1]);
2099 double projc = fabs(unitk[2]);
2100 kxmax = 0;
2101 kymax = 0;
2102 kzmax = 0;
2103 while (kxmax * proja <= rcut) kxmax++;
2104 while (kymax * projb <= rcut) kymax++;
2105 while (kzmax * projc <= rcut) kzmax++;
2106
2107 return;
2108}
2109
2110
2111
FFT_SCALAR *** vx_brick
KSpaceHDNNP(class LAMMPS *)
FFT_SCALAR ** rho_coeff
FFT_SCALAR *** vdy_brick
class PairHDNNP4G * hdnnp
virtual void compute_gf_ik()
FFT_SCALAR *** vz_brick
virtual void set_grid_global()
FFT_SCALAR *** v5_brick
virtual void compute_gf_denom()
FFT_SCALAR *** v1_brick
friend class PairHDNNP4G
void compute_rho1d(const FFT_SCALAR &, const FFT_SCALAR &, const FFT_SCALAR &)
FFT_SCALAR *** v4_brick
virtual void settings(int, char **)
double estimate_ik_error(double, double, bigint)
FFT_SCALAR *** vdz_brick
FFT_SCALAR *** v0_brick
void procs2grid2d(int, int, int, int *, int *)
double gf_denom(const double &x, const double &y, const double &z) const
FFT_SCALAR *** density_brick
FFT_SCALAR *** vy_brick
virtual void particle_map()
FFT_SCALAR *** vdx_brick
virtual void compute(int, int)
FFT_SCALAR *** v2_brick
double rms(int, double, bigint, double)
FFT_SCALAR ** drho_coeff
double compute_ewald_eqeq(const gsl_vector *)
FFT_SCALAR *** v3_brick
FFT_SCALAR *** u_brick
virtual double newton_raphson_f()
void make_rho_qeq(const gsl_vector *)
#define SMALL
#define OFFSET
@ FORWARD_AD
@ FORWARD_IK
@ FORWARD_AD_PERATOM
@ FORWARD_IK_PERATOM
#define MAXORDER
#define LARGE
#define ZEROF
@ REVERSE_RHO
#define EPS_HOC
#define LMP_FFT_PREC
#define MPI_FFT_SCALAR
#define LMP_FFT_LIB
double FFT_SCALAR