n2p2 - A neural network potential package
Dataset.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Dataset.h"
18#include "SymFnc.h"
19#include "mpi-extra.h"
20#include "utility.h"
21#include <algorithm> // std::max, std::find, std::find_if, std::sort, std::fill
22#include <cmath> // sqrt, fabs
23#include <cstdint> // int64_t
24#include <cstdlib> // atoi
25#include <cstdio> // fprintf, fopen, fclose, remove
26#include <iostream> // std::ios::binary
27#include <fstream> // std::ifstream, std::ofstream
28#include <limits> // std::numeric_limits
29#include <stdexcept> // std::runtime_error
30#include <gsl/gsl_histogram.h>
31#include <gsl/gsl_rng.h>
32
33using namespace std;
34using namespace nnp;
35
36Dataset::Dataset() : Mode(),
37 myRank (0 ),
38 numProcs (0 ),
39 numStructures(0 ),
40 myName ("" ),
41 rng (NULL),
42 rngGlobal (NULL)
43{
44}
45
47{
48 if (rng != NULL) gsl_rng_free(rng);
49 if (rngGlobal != NULL) gsl_rng_free(rngGlobal);
50}
51
53{
54 MPI_Comm tmpComm;
55 MPI_Comm_dup(MPI_COMM_WORLD, &tmpComm);
56 setupMPI(&tmpComm);
57 MPI_Comm_free(&tmpComm);
58
59 return;
60}
61
62void Dataset::setupMPI(MPI_Comm* communicator)
63{
64 log << "\n";
65 log << "*** SETUP: MPI **************************"
66 "**************************************\n";
67 log << "\n";
68
69 int bufferSize = 0;
70 char line[MPI_MAX_PROCESSOR_NAME];
71 MPI_Status ms;
72
73 MPI_Comm_dup(*communicator, &comm);
74 MPI_Comm_rank(comm, &myRank);
75 MPI_Comm_size(comm, &numProcs);
76 MPI_Get_processor_name(line, &bufferSize);
77 myName = line;
78
79 log << strpr("Number of processors: %d\n", numProcs);
80 log << strpr("Process %d of %d (rank %d): %s\n",
81 myRank + 1,
83 myRank,
84 myName.c_str());
85 for (int i = 1; i < numProcs; ++i)
86 {
87 if (myRank == 0)
88 {
89 MPI_Recv(&bufferSize, 1, MPI_INT, i, 0, comm, &ms);
90 MPI_Recv(line, bufferSize, MPI_CHAR, i, 0, comm, &ms);
91 log << strpr("Process %d of %d (rank %d): %s\n",
92 i + 1,
94 i,
95 line);
96 }
97 else if (myRank == i)
98 {
99 MPI_Send(&bufferSize, 1, MPI_INT, 0, 0, comm);
100 MPI_Send(line, bufferSize, MPI_CHAR, 0, 0, comm);
101 }
102 }
103
104 log << "*****************************************"
105 "**************************************\n";
106
107 return;
108}
109
111{
112 log << "\n";
113 log << "*** SETUP: RANDOM NUMBER GENERATOR ******"
114 "**************************************\n";
115 log << "\n";
116
117 // Get random seed from settings file.
118 unsigned long seed = atoi(settings["random_seed"].c_str());
119 unsigned long seedGlobal = 0;
120
121 if (myRank == 0)
122 {
123 log << strpr("Random number generator seed: %d\n", seed);
124 if (seed == 0)
125 {
126 log << "WARNING: Seed set to 0. This is a special value for the "
127 "gsl_rng_set() routine (see GSL docs).\n";
128 }
129 // Initialize personal RNG of process 0 with the given seed.
130 rng = gsl_rng_alloc(gsl_rng_mt19937);
131 gsl_rng_set(rng, seed);
132 log << strpr("Seed for rank %d: %lu\n", 0, seed);
133 for (int i = 1; i < numProcs; ++i)
134 {
135 // Get new seeds for all remaining processes with the RNG.
136 seed = gsl_rng_get(rng);
137 log << strpr("Seed for rank %d: %lu\n", i, seed);
138 MPI_Send(&seed, 1, MPI_UNSIGNED_LONG, i, 0, comm);
139 }
140 // Set seed for global RNG.
141 seedGlobal = gsl_rng_get(rng);
142 }
143 else
144 {
145 // Receive seed for personal RNG.
146 MPI_Status ms;
147 MPI_Recv(&seed, 1, MPI_UNSIGNED_LONG, 0, 0, comm, &ms);
148 log << strpr("Seed for rank %d: %lu\n", myRank, seed);
149 rng = gsl_rng_alloc(gsl_rng_taus);
150 gsl_rng_set(rng, seed);
151 }
152 // Rank 0 broadcasts global seed.
153 MPI_Bcast(&seedGlobal, 1, MPI_UNSIGNED_LONG, 0, comm);
154 log << strpr("Seed for global RNG: %lu\n", seedGlobal);
155 // All processes initialize global RNG.
156 rngGlobal = gsl_rng_alloc(gsl_rng_taus);
157 gsl_rng_set(rngGlobal, seedGlobal);
158
159 log << "*****************************************"
160 "**************************************\n";
161
162 return;
163}
164
165int Dataset::calculateBufferSize(Structure const& structure) const
166{
167 int bs = 0; // Send buffer size.
168 int is = 0; // int size.
169 int i64s = 0; // int64_t size.
170 int ss = 0; // size_t size.
171 int ds = 0; // double size.
172 int cs = 0; // char size.
173 Structure const& s = structure; // Shortcut for structure.
174
175 MPI_Pack_size(1, MPI_INT , comm, &is );
176 MPI_Pack_size(1, MPI_INT64_T, comm, &i64s);
177 MPI_Pack_size(1, MPI_SIZE_T , comm, &ss );
178 MPI_Pack_size(1, MPI_DOUBLE , comm, &ds );
179 MPI_Pack_size(1, MPI_CHAR , comm, &cs );
180
181 // Structure
182 bs += 5 * cs + 4 * ss + 4 * is + 5 * ds;
183 // Structure.comment
184 bs += ss;
185 bs += (s.comment.length() + 1) * cs;
186 // Structure.box
187 bs += 9 * ds;
188 // Structure.invbox
189 bs += 9 * ds;
190 // Structure.numAtomsPerElement
191 bs += ss;
192 bs += s.numAtomsPerElement.size() * ss;
193 // Structure.atoms
194 bs += ss;
195 bs += s.atoms.size() * (4 * cs + 6 * ss + i64s + 4 * ds + 3 * 3 * ds);
196 for (vector<Atom>::const_iterator it = s.atoms.begin();
197 it != s.atoms.end(); ++it)
198 {
199 // Atom.neighborsUnique
200 bs += ss;
201 bs += it->neighborsUnique.size() * ss;
202 // Atom.numNeighborsPerElement
203 bs += ss;
204 bs += it->numNeighborsPerElement.size() * ss;
205 // Atom.numSymmetryFunctionDerivatives
206 bs += ss;
207 bs += it->numSymmetryFunctionDerivatives.size() * ss;
208#ifndef N2P2_NO_SF_CACHE
209 // Atom.cacheSizePerElement
210 bs += ss;
211 bs += it->cacheSizePerElement.size() * ss;
212#endif
213 // Atom.G
214 bs += ss;
215 bs += it->G.size() * ds;
216 // Atom.dEdG
217 bs += ss;
218 bs += it->dEdG.size() * ds;
219 // Atom.dQdG
220 bs += ss;
221 bs += it->dQdG.size() * ds;
222#ifdef N2P2_FULL_SFD_MEMORY
223 // Atom.dGdxia
224 bs += ss;
225 bs += it->dGdxia.size() * ds;
226#endif
227 // Atom.dGdr
228 bs += ss;
229 bs += it->dGdr.size() * 3 * ds;
230 // Atom.neighbors
231 bs += ss;
232 for (vector<Atom::Neighbor>::const_iterator it2 =
233 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
234 {
235 // Neighbor
236 bs += 2 * ss + i64s + ds + 3 * ds;
237#ifndef N2P2_NO_SF_CACHE
238 // Neighbor.cache
239 bs += ss;
240 bs += it2->cache.size() * ds;
241#endif
242 // Neighbor.dGdr
243 bs += ss;
244 bs += it2->dGdr.size() * 3 * ds;
245 }
246 }
247
248 return bs;
249}
250
251int Dataset::sendStructure(Structure const& structure, int dest) const
252{
253 unsigned char* buf = 0; // Send buffer.
254 int bs = 0; // Send buffer size.
255 int p = 0; // Send buffer position.
256 int ts = 0; // Size for temporary stuff.
257 Structure const& s = structure; // Shortcut for structure.
258
259 bs = calculateBufferSize(s);
260 buf = new unsigned char[bs];
261
262 // Structure
263 MPI_Pack(&(s.isPeriodic ), 1, MPI_CHAR , buf, bs, &p, comm);
264 MPI_Pack(&(s.isTriclinic ), 1, MPI_CHAR , buf, bs, &p, comm);
265 MPI_Pack(&(s.hasNeighborList ), 1, MPI_CHAR , buf, bs, &p, comm);
266 MPI_Pack(&(s.hasSymmetryFunctions ), 1, MPI_CHAR , buf, bs, &p, comm);
267 MPI_Pack(&(s.hasSymmetryFunctionDerivatives), 1, MPI_CHAR , buf, bs, &p, comm);
268 MPI_Pack(&(s.index ), 1, MPI_SIZE_T, buf, bs, &p, comm);
269 MPI_Pack(&(s.numAtoms ), 1, MPI_SIZE_T, buf, bs, &p, comm);
270 MPI_Pack(&(s.numElements ), 1, MPI_SIZE_T, buf, bs, &p, comm);
271 MPI_Pack(&(s.numElementsPresent ), 1, MPI_SIZE_T, buf, bs, &p, comm);
272 MPI_Pack(&(s.pbc ), 3, MPI_INT , buf, bs, &p, comm);
273 MPI_Pack(&(s.energy ), 1, MPI_DOUBLE, buf, bs, &p, comm);
274 MPI_Pack(&(s.energyRef ), 1, MPI_DOUBLE, buf, bs, &p, comm);
275 MPI_Pack(&(s.charge ), 1, MPI_DOUBLE, buf, bs, &p, comm);
276 MPI_Pack(&(s.chargeRef ), 1, MPI_DOUBLE, buf, bs, &p, comm);
277 MPI_Pack(&(s.volume ), 1, MPI_DOUBLE, buf, bs, &p, comm);
278 MPI_Pack(&(s.sampleType ), 1, MPI_INT , buf, bs, &p, comm);
279
280 // Strucuture.comment
281 ts = s.comment.length() + 1;
282 MPI_Pack(&ts, 1, MPI_SIZE_T, buf, bs, &p, comm);
283 MPI_Pack(s.comment.c_str(), ts, MPI_CHAR, buf, bs, &p, comm);
284
285 // Structure.box
286 for (size_t i = 0; i < 3; ++i)
287 {
288 MPI_Pack(s.box[i].r, 3, MPI_DOUBLE, buf, bs, &p, comm);
289 }
290
291 // Structure.invbox
292 for (size_t i = 0; i < 3; ++i)
293 {
294 MPI_Pack(s.invbox[i].r, 3, MPI_DOUBLE, buf, bs, &p, comm);
295 }
296
297 // Structure.numAtomsPerElement
298 ts = s.numAtomsPerElement.size();
299 MPI_Pack(&ts, 1, MPI_SIZE_T, buf, bs, &p, comm);
300 if (ts > 0)
301 {
302 MPI_Pack(&(s.numAtomsPerElement.front()), ts, MPI_SIZE_T, buf, bs, &p, comm);
303 }
304
305 // Structure.atoms
306 ts = s.atoms.size();
307 MPI_Pack(&ts, 1, MPI_SIZE_T, buf, bs, &p, comm);
308 if (ts > 0)
309 {
310 for (vector<Atom>::const_iterator it = s.atoms.begin();
311 it != s.atoms.end(); ++it)
312 {
313 // Atom
314 MPI_Pack(&(it->hasNeighborList ), 1, MPI_CHAR, buf, bs, &p, comm);
315 MPI_Pack(&(it->hasSymmetryFunctions ), 1, MPI_CHAR, buf, bs, &p, comm);
316 MPI_Pack(&(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR, buf, bs, &p, comm);
317 MPI_Pack(&(it->useChargeNeuron ), 1, MPI_CHAR, buf, bs, &p, comm);
318 MPI_Pack(&(it->index ), 1, MPI_SIZE_T, buf, bs, &p, comm);
319 MPI_Pack(&(it->indexStructure ), 1, MPI_SIZE_T, buf, bs, &p, comm);
320 MPI_Pack(&(it->tag ), 1, MPI_INT64_T, buf, bs, &p, comm);
321 MPI_Pack(&(it->element ), 1, MPI_SIZE_T, buf, bs, &p, comm);
322 MPI_Pack(&(it->numNeighbors ), 1, MPI_SIZE_T, buf, bs, &p, comm);
323 MPI_Pack(&(it->numNeighborsUnique ), 1, MPI_SIZE_T, buf, bs, &p, comm);
324 MPI_Pack(&(it->numSymmetryFunctions ), 1, MPI_SIZE_T, buf, bs, &p, comm);
325 MPI_Pack(&(it->energy ), 1, MPI_DOUBLE, buf, bs, &p, comm);
326 MPI_Pack(&(it->chi ), 1, MPI_DOUBLE, buf, bs, &p, comm);
327 MPI_Pack(&(it->charge ), 1, MPI_DOUBLE, buf, bs, &p, comm);
328 MPI_Pack(&(it->chargeRef ), 1, MPI_DOUBLE, buf, bs, &p, comm);
329 MPI_Pack(&(it->r.r ), 3, MPI_DOUBLE, buf, bs, &p, comm);
330 MPI_Pack(&(it->f.r ), 3, MPI_DOUBLE, buf, bs, &p, comm);
331 MPI_Pack(&(it->fRef.r ), 3, MPI_DOUBLE, buf, bs, &p, comm);
332
333 // Atom.neighborsUnique
334 size_t ts2 = it->neighborsUnique.size();
335 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
336 if (ts2 > 0)
337 {
338 MPI_Pack(&(it->neighborsUnique.front()), ts2, MPI_SIZE_T, buf, bs, &p, comm);
339 }
340
341 // Atom.numNeighborsPerElement
342 ts2 = it->numNeighborsPerElement.size();
343 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
344 if (ts2 > 0)
345 {
346 MPI_Pack(&(it->numNeighborsPerElement.front()), ts2, MPI_SIZE_T, buf, bs, &p, comm);
347 }
348
349 // Atom.numSymmetryFunctionDerivatives
350 ts2 = it->numSymmetryFunctionDerivatives.size();
351 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
352 if (ts2 > 0)
353 {
354 MPI_Pack(&(it->numSymmetryFunctionDerivatives.front()), ts2, MPI_SIZE_T, buf, bs, &p, comm);
355 }
356
357#ifndef N2P2_NO_SF_CACHE
358 // Atom.cacheSizePerElement
359 ts2 = it->cacheSizePerElement.size();
360 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
361 if (ts2 > 0)
362 {
363 MPI_Pack(&(it->cacheSizePerElement.front()), ts2, MPI_SIZE_T, buf, bs, &p, comm);
364 }
365#endif
366
367 // Atom.G
368 ts2 = it->G.size();
369 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
370 if (ts2 > 0)
371 {
372 MPI_Pack(&(it->G.front()), ts2, MPI_DOUBLE, buf, bs, &p, comm);
373 }
374
375 // Atom.dEdG
376 ts2 = it->dEdG.size();
377 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
378 if (ts2 > 0)
379 {
380 MPI_Pack(&(it->dEdG.front()), ts2, MPI_DOUBLE, buf, bs, &p, comm);
381 }
382
383 // Atom.dQdG
384 ts2 = it->dQdG.size();
385 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
386 if (ts2 > 0)
387 {
388 MPI_Pack(&(it->dQdG.front()), ts2, MPI_DOUBLE, buf, bs, &p, comm);
389 }
390
391#ifdef N2P2_FULL_SFD_MEMORY
392 // Atom.dGdxia
393 ts2 = it->dGdxia.size();
394 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
395 if (ts2 > 0)
396 {
397 MPI_Pack(&(it->dGdxia.front()), ts2, MPI_DOUBLE, buf, bs, &p, comm);
398 }
399#endif
400
401 // Atom.dGdr
402 ts2 = it->dGdr.size();
403 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
404 if (ts2 > 0)
405 {
406 for (vector<Vec3D>::const_iterator it2 = it->dGdr.begin();
407 it2 != it->dGdr.end(); ++it2)
408 {
409 MPI_Pack(it2->r, 3, MPI_DOUBLE, buf, bs, &p, comm);
410 }
411 }
412
413 // Atom.neighbors
414 ts2 = it->neighbors.size();
415 MPI_Pack(&ts2, 1, MPI_SIZE_T, buf, bs, &p, comm);
416 if (ts2 > 0)
417 {
418 for (vector<Atom::Neighbor>::const_iterator it2 =
419 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
420 {
421 // Neighbor
422 MPI_Pack(&(it2->index ), 1, MPI_SIZE_T , buf, bs, &p, comm);
423 MPI_Pack(&(it2->tag ), 1, MPI_INT64_T, buf, bs, &p, comm);
424 MPI_Pack(&(it2->element ), 1, MPI_SIZE_T , buf, bs, &p, comm);
425 MPI_Pack(&(it2->d ), 1, MPI_DOUBLE , buf, bs, &p, comm);
426 MPI_Pack( it2->dr.r , 3, MPI_DOUBLE , buf, bs, &p, comm);
427
428 size_t ts3 = 0;
429#ifndef N2P2_NO_SF_CACHE
430 // Neighbor.cache
431 ts3 = it2->cache.size();
432 MPI_Pack(&ts3, 1, MPI_SIZE_T, buf, bs, &p, comm);
433 if (ts3 > 0)
434 {
435 MPI_Pack(&(it2->cache.front()), ts3, MPI_DOUBLE, buf, bs, &p, comm);
436 }
437#endif
438
439 // Neighbor.dGdr
440 ts3 = it2->dGdr.size();
441 MPI_Pack(&ts3, 1, MPI_SIZE_T, buf, bs, &p, comm);
442 if (ts3 > 0)
443 {
444 for (vector<Vec3D>::const_iterator it3 =
445 it2->dGdr.begin(); it3 != it2->dGdr.end(); ++it3)
446 {
447 MPI_Pack(it3->r, 3, MPI_DOUBLE, buf, bs, &p, comm);
448 }
449 }
450 }
451 }
452 }
453 }
454
455 MPI_Send(&bs, 1, MPI_INT, dest, 0, comm);
456 MPI_Send(buf, bs, MPI_PACKED, dest, 0, comm);
457
458 delete[] buf;
459
460 return bs;
461}
462
463int Dataset::recvStructure(Structure* const structure, int src)
464{
465 unsigned char* buf = 0; // Receive buffer.
466 int bs = 0; // Receive buffer size.
467 int p = 0; // Receive buffer position.
468 int ts = 0; // Size for temporary stuff.
469 Structure* const s = structure; // Shortcut for structure.
470 MPI_Status ms;
471
472 // Receive buffer size and extract source.
473 MPI_Recv(&bs, 1, MPI_INT, src, 0, comm, &ms);
474 src = ms.MPI_SOURCE;
475
476 buf = new unsigned char[bs];
477
478 MPI_Recv(buf, bs, MPI_PACKED, src, 0, comm, &ms);
479
480 // Structure
481 MPI_Unpack(buf, bs, &p, &(s->isPeriodic ), 1, MPI_CHAR , comm);
482 MPI_Unpack(buf, bs, &p, &(s->isTriclinic ), 1, MPI_CHAR , comm);
483 MPI_Unpack(buf, bs, &p, &(s->hasNeighborList ), 1, MPI_CHAR , comm);
484 MPI_Unpack(buf, bs, &p, &(s->hasSymmetryFunctions ), 1, MPI_CHAR , comm);
485 MPI_Unpack(buf, bs, &p, &(s->hasSymmetryFunctionDerivatives), 1, MPI_CHAR , comm);
486 MPI_Unpack(buf, bs, &p, &(s->index ), 1, MPI_SIZE_T, comm);
487 MPI_Unpack(buf, bs, &p, &(s->numAtoms ), 1, MPI_SIZE_T, comm);
488 MPI_Unpack(buf, bs, &p, &(s->numElements ), 1, MPI_SIZE_T, comm);
489 MPI_Unpack(buf, bs, &p, &(s->numElementsPresent ), 1, MPI_SIZE_T, comm);
490 MPI_Unpack(buf, bs, &p, &(s->pbc ), 3, MPI_INT , comm);
491 MPI_Unpack(buf, bs, &p, &(s->energy ), 1, MPI_DOUBLE, comm);
492 MPI_Unpack(buf, bs, &p, &(s->energyRef ), 1, MPI_DOUBLE, comm);
493 MPI_Unpack(buf, bs, &p, &(s->charge ), 1, MPI_DOUBLE, comm);
494 MPI_Unpack(buf, bs, &p, &(s->chargeRef ), 1, MPI_DOUBLE, comm);
495 MPI_Unpack(buf, bs, &p, &(s->volume ), 1, MPI_DOUBLE, comm);
496 MPI_Unpack(buf, bs, &p, &(s->sampleType ), 1, MPI_INT , comm);
497
498 // Strucuture.comment
499 ts = 0;
500 MPI_Unpack(buf, bs, &p, &ts, 1, MPI_SIZE_T, comm);
501 char* comment = new char[ts];
502 MPI_Unpack(buf, bs, &p, comment, ts, MPI_CHAR, comm);
503 s->comment = comment;
504 delete[] comment;
505
506 // Structure.box
507 for (size_t i = 0; i < 3; ++i)
508 {
509 MPI_Unpack(buf, bs, &p, s->box[i].r, 3, MPI_DOUBLE, comm);
510 }
511
512 // Structure.invbox
513 for (size_t i = 0; i < 3; ++i)
514 {
515 MPI_Unpack(buf, bs, &p, s->invbox[i].r, 3, MPI_DOUBLE, comm);
516 }
517
518 // Structure.numAtomsPerElement
519 ts = 0;
520 MPI_Unpack(buf, bs, &p, &ts, 1, MPI_SIZE_T, comm);
521 if (ts > 0)
522 {
523 s->numAtomsPerElement.clear();
524 s->numAtomsPerElement.resize(ts, 0);
525 MPI_Unpack(buf, bs, &p, &(s->numAtomsPerElement.front()), ts, MPI_SIZE_T, comm);
526 }
527
528 // Structure.atoms
529 ts = 0;
530 MPI_Unpack(buf, bs, &p, &ts, 1, MPI_SIZE_T, comm);
531 if (ts > 0)
532 {
533 s->atoms.clear();
534 s->atoms.resize(ts);
535 for (vector<Atom>::iterator it = s->atoms.begin();
536 it != s->atoms.end(); ++it)
537 {
538 // Atom
539 MPI_Unpack(buf, bs, &p, &(it->hasNeighborList ), 1, MPI_CHAR, comm);
540 MPI_Unpack(buf, bs, &p, &(it->hasSymmetryFunctions ), 1, MPI_CHAR, comm);
541 MPI_Unpack(buf, bs, &p, &(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR, comm);
542 MPI_Unpack(buf, bs, &p, &(it->useChargeNeuron ), 1, MPI_CHAR, comm);
543 MPI_Unpack(buf, bs, &p, &(it->index ), 1, MPI_SIZE_T, comm);
544 MPI_Unpack(buf, bs, &p, &(it->indexStructure ), 1, MPI_SIZE_T, comm);
545 MPI_Unpack(buf, bs, &p, &(it->tag ), 1, MPI_INT64_T, comm);
546 MPI_Unpack(buf, bs, &p, &(it->element ), 1, MPI_SIZE_T, comm);
547 MPI_Unpack(buf, bs, &p, &(it->numNeighbors ), 1, MPI_SIZE_T, comm);
548 MPI_Unpack(buf, bs, &p, &(it->numNeighborsUnique ), 1, MPI_SIZE_T, comm);
549 MPI_Unpack(buf, bs, &p, &(it->numSymmetryFunctions ), 1, MPI_SIZE_T, comm);
550 MPI_Unpack(buf, bs, &p, &(it->energy ), 1, MPI_DOUBLE, comm);
551 MPI_Unpack(buf, bs, &p, &(it->chi ), 1, MPI_DOUBLE, comm);
552 MPI_Unpack(buf, bs, &p, &(it->charge ), 1, MPI_DOUBLE, comm);
553 MPI_Unpack(buf, bs, &p, &(it->chargeRef ), 1, MPI_DOUBLE, comm);
554 MPI_Unpack(buf, bs, &p, &(it->r.r ), 3, MPI_DOUBLE, comm);
555 MPI_Unpack(buf, bs, &p, &(it->f.r ), 3, MPI_DOUBLE, comm);
556 MPI_Unpack(buf, bs, &p, &(it->fRef.r ), 3, MPI_DOUBLE, comm);
557
558 // Atom.neighborsUnique
559 size_t ts2 = 0;
560 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
561 if (ts2 > 0)
562 {
563 it->neighborsUnique.clear();
564 it->neighborsUnique.resize(ts2, 0);
565 MPI_Unpack(buf, bs, &p, &(it->neighborsUnique.front()), ts2, MPI_SIZE_T, comm);
566 }
567
568 // Atom.numNeighborsPerElement
569 ts2 = 0;
570 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
571 if (ts2 > 0)
572 {
573 it->numNeighborsPerElement.clear();
574 it->numNeighborsPerElement.resize(ts2, 0);
575 MPI_Unpack(buf, bs, &p, &(it->numNeighborsPerElement.front()), ts2, MPI_SIZE_T, comm);
576 }
577
578 // Atom.numSymmetryFunctionDerivatives
579 ts2 = 0;
580 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
581 if (ts2 > 0)
582 {
583 it->numSymmetryFunctionDerivatives.clear();
584 it->numSymmetryFunctionDerivatives.resize(ts2, 0);
585 MPI_Unpack(buf, bs, &p, &(it->numSymmetryFunctionDerivatives.front()), ts2, MPI_SIZE_T, comm);
586 }
587
588#ifndef N2P2_NO_SF_CACHE
589 // Atom.cacheSizePerElement
590 ts2 = 0;
591 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
592 if (ts2 > 0)
593 {
594 it->cacheSizePerElement.clear();
595 it->cacheSizePerElement.resize(ts2, 0);
596 MPI_Unpack(buf, bs, &p, &(it->cacheSizePerElement.front()), ts2, MPI_SIZE_T, comm);
597 }
598#endif
599
600 // Atom.G
601 ts2 = 0;
602 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
603 if (ts2 > 0)
604 {
605 it->G.clear();
606 it->G.resize(ts2, 0.0);
607 MPI_Unpack(buf, bs, &p, &(it->G.front()), ts2, MPI_DOUBLE, comm);
608 }
609
610 // Atom.dEdG
611 ts2 = 0;
612 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
613 if (ts2 > 0)
614 {
615 it->dEdG.clear();
616 it->dEdG.resize(ts2, 0.0);
617 MPI_Unpack(buf, bs, &p, &(it->dEdG.front()), ts2, MPI_DOUBLE, comm);
618 }
619
620 // Atom.dQdG
621 ts2 = 0;
622 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
623 if (ts2 > 0)
624 {
625 it->dQdG.clear();
626 it->dQdG.resize(ts2, 0.0);
627 MPI_Unpack(buf, bs, &p, &(it->dQdG.front()), ts2, MPI_DOUBLE, comm);
628 }
629
630#ifdef N2P2_FULL_SFD_MEMORY
631 // Atom.dGdxia
632 ts2 = 0;
633 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
634 if (ts2 > 0)
635 {
636 it->dGdxia.clear();
637 it->dGdxia.resize(ts2, 0.0);
638 MPI_Unpack(buf, bs, &p, &(it->dGdxia.front()), ts2, MPI_DOUBLE, comm);
639 }
640#endif
641
642 // Atom.dGdr
643 ts2 = 0;
644 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
645 if (ts2 > 0)
646 {
647 it->dGdr.clear();
648 it->dGdr.resize(ts2);
649 for (vector<Vec3D>::iterator it2 = it->dGdr.begin();
650 it2 != it->dGdr.end(); ++it2)
651 {
652 MPI_Unpack(buf, bs, &p, it2->r, 3, MPI_DOUBLE, comm);
653 }
654 }
655
656 // Atom.neighbors
657 ts2 = 0;
658 MPI_Unpack(buf, bs, &p, &ts2, 1, MPI_SIZE_T, comm);
659 if (ts2 > 0)
660 {
661 it->neighbors.clear();
662 it->neighbors.resize(ts2);
663 for (vector<Atom::Neighbor>::iterator it2 =
664 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
665 {
666 // Neighbor
667 MPI_Unpack(buf, bs, &p, &(it2->index ), 1, MPI_SIZE_T , comm);
668 MPI_Unpack(buf, bs, &p, &(it2->tag ), 1, MPI_INT64_T, comm);
669 MPI_Unpack(buf, bs, &p, &(it2->element ), 1, MPI_SIZE_T , comm);
670 MPI_Unpack(buf, bs, &p, &(it2->d ), 1, MPI_DOUBLE , comm);
671 MPI_Unpack(buf, bs, &p, it2->dr.r , 3, MPI_DOUBLE , comm);
672
673 size_t ts3 = 0;
674#ifndef N2P2_NO_SF_CACHE
675 // Neighbor.cache
676 ts3 = 0;
677 MPI_Unpack(buf, bs, &p, &ts3, 1, MPI_SIZE_T, comm);
678 if (ts3 > 0)
679 {
680 it2->cache.clear();
681 it2->cache.resize(ts3, 0.0);
682 MPI_Unpack(buf, bs, &p, &(it2->cache.front()), ts3, MPI_DOUBLE, comm);
683 }
684#endif
685
686 // Neighbor.dGdr
687 ts3 = 0;
688 MPI_Unpack(buf, bs, &p, &ts3, 1, MPI_SIZE_T, comm);
689 if (ts3 > 0)
690 {
691 it2->dGdr.clear();
692 it2->dGdr.resize(ts3);
693 for (vector<Vec3D>::iterator it3 = it2->dGdr.begin();
694 it3 != it2->dGdr.end(); ++it3)
695 {
696 MPI_Unpack(buf, bs, &p, it3->r, 3, MPI_DOUBLE, comm);
697 }
698 }
699 }
700 }
701 }
702 }
703
704 delete[] buf;
705
706 return bs;
707}
708
709size_t Dataset::getNumStructures(ifstream& dataFile)
710{
711 size_t count = 0;
712 string line;
713 vector<string> splitLine;
714
715 while (getline(dataFile, line))
716 {
717 splitLine = split(reduce(line));
718 if (!splitLine.empty() && splitLine.at(0) == "begin") count++;
719 }
720
721 return count;
722}
723
725 bool excludeRank0,
726 string const& fileName)
727{
728 log << "\n";
729 log << "*** STRUCTURE DISTRIBUTION **************"
730 "**************************************\n";
731 log << "\n";
732
733 ifstream dataFile;
734 vector<size_t> numStructuresPerProc;
735
736 if (excludeRank0)
737 {
738 log << "No structures will be distributed to rank 0.\n";
739 if (numProcs == 1)
740 {
741 throw runtime_error("ERROR: Can not distribute structures, "
742 "at least 2 MPI tasks are required.\n");
743 }
744 }
745 size_t numReceivers = numProcs;
746 if (excludeRank0) numReceivers--;
747
748 if (myRank == 0)
749 {
750 log << strpr("Reading configurations from data file: %s.\n",
751 fileName.c_str());
752 dataFile.open(fileName.c_str());
754 log << strpr("Total number of structures: %zu\n", numStructures);
755 dataFile.clear();
756 dataFile.seekg(0);
757 }
758 MPI_Bcast(&numStructures, 1, MPI_SIZE_T, 0, comm);
759 if (numStructures < numReceivers)
760 {
761 throw runtime_error("ERROR: More receiving processors than "
762 "structures.\n");
763 }
764
765 numStructuresPerProc.resize(numProcs, 0);
766 if (myRank == 0)
767 {
768 size_t quotient = numStructures / numReceivers;
769 size_t remainder = numStructures % numReceivers;
770 for (size_t i = 0; i < (size_t) numProcs; i++)
771 {
772 if (i != 0 || (!excludeRank0))
773 {
774 numStructuresPerProc.at(i) = quotient;
775 if (remainder > 0 && i > 0 && i <= remainder)
776 {
777 numStructuresPerProc.at(i)++;
778 }
779 }
780 }
781 if (remainder == 0)
782 {
783 log << strpr("Number of structures per processor: %d\n", quotient);
784 }
785 else
786 {
787 log << strpr("Number of structures per"
788 " processor: %d (%d) or %d (%d)\n",
789 quotient,
790 numReceivers - remainder,
791 quotient + 1,
792 remainder);
793 }
794 }
795 MPI_Bcast(&(numStructuresPerProc.front()), numProcs, MPI_SIZE_T, 0, comm);
796
797 int bytesTransferred = 0;
798 size_t numMyStructures = numStructuresPerProc.at(myRank);
799 if (myRank == 0)
800 {
801 size_t countStructures = 0;
802 vector<size_t> countStructuresPerProc;
803
804 countStructuresPerProc.resize(numProcs, 0);
805
806 if (randomize)
807 {
808 while (countStructures < numStructures)
809 {
810 int proc = gsl_rng_uniform_int(rng, numProcs);
811 if (countStructuresPerProc.at(proc)
812 < numStructuresPerProc.at(proc))
813 {
814 if (proc == 0)
815 {
816 structures.push_back(Structure());
817 structures.back().setElementMap(elementMap);
818 structures.back().index = countStructures;
819 structures.back().readFromFile(dataFile);
821 }
822 else
823 {
824 Structure tmpStructure;
825 tmpStructure.setElementMap(elementMap);
826 tmpStructure.index = countStructures;
827 tmpStructure.readFromFile(dataFile);
828 removeEnergyOffset(tmpStructure);
829 bytesTransferred += sendStructure(tmpStructure, proc);
830 }
831 countStructuresPerProc.at(proc)++;
832 countStructures++;
833 }
834 }
835 }
836 else
837 {
838 for (int proc = 0; proc < numProcs; ++proc)
839 {
840 for (size_t i = 0; i < numStructuresPerProc.at(proc); ++i)
841 {
842 if (proc == 0)
843 {
844 structures.push_back(Structure());
845 structures.back().setElementMap(elementMap);
846 structures.back().index = countStructures;
847 structures.back().readFromFile(dataFile);
849 }
850 else
851 {
852 Structure tmpStructure;
853 tmpStructure.setElementMap(elementMap);
854 tmpStructure.index = countStructures;
855 tmpStructure.readFromFile(dataFile);
856 removeEnergyOffset(tmpStructure);
857 bytesTransferred += sendStructure(tmpStructure, proc);
858 }
859 countStructuresPerProc.at(proc)++;
860 countStructures++;
861 }
862 }
863 }
864 dataFile.close();
865 }
866 else
867 {
868 for (size_t i = 0; i < numMyStructures; i++)
869 {
870 structures.push_back(Structure());
871 structures.back().setElementMap(elementMap);
872 bytesTransferred += recvStructure(&(structures.back()), 0);
873 }
874 }
875
876 log << strpr("Distributed %zu structures,"
877 " %d bytes (%.2f MiB) transferred.\n",
879 bytesTransferred,
880 bytesTransferred / 1024. / 1024.);
881 log << strpr("Number of local structures: %zu\n", structures.size());
882 log << "*****************************************"
883 "**************************************\n";
884
885 return bytesTransferred;
886}
887
888size_t Dataset::prepareNumericForces(Structure& original, double delta)
889{
890 // Be sure that structure memory is cleared.
891 vector<Structure>().swap(structures);
892
893 // Send original to all processors.
894 for (int p = 1; p < numProcs; ++p)
895 {
896 if (myRank == 0) sendStructure(original, p);
897 else if (myRank == p) recvStructure(&original, 0);
898 }
899
900 vector<size_t> numStructuresPerProc(numProcs, 0);
901 // Number of structures that need to be calculated:
902 // - Original structure +
903 // - Two times for each force component.
904 numStructures = 1 + 6 * original.numAtoms;
905 size_t quotient = numStructures / numProcs;
906 size_t remainder = numStructures % numProcs;
907 for (size_t i = 0; i < (size_t) numProcs; i++)
908 {
909 numStructuresPerProc.at(i) = quotient;
910 if (remainder > 0 && i < remainder) numStructuresPerProc.at(i)++;
911 }
912
913 // Rank 0 needs an unaltered version.
914 if (myRank == 0)
915 {
916 structures.push_back(original);
917 structures.back().setElementMap(elementMap);
918 structures.back().comment = "original";
919 }
920 // Now make as many copies as each processor needs.
921 size_t count = 0;
922 size_t iAtom = 0;
923 size_t ixyz = 0;
924 int sign = 0;
925 for (int p = 0; p < numProcs; ++p)
926 {
927 size_t istart = 0;
928 if (p == 0) istart = 1;
929 for (size_t i = istart; i < numStructuresPerProc.at(p); ++i)
930 {
931 iAtom = count / 6;
932 ixyz = count % 3;
933 sign = 2 * ((count % 6) / 3) - 1;
934 if (p == myRank)
935 {
936 structures.push_back(original);
937 Structure& s = structures.back();
939 // Write identifiers to comment field.
940 s.comment = strpr("%zu %zu %zu %d", count, iAtom, ixyz, sign);
941 // Modify atom position.
942 s.atoms.at(iAtom).r[ixyz] += sign * delta;
943 if (s.isPeriodic) s.remap(s.atoms.at(iAtom));
944 }
945 count++;
946 }
947 }
948
949 return numStructures;
950}
951
953{
954 for (vector<Structure>::iterator it = structures.begin();
955 it != structures.end(); ++it)
956 {
957 it->toNormalizedUnits(meanEnergy, convEnergy, convLength, convCharge);
958 }
959
960 return;
961}
962
964{
965 for (vector<Structure>::iterator it = structures.begin();
966 it != structures.end(); ++it)
967 {
968 it->toPhysicalUnits(meanEnergy, convEnergy, convLength, convCharge);
969 }
970
971 return;
972}
973
975{
976 for (vector<Element>::iterator it = elements.begin();
977 it != elements.end(); ++it)
978 {
979 // If no atoms of this element exist on this proc, create empty
980 // statistics.
981 if (it->statistics.data.size() == 0)
982 {
983 log << strpr("WARNING: No statistics for element %zu (%2s) found, "
984 "process %d has no corresponding atoms, creating "
985 "empty statistics.\n",
986 it->getIndex(),
987 it->getSymbol().c_str(),
988 myRank);
989 }
990 for (size_t i = 0; i < it->numSymmetryFunctions(); ++i)
991 {
992 SymFncStatistics::Container& c = it->statistics.data[i];
993 MPI_Allreduce(MPI_IN_PLACE, &(c.count), 1, MPI_SIZE_T, MPI_SUM, comm);
994 MPI_Allreduce(MPI_IN_PLACE, &(c.min ), 1, MPI_DOUBLE, MPI_MIN, comm);
995 MPI_Allreduce(MPI_IN_PLACE, &(c.max ), 1, MPI_DOUBLE, MPI_MAX, comm);
996 MPI_Allreduce(MPI_IN_PLACE, &(c.sum ), 1, MPI_DOUBLE, MPI_SUM, comm);
997 MPI_Allreduce(MPI_IN_PLACE, &(c.sum2 ), 1, MPI_DOUBLE, MPI_SUM, comm);
998 }
999 }
1000
1001 return;
1002}
1003
1004void Dataset::writeSymmetryFunctionScaling(string const& fileName)
1005{
1006 log << "\n";
1007 log << "*** SYMMETRY FUNCTION SCALING ***********"
1008 "**************************************\n";
1009 log << "\n";
1010
1011 if (myRank == 0)
1012 {
1013 log << strpr("Writing symmetry function scaling file: %s.\n",
1014 fileName.c_str());
1015 ofstream sFile;
1016 sFile.open(fileName.c_str());
1017
1018 // File header.
1019 vector<string> title;
1020 vector<string> colName;
1021 vector<string> colInfo;
1022 vector<size_t> colSize;
1023 title.push_back("Symmetry function scaling data.");
1024 colSize.push_back(10);
1025 colName.push_back("e_index");
1026 colInfo.push_back("Element index.");
1027 colSize.push_back(10);
1028 colName.push_back("sf_index");
1029 colInfo.push_back("Symmetry function index.");
1030 colSize.push_back(24);
1031 colName.push_back("sf_min");
1032 colInfo.push_back("Symmetry function minimum.");
1033 colSize.push_back(24);
1034 colName.push_back("sf_max");
1035 colInfo.push_back("Symmetry function maximum.");
1036 colSize.push_back(24);
1037 colName.push_back("sf_mean");
1038 colInfo.push_back("Symmetry function mean.");
1039 colSize.push_back(24);
1040 colName.push_back("sf_sigma");
1041 colInfo.push_back("Symmetry function sigma.");
1042 appendLinesToFile(sFile,
1043 createFileHeader(title, colSize, colName, colInfo));
1044
1045 log << "\n";
1046 log << "Abbreviations:\n";
1047 log << "--------------\n";
1048 log << "ind ...... Symmetry function index.\n";
1049 log << "min ...... Minimum symmetry function value.\n";
1050 log << "max ...... Maximum symmetry function value.\n";
1051 log << "mean ..... Mean symmetry function value.\n";
1052 log << "sigma .... Standard deviation of symmetry function values.\n";
1053 log << "spread ... (max - min) / sigma.\n";
1054 log << "\n";
1055 for (vector<Element>::const_iterator it = elements.begin();
1056 it != elements.end(); ++it)
1057 {
1058 log << strpr("Scaling data for symmetry functions element %2s :\n",
1059 it->getSymbol().c_str());
1060 log << "-----------------------------------------"
1061 "--------------------------------------\n";
1062 log << " ind min max mean sigma spread\n";
1063 log << "-----------------------------------------"
1064 "--------------------------------------\n";
1065 for (size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1066 {
1068 = it->statistics.data.at(i);
1069 size_t n = c.count;
1070 double const mean = c.sum / n;
1071 double const sigma = sqrt((c.sum2 - c.sum * c.sum / n)
1072 / (n - 1));
1073 double const spread = (c.max - c.min) / sigma;
1074 sFile << strpr("%10d %10d %24.16E %24.16E %24.16E %24.16E\n",
1075 it->getIndex() + 1,
1076 i + 1,
1077 c.min,
1078 c.max,
1079 mean,
1080 sigma);
1081 log << strpr("%4zu %9.2E %9.2E %9.2E %9.2E %9.2E\n",
1082 i + 1,
1083 c.min,
1084 c.max,
1085 mean,
1086 sigma,
1087 spread);
1088 }
1089 log << "-----------------------------------------"
1090 "--------------------------------------\n";
1091 }
1092 // Finally decided to remove this legacy line...
1093 //sFile << strpr("%f %f\n", 0.0, 0.0);
1094 sFile.close();
1095 }
1096
1097 log << "*****************************************"
1098 "**************************************\n";
1099
1100 return;
1101}
1102
1104 string fileNameFormat)
1105{
1106 log << "\n";
1107 log << "*** SYMMETRY FUNCTION HISTOGRAMS ********"
1108 "**************************************\n";
1109 log << "\n";
1110
1111 // Initialize histograms.
1112 numBins--;
1113 vector<vector<gsl_histogram*> > histograms;
1114 for (vector<Element>::const_iterator it = elements.begin();
1115 it != elements.end(); ++it)
1116 {
1117 histograms.push_back(vector<gsl_histogram*>());
1118 for (size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1119 {
1120 double l = safeFind(it->statistics.data, i).min;
1121 double h = safeFind(it->statistics.data, i).max;
1122 if (l < h)
1123 {
1124 // Add an extra bin at the end to cover complete range.
1125 h += (h - l) / numBins;
1126 histograms.back().push_back(gsl_histogram_alloc(numBins + 1));
1127 gsl_histogram_set_ranges_uniform(histograms.back().back(),
1128 l,
1129 h);
1130 }
1131 else
1132 {
1133 // Use nullptr so signalize non-existing histogram.
1134 histograms.back().push_back(nullptr);
1135 log << strpr("WARNING: Symmetry function min equals max, "
1136 "ommitting histogram (Element %2s SF %4zu "
1137 "(line %4zu).\n",
1138 it->getSymbol().c_str(),
1139 i,
1140 it->getSymmetryFunction(i).getLineNumber() + 1);
1141 }
1142 }
1143 }
1144
1145 // Increment histograms with symmetry function values.
1146 for (vector<Structure>::const_iterator it = structures.begin();
1147 it != structures.end(); ++it)
1148 {
1149 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1150 it2 != it->atoms.end(); ++it2)
1151 {
1152 size_t e = it2->element;
1153 vector<gsl_histogram*>& h = histograms.at(e);
1154 for (size_t s = 0; s < it2->G.size(); ++s)
1155 {
1156 if (h.at(s) == nullptr) continue;
1157 gsl_histogram_increment(h.at(s), it2->G.at(s));
1158 }
1159 }
1160 }
1161
1162 // Collect histograms from all processes.
1163 for (vector<vector<gsl_histogram*> >::const_iterator it =
1164 histograms.begin(); it != histograms.end(); ++it)
1165 {
1166 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1167 it2 != it->end(); ++it2)
1168 {
1169 if ((*it2) == nullptr) continue;
1170 MPI_Allreduce(MPI_IN_PLACE, (*it2)->bin, numBins + 1, MPI_DOUBLE, MPI_SUM, comm);
1171 }
1172 }
1173
1174 // Write histogram to file.
1175 if (myRank == 0)
1176 {
1177 log << strpr("Writing histograms with %zu bins.\n", numBins + 1);
1178 for (size_t e = 0; e < elements.size(); ++e)
1179 {
1180 for (size_t s = 0; s < elements.at(e).numSymmetryFunctions(); ++s)
1181 {
1182 gsl_histogram*& h = histograms.at(e).at(s);
1183 if (h == nullptr) continue;
1184 FILE* fp = 0;
1185 string fileName = strpr(fileNameFormat.c_str(),
1187 s + 1);
1188 fp = fopen(fileName.c_str(), "w");
1189 if (fp == 0)
1190 {
1191 throw runtime_error(strpr("ERROR: Could not open file:"
1192 " %s.\n", fileName.c_str()));
1193 }
1194 vector<string> info = elements.at(e).infoSymmetryFunction(s);
1195 for (vector<string>::const_iterator it = info.begin();
1196 it != info.end(); ++it)
1197 {
1198 fprintf(fp, "#SFINFO %s\n", it->c_str());
1199 }
1201 = elements.at(e).statistics.data.at(s);
1202 size_t n = c.count;
1203 fprintf(fp, "#SFINFO min %15.8E\n", c.min);
1204 fprintf(fp, "#SFINFO max %15.8E\n", c.max);
1205 fprintf(fp, "#SFINFO mean %15.8E\n", c.sum / n);
1206 fprintf(fp, "#SFINFO sigma %15.8E\n",
1207 sqrt(1.0 / (n - 1) * (c.sum2 - c.sum * c.sum / n)));
1208
1209 // File header.
1210 vector<string> title;
1211 vector<string> colName;
1212 vector<string> colInfo;
1213 vector<size_t> colSize;
1214 title.push_back("Symmetry function histogram.");
1215 colSize.push_back(16);
1216 colName.push_back("symfunc_l");
1217 colInfo.push_back("Symmetry function value, left bin limit.");
1218 colSize.push_back(16);
1219 colName.push_back("symfunc_r");
1220 colInfo.push_back("Symmetry function value, right bin limit.");
1221 colSize.push_back(16);
1222 colName.push_back("hist");
1223 colInfo.push_back("Histogram count.");
1225 createFileHeader(title,
1226 colSize,
1227 colName,
1228 colInfo));
1229
1230 gsl_histogram_fprintf(fp, h, "%16.8E", "%16.8E");
1231 fflush(fp);
1232 fclose(fp);
1233 fp = 0;
1234 }
1235 }
1236 }
1237
1238 for (vector<vector<gsl_histogram*> >::const_iterator it =
1239 histograms.begin(); it != histograms.end(); ++it)
1240 {
1241 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1242 it2 != it->end(); ++it2)
1243 {
1244 if ((*it2) == nullptr) continue;
1245 gsl_histogram_free(*it2);
1246 }
1247 }
1248
1249 log << "*****************************************"
1250 "**************************************\n";
1251
1252 return;
1253}
1254
1256{
1257 log << "\n";
1258 log << "*** SYMMETRY FUNCTION FILE **************"
1259 "**************************************\n";
1260 log << "\n";
1261
1262 // Create empty file.
1263 log << strpr("Writing symmetry functions to file: %s\n", fileName.c_str());
1264 if (myRank == 0)
1265 {
1266 ofstream file;
1267 file.open(fileName.c_str());
1268 file.close();
1269 }
1270 MPI_Barrier(comm);
1271
1272 // Prepare structure iterator.
1273 vector<Structure>::const_iterator it = structures.begin();
1274 // Loop over all structures (on each proc the local structures are stored
1275 // with increasing index).
1276 for (size_t i = 0; i < numStructures; ++i)
1277 {
1278 // If this proc holds structure with matching index,
1279 // open file and write symmetry functions.
1280 if (it != structures.end() && i == it->index)
1281 {
1282 ofstream file;
1283 file.open(fileName.c_str(), ios_base::app);
1284 file << strpr("%6zu\n", it->numAtoms);
1285 // Loop over atoms.
1286 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1287 it2 != it->atoms.end(); ++it2)
1288 {
1289 // Loop over symmetry functions.
1290 file << strpr("%3zu ", elementMap.atomicNumber(it2->element));
1291 for (vector<double>::const_iterator it3 = it2->G.begin();
1292 it3 != it2->G.end(); ++it3)
1293 {
1294 file << strpr(" %14.10f", *it3);
1295 }
1296 file << '\n';
1297 }
1298 // There is no charge NN, so first and last entry is zero.
1299 double energy = 0.0;
1300 if (normalize) energy = physicalEnergy(*it);
1301 else energy = it->energyRef;
1302 energy += getEnergyOffset(*it);
1303 energy /= it->numAtoms;
1304 file << strpr(" %19.10f %19.10f %19.10f %19.10f\n",
1305 0.0, energy, energy, 0.0);
1306 file.flush();
1307 file.close();
1308 // Iterate to next structure.
1309 ++it;
1310 }
1311 MPI_Barrier(comm);
1312 }
1313
1314 log << "*****************************************"
1315 "**************************************\n";
1316
1317 return;
1318}
1319
1320size_t Dataset::writeNeighborHistogram(string const& fileNameHisto,
1321 string const& fileNameStructure)
1322{
1323 log << "\n";
1324 log << "*** NEIGHBOR STATISTICS/HISTOGRAM *******"
1325 "**************************************\n";
1326 log << "\n";
1327
1328 ofstream statFile;
1329 string fileNameLocal = strpr("%s.%04d", fileNameStructure.c_str(), myRank);
1330 statFile.open(fileNameLocal.c_str());
1331 if (myRank == 0)
1332 {
1333 // File header.
1334 vector<string> title;
1335 vector<string> colName;
1336 vector<string> colInfo;
1337 vector<size_t> colSize;
1338 title.push_back("Neighbor statistics for each structure.");
1339 colSize.push_back(10);
1340 colName.push_back("struct");
1341 colInfo.push_back("Index of structure (starting with 1).");
1342 colSize.push_back(10);
1343 colName.push_back("natoms");
1344 colInfo.push_back("Number of atoms in structure.");
1345 colSize.push_back(10);
1346 colName.push_back("min");
1347 colInfo.push_back("Minimum number of neighbors over all atoms.");
1348 colSize.push_back(10);
1349 colName.push_back("max");
1350 colInfo.push_back("Maximum number of neighbors over all atoms.");
1351 colSize.push_back(16);
1352 colName.push_back("mean");
1353 colInfo.push_back("Mean number of neighbors over all atoms.");
1354 appendLinesToFile(statFile,
1355 createFileHeader(title, colSize, colName, colInfo));
1356 }
1357
1358 // Compute per-structure statistics and prepare histogram boundaries.
1359 size_t numAtomsGlobal = 0;
1360 size_t minNeighborsGlobal = numeric_limits<size_t>::max();
1361 size_t maxNeighborsGlobal = 0;
1362 double meanNeighborsGlobal = 0.0;
1363 for (auto const& s : structures)
1364 {
1365 size_t minNeighbors = numeric_limits<size_t>::max();
1366 size_t maxNeighbors = 0;
1367 double meanNeighbors = 0.0;
1368 for (auto const& a : s.atoms)
1369 {
1370 size_t const n = a.numNeighbors;
1371 minNeighbors = min(minNeighbors, n);
1372 maxNeighbors = max(maxNeighbors, n);
1373 meanNeighbors += n;
1374 }
1375 numAtomsGlobal += s.numAtoms;
1376 minNeighborsGlobal = min(minNeighborsGlobal, minNeighbors);
1377 maxNeighborsGlobal = max(maxNeighborsGlobal, maxNeighbors);
1378 meanNeighborsGlobal += meanNeighbors;
1379 statFile << strpr("%10zu %10zu %10zu %10zu %16.8E\n",
1380 s.index + 1,
1381 s.numAtoms,
1383 maxNeighbors,
1384 meanNeighbors / s.numAtoms);
1385 }
1386 statFile.flush();
1387 statFile.close();
1388 MPI_Allreduce(MPI_IN_PLACE, &numAtomsGlobal , 1, MPI_SIZE_T, MPI_SUM, comm);
1389 MPI_Allreduce(MPI_IN_PLACE, &minNeighborsGlobal , 1, MPI_SIZE_T, MPI_MIN, comm);
1390 MPI_Allreduce(MPI_IN_PLACE, &maxNeighborsGlobal , 1, MPI_SIZE_T, MPI_MAX, comm);
1391 MPI_Allreduce(MPI_IN_PLACE, &meanNeighborsGlobal, 1, MPI_DOUBLE, MPI_SUM, comm);
1392 log << strpr("Minimum number of neighbors: %d\n", minNeighborsGlobal);
1393 log << strpr("Mean number of neighbors: %.1f\n",
1394 meanNeighborsGlobal / numAtomsGlobal);
1395 log << strpr("Maximum number of neighbors: %d\n", maxNeighborsGlobal);
1396
1397 // Calculate neighbor histogram.
1398 gsl_histogram* histNeighbors = NULL;
1399 histNeighbors = gsl_histogram_alloc(maxNeighborsGlobal + 1);
1400 gsl_histogram_set_ranges_uniform(histNeighbors,
1401 -0.5,
1402 maxNeighborsGlobal + 0.5);
1403 for (vector<Structure>::const_iterator it = structures.begin();
1404 it != structures.end(); ++it)
1405 {
1406 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1407 it2 != it->atoms.end(); ++it2)
1408 {
1409 gsl_histogram_increment(histNeighbors, it2->numNeighbors);
1410 }
1411 }
1412 MPI_Allreduce(MPI_IN_PLACE, histNeighbors->bin, maxNeighborsGlobal + 1, MPI_DOUBLE, MPI_SUM, comm);
1413
1414 // Write histogram to file.
1415 if (myRank == 0)
1416 {
1417 log << strpr("Neighbor histogram file: %s.\n", fileNameHisto.c_str());
1418 FILE* fp = 0;
1419 fp = fopen(fileNameHisto.c_str(), "w");
1420 if (fp == 0)
1421 {
1422 throw runtime_error(strpr("ERROR: Could not open file: %s.\n",
1423 fileNameHisto.c_str()));
1424 }
1425
1426 // File header.
1427 vector<string> title;
1428 vector<string> colName;
1429 vector<string> colInfo;
1430 vector<size_t> colSize;
1431 title.push_back("Neighbor count histogram.");
1432 colSize.push_back(9);
1433 colName.push_back("neigh_l");
1434 colInfo.push_back("Number of neighbors, left bin limit.");
1435 colSize.push_back(9);
1436 colName.push_back("neigh_r");
1437 colInfo.push_back("Number of neighbors, right bin limit.");
1438 colSize.push_back(16);
1439 colName.push_back("hist");
1440 colInfo.push_back("Histogram count.");
1442 createFileHeader(title, colSize, colName, colInfo));
1443
1444 gsl_histogram_fprintf(fp, histNeighbors, "%9.1f", "%16.8E");
1445 fflush(fp);
1446 fclose(fp);
1447 fp = 0;
1448 }
1449
1450 MPI_Barrier(comm);
1451 if (myRank == 0)
1452 {
1453 log << strpr("Combining per-structure neighbor statistics file: %s.\n",
1454 fileNameStructure.c_str());
1455 combineFiles(fileNameStructure);
1456 }
1457
1458 gsl_histogram_free(histNeighbors);
1459
1460 log << "*****************************************"
1461 "**************************************\n";
1462
1463 return maxNeighborsGlobal;
1464}
1465
1467{
1468 log << "\n";
1469 log << "*** NEIGHBOR LIST ***********************"
1470 "**************************************\n";
1471 log << "\n";
1472
1473 log << "Sorting neighbor lists according to element and distance.\n";
1474
1475 for (vector<Structure>::iterator it = structures.begin();
1476 it != structures.end(); ++it)
1477 {
1478 for (vector<Atom>::iterator it2 = it->atoms.begin();
1479 it2 != it->atoms.end(); ++it2)
1480 {
1481 sort(it2->neighbors.begin(), it2->neighbors.end());
1482 }
1483 }
1484
1485 log << "*****************************************"
1486 "**************************************\n";
1487
1488 return;
1489}
1490void Dataset::writeNeighborLists(string const& fileName)
1491{
1492 log << "\n";
1493 log << "*** NEIGHBOR LIST ***********************"
1494 "**************************************\n";
1495 log << "\n";
1496
1497 string fileNameLocal = strpr("%s.%04d", fileName.c_str(), myRank);
1498 ofstream fileLocal;
1499 fileLocal.open(fileNameLocal.c_str());
1500
1501 for (vector<Structure>::const_iterator it = structures.begin();
1502 it != structures.end(); ++it)
1503 {
1504 fileLocal << strpr("%zu\n", it->numAtoms);
1505 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1506 it2 != it->atoms.end(); ++it2)
1507 {
1508 fileLocal << strpr("%zu", elementMap.atomicNumber(it2->element));
1509 for (size_t i = 0; i < numElements; ++i)
1510 {
1511 fileLocal << strpr(" %zu", it2->numNeighborsPerElement.at(i));
1512 }
1513 for (vector<Atom::Neighbor>::const_iterator it3
1514 = it2->neighbors.begin(); it3 != it2->neighbors.end(); ++it3)
1515 {
1516 fileLocal << strpr(" %zu", it3->index);
1517 }
1518 fileLocal << '\n';
1519 }
1520 }
1521
1522 fileLocal.flush();
1523 fileLocal.close();
1524 MPI_Barrier(comm);
1525
1526 log << strpr("Writing neighbor lists to file: %s.\n", fileName.c_str());
1527
1528 if (myRank == 0) combineFiles(fileName);
1529
1530 log << "*****************************************"
1531 "**************************************\n";
1532
1533 return;
1534}
1535
1537 vector<vector<size_t> > neighCutoff,
1538 bool derivatives,
1539 string const& fileNamePrefix)
1540{
1541 log << "\n";
1542 log << "*** ATOMIC ENVIRONMENT ******************"
1543 "**************************************\n";
1544 log << "\n";
1545
1546 string const fileNamePrefixG = strpr("%s.G" , fileNamePrefix.c_str());
1547 string const fileNamePrefixdGdx = strpr("%s.dGdx", fileNamePrefix.c_str());
1548 string const fileNamePrefixdGdy = strpr("%s.dGdy", fileNamePrefix.c_str());
1549 string const fileNamePrefixdGdz = strpr("%s.dGdz", fileNamePrefix.c_str());
1550
1551 string const fileNameLocalG = strpr("%s.%04d",
1552 fileNamePrefixG.c_str(), myRank);
1553 string const fileNameLocaldGdx = strpr("%s.%04d",
1554 fileNamePrefixdGdx.c_str(), myRank);
1555 string const fileNameLocaldGdy = strpr("%s.%04d",
1556 fileNamePrefixdGdy.c_str(), myRank);
1557 string const fileNameLocaldGdz = strpr("%s.%04d",
1558 fileNamePrefixdGdz.c_str(), myRank);
1559
1560 ofstream fileLocalG;
1561 ofstream fileLocaldGdx;
1562 ofstream fileLocaldGdy;
1563 ofstream fileLocaldGdz;
1564
1565 fileLocalG.open(fileNameLocalG.c_str());
1566 if (derivatives)
1567 {
1568 fileLocaldGdx.open(fileNameLocaldGdx.c_str());
1569 fileLocaldGdy.open(fileNameLocaldGdy.c_str());
1570 fileLocaldGdz.open(fileNameLocaldGdz.c_str());
1571 }
1572
1573 log << "Preparing symmetry functions for atomic environment file(s).\n";
1574 for (size_t i = 0; i < numElements; ++i)
1575 {
1576 for (size_t j = 0; j < numElements; ++j)
1577 {
1578 log << strpr("Maximum number of %2s neighbors for central %2s "
1579 "atoms: %zu\n",
1580 elementMap[j].c_str(),
1581 elementMap[i].c_str(),
1582 neighCutoff.at(i).at(j));
1583 }
1584 }
1585
1586 vector<size_t> neighCount(numElements, 0);
1587 for (vector<Structure>::const_iterator its = structures.begin();
1588 its != structures.end(); ++its)
1589 {
1590 for (vector<Atom>::const_iterator ita = its->atoms.begin();
1591 ita != its->atoms.end(); ++ita)
1592 {
1593 size_t const ea = ita->element;
1594 for (size_t i = 0; i < numElements; ++i)
1595 {
1596 if (ita->numNeighborsPerElement.at(i)
1597 < neighCutoff.at(ea).at(i))
1598 {
1599 throw runtime_error(strpr(
1600 "ERROR: Not enough neighbor atoms, cannot create "
1601 "atomic environment file. Reduce neighbor cutoff for "
1602 "element %2s.\n", elementMap[i].c_str()).c_str());
1603 }
1604 }
1605 fileLocalG << strpr("%2s", elementMap[ita->element].c_str());
1606 fileLocaldGdx << strpr("%2s", elementMap[ita->element].c_str());
1607 fileLocaldGdy << strpr("%2s", elementMap[ita->element].c_str());
1608 fileLocaldGdz << strpr("%2s", elementMap[ita->element].c_str());
1609 // Write atom's own symmetry functions (and derivatives).
1610 for (vector<double>::const_iterator it = ita->G.begin();
1611 it != ita->G.end(); ++it)
1612 {
1613 fileLocalG << strpr(" %16.8E", (*it));
1614 }
1615 if (derivatives)
1616 {
1617 for (vector<Vec3D>::const_iterator it = ita->dGdr.begin();
1618 it != ita->dGdr.end(); ++it)
1619 {
1620 fileLocaldGdx << strpr(" %16.8E", (*it)[0]);
1621 fileLocaldGdy << strpr(" %16.8E", (*it)[1]);
1622 fileLocaldGdz << strpr(" %16.8E", (*it)[2]);
1623 }
1624 }
1625 // Write symmetry functions of neighbors
1626 for (vector<Atom::Neighbor>::const_iterator itn
1627 = ita->neighbors.begin(); itn != ita->neighbors.end(); ++itn)
1628 {
1629 size_t const i = itn->index;
1630 size_t const en = itn->element;
1631 if (neighCount.at(en) < neighCutoff.at(ea).at(en))
1632 {
1633 // Look up symmetry function at Atom instance of neighbor.
1634 Atom const& a = its->atoms.at(i);
1635 for (vector<double>::const_iterator it = a.G.begin();
1636 it != a.G.end(); ++it)
1637 {
1638 fileLocalG << strpr(" %16.8E", (*it));
1639 }
1640 // Log derivatives directly from Neighbor instance.
1641 if (derivatives)
1642 {
1643 // Find atom in neighbor list of neighbor atom.
1644 vector<Atom::Neighbor>::const_iterator itan = find_if(
1645 a.neighbors.begin(), a.neighbors.end(),
1646 [&ita](Atom::Neighbor const& n)
1647 {
1648 return n.index == ita->index;
1649 });
1650 for (vector<Vec3D>::const_iterator it
1651 = itan->dGdr.begin();
1652 it != itan->dGdr.end(); ++it)
1653 {
1654 fileLocaldGdx << strpr(" %16.8E", (*it)[0]);
1655 fileLocaldGdy << strpr(" %16.8E", (*it)[1]);
1656 fileLocaldGdz << strpr(" %16.8E", (*it)[2]);
1657 }
1658 }
1659 neighCount.at(en)++;
1660 }
1661 }
1662 fileLocalG << '\n';
1663 if (derivatives)
1664 {
1665 fileLocaldGdx << '\n';
1666 fileLocaldGdy << '\n';
1667 fileLocaldGdz << '\n';
1668 }
1669 // Reset neighbor counter.
1670 fill(neighCount.begin(), neighCount.end(), 0);
1671 }
1672 }
1673
1674 fileLocalG.flush();
1675 fileLocalG.close();
1676 if (derivatives)
1677 {
1678 fileLocaldGdx.flush();
1679 fileLocaldGdx.close();
1680 fileLocaldGdy.flush();
1681 fileLocaldGdy.close();
1682 fileLocaldGdz.flush();
1683 fileLocaldGdz.close();
1684 }
1685 MPI_Barrier(comm);
1686
1687 if (myRank == 0)
1688 {
1689 log << strpr("Combining atomic environment file: %s.\n",
1690 fileNamePrefixG.c_str());
1691 combineFiles(fileNamePrefixG);
1692 if (derivatives)
1693 {
1694 log << strpr("Combining atomic environment file: %s.\n",
1695 fileNamePrefixdGdx.c_str());
1696 combineFiles(fileNamePrefixdGdx);
1697 log << strpr("Combining atomic environment file: %s.\n",
1698 fileNamePrefixdGdy.c_str());
1699 combineFiles(fileNamePrefixdGdy);
1700 log << strpr("Combining atomic environment file: %s.\n",
1701 fileNamePrefixdGdz.c_str());
1702 combineFiles(fileNamePrefixdGdz);
1703 }
1704 }
1705
1706 log << "*****************************************"
1707 "**************************************\n";
1708
1709 return;
1710}
1711
1712void Dataset::collectError(string const& property,
1713 map<string, double>& error,
1714 size_t& count) const
1715{
1716 if (property == "energy")
1717 {
1718 MPI_Allreduce(MPI_IN_PLACE, &count , 1, MPI_SIZE_T, MPI_SUM, comm);
1719 MPI_Allreduce(MPI_IN_PLACE, &(error.at("RMSEpa")), 1, MPI_DOUBLE, MPI_SUM, comm);
1720 MPI_Allreduce(MPI_IN_PLACE, &(error.at("RMSE")) , 1, MPI_DOUBLE, MPI_SUM, comm);
1721 MPI_Allreduce(MPI_IN_PLACE, &(error.at("MAEpa")) , 1, MPI_DOUBLE, MPI_SUM, comm);
1722 MPI_Allreduce(MPI_IN_PLACE, &(error.at("MAE")) , 1, MPI_DOUBLE, MPI_SUM, comm);
1723 error.at("RMSEpa") = sqrt(error.at("RMSEpa") / count);
1724 error.at("RMSE") = sqrt(error.at("RMSE") / count);
1725 error.at("MAEpa") = error.at("MAEpa") / count;
1726 error.at("MAE") = error.at("MAE") / count;
1727 }
1728 else if (property == "force" || property == "charge")
1729 {
1730 MPI_Allreduce(MPI_IN_PLACE, &count , 1, MPI_SIZE_T, MPI_SUM, comm);
1731 MPI_Allreduce(MPI_IN_PLACE, &(error.at("RMSE")), 1, MPI_DOUBLE, MPI_SUM, comm);
1732 MPI_Allreduce(MPI_IN_PLACE, &(error.at("MAE")) , 1, MPI_DOUBLE, MPI_SUM, comm);
1733 error.at("RMSE") = sqrt(error.at("RMSE") / count);
1734 error.at("MAE") = error.at("MAE") / count;
1735 }
1736 else
1737 {
1738 throw runtime_error("ERROR: Unknown property for error collection.\n");
1739 }
1740
1741 return;
1742}
1743
1744void Dataset::combineFiles(string filePrefix) const
1745{
1746 ofstream combinedFile(filePrefix.c_str(), ios::binary);
1747 if (!combinedFile.is_open())
1748 {
1749 throw runtime_error(strpr("ERROR: Could not open file: %s.\n",
1750 filePrefix.c_str()));
1751 }
1752 for (int i = 0; i < numProcs; ++i)
1753 {
1754 string procFileName = strpr("%s.%04d", filePrefix.c_str(), i);
1755 ifstream procFile(procFileName.c_str(), ios::binary);
1756 if (!procFile.is_open())
1757 {
1758 throw runtime_error(strpr("ERROR: Could not open file: %s.\n",
1759 procFileName.c_str()));
1760 }
1761 // If file is empty, do not get rdbuf, otherwise combined file will be
1762 // closed!
1763 if (procFile.peek() != ifstream::traits_type::eof())
1764 {
1765 combinedFile << procFile.rdbuf();
1766 }
1767 procFile.close();
1768 remove(procFileName.c_str());
1769 }
1770 combinedFile.close();
1771
1772 return;
1773}
std::size_t numStructures
Total number of structures in dataset.
Definition: Dataset.h:203
void writeSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Write symmetry function scaling values to file.
Definition: Dataset.cpp:1004
void collectSymmetryFunctionStatistics()
Collect symmetry function statistics from all processors.
Definition: Dataset.cpp:974
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
Definition: Dataset.h:209
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:724
int recvStructure(Structure *structure, int src)
Receive one structure from source process.
Definition: Dataset.cpp:463
void sortNeighborLists()
Sort all neighbor lists according to element and distance.
Definition: Dataset.cpp:1466
~Dataset()
Destructor.
Definition: Dataset.cpp:46
void writeAtomicEnvironmentFile(std::vector< std::vector< std::size_t > > neighCutoff, bool derivatives, std::string const &fileNamePrefix="atomic-env")
Write atomic environment file.
Definition: Dataset.cpp:1536
void writeNeighborLists(std::string const &fileName="neighbor-list.data")
Write neighbor list file.
Definition: Dataset.cpp:1490
int calculateBufferSize(Structure const &structure) const
Calculate buffer size required to communicate structure via MPI.
Definition: Dataset.cpp:165
MPI_Comm comm
Global MPI communicator.
Definition: Dataset.h:207
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
int numProcs
Total number of MPI processors.
Definition: Dataset.h:201
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void writeSymmetryFunctionFile(std::string fileName="function.data")
Write symmetry function legacy file ("function.data").
Definition: Dataset.cpp:1255
std::size_t prepareNumericForces(Structure &original, double delta)
Prepare numeric force check for a single structure.
Definition: Dataset.cpp:888
void toPhysicalUnits()
Switch all structures to physical units.
Definition: Dataset.cpp:963
void writeSymmetryFunctionHistograms(std::size_t numBins, std::string fileNameFormat="sf.%03zu.%04zu.histo")
Calculate and write symmetry function histograms.
Definition: Dataset.cpp:1103
std::size_t writeNeighborHistogram(std::string const &fileNameHisto="neighbors.histo", std::string const &fileNameStructure="neighbors.out")
Calculate and write neighbor histogram and per-structure statistics.
Definition: Dataset.cpp:1320
void setupRandomNumberGenerator()
Initialize random number generator.
Definition: Dataset.cpp:110
int sendStructure(Structure const &structure, int dest) const
Send one structure to destination process.
Definition: Dataset.cpp:251
std::string myName
My processor name.
Definition: Dataset.h:205
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
int myRank
My process ID.
Definition: Dataset.h:199
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
Definition: Dataset.h:211
std::size_t getNumStructures(std::ifstream &dataFile)
Get number of structures in data file.
Definition: Dataset.cpp:709
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
Definition: Dataset.cpp:1712
void toNormalizedUnits()
Switch all structures to normalized units.
Definition: Dataset.cpp:952
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Definition: ElementMap.h:145
Base class for all NNP applications.
Definition: Mode.h:87
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
Definition: Mode.cpp:2122
bool normalize
Definition: Mode.h:629
double convEnergy
Definition: Mode.h:637
ElementMap elementMap
Global element map, populated by setupElementMap().
Definition: Mode.h:591
double convLength
Definition: Mode.h:638
double meanEnergy
Definition: Mode.h:636
std::vector< Element > elements
Definition: Mode.h:646
std::size_t numElements
Definition: Mode.h:631
settings::Settings settings
Definition: Mode.h:642
double convCharge
Definition: Mode.h:639
void removeEnergyOffset(Structure &structure, bool ref=true)
Remove atomic energy offsets from reference energy.
Definition: Mode.cpp:2037
Log log
Global log file.
Definition: Mode.h:593
std::vector< std::size_t > minNeighbors
Definition: Mode.h:632
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
Definition: Mode.cpp:2056
#define MPI_SIZE_T
Definition: mpi-extra.h:22
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
Definition: utility.cpp:33
string reduce(string const &line, string const &whitespace, string const &fill)
Replace multiple whitespaces with fill.
Definition: utility.cpp:60
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
V const & safeFind(std::map< K, V > const &stdMap, typename std::map< K, V >::key_type const &key)
Safely access map entry.
Definition: utility.h:144
size_t p
Definition: nnp-cutoff.cpp:33
Struct to store information on neighbor atoms.
Definition: Atom.h:36
Storage for a single atom.
Definition: Atom.h:33
std::vector< Neighbor > neighbors
Neighbor array (maximum number defined in macros.h.
Definition: Atom.h:170
std::vector< double > G
Symmetry function values.
Definition: Atom.h:146
Storage for one atomic configuration.
Definition: Structure.h:39
Vec3D invbox[3]
Inverse simulation box vectors.
Definition: Structure.h:114
Vec3D box[3]
Simulation box vectors.
Definition: Structure.h:112
bool isTriclinic
If the simulation box is triclinic.
Definition: Structure.h:63
std::vector< std::size_t > numAtomsPerElement
Number of atoms of each element in this structure.
Definition: Structure.h:120
std::string comment
Structure comment.
Definition: Structure.h:110
bool isPeriodic
If periodic boundary conditions apply.
Definition: Structure.h:61
double charge
Charge determined by neural network potential.
Definition: Structure.h:96
void setElementMap(ElementMap const &elementMap)
Set element map of structure.
Definition: Structure.cpp:71
std::size_t index
Index number of this structure.
Definition: Structure.h:73
void remap()
Translate all atoms back into box if outside.
Definition: Structure.cpp:1214
double chargeRef
Reference charge.
Definition: Structure.h:98
SampleType sampleType
Sample type (training or test set).
Definition: Structure.h:108
bool hasSymmetryFunctionDerivatives
If symmetry function derivatives are saved for each atom.
Definition: Structure.h:71
double energy
Potential energy determined by neural network.
Definition: Structure.h:83
void readFromFile(std::string const fileName="input.data")
Read configuration from file.
Definition: Structure.cpp:97
double energyRef
Reference potential energy.
Definition: Structure.h:85
int pbc[3]
Number of PBC images necessary in each direction for max cut-off.
Definition: Structure.h:81
double volume
Simulation box volume.
Definition: Structure.h:100
std::size_t numAtoms
Total number of atoms present in this structure.
Definition: Structure.h:75
std::size_t numElements
Global number of elements (all structures).
Definition: Structure.h:77
std::vector< Atom > atoms
Vector of all atoms in this structure.
Definition: Structure.h:122
std::size_t numElementsPresent
Number of elements present in this structure.
Definition: Structure.h:79
bool hasNeighborList
If the neighbor list has been calculated.
Definition: Structure.h:65
bool hasSymmetryFunctions
If symmetry function values are saved for each atom.
Definition: Structure.h:69
Struct containing statistics gathered during symmetry function calculation.
double min
Minimum symmetry function value encountered.
double sum2
Sum of squared symmetry function values (to compute sigma).
double sum
Sum of symmetry function values (to compute mean).
double max
Maximum symmetry function value encountered.
std::size_t count
Counts total number of symmetry function evaluations.
double r[3]
cartesian coordinates.
Definition: Vec3D.h:32