30#include <gsl/gsl_histogram.h>
31#include <gsl/gsl_rng.h>
36Dataset::Dataset() :
Mode(),
48 if (
rng != NULL) gsl_rng_free(
rng);
55 MPI_Comm_dup(MPI_COMM_WORLD, &tmpComm);
57 MPI_Comm_free(&tmpComm);
65 log <<
"*** SETUP: MPI **************************"
66 "**************************************\n";
70 char line[MPI_MAX_PROCESSOR_NAME];
73 MPI_Comm_dup(*communicator, &
comm);
76 MPI_Get_processor_name(line, &bufferSize);
80 log <<
strpr(
"Process %d of %d (rank %d): %s\n",
89 MPI_Recv(&bufferSize, 1, MPI_INT, i, 0,
comm, &ms);
90 MPI_Recv(line, bufferSize, MPI_CHAR, i, 0,
comm, &ms);
91 log <<
strpr(
"Process %d of %d (rank %d): %s\n",
99 MPI_Send(&bufferSize, 1, MPI_INT, 0, 0,
comm);
100 MPI_Send(line, bufferSize, MPI_CHAR, 0, 0,
comm);
104 log <<
"*****************************************"
105 "**************************************\n";
113 log <<
"*** SETUP: RANDOM NUMBER GENERATOR ******"
114 "**************************************\n";
118 unsigned long seed = atoi(
settings[
"random_seed"].c_str());
119 unsigned long seedGlobal = 0;
123 log <<
strpr(
"Random number generator seed: %d\n", seed);
126 log <<
"WARNING: Seed set to 0. This is a special value for the "
127 "gsl_rng_set() routine (see GSL docs).\n";
130 rng = gsl_rng_alloc(gsl_rng_mt19937);
131 gsl_rng_set(
rng, seed);
132 log <<
strpr(
"Seed for rank %d: %lu\n", 0, seed);
136 seed = gsl_rng_get(
rng);
137 log <<
strpr(
"Seed for rank %d: %lu\n", i, seed);
138 MPI_Send(&seed, 1, MPI_UNSIGNED_LONG, i, 0,
comm);
141 seedGlobal = gsl_rng_get(
rng);
147 MPI_Recv(&seed, 1, MPI_UNSIGNED_LONG, 0, 0,
comm, &ms);
149 rng = gsl_rng_alloc(gsl_rng_taus);
150 gsl_rng_set(
rng, seed);
153 MPI_Bcast(&seedGlobal, 1, MPI_UNSIGNED_LONG, 0,
comm);
154 log <<
strpr(
"Seed for global RNG: %lu\n", seedGlobal);
159 log <<
"*****************************************"
160 "**************************************\n";
175 MPI_Pack_size(1, MPI_INT ,
comm, &is );
176 MPI_Pack_size(1, MPI_INT64_T,
comm, &i64s);
178 MPI_Pack_size(1, MPI_DOUBLE ,
comm, &ds );
179 MPI_Pack_size(1, MPI_CHAR ,
comm, &cs );
182 bs += 5 * cs + 4 * ss + 4 * is + 5 * ds;
185 bs += (s.
comment.length() + 1) * cs;
195 bs += s.
atoms.size() * (4 * cs + 6 * ss + i64s + 3 * ds + 3 * 3 * ds);
196 for (vector<Atom>::const_iterator it = s.
atoms.begin();
197 it != s.
atoms.end(); ++it)
201 bs += it->neighborsUnique.size() * ss;
204 bs += it->numNeighborsPerElement.size() * ss;
207 bs += it->numSymmetryFunctionDerivatives.size() * ss;
208#ifndef N2P2_NO_SF_CACHE
211 bs += it->cacheSizePerElement.size() * ss;
215 bs += it->G.size() * ds;
218 bs += it->dEdG.size() * ds;
221 bs += it->dQdG.size() * ds;
222#ifdef N2P2_FULL_SFD_MEMORY
225 bs += it->dGdxia.size() * ds;
229 bs += it->dGdr.size() * 3 * ds;
232 for (vector<Atom::Neighbor>::const_iterator it2 =
233 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
236 bs += 2 * ss + i64s + ds + 3 * ds;
237#ifndef N2P2_NO_SF_CACHE
240 bs += it2->cache.size() * ds;
244 bs += it2->dGdr.size() * 3 * ds;
253 unsigned char* buf = 0;
260 buf =
new unsigned char[bs];
272 MPI_Pack(&(s.
pbc ), 3, MPI_INT , buf, bs, &
p,
comm);
273 MPI_Pack(&(s.
energy ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
275 MPI_Pack(&(s.
charge ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
277 MPI_Pack(&(s.
volume ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
283 MPI_Pack(s.
comment.c_str(), ts, MPI_CHAR, buf, bs, &
p,
comm);
286 for (
size_t i = 0; i < 3; ++i)
288 MPI_Pack(s.
box[i].
r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
292 for (
size_t i = 0; i < 3; ++i)
294 MPI_Pack(s.
invbox[i].
r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
310 for (vector<Atom>::const_iterator it = s.
atoms.begin();
311 it != s.
atoms.end(); ++it)
314 MPI_Pack(&(it->hasNeighborList ), 1, MPI_CHAR , buf, bs, &
p,
comm);
315 MPI_Pack(&(it->hasSymmetryFunctions ), 1, MPI_CHAR , buf, bs, &
p,
comm);
316 MPI_Pack(&(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR , buf, bs, &
p,
comm);
317 MPI_Pack(&(it->useChargeNeuron ), 1, MPI_CHAR , buf, bs, &
p,
comm);
320 MPI_Pack(&(it->tag ), 1, MPI_INT64_T, buf, bs, &
p,
comm);
323 MPI_Pack(&(it->numNeighborsUnique ), 1,
MPI_SIZE_T , buf, bs, &
p,
comm);
324 MPI_Pack(&(it->numSymmetryFunctions ), 1,
MPI_SIZE_T , buf, bs, &
p,
comm);
325 MPI_Pack(&(it->energy ), 1, MPI_DOUBLE , buf, bs, &
p,
comm);
326 MPI_Pack(&(it->charge ), 1, MPI_DOUBLE , buf, bs, &
p,
comm);
327 MPI_Pack(&(it->chargeRef ), 1, MPI_DOUBLE , buf, bs, &
p,
comm);
328 MPI_Pack(&(it->r.r ), 3, MPI_DOUBLE , buf, bs, &
p,
comm);
329 MPI_Pack(&(it->f.r ), 3, MPI_DOUBLE , buf, bs, &
p,
comm);
330 MPI_Pack(&(it->fRef.r ), 3, MPI_DOUBLE , buf, bs, &
p,
comm);
333 size_t ts2 = it->neighborsUnique.size();
337 MPI_Pack(&(it->neighborsUnique.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
341 ts2 = it->numNeighborsPerElement.size();
345 MPI_Pack(&(it->numNeighborsPerElement.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
349 ts2 = it->numSymmetryFunctionDerivatives.size();
353 MPI_Pack(&(it->numSymmetryFunctionDerivatives.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
356#ifndef N2P2_NO_SF_CACHE
358 ts2 = it->cacheSizePerElement.size();
362 MPI_Pack(&(it->cacheSizePerElement.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
371 MPI_Pack(&(it->G.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
375 ts2 = it->dEdG.size();
379 MPI_Pack(&(it->dEdG.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
383 ts2 = it->dQdG.size();
387 MPI_Pack(&(it->dQdG.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
390#ifdef N2P2_FULL_SFD_MEMORY
392 ts2 = it->dGdxia.size();
396 MPI_Pack(&(it->dGdxia.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
401 ts2 = it->dGdr.size();
405 for (vector<Vec3D>::const_iterator it2 = it->dGdr.begin();
406 it2 != it->dGdr.end(); ++it2)
408 MPI_Pack(it2->r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
413 ts2 = it->neighbors.size();
417 for (vector<Atom::Neighbor>::const_iterator it2 =
418 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
422 MPI_Pack(&(it2->tag ), 1, MPI_INT64_T, buf, bs, &
p,
comm);
424 MPI_Pack(&(it2->d ), 1, MPI_DOUBLE , buf, bs, &
p,
comm);
425 MPI_Pack( it2->dr.r , 3, MPI_DOUBLE , buf, bs, &
p,
comm);
428#ifndef N2P2_NO_SF_CACHE
430 ts3 = it2->cache.size();
434 MPI_Pack(&(it2->cache.front()), ts3, MPI_DOUBLE, buf, bs, &
p,
comm);
439 ts3 = it2->dGdr.size();
443 for (vector<Vec3D>::const_iterator it3 =
444 it2->dGdr.begin(); it3 != it2->dGdr.end(); ++it3)
446 MPI_Pack(it3->r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
454 MPI_Send(&bs, 1, MPI_INT, dest, 0,
comm);
455 MPI_Send(buf, bs, MPI_PACKED, dest, 0,
comm);
464 unsigned char* buf = 0;
472 MPI_Recv(&bs, 1, MPI_INT, src, 0,
comm, &ms);
475 buf =
new unsigned char[bs];
477 MPI_Recv(buf, bs, MPI_PACKED, src, 0,
comm, &ms);
489 MPI_Unpack(buf, bs, &
p, &(s->
pbc ), 3, MPI_INT ,
comm);
490 MPI_Unpack(buf, bs, &
p, &(s->
energy ), 1, MPI_DOUBLE,
comm);
492 MPI_Unpack(buf, bs, &
p, &(s->
charge ), 1, MPI_DOUBLE,
comm);
494 MPI_Unpack(buf, bs, &
p, &(s->
volume ), 1, MPI_DOUBLE,
comm);
500 char* comment =
new char[ts];
501 MPI_Unpack(buf, bs, &
p, comment, ts, MPI_CHAR,
comm);
506 for (
size_t i = 0; i < 3; ++i)
508 MPI_Unpack(buf, bs, &
p, s->
box[i].
r, 3, MPI_DOUBLE,
comm);
512 for (
size_t i = 0; i < 3; ++i)
514 MPI_Unpack(buf, bs, &
p, s->
invbox[i].
r, 3, MPI_DOUBLE,
comm);
534 for (vector<Atom>::iterator it = s->
atoms.begin();
535 it != s->
atoms.end(); ++it)
538 MPI_Unpack(buf, bs, &
p, &(it->hasNeighborList ), 1, MPI_CHAR ,
comm);
539 MPI_Unpack(buf, bs, &
p, &(it->hasSymmetryFunctions ), 1, MPI_CHAR ,
comm);
540 MPI_Unpack(buf, bs, &
p, &(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR ,
comm);
541 MPI_Unpack(buf, bs, &
p, &(it->useChargeNeuron ), 1, MPI_CHAR ,
comm);
544 MPI_Unpack(buf, bs, &
p, &(it->tag ), 1, MPI_INT64_T,
comm);
547 MPI_Unpack(buf, bs, &
p, &(it->numNeighborsUnique ), 1,
MPI_SIZE_T ,
comm);
548 MPI_Unpack(buf, bs, &
p, &(it->numSymmetryFunctions ), 1,
MPI_SIZE_T ,
comm);
549 MPI_Unpack(buf, bs, &
p, &(it->energy ), 1, MPI_DOUBLE ,
comm);
550 MPI_Unpack(buf, bs, &
p, &(it->charge ), 1, MPI_DOUBLE ,
comm);
551 MPI_Unpack(buf, bs, &
p, &(it->chargeRef ), 1, MPI_DOUBLE ,
comm);
552 MPI_Unpack(buf, bs, &
p, &(it->r.r ), 3, MPI_DOUBLE ,
comm);
553 MPI_Unpack(buf, bs, &
p, &(it->f.r ), 3, MPI_DOUBLE ,
comm);
554 MPI_Unpack(buf, bs, &
p, &(it->fRef.r ), 3, MPI_DOUBLE ,
comm);
561 it->neighborsUnique.clear();
562 it->neighborsUnique.resize(ts2, 0);
563 MPI_Unpack(buf, bs, &
p, &(it->neighborsUnique.front()), ts2,
MPI_SIZE_T,
comm);
571 it->numNeighborsPerElement.clear();
572 it->numNeighborsPerElement.resize(ts2, 0);
573 MPI_Unpack(buf, bs, &
p, &(it->numNeighborsPerElement.front()), ts2,
MPI_SIZE_T,
comm);
581 it->numSymmetryFunctionDerivatives.clear();
582 it->numSymmetryFunctionDerivatives.resize(ts2, 0);
583 MPI_Unpack(buf, bs, &
p, &(it->numSymmetryFunctionDerivatives.front()), ts2,
MPI_SIZE_T,
comm);
586#ifndef N2P2_NO_SF_CACHE
592 it->cacheSizePerElement.clear();
593 it->cacheSizePerElement.resize(ts2, 0);
594 MPI_Unpack(buf, bs, &
p, &(it->cacheSizePerElement.front()), ts2,
MPI_SIZE_T,
comm);
604 it->G.resize(ts2, 0.0);
605 MPI_Unpack(buf, bs, &
p, &(it->G.front()), ts2, MPI_DOUBLE,
comm);
614 it->dEdG.resize(ts2, 0.0);
615 MPI_Unpack(buf, bs, &
p, &(it->dEdG.front()), ts2, MPI_DOUBLE,
comm);
624 it->dQdG.resize(ts2, 0.0);
625 MPI_Unpack(buf, bs, &
p, &(it->dQdG.front()), ts2, MPI_DOUBLE,
comm);
628#ifdef N2P2_FULL_SFD_MEMORY
635 it->dGdxia.resize(ts2, 0.0);
636 MPI_Unpack(buf, bs, &
p, &(it->dGdxia.front()), ts2, MPI_DOUBLE,
comm);
646 it->dGdr.resize(ts2);
647 for (vector<Vec3D>::iterator it2 = it->dGdr.begin();
648 it2 != it->dGdr.end(); ++it2)
650 MPI_Unpack(buf, bs, &
p, it2->r, 3, MPI_DOUBLE,
comm);
659 it->neighbors.clear();
660 it->neighbors.resize(ts2);
661 for (vector<Atom::Neighbor>::iterator it2 =
662 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
666 MPI_Unpack(buf, bs, &
p, &(it2->tag ), 1, MPI_INT64_T,
comm);
668 MPI_Unpack(buf, bs, &
p, &(it2->d ), 1, MPI_DOUBLE ,
comm);
669 MPI_Unpack(buf, bs, &
p, it2->dr.r , 3, MPI_DOUBLE ,
comm);
672#ifndef N2P2_NO_SF_CACHE
679 it2->cache.resize(ts3, 0.0);
680 MPI_Unpack(buf, bs, &
p, &(it2->cache.front()), ts3, MPI_DOUBLE,
comm);
690 it2->dGdr.resize(ts3);
691 for (vector<Vec3D>::iterator it3 = it2->dGdr.begin();
692 it3 != it2->dGdr.end(); ++it3)
694 MPI_Unpack(buf, bs, &
p, it3->r, 3, MPI_DOUBLE,
comm);
711 vector<string> splitLine;
713 while (getline(dataFile, line))
716 if (splitLine.at(0) ==
"begin") count++;
724 string const& fileName)
727 log <<
"*** STRUCTURE DISTRIBUTION **************"
728 "**************************************\n";
732 vector<size_t> numStructuresPerProc;
736 log <<
"No structures will be distributed to rank 0.\n";
739 throw runtime_error(
"ERROR: Can not distribute structures, "
740 "at least 2 MPI tasks are required.\n");
744 if (excludeRank0) numReceivers--;
748 log <<
strpr(
"Reading configurations from data file: %s.\n",
750 dataFile.open(fileName.c_str());
759 throw runtime_error(
"ERROR: More receiving processors than "
763 numStructuresPerProc.resize(
numProcs, 0);
768 for (
size_t i = 0; i < (size_t)
numProcs; i++)
770 if (i != 0 || (!excludeRank0))
772 numStructuresPerProc.at(i) = quotient;
773 if (remainder > 0 && i > 0 && i <= remainder)
775 numStructuresPerProc.at(i)++;
781 log <<
strpr(
"Number of structures per processor: %d\n", quotient);
785 log <<
strpr(
"Number of structures per"
786 " processor: %d (%d) or %d (%d)\n",
788 numReceivers - remainder,
795 int bytesTransferred = 0;
796 size_t numMyStructures = numStructuresPerProc.at(
myRank);
799 size_t countStructures = 0;
800 vector<size_t> countStructuresPerProc;
802 countStructuresPerProc.resize(
numProcs, 0);
809 if (countStructuresPerProc.at(proc)
810 < numStructuresPerProc.at(proc))
824 tmpStructure.
index = countStructures;
829 countStructuresPerProc.at(proc)++;
836 for (
int proc = 0; proc <
numProcs; ++proc)
838 for (
size_t i = 0; i < numStructuresPerProc.at(proc); ++i)
852 tmpStructure.
index = countStructures;
857 countStructuresPerProc.at(proc)++;
866 for (
size_t i = 0; i < numMyStructures; i++)
874 log <<
strpr(
"Distributed %zu structures,"
875 " %d bytes (%.2f MiB) transferred.\n",
878 bytesTransferred / 1024. / 1024.);
880 log <<
"*****************************************"
881 "**************************************\n";
883 return bytesTransferred;
898 vector<size_t> numStructuresPerProc(
numProcs, 0);
905 for (
size_t i = 0; i < (size_t)
numProcs; i++)
907 numStructuresPerProc.at(i) = quotient;
908 if (remainder > 0 && i < remainder) numStructuresPerProc.at(i)++;
926 if (
p == 0) istart = 1;
927 for (
size_t i = istart; i < numStructuresPerProc.at(
p); ++i)
931 sign = 2 * ((count % 6) / 3) - 1;
938 s.
comment =
strpr(
"%zu %zu %zu %d", count, iAtom, ixyz, sign);
940 s.
atoms.at(iAtom).r[ixyz] += sign * delta;
952 for (vector<Structure>::iterator it =
structures.begin();
963 for (vector<Structure>::iterator it =
structures.begin();
974 for (vector<Element>::iterator it =
elements.begin();
979 if (it->statistics.data.size() == 0)
981 log <<
strpr(
"WARNING: No statistics for element %zu (%2s) found, "
982 "process %d has no corresponding atoms, creating "
983 "empty statistics.\n",
985 it->getSymbol().c_str(),
988 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
992 MPI_Allreduce(MPI_IN_PLACE, &(c.
min ), 1, MPI_DOUBLE, MPI_MIN,
comm);
993 MPI_Allreduce(MPI_IN_PLACE, &(c.
max ), 1, MPI_DOUBLE, MPI_MAX,
comm);
994 MPI_Allreduce(MPI_IN_PLACE, &(c.
sum ), 1, MPI_DOUBLE, MPI_SUM,
comm);
995 MPI_Allreduce(MPI_IN_PLACE, &(c.
sum2 ), 1, MPI_DOUBLE, MPI_SUM,
comm);
1005 log <<
"*** SYMMETRY FUNCTION SCALING ***********"
1006 "**************************************\n";
1011 log <<
strpr(
"Writing symmetry function scaling file: %s.\n",
1014 sFile.open(fileName.c_str());
1017 vector<string> title;
1018 vector<string> colName;
1019 vector<string> colInfo;
1020 vector<size_t> colSize;
1021 title.push_back(
"Symmetry function scaling data.");
1022 colSize.push_back(10);
1023 colName.push_back(
"e_index");
1024 colInfo.push_back(
"Element index.");
1025 colSize.push_back(10);
1026 colName.push_back(
"sf_index");
1027 colInfo.push_back(
"Symmetry function index.");
1028 colSize.push_back(24);
1029 colName.push_back(
"sf_min");
1030 colInfo.push_back(
"Symmetry function minimum.");
1031 colSize.push_back(24);
1032 colName.push_back(
"sf_max");
1033 colInfo.push_back(
"Symmetry function maximum.");
1034 colSize.push_back(24);
1035 colName.push_back(
"sf_mean");
1036 colInfo.push_back(
"Symmetry function mean.");
1037 colSize.push_back(24);
1038 colName.push_back(
"sf_sigma");
1039 colInfo.push_back(
"Symmetry function sigma.");
1044 log <<
"Abbreviations:\n";
1045 log <<
"--------------\n";
1046 log <<
"ind ...... Symmetry function index.\n";
1047 log <<
"min ...... Minimum symmetry function value.\n";
1048 log <<
"max ...... Maximum symmetry function value.\n";
1049 log <<
"mean ..... Mean symmetry function value.\n";
1050 log <<
"sigma .... Standard deviation of symmetry function values.\n";
1051 log <<
"spread ... (max - min) / sigma.\n";
1053 for (vector<Element>::const_iterator it =
elements.begin();
1056 log <<
strpr(
"Scaling data for symmetry functions element %2s :\n",
1057 it->getSymbol().c_str());
1058 log <<
"-----------------------------------------"
1059 "--------------------------------------\n";
1060 log <<
" ind min max mean sigma spread\n";
1061 log <<
"-----------------------------------------"
1062 "--------------------------------------\n";
1063 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1066 = it->statistics.data.at(i);
1068 double const mean = c.
sum / n;
1069 double const sigma = sqrt((c.
sum2 - c.
sum * c.
sum / n)
1071 double const spread = (c.
max - c.
min) / sigma;
1072 sFile <<
strpr(
"%10d %10d %24.16E %24.16E %24.16E %24.16E\n",
1079 log <<
strpr(
"%4zu %9.2E %9.2E %9.2E %9.2E %9.2E\n",
1087 log <<
"-----------------------------------------"
1088 "--------------------------------------\n";
1095 log <<
"*****************************************"
1096 "**************************************\n";
1102 string fileNameFormat)
1105 log <<
"*** SYMMETRY FUNCTION HISTOGRAMS ********"
1106 "**************************************\n";
1111 vector<vector<gsl_histogram*> > histograms;
1112 for (vector<Element>::const_iterator it =
elements.begin();
1115 histograms.push_back(vector<gsl_histogram*>());
1116 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1118 double l =
safeFind(it->statistics.data, i).min;
1119 double h =
safeFind(it->statistics.data, i).max;
1123 h += (h - l) / numBins;
1124 histograms.back().push_back(gsl_histogram_alloc(numBins + 1));
1125 gsl_histogram_set_ranges_uniform(histograms.back().back(),
1132 histograms.back().push_back(
nullptr);
1133 log <<
strpr(
"WARNING: Symmetry function min equals max, "
1134 "ommitting histogram (Element %2s SF %4zu "
1136 it->getSymbol().c_str(),
1138 it->getSymmetryFunction(i).getLineNumber() + 1);
1144 for (vector<Structure>::const_iterator it =
structures.begin();
1147 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1148 it2 != it->atoms.end(); ++it2)
1150 size_t e = it2->element;
1151 vector<gsl_histogram*>& h = histograms.at(e);
1152 for (
size_t s = 0; s < it2->G.size(); ++s)
1154 if (h.at(s) ==
nullptr)
continue;
1155 gsl_histogram_increment(h.at(s), it2->G.at(s));
1161 for (vector<vector<gsl_histogram*> >::const_iterator it =
1162 histograms.begin(); it != histograms.end(); ++it)
1164 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1165 it2 != it->end(); ++it2)
1167 if ((*it2) ==
nullptr)
continue;
1168 MPI_Allreduce(MPI_IN_PLACE, (*it2)->bin, numBins + 1, MPI_DOUBLE, MPI_SUM,
comm);
1175 log <<
strpr(
"Writing histograms with %zu bins.\n", numBins + 1);
1176 for (
size_t e = 0; e <
elements.size(); ++e)
1178 for (
size_t s = 0; s <
elements.at(e).numSymmetryFunctions(); ++s)
1180 gsl_histogram*& h = histograms.at(e).at(s);
1181 if (h ==
nullptr)
continue;
1183 string fileName =
strpr(fileNameFormat.c_str(),
1186 fp = fopen(fileName.c_str(),
"w");
1189 throw runtime_error(
strpr(
"ERROR: Could not open file:"
1190 " %s.\n", fileName.c_str()));
1192 vector<string> info =
elements.at(e).infoSymmetryFunction(s);
1193 for (vector<string>::const_iterator it = info.begin();
1194 it != info.end(); ++it)
1196 fprintf(fp,
"#SFINFO %s\n", it->c_str());
1199 =
elements.at(e).statistics.data.at(s);
1201 fprintf(fp,
"#SFINFO min %15.8E\n", c.
min);
1202 fprintf(fp,
"#SFINFO max %15.8E\n", c.
max);
1203 fprintf(fp,
"#SFINFO mean %15.8E\n", c.
sum / n);
1204 fprintf(fp,
"#SFINFO sigma %15.8E\n",
1205 sqrt(1.0 / (n - 1) * (c.
sum2 - c.
sum * c.
sum / n)));
1208 vector<string> title;
1209 vector<string> colName;
1210 vector<string> colInfo;
1211 vector<size_t> colSize;
1212 title.push_back(
"Symmetry function histogram.");
1213 colSize.push_back(16);
1214 colName.push_back(
"symfunc_l");
1215 colInfo.push_back(
"Symmetry function value, left bin limit.");
1216 colSize.push_back(16);
1217 colName.push_back(
"symfunc_r");
1218 colInfo.push_back(
"Symmetry function value, right bin limit.");
1219 colSize.push_back(16);
1220 colName.push_back(
"hist");
1221 colInfo.push_back(
"Histogram count.");
1228 gsl_histogram_fprintf(fp, h,
"%16.8E",
"%16.8E");
1236 for (vector<vector<gsl_histogram*> >::const_iterator it =
1237 histograms.begin(); it != histograms.end(); ++it)
1239 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1240 it2 != it->end(); ++it2)
1242 if ((*it2) ==
nullptr)
continue;
1243 gsl_histogram_free(*it2);
1247 log <<
"*****************************************"
1248 "**************************************\n";
1256 log <<
"*** SYMMETRY FUNCTION FILE **************"
1257 "**************************************\n";
1261 log <<
strpr(
"Writing symmetry functions to file: %s\n", fileName.c_str());
1265 file.open(fileName.c_str());
1271 vector<Structure>::const_iterator it =
structures.begin();
1278 if (it !=
structures.end() && i == it->index)
1281 file.open(fileName.c_str(), ios_base::app);
1282 file <<
strpr(
"%6zu\n", it->numAtoms);
1284 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1285 it2 != it->atoms.end(); ++it2)
1289 for (vector<double>::const_iterator it3 = it2->G.begin();
1290 it3 != it2->G.end(); ++it3)
1292 file <<
strpr(
" %14.10f", *it3);
1297 double energy = 0.0;
1299 else energy = it->energyRef;
1301 energy /= it->numAtoms;
1302 file <<
strpr(
" %19.10f %19.10f %19.10f %19.10f\n",
1303 0.0, energy, energy, 0.0);
1312 log <<
"*****************************************"
1313 "**************************************\n";
1319 string const& fileNameStructure)
1322 log <<
"*** NEIGHBOR STATISTICS/HISTOGRAM *******"
1323 "**************************************\n";
1327 string fileNameLocal =
strpr(
"%s.%04d", fileNameStructure.c_str(),
myRank);
1328 statFile.open(fileNameLocal.c_str());
1332 vector<string> title;
1333 vector<string> colName;
1334 vector<string> colInfo;
1335 vector<size_t> colSize;
1336 title.push_back(
"Neighbor statistics for each structure.");
1337 colSize.push_back(10);
1338 colName.push_back(
"struct");
1339 colInfo.push_back(
"Index of structure (starting with 1).");
1340 colSize.push_back(10);
1341 colName.push_back(
"natoms");
1342 colInfo.push_back(
"Number of atoms in structure.");
1343 colSize.push_back(10);
1344 colName.push_back(
"min");
1345 colInfo.push_back(
"Minimum number of neighbors over all atoms.");
1346 colSize.push_back(10);
1347 colName.push_back(
"max");
1348 colInfo.push_back(
"Maximum number of neighbors over all atoms.");
1349 colSize.push_back(16);
1350 colName.push_back(
"mean");
1351 colInfo.push_back(
"Mean number of neighbors over all atoms.");
1357 size_t numAtomsGlobal = 0;
1358 size_t minNeighborsGlobal = numeric_limits<size_t>::max();
1359 size_t maxNeighborsGlobal = 0;
1360 double meanNeighborsGlobal = 0.0;
1364 size_t maxNeighbors = 0;
1365 double meanNeighbors = 0.0;
1366 for (
auto const& a : s.atoms)
1368 size_t const n = a.numNeighbors;
1370 maxNeighbors = max(maxNeighbors, n);
1373 numAtomsGlobal += s.numAtoms;
1374 minNeighborsGlobal = min(minNeighborsGlobal,
minNeighbors);
1375 maxNeighborsGlobal = max(maxNeighborsGlobal, maxNeighbors);
1376 meanNeighborsGlobal += meanNeighbors;
1377 statFile <<
strpr(
"%10zu %10zu %10zu %10zu %16.8E\n",
1382 meanNeighbors / s.numAtoms);
1386 MPI_Allreduce(MPI_IN_PLACE, &numAtomsGlobal , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1387 MPI_Allreduce(MPI_IN_PLACE, &minNeighborsGlobal , 1,
MPI_SIZE_T, MPI_MIN,
comm);
1388 MPI_Allreduce(MPI_IN_PLACE, &maxNeighborsGlobal , 1,
MPI_SIZE_T, MPI_MAX,
comm);
1389 MPI_Allreduce(MPI_IN_PLACE, &meanNeighborsGlobal, 1, MPI_DOUBLE, MPI_SUM,
comm);
1390 log <<
strpr(
"Minimum number of neighbors: %d\n", minNeighborsGlobal);
1391 log <<
strpr(
"Mean number of neighbors: %.1f\n",
1392 meanNeighborsGlobal / numAtomsGlobal);
1393 log <<
strpr(
"Maximum number of neighbors: %d\n", maxNeighborsGlobal);
1396 gsl_histogram* histNeighbors = NULL;
1397 histNeighbors = gsl_histogram_alloc(maxNeighborsGlobal + 1);
1398 gsl_histogram_set_ranges_uniform(histNeighbors,
1400 maxNeighborsGlobal + 0.5);
1401 for (vector<Structure>::const_iterator it =
structures.begin();
1404 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1405 it2 != it->atoms.end(); ++it2)
1407 gsl_histogram_increment(histNeighbors, it2->numNeighbors);
1410 MPI_Allreduce(MPI_IN_PLACE, histNeighbors->bin, maxNeighborsGlobal + 1, MPI_DOUBLE, MPI_SUM,
comm);
1415 log <<
strpr(
"Neighbor histogram file: %s.\n", fileNameHisto.c_str());
1417 fp = fopen(fileNameHisto.c_str(),
"w");
1420 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1421 fileNameHisto.c_str()));
1425 vector<string> title;
1426 vector<string> colName;
1427 vector<string> colInfo;
1428 vector<size_t> colSize;
1429 title.push_back(
"Neighbor count histogram.");
1430 colSize.push_back(9);
1431 colName.push_back(
"neigh_l");
1432 colInfo.push_back(
"Number of neighbors, left bin limit.");
1433 colSize.push_back(9);
1434 colName.push_back(
"neigh_r");
1435 colInfo.push_back(
"Number of neighbors, right bin limit.");
1436 colSize.push_back(16);
1437 colName.push_back(
"hist");
1438 colInfo.push_back(
"Histogram count.");
1442 gsl_histogram_fprintf(fp, histNeighbors,
"%9.1f",
"%16.8E");
1451 log <<
strpr(
"Combining per-structure neighbor statistics file: %s.\n",
1452 fileNameStructure.c_str());
1456 gsl_histogram_free(histNeighbors);
1458 log <<
"*****************************************"
1459 "**************************************\n";
1461 return maxNeighborsGlobal;
1467 log <<
"*** NEIGHBOR LIST ***********************"
1468 "**************************************\n";
1471 log <<
"Sorting neighbor lists according to element and distance.\n";
1473 for (vector<Structure>::iterator it =
structures.begin();
1476 for (vector<Atom>::iterator it2 = it->atoms.begin();
1477 it2 != it->atoms.end(); ++it2)
1479 sort(it2->neighbors.begin(), it2->neighbors.end());
1483 log <<
"*****************************************"
1484 "**************************************\n";
1491 log <<
"*** NEIGHBOR LIST ***********************"
1492 "**************************************\n";
1495 string fileNameLocal =
strpr(
"%s.%04d", fileName.c_str(),
myRank);
1497 fileLocal.open(fileNameLocal.c_str());
1499 for (vector<Structure>::const_iterator it =
structures.begin();
1502 fileLocal <<
strpr(
"%zu\n", it->numAtoms);
1503 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1504 it2 != it->atoms.end(); ++it2)
1509 fileLocal <<
strpr(
" %zu", it2->numNeighborsPerElement.at(i));
1511 for (vector<Atom::Neighbor>::const_iterator it3
1512 = it2->neighbors.begin(); it3 != it2->neighbors.end(); ++it3)
1514 fileLocal <<
strpr(
" %zu", it3->index);
1524 log <<
strpr(
"Writing neighbor lists to file: %s.\n", fileName.c_str());
1528 log <<
"*****************************************"
1529 "**************************************\n";
1535 vector<vector<size_t> > neighCutoff,
1537 string const& fileNamePrefix)
1540 log <<
"*** ATOMIC ENVIRONMENT ******************"
1541 "**************************************\n";
1544 string const fileNamePrefixG =
strpr(
"%s.G" , fileNamePrefix.c_str());
1545 string const fileNamePrefixdGdx =
strpr(
"%s.dGdx", fileNamePrefix.c_str());
1546 string const fileNamePrefixdGdy =
strpr(
"%s.dGdy", fileNamePrefix.c_str());
1547 string const fileNamePrefixdGdz =
strpr(
"%s.dGdz", fileNamePrefix.c_str());
1549 string const fileNameLocalG =
strpr(
"%s.%04d",
1550 fileNamePrefixG.c_str(),
myRank);
1551 string const fileNameLocaldGdx =
strpr(
"%s.%04d",
1552 fileNamePrefixdGdx.c_str(),
myRank);
1553 string const fileNameLocaldGdy =
strpr(
"%s.%04d",
1554 fileNamePrefixdGdy.c_str(),
myRank);
1555 string const fileNameLocaldGdz =
strpr(
"%s.%04d",
1556 fileNamePrefixdGdz.c_str(),
myRank);
1558 ofstream fileLocalG;
1559 ofstream fileLocaldGdx;
1560 ofstream fileLocaldGdy;
1561 ofstream fileLocaldGdz;
1563 fileLocalG.open(fileNameLocalG.c_str());
1566 fileLocaldGdx.open(fileNameLocaldGdx.c_str());
1567 fileLocaldGdy.open(fileNameLocaldGdy.c_str());
1568 fileLocaldGdz.open(fileNameLocaldGdz.c_str());
1571 log <<
"Preparing symmetry functions for atomic environment file(s).\n";
1576 log <<
strpr(
"Maximum number of %2s neighbors for central %2s "
1580 neighCutoff.at(i).at(j));
1585 for (vector<Structure>::const_iterator its =
structures.begin();
1588 for (vector<Atom>::const_iterator ita = its->atoms.begin();
1589 ita != its->atoms.end(); ++ita)
1591 size_t const ea = ita->element;
1594 if (ita->numNeighborsPerElement.at(i)
1595 < neighCutoff.at(ea).at(i))
1597 throw runtime_error(
strpr(
1598 "ERROR: Not enough neighbor atoms, cannot create "
1599 "atomic environment file. Reduce neighbor cutoff for "
1600 "element %2s.\n",
elementMap[i].c_str()).c_str());
1608 for (vector<double>::const_iterator it = ita->G.begin();
1609 it != ita->G.end(); ++it)
1611 fileLocalG <<
strpr(
" %16.8E", (*it));
1615 for (vector<Vec3D>::const_iterator it = ita->dGdr.begin();
1616 it != ita->dGdr.end(); ++it)
1618 fileLocaldGdx <<
strpr(
" %16.8E", (*it)[0]);
1619 fileLocaldGdy <<
strpr(
" %16.8E", (*it)[1]);
1620 fileLocaldGdz <<
strpr(
" %16.8E", (*it)[2]);
1624 for (vector<Atom::Neighbor>::const_iterator itn
1625 = ita->neighbors.begin(); itn != ita->neighbors.end(); ++itn)
1627 size_t const i = itn->index;
1628 size_t const en = itn->element;
1629 if (neighCount.at(en) < neighCutoff.at(ea).at(en))
1632 Atom const& a = its->atoms.at(i);
1633 for (vector<double>::const_iterator it = a.
G.begin();
1634 it != a.
G.end(); ++it)
1636 fileLocalG <<
strpr(
" %16.8E", (*it));
1642 vector<Atom::Neighbor>::const_iterator itan = find_if(
1646 return n.index == ita->index;
1648 for (vector<Vec3D>::const_iterator it
1649 = itan->dGdr.begin();
1650 it != itan->dGdr.end(); ++it)
1652 fileLocaldGdx <<
strpr(
" %16.8E", (*it)[0]);
1653 fileLocaldGdy <<
strpr(
" %16.8E", (*it)[1]);
1654 fileLocaldGdz <<
strpr(
" %16.8E", (*it)[2]);
1657 neighCount.at(en)++;
1663 fileLocaldGdx <<
'\n';
1664 fileLocaldGdy <<
'\n';
1665 fileLocaldGdz <<
'\n';
1668 fill(neighCount.begin(), neighCount.end(), 0);
1676 fileLocaldGdx.flush();
1677 fileLocaldGdx.close();
1678 fileLocaldGdy.flush();
1679 fileLocaldGdy.close();
1680 fileLocaldGdz.flush();
1681 fileLocaldGdz.close();
1687 log <<
strpr(
"Combining atomic environment file: %s.\n",
1688 fileNamePrefixG.c_str());
1692 log <<
strpr(
"Combining atomic environment file: %s.\n",
1693 fileNamePrefixdGdx.c_str());
1695 log <<
strpr(
"Combining atomic environment file: %s.\n",
1696 fileNamePrefixdGdy.c_str());
1698 log <<
strpr(
"Combining atomic environment file: %s.\n",
1699 fileNamePrefixdGdz.c_str());
1704 log <<
"*****************************************"
1705 "**************************************\n";
1711 map<string, double>& error,
1712 size_t& count)
const
1714 if (property ==
"energy")
1716 MPI_Allreduce(MPI_IN_PLACE, &count , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1717 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSEpa")), 1, MPI_DOUBLE, MPI_SUM,
comm);
1718 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1719 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAEpa")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1720 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1721 error.at(
"RMSEpa") = sqrt(error.at(
"RMSEpa") / count);
1722 error.at(
"RMSE") = sqrt(error.at(
"RMSE") / count);
1723 error.at(
"MAEpa") = error.at(
"MAEpa") / count;
1724 error.at(
"MAE") = error.at(
"MAE") / count;
1726 else if (property ==
"force" || property ==
"charge")
1728 MPI_Allreduce(MPI_IN_PLACE, &count , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1729 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSE")), 1, MPI_DOUBLE, MPI_SUM,
comm);
1730 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1731 error.at(
"RMSE") = sqrt(error.at(
"RMSE") / count);
1732 error.at(
"MAE") = error.at(
"MAE") / count;
1736 throw runtime_error(
"ERROR: Unknown property for error collection.\n");
1744 ofstream combinedFile(filePrefix.c_str(), ios::binary);
1745 if (!combinedFile.is_open())
1747 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1748 filePrefix.c_str()));
1752 string procFileName =
strpr(
"%s.%04d", filePrefix.c_str(), i);
1753 ifstream procFile(procFileName.c_str(), ios::binary);
1754 if (!procFile.is_open())
1756 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1757 procFileName.c_str()));
1761 if (procFile.peek() != ifstream::traits_type::eof())
1763 combinedFile << procFile.rdbuf();
1766 remove(procFileName.c_str());
1768 combinedFile.close();
std::size_t numStructures
Total number of structures in dataset.
void writeSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Write symmetry function scaling values to file.
void collectSymmetryFunctionStatistics()
Collect symmetry function statistics from all processors.
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
int recvStructure(Structure *structure, int src)
Receive one structure from source process.
void sortNeighborLists()
Sort all neighbor lists according to element and distance.
void writeAtomicEnvironmentFile(std::vector< std::vector< std::size_t > > neighCutoff, bool derivatives, std::string const &fileNamePrefix="atomic-env")
Write atomic environment file.
void writeNeighborLists(std::string const &fileName="neighbor-list.data")
Write neighbor list file.
int calculateBufferSize(Structure const &structure) const
Calculate buffer size required to communicate structure via MPI.
MPI_Comm comm
Global MPI communicator.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
int numProcs
Total number of MPI processors.
std::vector< Structure > structures
All structures in this dataset.
void writeSymmetryFunctionFile(std::string fileName="function.data")
Write symmetry function legacy file ("function.data").
std::size_t prepareNumericForces(Structure &original, double delta)
Prepare numeric force check for a single structure.
void toPhysicalUnits()
Switch all structures to physical units.
void writeSymmetryFunctionHistograms(std::size_t numBins, std::string fileNameFormat="sf.%03zu.%04zu.histo")
Calculate and write symmetry function histograms.
std::size_t writeNeighborHistogram(std::string const &fileNameHisto="neighbors.histo", std::string const &fileNameStructure="neighbors.out")
Calculate and write neighbor histogram and per-structure statistics.
void setupRandomNumberGenerator()
Initialize random number generator.
int sendStructure(Structure const &structure, int dest) const
Send one structure to destination process.
std::string myName
My processor name.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
std::size_t getNumStructures(std::ifstream &dataFile)
Get number of structures in data file.
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
void toNormalizedUnits()
Switch all structures to normalized units.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Base class for all NNP applications.
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
ElementMap elementMap
Global element map, populated by setupElementMap().
std::vector< Element > elements
void removeEnergyOffset(Structure &structure, bool ref=true)
Remove atomic energy offsets from reference energy.
std::vector< std::size_t > minNeighbors
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
string reduce(string const &line, string const &whitespace, string const &fill)
Replace multiple whitespaces with fill.
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
V const & safeFind(std::map< K, V > const &stdMap, typename std::map< K, V >::key_type const &key)
Safely access map entry.
Struct to store information on neighbor atoms.
Storage for a single atom.
std::vector< Neighbor > neighbors
Neighbor array (maximum number defined in macros.h.
std::vector< double > G
Symmetry function values.
Storage for one atomic configuration.
Vec3D invbox[3]
Inverse simulation box vectors.
Vec3D box[3]
Simulation box vectors.
bool isTriclinic
If the simulation box is triclinic.
std::vector< std::size_t > numAtomsPerElement
Number of atoms of each element in this structure.
std::string comment
Structure comment.
bool isPeriodic
If periodic boundary conditions apply.
double charge
Charge determined by neural network potential.
void setElementMap(ElementMap const &elementMap)
Set element map of structure.
std::size_t index
Index number of this structure.
void remap()
Translate all atoms back into box if outside.
double chargeRef
Reference charge.
SampleType sampleType
Sample type (training or test set).
bool hasSymmetryFunctionDerivatives
If symmetry function derivatives are saved for each atom.
double energy
Potential energy determined by neural network.
void readFromFile(std::string const fileName="input.data")
Read configuration from file.
double energyRef
Reference potential energy.
int pbc[3]
Number of PBC images necessary in each direction.
double volume
Simulation box volume.
std::size_t numAtoms
Total number of atoms present in this structure.
std::size_t numElements
Global number of elements (all structures).
std::vector< Atom > atoms
Vector of all atoms in this structure.
std::size_t numElementsPresent
Number of elements present in this structure.
bool hasNeighborList
If the neighbor list has been calculated.
bool hasSymmetryFunctions
If symmetry function values are saved for each atom.
Struct containing statistics gathered during symmetry function calculation.
double min
Minimum symmetry function value encountered.
double sum2
Sum of squared symmetry function values (to compute sigma).
double sum
Sum of symmetry function values (to compute mean).
double max
Maximum symmetry function value encountered.
std::size_t count
Counts total number of symmetry function evaluations.
double r[3]
cartesian coordinates.