30#include <gsl/gsl_histogram.h>
31#include <gsl/gsl_rng.h>
36Dataset::Dataset() :
Mode(),
48 if (
rng != NULL) gsl_rng_free(
rng);
55 MPI_Comm_dup(MPI_COMM_WORLD, &tmpComm);
57 MPI_Comm_free(&tmpComm);
65 log <<
"*** SETUP: MPI **************************"
66 "**************************************\n";
70 char line[MPI_MAX_PROCESSOR_NAME];
73 MPI_Comm_dup(*communicator, &
comm);
76 MPI_Get_processor_name(line, &bufferSize);
80 log <<
strpr(
"Process %d of %d (rank %d): %s\n",
89 MPI_Recv(&bufferSize, 1, MPI_INT, i, 0,
comm, &ms);
90 MPI_Recv(line, bufferSize, MPI_CHAR, i, 0,
comm, &ms);
91 log <<
strpr(
"Process %d of %d (rank %d): %s\n",
99 MPI_Send(&bufferSize, 1, MPI_INT, 0, 0,
comm);
100 MPI_Send(line, bufferSize, MPI_CHAR, 0, 0,
comm);
104 log <<
"*****************************************"
105 "**************************************\n";
113 log <<
"*** SETUP: RANDOM NUMBER GENERATOR ******"
114 "**************************************\n";
118 unsigned long seed = atoi(
settings[
"random_seed"].c_str());
119 unsigned long seedGlobal = 0;
123 log <<
strpr(
"Random number generator seed: %d\n", seed);
126 log <<
"WARNING: Seed set to 0. This is a special value for the "
127 "gsl_rng_set() routine (see GSL docs).\n";
130 rng = gsl_rng_alloc(gsl_rng_mt19937);
131 gsl_rng_set(
rng, seed);
132 log <<
strpr(
"Seed for rank %d: %lu\n", 0, seed);
136 seed = gsl_rng_get(
rng);
137 log <<
strpr(
"Seed for rank %d: %lu\n", i, seed);
138 MPI_Send(&seed, 1, MPI_UNSIGNED_LONG, i, 0,
comm);
141 seedGlobal = gsl_rng_get(
rng);
147 MPI_Recv(&seed, 1, MPI_UNSIGNED_LONG, 0, 0,
comm, &ms);
149 rng = gsl_rng_alloc(gsl_rng_taus);
150 gsl_rng_set(
rng, seed);
153 MPI_Bcast(&seedGlobal, 1, MPI_UNSIGNED_LONG, 0,
comm);
154 log <<
strpr(
"Seed for global RNG: %lu\n", seedGlobal);
159 log <<
"*****************************************"
160 "**************************************\n";
175 MPI_Pack_size(1, MPI_INT ,
comm, &is );
176 MPI_Pack_size(1, MPI_INT64_T,
comm, &i64s);
178 MPI_Pack_size(1, MPI_DOUBLE ,
comm, &ds );
179 MPI_Pack_size(1, MPI_CHAR ,
comm, &cs );
182 bs += 5 * cs + 4 * ss + 4 * is + 5 * ds;
185 bs += (s.
comment.length() + 1) * cs;
195 bs += s.
atoms.size() * (4 * cs + 6 * ss + i64s + 4 * ds + 3 * 3 * ds);
196 for (vector<Atom>::const_iterator it = s.
atoms.begin();
197 it != s.
atoms.end(); ++it)
201 bs += it->neighborsUnique.size() * ss;
204 bs += it->numNeighborsPerElement.size() * ss;
207 bs += it->numSymmetryFunctionDerivatives.size() * ss;
208#ifndef N2P2_NO_SF_CACHE
211 bs += it->cacheSizePerElement.size() * ss;
215 bs += it->G.size() * ds;
218 bs += it->dEdG.size() * ds;
221 bs += it->dQdG.size() * ds;
222#ifdef N2P2_FULL_SFD_MEMORY
225 bs += it->dGdxia.size() * ds;
229 bs += it->dGdr.size() * 3 * ds;
232 for (vector<Atom::Neighbor>::const_iterator it2 =
233 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
236 bs += 2 * ss + i64s + ds + 3 * ds;
237#ifndef N2P2_NO_SF_CACHE
240 bs += it2->cache.size() * ds;
244 bs += it2->dGdr.size() * 3 * ds;
253 unsigned char* buf = 0;
260 buf =
new unsigned char[bs];
272 MPI_Pack(&(s.
pbc ), 3, MPI_INT , buf, bs, &
p,
comm);
273 MPI_Pack(&(s.
energy ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
275 MPI_Pack(&(s.
charge ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
277 MPI_Pack(&(s.
volume ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
283 MPI_Pack(s.
comment.c_str(), ts, MPI_CHAR, buf, bs, &
p,
comm);
286 for (
size_t i = 0; i < 3; ++i)
288 MPI_Pack(s.
box[i].
r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
292 for (
size_t i = 0; i < 3; ++i)
294 MPI_Pack(s.
invbox[i].
r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
310 for (vector<Atom>::const_iterator it = s.
atoms.begin();
311 it != s.
atoms.end(); ++it)
314 MPI_Pack(&(it->hasNeighborList ), 1, MPI_CHAR, buf, bs, &
p,
comm);
315 MPI_Pack(&(it->hasSymmetryFunctions ), 1, MPI_CHAR, buf, bs, &
p,
comm);
316 MPI_Pack(&(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR, buf, bs, &
p,
comm);
317 MPI_Pack(&(it->useChargeNeuron ), 1, MPI_CHAR, buf, bs, &
p,
comm);
320 MPI_Pack(&(it->tag ), 1, MPI_INT64_T, buf, bs, &
p,
comm);
323 MPI_Pack(&(it->numNeighborsUnique ), 1,
MPI_SIZE_T, buf, bs, &
p,
comm);
324 MPI_Pack(&(it->numSymmetryFunctions ), 1,
MPI_SIZE_T, buf, bs, &
p,
comm);
325 MPI_Pack(&(it->energy ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
326 MPI_Pack(&(it->chi ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
327 MPI_Pack(&(it->charge ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
328 MPI_Pack(&(it->chargeRef ), 1, MPI_DOUBLE, buf, bs, &
p,
comm);
329 MPI_Pack(&(it->r.r ), 3, MPI_DOUBLE, buf, bs, &
p,
comm);
330 MPI_Pack(&(it->f.r ), 3, MPI_DOUBLE, buf, bs, &
p,
comm);
331 MPI_Pack(&(it->fRef.r ), 3, MPI_DOUBLE, buf, bs, &
p,
comm);
334 size_t ts2 = it->neighborsUnique.size();
338 MPI_Pack(&(it->neighborsUnique.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
342 ts2 = it->numNeighborsPerElement.size();
346 MPI_Pack(&(it->numNeighborsPerElement.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
350 ts2 = it->numSymmetryFunctionDerivatives.size();
354 MPI_Pack(&(it->numSymmetryFunctionDerivatives.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
357#ifndef N2P2_NO_SF_CACHE
359 ts2 = it->cacheSizePerElement.size();
363 MPI_Pack(&(it->cacheSizePerElement.front()), ts2,
MPI_SIZE_T, buf, bs, &
p,
comm);
372 MPI_Pack(&(it->G.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
376 ts2 = it->dEdG.size();
380 MPI_Pack(&(it->dEdG.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
384 ts2 = it->dQdG.size();
388 MPI_Pack(&(it->dQdG.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
391#ifdef N2P2_FULL_SFD_MEMORY
393 ts2 = it->dGdxia.size();
397 MPI_Pack(&(it->dGdxia.front()), ts2, MPI_DOUBLE, buf, bs, &
p,
comm);
402 ts2 = it->dGdr.size();
406 for (vector<Vec3D>::const_iterator it2 = it->dGdr.begin();
407 it2 != it->dGdr.end(); ++it2)
409 MPI_Pack(it2->r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
414 ts2 = it->neighbors.size();
418 for (vector<Atom::Neighbor>::const_iterator it2 =
419 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
423 MPI_Pack(&(it2->tag ), 1, MPI_INT64_T, buf, bs, &
p,
comm);
425 MPI_Pack(&(it2->d ), 1, MPI_DOUBLE , buf, bs, &
p,
comm);
426 MPI_Pack( it2->dr.r , 3, MPI_DOUBLE , buf, bs, &
p,
comm);
429#ifndef N2P2_NO_SF_CACHE
431 ts3 = it2->cache.size();
435 MPI_Pack(&(it2->cache.front()), ts3, MPI_DOUBLE, buf, bs, &
p,
comm);
440 ts3 = it2->dGdr.size();
444 for (vector<Vec3D>::const_iterator it3 =
445 it2->dGdr.begin(); it3 != it2->dGdr.end(); ++it3)
447 MPI_Pack(it3->r, 3, MPI_DOUBLE, buf, bs, &
p,
comm);
455 MPI_Send(&bs, 1, MPI_INT, dest, 0,
comm);
456 MPI_Send(buf, bs, MPI_PACKED, dest, 0,
comm);
465 unsigned char* buf = 0;
473 MPI_Recv(&bs, 1, MPI_INT, src, 0,
comm, &ms);
476 buf =
new unsigned char[bs];
478 MPI_Recv(buf, bs, MPI_PACKED, src, 0,
comm, &ms);
490 MPI_Unpack(buf, bs, &
p, &(s->
pbc ), 3, MPI_INT ,
comm);
491 MPI_Unpack(buf, bs, &
p, &(s->
energy ), 1, MPI_DOUBLE,
comm);
493 MPI_Unpack(buf, bs, &
p, &(s->
charge ), 1, MPI_DOUBLE,
comm);
495 MPI_Unpack(buf, bs, &
p, &(s->
volume ), 1, MPI_DOUBLE,
comm);
501 char* comment =
new char[ts];
502 MPI_Unpack(buf, bs, &
p, comment, ts, MPI_CHAR,
comm);
507 for (
size_t i = 0; i < 3; ++i)
509 MPI_Unpack(buf, bs, &
p, s->
box[i].
r, 3, MPI_DOUBLE,
comm);
513 for (
size_t i = 0; i < 3; ++i)
515 MPI_Unpack(buf, bs, &
p, s->
invbox[i].
r, 3, MPI_DOUBLE,
comm);
535 for (vector<Atom>::iterator it = s->
atoms.begin();
536 it != s->
atoms.end(); ++it)
539 MPI_Unpack(buf, bs, &
p, &(it->hasNeighborList ), 1, MPI_CHAR,
comm);
540 MPI_Unpack(buf, bs, &
p, &(it->hasSymmetryFunctions ), 1, MPI_CHAR,
comm);
541 MPI_Unpack(buf, bs, &
p, &(it->hasSymmetryFunctionDerivatives), 1, MPI_CHAR,
comm);
542 MPI_Unpack(buf, bs, &
p, &(it->useChargeNeuron ), 1, MPI_CHAR,
comm);
545 MPI_Unpack(buf, bs, &
p, &(it->tag ), 1, MPI_INT64_T,
comm);
548 MPI_Unpack(buf, bs, &
p, &(it->numNeighborsUnique ), 1,
MPI_SIZE_T,
comm);
549 MPI_Unpack(buf, bs, &
p, &(it->numSymmetryFunctions ), 1,
MPI_SIZE_T,
comm);
550 MPI_Unpack(buf, bs, &
p, &(it->energy ), 1, MPI_DOUBLE,
comm);
551 MPI_Unpack(buf, bs, &
p, &(it->chi ), 1, MPI_DOUBLE,
comm);
552 MPI_Unpack(buf, bs, &
p, &(it->charge ), 1, MPI_DOUBLE,
comm);
553 MPI_Unpack(buf, bs, &
p, &(it->chargeRef ), 1, MPI_DOUBLE,
comm);
554 MPI_Unpack(buf, bs, &
p, &(it->r.r ), 3, MPI_DOUBLE,
comm);
555 MPI_Unpack(buf, bs, &
p, &(it->f.r ), 3, MPI_DOUBLE,
comm);
556 MPI_Unpack(buf, bs, &
p, &(it->fRef.r ), 3, MPI_DOUBLE,
comm);
563 it->neighborsUnique.clear();
564 it->neighborsUnique.resize(ts2, 0);
565 MPI_Unpack(buf, bs, &
p, &(it->neighborsUnique.front()), ts2,
MPI_SIZE_T,
comm);
573 it->numNeighborsPerElement.clear();
574 it->numNeighborsPerElement.resize(ts2, 0);
575 MPI_Unpack(buf, bs, &
p, &(it->numNeighborsPerElement.front()), ts2,
MPI_SIZE_T,
comm);
583 it->numSymmetryFunctionDerivatives.clear();
584 it->numSymmetryFunctionDerivatives.resize(ts2, 0);
585 MPI_Unpack(buf, bs, &
p, &(it->numSymmetryFunctionDerivatives.front()), ts2,
MPI_SIZE_T,
comm);
588#ifndef N2P2_NO_SF_CACHE
594 it->cacheSizePerElement.clear();
595 it->cacheSizePerElement.resize(ts2, 0);
596 MPI_Unpack(buf, bs, &
p, &(it->cacheSizePerElement.front()), ts2,
MPI_SIZE_T,
comm);
606 it->G.resize(ts2, 0.0);
607 MPI_Unpack(buf, bs, &
p, &(it->G.front()), ts2, MPI_DOUBLE,
comm);
616 it->dEdG.resize(ts2, 0.0);
617 MPI_Unpack(buf, bs, &
p, &(it->dEdG.front()), ts2, MPI_DOUBLE,
comm);
626 it->dQdG.resize(ts2, 0.0);
627 MPI_Unpack(buf, bs, &
p, &(it->dQdG.front()), ts2, MPI_DOUBLE,
comm);
630#ifdef N2P2_FULL_SFD_MEMORY
637 it->dGdxia.resize(ts2, 0.0);
638 MPI_Unpack(buf, bs, &
p, &(it->dGdxia.front()), ts2, MPI_DOUBLE,
comm);
648 it->dGdr.resize(ts2);
649 for (vector<Vec3D>::iterator it2 = it->dGdr.begin();
650 it2 != it->dGdr.end(); ++it2)
652 MPI_Unpack(buf, bs, &
p, it2->r, 3, MPI_DOUBLE,
comm);
661 it->neighbors.clear();
662 it->neighbors.resize(ts2);
663 for (vector<Atom::Neighbor>::iterator it2 =
664 it->neighbors.begin(); it2 != it->neighbors.end(); ++it2)
668 MPI_Unpack(buf, bs, &
p, &(it2->tag ), 1, MPI_INT64_T,
comm);
670 MPI_Unpack(buf, bs, &
p, &(it2->d ), 1, MPI_DOUBLE ,
comm);
671 MPI_Unpack(buf, bs, &
p, it2->dr.r , 3, MPI_DOUBLE ,
comm);
674#ifndef N2P2_NO_SF_CACHE
681 it2->cache.resize(ts3, 0.0);
682 MPI_Unpack(buf, bs, &
p, &(it2->cache.front()), ts3, MPI_DOUBLE,
comm);
692 it2->dGdr.resize(ts3);
693 for (vector<Vec3D>::iterator it3 = it2->dGdr.begin();
694 it3 != it2->dGdr.end(); ++it3)
696 MPI_Unpack(buf, bs, &
p, it3->r, 3, MPI_DOUBLE,
comm);
713 vector<string> splitLine;
715 while (getline(dataFile, line))
718 if (!splitLine.empty() && splitLine.at(0) ==
"begin") count++;
726 string const& fileName)
729 log <<
"*** STRUCTURE DISTRIBUTION **************"
730 "**************************************\n";
734 vector<size_t> numStructuresPerProc;
738 log <<
"No structures will be distributed to rank 0.\n";
741 throw runtime_error(
"ERROR: Can not distribute structures, "
742 "at least 2 MPI tasks are required.\n");
746 if (excludeRank0) numReceivers--;
750 log <<
strpr(
"Reading configurations from data file: %s.\n",
752 dataFile.open(fileName.c_str());
761 throw runtime_error(
"ERROR: More receiving processors than "
765 numStructuresPerProc.resize(
numProcs, 0);
770 for (
size_t i = 0; i < (size_t)
numProcs; i++)
772 if (i != 0 || (!excludeRank0))
774 numStructuresPerProc.at(i) = quotient;
775 if (remainder > 0 && i > 0 && i <= remainder)
777 numStructuresPerProc.at(i)++;
783 log <<
strpr(
"Number of structures per processor: %d\n", quotient);
787 log <<
strpr(
"Number of structures per"
788 " processor: %d (%d) or %d (%d)\n",
790 numReceivers - remainder,
797 int bytesTransferred = 0;
798 size_t numMyStructures = numStructuresPerProc.at(
myRank);
801 size_t countStructures = 0;
802 vector<size_t> countStructuresPerProc;
804 countStructuresPerProc.resize(
numProcs, 0);
811 if (countStructuresPerProc.at(proc)
812 < numStructuresPerProc.at(proc))
826 tmpStructure.
index = countStructures;
831 countStructuresPerProc.at(proc)++;
838 for (
int proc = 0; proc <
numProcs; ++proc)
840 for (
size_t i = 0; i < numStructuresPerProc.at(proc); ++i)
854 tmpStructure.
index = countStructures;
859 countStructuresPerProc.at(proc)++;
868 for (
size_t i = 0; i < numMyStructures; i++)
876 log <<
strpr(
"Distributed %zu structures,"
877 " %d bytes (%.2f MiB) transferred.\n",
880 bytesTransferred / 1024. / 1024.);
882 log <<
"*****************************************"
883 "**************************************\n";
885 return bytesTransferred;
900 vector<size_t> numStructuresPerProc(
numProcs, 0);
907 for (
size_t i = 0; i < (size_t)
numProcs; i++)
909 numStructuresPerProc.at(i) = quotient;
910 if (remainder > 0 && i < remainder) numStructuresPerProc.at(i)++;
928 if (
p == 0) istart = 1;
929 for (
size_t i = istart; i < numStructuresPerProc.at(
p); ++i)
933 sign = 2 * ((count % 6) / 3) - 1;
940 s.
comment =
strpr(
"%zu %zu %zu %d", count, iAtom, ixyz, sign);
942 s.
atoms.at(iAtom).r[ixyz] += sign * delta;
954 for (vector<Structure>::iterator it =
structures.begin();
965 for (vector<Structure>::iterator it =
structures.begin();
976 for (vector<Element>::iterator it =
elements.begin();
981 if (it->statistics.data.size() == 0)
983 log <<
strpr(
"WARNING: No statistics for element %zu (%2s) found, "
984 "process %d has no corresponding atoms, creating "
985 "empty statistics.\n",
987 it->getSymbol().c_str(),
990 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
994 MPI_Allreduce(MPI_IN_PLACE, &(c.
min ), 1, MPI_DOUBLE, MPI_MIN,
comm);
995 MPI_Allreduce(MPI_IN_PLACE, &(c.
max ), 1, MPI_DOUBLE, MPI_MAX,
comm);
996 MPI_Allreduce(MPI_IN_PLACE, &(c.
sum ), 1, MPI_DOUBLE, MPI_SUM,
comm);
997 MPI_Allreduce(MPI_IN_PLACE, &(c.
sum2 ), 1, MPI_DOUBLE, MPI_SUM,
comm);
1007 log <<
"*** SYMMETRY FUNCTION SCALING ***********"
1008 "**************************************\n";
1013 log <<
strpr(
"Writing symmetry function scaling file: %s.\n",
1016 sFile.open(fileName.c_str());
1019 vector<string> title;
1020 vector<string> colName;
1021 vector<string> colInfo;
1022 vector<size_t> colSize;
1023 title.push_back(
"Symmetry function scaling data.");
1024 colSize.push_back(10);
1025 colName.push_back(
"e_index");
1026 colInfo.push_back(
"Element index.");
1027 colSize.push_back(10);
1028 colName.push_back(
"sf_index");
1029 colInfo.push_back(
"Symmetry function index.");
1030 colSize.push_back(24);
1031 colName.push_back(
"sf_min");
1032 colInfo.push_back(
"Symmetry function minimum.");
1033 colSize.push_back(24);
1034 colName.push_back(
"sf_max");
1035 colInfo.push_back(
"Symmetry function maximum.");
1036 colSize.push_back(24);
1037 colName.push_back(
"sf_mean");
1038 colInfo.push_back(
"Symmetry function mean.");
1039 colSize.push_back(24);
1040 colName.push_back(
"sf_sigma");
1041 colInfo.push_back(
"Symmetry function sigma.");
1046 log <<
"Abbreviations:\n";
1047 log <<
"--------------\n";
1048 log <<
"ind ...... Symmetry function index.\n";
1049 log <<
"min ...... Minimum symmetry function value.\n";
1050 log <<
"max ...... Maximum symmetry function value.\n";
1051 log <<
"mean ..... Mean symmetry function value.\n";
1052 log <<
"sigma .... Standard deviation of symmetry function values.\n";
1053 log <<
"spread ... (max - min) / sigma.\n";
1055 for (vector<Element>::const_iterator it =
elements.begin();
1058 log <<
strpr(
"Scaling data for symmetry functions element %2s :\n",
1059 it->getSymbol().c_str());
1060 log <<
"-----------------------------------------"
1061 "--------------------------------------\n";
1062 log <<
" ind min max mean sigma spread\n";
1063 log <<
"-----------------------------------------"
1064 "--------------------------------------\n";
1065 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1068 = it->statistics.data.at(i);
1070 double const mean = c.
sum / n;
1071 double const sigma = sqrt((c.
sum2 - c.
sum * c.
sum / n)
1073 double const spread = (c.
max - c.
min) / sigma;
1074 sFile <<
strpr(
"%10d %10d %24.16E %24.16E %24.16E %24.16E\n",
1081 log <<
strpr(
"%4zu %9.2E %9.2E %9.2E %9.2E %9.2E\n",
1089 log <<
"-----------------------------------------"
1090 "--------------------------------------\n";
1097 log <<
"*****************************************"
1098 "**************************************\n";
1104 string fileNameFormat)
1107 log <<
"*** SYMMETRY FUNCTION HISTOGRAMS ********"
1108 "**************************************\n";
1113 vector<vector<gsl_histogram*> > histograms;
1114 for (vector<Element>::const_iterator it =
elements.begin();
1117 histograms.push_back(vector<gsl_histogram*>());
1118 for (
size_t i = 0; i < it->numSymmetryFunctions(); ++i)
1120 double l =
safeFind(it->statistics.data, i).min;
1121 double h =
safeFind(it->statistics.data, i).max;
1125 h += (h - l) / numBins;
1126 histograms.back().push_back(gsl_histogram_alloc(numBins + 1));
1127 gsl_histogram_set_ranges_uniform(histograms.back().back(),
1134 histograms.back().push_back(
nullptr);
1135 log <<
strpr(
"WARNING: Symmetry function min equals max, "
1136 "ommitting histogram (Element %2s SF %4zu "
1138 it->getSymbol().c_str(),
1140 it->getSymmetryFunction(i).getLineNumber() + 1);
1146 for (vector<Structure>::const_iterator it =
structures.begin();
1149 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1150 it2 != it->atoms.end(); ++it2)
1152 size_t e = it2->element;
1153 vector<gsl_histogram*>& h = histograms.at(e);
1154 for (
size_t s = 0; s < it2->G.size(); ++s)
1156 if (h.at(s) ==
nullptr)
continue;
1157 gsl_histogram_increment(h.at(s), it2->G.at(s));
1163 for (vector<vector<gsl_histogram*> >::const_iterator it =
1164 histograms.begin(); it != histograms.end(); ++it)
1166 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1167 it2 != it->end(); ++it2)
1169 if ((*it2) ==
nullptr)
continue;
1170 MPI_Allreduce(MPI_IN_PLACE, (*it2)->bin, numBins + 1, MPI_DOUBLE, MPI_SUM,
comm);
1177 log <<
strpr(
"Writing histograms with %zu bins.\n", numBins + 1);
1178 for (
size_t e = 0; e <
elements.size(); ++e)
1180 for (
size_t s = 0; s <
elements.at(e).numSymmetryFunctions(); ++s)
1182 gsl_histogram*& h = histograms.at(e).at(s);
1183 if (h ==
nullptr)
continue;
1185 string fileName =
strpr(fileNameFormat.c_str(),
1188 fp = fopen(fileName.c_str(),
"w");
1191 throw runtime_error(
strpr(
"ERROR: Could not open file:"
1192 " %s.\n", fileName.c_str()));
1194 vector<string> info =
elements.at(e).infoSymmetryFunction(s);
1195 for (vector<string>::const_iterator it = info.begin();
1196 it != info.end(); ++it)
1198 fprintf(fp,
"#SFINFO %s\n", it->c_str());
1201 =
elements.at(e).statistics.data.at(s);
1203 fprintf(fp,
"#SFINFO min %15.8E\n", c.
min);
1204 fprintf(fp,
"#SFINFO max %15.8E\n", c.
max);
1205 fprintf(fp,
"#SFINFO mean %15.8E\n", c.
sum / n);
1206 fprintf(fp,
"#SFINFO sigma %15.8E\n",
1207 sqrt(1.0 / (n - 1) * (c.
sum2 - c.
sum * c.
sum / n)));
1210 vector<string> title;
1211 vector<string> colName;
1212 vector<string> colInfo;
1213 vector<size_t> colSize;
1214 title.push_back(
"Symmetry function histogram.");
1215 colSize.push_back(16);
1216 colName.push_back(
"symfunc_l");
1217 colInfo.push_back(
"Symmetry function value, left bin limit.");
1218 colSize.push_back(16);
1219 colName.push_back(
"symfunc_r");
1220 colInfo.push_back(
"Symmetry function value, right bin limit.");
1221 colSize.push_back(16);
1222 colName.push_back(
"hist");
1223 colInfo.push_back(
"Histogram count.");
1230 gsl_histogram_fprintf(fp, h,
"%16.8E",
"%16.8E");
1238 for (vector<vector<gsl_histogram*> >::const_iterator it =
1239 histograms.begin(); it != histograms.end(); ++it)
1241 for (vector<gsl_histogram*>::const_iterator it2 = it->begin();
1242 it2 != it->end(); ++it2)
1244 if ((*it2) ==
nullptr)
continue;
1245 gsl_histogram_free(*it2);
1249 log <<
"*****************************************"
1250 "**************************************\n";
1258 log <<
"*** SYMMETRY FUNCTION FILE **************"
1259 "**************************************\n";
1263 log <<
strpr(
"Writing symmetry functions to file: %s\n", fileName.c_str());
1267 file.open(fileName.c_str());
1273 vector<Structure>::const_iterator it =
structures.begin();
1280 if (it !=
structures.end() && i == it->index)
1283 file.open(fileName.c_str(), ios_base::app);
1284 file <<
strpr(
"%6zu\n", it->numAtoms);
1286 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1287 it2 != it->atoms.end(); ++it2)
1291 for (vector<double>::const_iterator it3 = it2->G.begin();
1292 it3 != it2->G.end(); ++it3)
1294 file <<
strpr(
" %14.10f", *it3);
1299 double energy = 0.0;
1301 else energy = it->energyRef;
1303 energy /= it->numAtoms;
1304 file <<
strpr(
" %19.10f %19.10f %19.10f %19.10f\n",
1305 0.0, energy, energy, 0.0);
1314 log <<
"*****************************************"
1315 "**************************************\n";
1321 string const& fileNameStructure)
1324 log <<
"*** NEIGHBOR STATISTICS/HISTOGRAM *******"
1325 "**************************************\n";
1329 string fileNameLocal =
strpr(
"%s.%04d", fileNameStructure.c_str(),
myRank);
1330 statFile.open(fileNameLocal.c_str());
1334 vector<string> title;
1335 vector<string> colName;
1336 vector<string> colInfo;
1337 vector<size_t> colSize;
1338 title.push_back(
"Neighbor statistics for each structure.");
1339 colSize.push_back(10);
1340 colName.push_back(
"struct");
1341 colInfo.push_back(
"Index of structure (starting with 1).");
1342 colSize.push_back(10);
1343 colName.push_back(
"natoms");
1344 colInfo.push_back(
"Number of atoms in structure.");
1345 colSize.push_back(10);
1346 colName.push_back(
"min");
1347 colInfo.push_back(
"Minimum number of neighbors over all atoms.");
1348 colSize.push_back(10);
1349 colName.push_back(
"max");
1350 colInfo.push_back(
"Maximum number of neighbors over all atoms.");
1351 colSize.push_back(16);
1352 colName.push_back(
"mean");
1353 colInfo.push_back(
"Mean number of neighbors over all atoms.");
1359 size_t numAtomsGlobal = 0;
1360 size_t minNeighborsGlobal = numeric_limits<size_t>::max();
1361 size_t maxNeighborsGlobal = 0;
1362 double meanNeighborsGlobal = 0.0;
1366 size_t maxNeighbors = 0;
1367 double meanNeighbors = 0.0;
1368 for (
auto const& a : s.atoms)
1370 size_t const n = a.numNeighbors;
1372 maxNeighbors = max(maxNeighbors, n);
1375 numAtomsGlobal += s.numAtoms;
1376 minNeighborsGlobal = min(minNeighborsGlobal,
minNeighbors);
1377 maxNeighborsGlobal = max(maxNeighborsGlobal, maxNeighbors);
1378 meanNeighborsGlobal += meanNeighbors;
1379 statFile <<
strpr(
"%10zu %10zu %10zu %10zu %16.8E\n",
1384 meanNeighbors / s.numAtoms);
1388 MPI_Allreduce(MPI_IN_PLACE, &numAtomsGlobal , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1389 MPI_Allreduce(MPI_IN_PLACE, &minNeighborsGlobal , 1,
MPI_SIZE_T, MPI_MIN,
comm);
1390 MPI_Allreduce(MPI_IN_PLACE, &maxNeighborsGlobal , 1,
MPI_SIZE_T, MPI_MAX,
comm);
1391 MPI_Allreduce(MPI_IN_PLACE, &meanNeighborsGlobal, 1, MPI_DOUBLE, MPI_SUM,
comm);
1392 log <<
strpr(
"Minimum number of neighbors: %d\n", minNeighborsGlobal);
1393 log <<
strpr(
"Mean number of neighbors: %.1f\n",
1394 meanNeighborsGlobal / numAtomsGlobal);
1395 log <<
strpr(
"Maximum number of neighbors: %d\n", maxNeighborsGlobal);
1398 gsl_histogram* histNeighbors = NULL;
1399 histNeighbors = gsl_histogram_alloc(maxNeighborsGlobal + 1);
1400 gsl_histogram_set_ranges_uniform(histNeighbors,
1402 maxNeighborsGlobal + 0.5);
1403 for (vector<Structure>::const_iterator it =
structures.begin();
1406 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1407 it2 != it->atoms.end(); ++it2)
1409 gsl_histogram_increment(histNeighbors, it2->numNeighbors);
1412 MPI_Allreduce(MPI_IN_PLACE, histNeighbors->bin, maxNeighborsGlobal + 1, MPI_DOUBLE, MPI_SUM,
comm);
1417 log <<
strpr(
"Neighbor histogram file: %s.\n", fileNameHisto.c_str());
1419 fp = fopen(fileNameHisto.c_str(),
"w");
1422 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1423 fileNameHisto.c_str()));
1427 vector<string> title;
1428 vector<string> colName;
1429 vector<string> colInfo;
1430 vector<size_t> colSize;
1431 title.push_back(
"Neighbor count histogram.");
1432 colSize.push_back(9);
1433 colName.push_back(
"neigh_l");
1434 colInfo.push_back(
"Number of neighbors, left bin limit.");
1435 colSize.push_back(9);
1436 colName.push_back(
"neigh_r");
1437 colInfo.push_back(
"Number of neighbors, right bin limit.");
1438 colSize.push_back(16);
1439 colName.push_back(
"hist");
1440 colInfo.push_back(
"Histogram count.");
1444 gsl_histogram_fprintf(fp, histNeighbors,
"%9.1f",
"%16.8E");
1453 log <<
strpr(
"Combining per-structure neighbor statistics file: %s.\n",
1454 fileNameStructure.c_str());
1458 gsl_histogram_free(histNeighbors);
1460 log <<
"*****************************************"
1461 "**************************************\n";
1463 return maxNeighborsGlobal;
1469 log <<
"*** NEIGHBOR LIST ***********************"
1470 "**************************************\n";
1473 log <<
"Sorting neighbor lists according to element and distance.\n";
1475 for (vector<Structure>::iterator it =
structures.begin();
1478 for (vector<Atom>::iterator it2 = it->atoms.begin();
1479 it2 != it->atoms.end(); ++it2)
1481 sort(it2->neighbors.begin(), it2->neighbors.end());
1485 log <<
"*****************************************"
1486 "**************************************\n";
1493 log <<
"*** NEIGHBOR LIST ***********************"
1494 "**************************************\n";
1497 string fileNameLocal =
strpr(
"%s.%04d", fileName.c_str(),
myRank);
1499 fileLocal.open(fileNameLocal.c_str());
1501 for (vector<Structure>::const_iterator it =
structures.begin();
1504 fileLocal <<
strpr(
"%zu\n", it->numAtoms);
1505 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
1506 it2 != it->atoms.end(); ++it2)
1511 fileLocal <<
strpr(
" %zu", it2->numNeighborsPerElement.at(i));
1513 for (vector<Atom::Neighbor>::const_iterator it3
1514 = it2->neighbors.begin(); it3 != it2->neighbors.end(); ++it3)
1516 fileLocal <<
strpr(
" %zu", it3->index);
1526 log <<
strpr(
"Writing neighbor lists to file: %s.\n", fileName.c_str());
1530 log <<
"*****************************************"
1531 "**************************************\n";
1537 vector<vector<size_t> > neighCutoff,
1539 string const& fileNamePrefix)
1542 log <<
"*** ATOMIC ENVIRONMENT ******************"
1543 "**************************************\n";
1546 string const fileNamePrefixG =
strpr(
"%s.G" , fileNamePrefix.c_str());
1547 string const fileNamePrefixdGdx =
strpr(
"%s.dGdx", fileNamePrefix.c_str());
1548 string const fileNamePrefixdGdy =
strpr(
"%s.dGdy", fileNamePrefix.c_str());
1549 string const fileNamePrefixdGdz =
strpr(
"%s.dGdz", fileNamePrefix.c_str());
1551 string const fileNameLocalG =
strpr(
"%s.%04d",
1552 fileNamePrefixG.c_str(),
myRank);
1553 string const fileNameLocaldGdx =
strpr(
"%s.%04d",
1554 fileNamePrefixdGdx.c_str(),
myRank);
1555 string const fileNameLocaldGdy =
strpr(
"%s.%04d",
1556 fileNamePrefixdGdy.c_str(),
myRank);
1557 string const fileNameLocaldGdz =
strpr(
"%s.%04d",
1558 fileNamePrefixdGdz.c_str(),
myRank);
1560 ofstream fileLocalG;
1561 ofstream fileLocaldGdx;
1562 ofstream fileLocaldGdy;
1563 ofstream fileLocaldGdz;
1565 fileLocalG.open(fileNameLocalG.c_str());
1568 fileLocaldGdx.open(fileNameLocaldGdx.c_str());
1569 fileLocaldGdy.open(fileNameLocaldGdy.c_str());
1570 fileLocaldGdz.open(fileNameLocaldGdz.c_str());
1573 log <<
"Preparing symmetry functions for atomic environment file(s).\n";
1578 log <<
strpr(
"Maximum number of %2s neighbors for central %2s "
1582 neighCutoff.at(i).at(j));
1587 for (vector<Structure>::const_iterator its =
structures.begin();
1590 for (vector<Atom>::const_iterator ita = its->atoms.begin();
1591 ita != its->atoms.end(); ++ita)
1593 size_t const ea = ita->element;
1596 if (ita->numNeighborsPerElement.at(i)
1597 < neighCutoff.at(ea).at(i))
1599 throw runtime_error(
strpr(
1600 "ERROR: Not enough neighbor atoms, cannot create "
1601 "atomic environment file. Reduce neighbor cutoff for "
1602 "element %2s.\n",
elementMap[i].c_str()).c_str());
1610 for (vector<double>::const_iterator it = ita->G.begin();
1611 it != ita->G.end(); ++it)
1613 fileLocalG <<
strpr(
" %16.8E", (*it));
1617 for (vector<Vec3D>::const_iterator it = ita->dGdr.begin();
1618 it != ita->dGdr.end(); ++it)
1620 fileLocaldGdx <<
strpr(
" %16.8E", (*it)[0]);
1621 fileLocaldGdy <<
strpr(
" %16.8E", (*it)[1]);
1622 fileLocaldGdz <<
strpr(
" %16.8E", (*it)[2]);
1626 for (vector<Atom::Neighbor>::const_iterator itn
1627 = ita->neighbors.begin(); itn != ita->neighbors.end(); ++itn)
1629 size_t const i = itn->index;
1630 size_t const en = itn->element;
1631 if (neighCount.at(en) < neighCutoff.at(ea).at(en))
1634 Atom const& a = its->atoms.at(i);
1635 for (vector<double>::const_iterator it = a.
G.begin();
1636 it != a.
G.end(); ++it)
1638 fileLocalG <<
strpr(
" %16.8E", (*it));
1644 vector<Atom::Neighbor>::const_iterator itan = find_if(
1648 return n.index == ita->index;
1650 for (vector<Vec3D>::const_iterator it
1651 = itan->dGdr.begin();
1652 it != itan->dGdr.end(); ++it)
1654 fileLocaldGdx <<
strpr(
" %16.8E", (*it)[0]);
1655 fileLocaldGdy <<
strpr(
" %16.8E", (*it)[1]);
1656 fileLocaldGdz <<
strpr(
" %16.8E", (*it)[2]);
1659 neighCount.at(en)++;
1665 fileLocaldGdx <<
'\n';
1666 fileLocaldGdy <<
'\n';
1667 fileLocaldGdz <<
'\n';
1670 fill(neighCount.begin(), neighCount.end(), 0);
1678 fileLocaldGdx.flush();
1679 fileLocaldGdx.close();
1680 fileLocaldGdy.flush();
1681 fileLocaldGdy.close();
1682 fileLocaldGdz.flush();
1683 fileLocaldGdz.close();
1689 log <<
strpr(
"Combining atomic environment file: %s.\n",
1690 fileNamePrefixG.c_str());
1694 log <<
strpr(
"Combining atomic environment file: %s.\n",
1695 fileNamePrefixdGdx.c_str());
1697 log <<
strpr(
"Combining atomic environment file: %s.\n",
1698 fileNamePrefixdGdy.c_str());
1700 log <<
strpr(
"Combining atomic environment file: %s.\n",
1701 fileNamePrefixdGdz.c_str());
1706 log <<
"*****************************************"
1707 "**************************************\n";
1713 map<string, double>& error,
1714 size_t& count)
const
1716 if (property ==
"energy")
1718 MPI_Allreduce(MPI_IN_PLACE, &count , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1719 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSEpa")), 1, MPI_DOUBLE, MPI_SUM,
comm);
1720 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1721 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAEpa")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1722 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1723 error.at(
"RMSEpa") = sqrt(error.at(
"RMSEpa") / count);
1724 error.at(
"RMSE") = sqrt(error.at(
"RMSE") / count);
1725 error.at(
"MAEpa") = error.at(
"MAEpa") / count;
1726 error.at(
"MAE") = error.at(
"MAE") / count;
1728 else if (property ==
"force" || property ==
"charge")
1730 MPI_Allreduce(MPI_IN_PLACE, &count , 1,
MPI_SIZE_T, MPI_SUM,
comm);
1731 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"RMSE")), 1, MPI_DOUBLE, MPI_SUM,
comm);
1732 MPI_Allreduce(MPI_IN_PLACE, &(error.at(
"MAE")) , 1, MPI_DOUBLE, MPI_SUM,
comm);
1733 error.at(
"RMSE") = sqrt(error.at(
"RMSE") / count);
1734 error.at(
"MAE") = error.at(
"MAE") / count;
1738 throw runtime_error(
"ERROR: Unknown property for error collection.\n");
1746 ofstream combinedFile(filePrefix.c_str(), ios::binary);
1747 if (!combinedFile.is_open())
1749 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1750 filePrefix.c_str()));
1754 string procFileName =
strpr(
"%s.%04d", filePrefix.c_str(), i);
1755 ifstream procFile(procFileName.c_str(), ios::binary);
1756 if (!procFile.is_open())
1758 throw runtime_error(
strpr(
"ERROR: Could not open file: %s.\n",
1759 procFileName.c_str()));
1763 if (procFile.peek() != ifstream::traits_type::eof())
1765 combinedFile << procFile.rdbuf();
1768 remove(procFileName.c_str());
1770 combinedFile.close();
std::size_t numStructures
Total number of structures in dataset.
void writeSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Write symmetry function scaling values to file.
void collectSymmetryFunctionStatistics()
Collect symmetry function statistics from all processors.
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
int recvStructure(Structure *structure, int src)
Receive one structure from source process.
void sortNeighborLists()
Sort all neighbor lists according to element and distance.
void writeAtomicEnvironmentFile(std::vector< std::vector< std::size_t > > neighCutoff, bool derivatives, std::string const &fileNamePrefix="atomic-env")
Write atomic environment file.
void writeNeighborLists(std::string const &fileName="neighbor-list.data")
Write neighbor list file.
int calculateBufferSize(Structure const &structure) const
Calculate buffer size required to communicate structure via MPI.
MPI_Comm comm
Global MPI communicator.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
int numProcs
Total number of MPI processors.
std::vector< Structure > structures
All structures in this dataset.
void writeSymmetryFunctionFile(std::string fileName="function.data")
Write symmetry function legacy file ("function.data").
std::size_t prepareNumericForces(Structure &original, double delta)
Prepare numeric force check for a single structure.
void toPhysicalUnits()
Switch all structures to physical units.
void writeSymmetryFunctionHistograms(std::size_t numBins, std::string fileNameFormat="sf.%03zu.%04zu.histo")
Calculate and write symmetry function histograms.
std::size_t writeNeighborHistogram(std::string const &fileNameHisto="neighbors.histo", std::string const &fileNameStructure="neighbors.out")
Calculate and write neighbor histogram and per-structure statistics.
void setupRandomNumberGenerator()
Initialize random number generator.
int sendStructure(Structure const &structure, int dest) const
Send one structure to destination process.
std::string myName
My processor name.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
std::size_t getNumStructures(std::ifstream &dataFile)
Get number of structures in data file.
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
void toNormalizedUnits()
Switch all structures to normalized units.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Base class for all NNP applications.
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
ElementMap elementMap
Global element map, populated by setupElementMap().
std::vector< Element > elements
settings::Settings settings
void removeEnergyOffset(Structure &structure, bool ref=true)
Remove atomic energy offsets from reference energy.
std::vector< std::size_t > minNeighbors
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
string reduce(string const &line, string const &whitespace, string const &fill)
Replace multiple whitespaces with fill.
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
V const & safeFind(std::map< K, V > const &stdMap, typename std::map< K, V >::key_type const &key)
Safely access map entry.
Struct to store information on neighbor atoms.
Storage for a single atom.
std::vector< Neighbor > neighbors
Neighbor array (maximum number defined in macros.h.
std::vector< double > G
Symmetry function values.
Storage for one atomic configuration.
Vec3D invbox[3]
Inverse simulation box vectors.
Vec3D box[3]
Simulation box vectors.
bool isTriclinic
If the simulation box is triclinic.
std::vector< std::size_t > numAtomsPerElement
Number of atoms of each element in this structure.
std::string comment
Structure comment.
bool isPeriodic
If periodic boundary conditions apply.
double charge
Charge determined by neural network potential.
void setElementMap(ElementMap const &elementMap)
Set element map of structure.
std::size_t index
Index number of this structure.
void remap()
Translate all atoms back into box if outside.
double chargeRef
Reference charge.
SampleType sampleType
Sample type (training or test set).
bool hasSymmetryFunctionDerivatives
If symmetry function derivatives are saved for each atom.
double energy
Potential energy determined by neural network.
void readFromFile(std::string const fileName="input.data")
Read configuration from file.
double energyRef
Reference potential energy.
int pbc[3]
Number of PBC images necessary in each direction for max cut-off.
double volume
Simulation box volume.
std::size_t numAtoms
Total number of atoms present in this structure.
std::size_t numElements
Global number of elements (all structures).
std::vector< Atom > atoms
Vector of all atoms in this structure.
std::size_t numElementsPresent
Number of elements present in this structure.
bool hasNeighborList
If the neighbor list has been calculated.
bool hasSymmetryFunctions
If symmetry function values are saved for each atom.
Struct containing statistics gathered during symmetry function calculation.
double min
Minimum symmetry function value encountered.
double sum2
Sum of squared symmetry function values (to compute sigma).
double sum
Sum of symmetry function values (to compute mean).
double max
Maximum symmetry function value encountered.
std::size_t count
Counts total number of symmetry function evaluations.
double r[3]
cartesian coordinates.