33int main(
int argc,
char* argv[])
36 bool useForces =
false;
37 bool normalize =
false;
40 size_t countEnergy = 0;
41 size_t countForces = 0;
42 map<string, double> errorEnergy;
43 map<string, double> errorForces;
47 ofstream fileOutputData;
50 errorEnergy[
"RMSEpa"] = 0.0;
51 errorEnergy[
"RMSE"] = 0.0;
52 errorEnergy[
"MAEpa"] = 0.0;
53 errorEnergy[
"MAE"] = 0.0;
54 errorForces[
"RMSE"] = 0.0;
55 errorForces[
"MAE"] = 0.0;
59 cout <<
"USAGE: " << argv[0] <<
" <shuffle>\n"
60 <<
" <shuffle> ... Randomly distribute structures to MPI"
61 " processes (0/1 = no/yes).\n"
62 <<
" Execute in directory with these NNP files present:\n"
63 <<
" - input.data (structure file)\n"
64 <<
" - input.nn (NNP settings)\n"
65 <<
" - scaling.data (symmetry function scaling data)\n"
66 <<
" - \"weights.%%03d.data\" (weights files)\n";
70 shuffle = (bool)atoi(argv[1]);
72 MPI_Init(&argc, &argv);
73 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
74 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
78 myLog.open(
strpr(
"nnp-dataset.log.%04d", myRank).c_str());
93 dataset.
log <<
"*** DATA SET PREDICTION *****************"
94 "**************************************\n";
100 dataset.
log <<
"Energies and forces are predicted.\n";
104 dataset.
log <<
"Only energies are predicted.\n";
110 vector<size_t> count(numElements, 0);
111 vector<vector<double> > sensMean;
112 vector<vector<double> > sensMax;
113 sensMean.resize(numElements);
114 sensMax.resize(numElements);
115 for (
size_t i = 0; i < numElements; ++i)
117 sensMean.at(i).resize(numSymmetryFunctions.at(i), 0.0);
118 sensMax.at(i).resize(numSymmetryFunctions.at(i), 0.0);
122 fileName =
strpr(
"energy.comp.%04d", myRank);
123 fileEnergy.open(fileName.c_str());
128 vector<string> title;
129 vector<string> colName;
130 vector<string> colInfo;
131 vector<size_t> colSize;
132 title.push_back(
"Energy comparison.");
133 colSize.push_back(10);
134 colName.push_back(
"index");
135 colInfo.push_back(
"Structure index.");
136 colSize.push_back(10);
137 colName.push_back(
"N");
138 colInfo.push_back(
"Number of atoms in structure.");
139 colSize.push_back(16);
140 colName.push_back(
"Eref_phys");
141 colInfo.push_back(
"Reference potential energy (physical units, "
142 "atomic energy offsets added).");
143 colSize.push_back(16);
144 colName.push_back(
"Ennp_phys");
145 colInfo.push_back(
"NNP potential energy (physical units, "
146 "atomic energy offsets added).");
149 colSize.push_back(16);
150 colName.push_back(
"Eref_int");
151 colInfo.push_back(
"Reference potential energy (internal units).");
152 colSize.push_back(16);
153 colName.push_back(
"Ennp_int");
154 colInfo.push_back(
"NNP potential energy (internal units).");
161 fileName =
strpr(
"forces.comp.%04d", myRank);
162 fileForces.open(fileName.c_str());
167 vector<string> title;
168 vector<string> colName;
169 vector<string> colInfo;
170 vector<size_t> colSize;
171 title.push_back(
"Force comparison.");
172 colSize.push_back(10);
173 colName.push_back(
"index_s");
174 colInfo.push_back(
"Structure index.");
175 colSize.push_back(10);
176 colName.push_back(
"index_a");
177 colInfo.push_back(
"Atom index (x, y, z components in consecutive "
179 colSize.push_back(16);
180 colName.push_back(
"Fref_phys");
181 colInfo.push_back(
"Reference force (physical units).");
182 colSize.push_back(16);
183 colName.push_back(
"Fnnp_phys");
184 colInfo.push_back(
"NNP force (physical units).");
187 colSize.push_back(16);
188 colName.push_back(
"Fref_int");
189 colInfo.push_back(
"Reference force (internal units).");
190 colSize.push_back(16);
191 colName.push_back(
"Fnnp_int");
192 colInfo.push_back(
"NNP force (internal units).");
203 fileName =
strpr(
"output.data.%04d", myRank);
204 fileOutputData.open(fileName.c_str());
206 for (vector<Structure>::iterator it = dataset.
structures.begin();
214 for (vector<Atom>::iterator it2 = it->atoms.begin();
215 it2 != it->atoms.end(); ++it2)
218 size_t const& e = it2->element;
220 for (
size_t i = 0; i < numSymmetryFunctions.at(e); ++i)
222 double const& s = it2->dEdG.at(i);
223 sensMean.at(e).at(i) += s * s;
224 sensMax.at(e).at(i) = max(sensMax.at(e).at(i), abs(s));
229 it2->numNeighborsUnique = 0;
230 it2->neighborsUnique.clear();
231 vector<size_t>(it2->neighborsUnique).swap(it2->neighborsUnique);
233 it2->numNeighborsPerElement.clear();
234 vector<size_t>(it2->numNeighborsPerElement).swap(
235 it2->numNeighborsPerElement);
238 vector<double>(it2->G).swap(it2->G);
241 vector<double>(it2->dEdG).swap(it2->dEdG);
243#ifdef N2P2_FULL_SFD_MEMORY
245 vector<double>(it2->dGdxia).swap(it2->dGdxia);
249 vector<Vec3D>(it2->dGdr).swap(it2->dGdr);
251 it2->numNeighbors = 0;
252 it2->neighbors.clear();
253 vector<Atom::Neighbor>(it2->neighbors).swap(it2->neighbors);
255 it2->hasNeighborList =
false;
256 it2->hasSymmetryFunctions =
false;
257 it2->hasSymmetryFunctionDerivatives =
false;
259 it->hasNeighborList =
false;
260 it->hasSymmetryFunctions =
false;
261 it->hasSymmetryFunctionDerivatives =
false;
262 it->updateError(
"energy", errorEnergy, countEnergy);
263 fileEnergy <<
strpr(
"%10zu %10zu", it->index, it->numAtoms);
266 fileEnergy <<
strpr(
" %16.8E %16.8E %16.8E %16.8E\n",
276 fileEnergy <<
strpr(
" %16.8E %16.8E\n",
282 it->updateError(
"force", errorForces, countForces);
283 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
284 it2 != it->atoms.end(); ++it2)
286 for (
size_t i = 0; i < 3; ++i)
288 fileForces <<
strpr(
"%10zu %10zu",
295 dataset.
physical(
"force", it2->fRef[i]),
296 dataset.
physical(
"force", it2->f[i]));
298 fileForces <<
strpr(
" %16.8E %16.8E\n",
312 it->writeToFile(&fileOutputData,
false);
316 if (useForces) fileForces.close();
317 fileOutputData.close();
318 MPI_Barrier(MPI_COMM_WORLD);
322 fileName =
"energy.comp";
326 fileName =
"forces.comp";
329 fileName =
"output.data";
333 dataset.
collectError(
"energy", errorEnergy, countEnergy);
334 if (useForces) dataset.
collectError(
"force", errorForces, countForces);
340 dataset.
log <<
"Energy and force comparison in files:\n";
341 dataset.
log <<
" - energy.comp\n";
342 dataset.
log <<
" - forces.comp\n";
346 dataset.
log <<
"Energy comparison in file:\n";
347 dataset.
log <<
" - energy.comp\n";
349 dataset.
log <<
"Predicted data set in \"output.data\"\n";
351 dataset.
log <<
"Error metrics for energies and forces:\n";
352 dataset.
log <<
"-----------------------------------------"
353 "-----------------------------------------"
354 "--------------------------------------\n";
355 dataset.
log <<
" physical units ";
358 dataset.
log <<
" | internal units ";
361 dataset.
log <<
strpr(
" %13s %13s %13s %13s",
362 "RMSEpa",
"RMSE",
"MAEpa",
"MAE");
365 dataset.
log <<
strpr(
" | %13s %13s %13s %13s",
366 "RMSEpa",
"RMSE",
"MAEpa",
"MAE");
369 dataset.
log <<
"ENERGY";
373 " %13.5E %13.5E %13.5E %13.5E |",
374 dataset.
physical(
"energy", errorEnergy.at(
"RMSEpa")),
375 dataset.
physical(
"energy", errorEnergy.at(
"RMSE")),
376 dataset.
physical(
"energy", errorEnergy.at(
"MAEpa")),
377 dataset.
physical(
"energy", errorEnergy.at(
"MAE")));
379 dataset.
log <<
strpr(
" %13.5E %13.5E %13.5E %13.5E\n",
380 errorEnergy.at(
"RMSEpa"),
381 errorEnergy.at(
"RMSE"),
382 errorEnergy.at(
"MAEpa"),
383 errorEnergy.at(
"MAE"));
386 dataset.
log <<
"FORCES";
390 " %13s %13.5E %13s %13.5E |",
"",
391 dataset.
physical(
"force", errorForces.at(
"RMSE")),
"",
392 dataset.
physical(
"force", errorForces.at(
"MAE")));
394 dataset.
log <<
strpr(
" %13s %13.5E %13s %13.5E\n",
"",
395 errorForces.at(
"RMSE"),
"",
396 errorForces.at(
"MAE"));
398 dataset.
log <<
"-----------------------------------------"
399 "-----------------------------------------"
400 "--------------------------------------\n";
401 dataset.
log <<
"*****************************************"
402 "**************************************\n";
405 dataset.
log <<
"*** SENSITIVITY ANALYSIS ****************"
406 "**************************************\n";
412 dataset.
log <<
"Writing sensitivity analysis data to files:\n";
413 MPI_Reduce(MPI_IN_PLACE, &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
414 for (
size_t i = 0; i < numElements; ++i)
416 size_t const& n = numSymmetryFunctions.at(i);
417 double sensMeanSum = 0.0;
418 double sensMaxSum = 0.0;
419 MPI_Reduce(MPI_IN_PLACE, &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
420 MPI_Reduce(MPI_IN_PLACE, &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
421 for (
size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
423 sensMean.at(i).at(j) = sqrt(sensMean.at(i).at(j)
425 sensMeanSum += sensMean.at(i).at(j);
426 sensMaxSum += sensMax.at(i).at(j);
429 string sensFileName =
strpr(
"sensitivity.%03d.out",
431 dataset.
log <<
strpr(
" - %s\n", sensFileName.c_str());
432 sensFile.open(sensFileName.c_str());
435 vector<string> title;
436 vector<string> colName;
437 vector<string> colInfo;
438 vector<size_t> colSize;
439 title.push_back(
strpr(
"Sensitivity analysis for element %2s.",
441 colSize.push_back(10);
442 colName.push_back(
"index");
443 colInfo.push_back(
"Symmetry function index.");
444 colSize.push_back(16);
445 colName.push_back(
"sens_msa_norm");
446 colInfo.push_back(
"Mean square average sensitivity (normalized, "
448 colSize.push_back(16);
449 colName.push_back(
"sens_max_norm");
450 colInfo.push_back(
"Maximum sensitivity (normalized, sum = 100%).");
451 colSize.push_back(16);
452 colName.push_back(
"sens_msa_phys");
453 colInfo.push_back(
"Mean square average sensitivity (physical "
455 colSize.push_back(16);
456 colName.push_back(
"sens_max_phys");
457 colInfo.push_back(
"Maximum sensitivity (physical energy units).");
460 colSize.push_back(16);
461 colName.push_back(
"sens_msa_int");
462 colInfo.push_back(
"Mean square average sensitivity (internal "
464 colSize.push_back(16);
465 colName.push_back(
"sens_max_int");
466 colInfo.push_back(
"Maximum sensitivity (internal units).");
474 for (
size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
476 sensFile <<
strpr(
"%10d", j + 1);
477 sensFile <<
strpr(
" %16.8E %16.8E",
478 sensMean.at(i).at(j) / sensMeanSum * 100.0,
479 sensMax.at(i).at(j) / sensMaxSum * 100.0);
482 sensFile <<
strpr(
" %16.8E %16.8E",
484 sensMean.at(i).at(j)),
486 sensMax.at(i).at(j)));
488 sensFile <<
strpr(
" %16.8E %16.8E\n",
489 sensMean.at(i).at(j),
490 sensMax.at(i).at(j));
497 MPI_Reduce(&(count.front()), &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
498 for (
size_t i = 0; i < numElements; ++i)
500 size_t const& n = numSymmetryFunctions.at(i);
501 MPI_Reduce(&(sensMean.at(i).front()), &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
502 MPI_Reduce(&(sensMax.at(i).front() ), &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
506 dataset.
log <<
"*****************************************"
507 "**************************************\n";
Collect and process large data sets.
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
std::vector< Structure > structures
All structures in this dataset.
void setupRandomNumberGenerator()
Initialize random number generator.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
void toNormalizedUnits()
Switch all structures to normalized units.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
bool writeToStdout
Turn on/off output to stdout.
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
ElementMap elementMap
Global element map, populated by setupElementMap().
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
void initialize()
Write welcome message with version information.
std::size_t getNumElements() const
Getter for Mode::numElements.
double getConvLength() const
Getter for Mode::convLength.
void setupGeneric(std::string const &nnpDir="", bool skipNormalize=false, bool initialHardness=false)
Combine multiple setup routines and provide a basic NNP setup.
virtual void setupNeuralNetworkWeights(std::map< std::string, std::string > fileNameFormats=std::map< std::string, std::string >())
Set up neural network weights from files with given name format.
double physical(std::string const &property, double value) const
Undo normalization for a given property.
bool useNormalization() const
Check if normalization is enabled.
double getConvEnergy() const
Getter for Mode::convEnergy.
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
std::vector< std::size_t > getNumSymmetryFunctions() const
Get number of symmetry functions per element.
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
double getMeanEnergy() const
Getter for Mode::meanEnergy.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
void evaluateNNP(Structure &structure, bool useForces=true, bool useDEdG=true)
Evaluate neural network potential (includes total energy, optionally forces and in some cases charges...
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
double getConvCharge() const
Getter for Mode::convCharge.
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
int main(int argc, char *argv[])