n2p2 - A neural network potential package
nnp-dataset.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Dataset.h"
18#include "mpi-extra.h"
19#include "utility.h"
20#include <mpi.h>
21#include <algorithm>
22#include <cmath>
23#include <cstdlib>
24#include <fstream>
25#include <iostream>
26#include <map>
27#include <string>
28#include <vector>
29
30using namespace std;
31using namespace nnp;
32
33int main(int argc, char* argv[])
34{
35 bool shuffle = false;
36 bool useForces = false;
37 bool normalize = false;
38 int numProcs = 0;
39 int myRank = 0;
40 size_t countEnergy = 0;
41 size_t countForces = 0;
42 map<string, double> errorEnergy;
43 map<string, double> errorForces;
44 string fileName;
45 ofstream fileEnergy;
46 ofstream fileForces;
47 ofstream fileOutputData;
48 ofstream myLog;
49
50 errorEnergy["RMSEpa"] = 0.0;
51 errorEnergy["RMSE"] = 0.0;
52 errorEnergy["MAEpa"] = 0.0;
53 errorEnergy["MAE"] = 0.0;
54 errorForces["RMSE"] = 0.0;
55 errorForces["MAE"] = 0.0;
56
57 if (argc != 2)
58 {
59 cout << "USAGE: " << argv[0] << " <shuffle>\n"
60 << " <shuffle> ... Randomly distribute structures to MPI"
61 " processes (0/1 = no/yes).\n"
62 << " Execute in directory with these NNP files present:\n"
63 << " - input.data (structure file)\n"
64 << " - input.nn (NNP settings)\n"
65 << " - scaling.data (symmetry function scaling data)\n"
66 << " - \"weights.%%03d.data\" (weights files)\n";
67 return 1;
68 }
69
70 shuffle = (bool)atoi(argv[1]);
71
72 MPI_Init(&argc, &argv);
73 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
74 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
75
76 Dataset dataset;
77 if (myRank != 0) dataset.log.writeToStdout = false;
78 myLog.open(strpr("nnp-dataset.log.%04d", myRank).c_str());
79 dataset.log.registerStreamPointer(&myLog);
80 dataset.setupMPI();
81 dataset.initialize();
82 dataset.loadSettingsFile();
83 dataset.setupGeneric();
84 normalize = dataset.useNormalization();
86 dataset.setupSymmetryFunctionStatistics(false, false, true, false);
88 if (shuffle) dataset.setupRandomNumberGenerator();
89 dataset.distributeStructures(shuffle);
90 if (normalize) dataset.toNormalizedUnits();
91
92 dataset.log << "\n";
93 dataset.log << "*** DATA SET PREDICTION *****************"
94 "**************************************\n";
95 dataset.log << "\n";
96
97 useForces = dataset.settingsKeywordExists("use_short_forces");
98 if (useForces)
99 {
100 dataset.log << "Energies and forces are predicted.\n";
101 }
102 else
103 {
104 dataset.log << "Only energies are predicted.\n";
105 }
106
107 // Set up sensitivity vectors.
108 size_t numElements = dataset.getNumElements();
109 vector<size_t> numSymmetryFunctions = dataset.getNumSymmetryFunctions();
110 vector<size_t> count(numElements, 0);
111 vector<vector<double> > sensMean;
112 vector<vector<double> > sensMax;
113 sensMean.resize(numElements);
114 sensMax.resize(numElements);
115 for (size_t i = 0; i < numElements; ++i)
116 {
117 sensMean.at(i).resize(numSymmetryFunctions.at(i), 0.0);
118 sensMax.at(i).resize(numSymmetryFunctions.at(i), 0.0);
119 }
120
121 // Set up error files for energy and forces RMSEs.
122 fileName = strpr("energy.comp.%04d", myRank);
123 fileEnergy.open(fileName.c_str());
124
125 // File header.
126 if (myRank == 0)
127 {
128 vector<string> title;
129 vector<string> colName;
130 vector<string> colInfo;
131 vector<size_t> colSize;
132 title.push_back("Energy comparison.");
133 colSize.push_back(10);
134 colName.push_back("index");
135 colInfo.push_back("Structure index.");
136 colSize.push_back(10);
137 colName.push_back("N");
138 colInfo.push_back("Number of atoms in structure.");
139 colSize.push_back(16);
140 colName.push_back("Eref_phys");
141 colInfo.push_back("Reference potential energy (physical units, "
142 "atomic energy offsets added).");
143 colSize.push_back(16);
144 colName.push_back("Ennp_phys");
145 colInfo.push_back("NNP potential energy (physical units, "
146 "atomic energy offsets added).");
147 if (normalize)
148 {
149 colSize.push_back(16);
150 colName.push_back("Eref_int");
151 colInfo.push_back("Reference potential energy (internal units).");
152 colSize.push_back(16);
153 colName.push_back("Ennp_int");
154 colInfo.push_back("NNP potential energy (internal units).");
155 }
156 appendLinesToFile(fileEnergy,
157 createFileHeader(title, colSize, colName, colInfo));
158 }
159 if (useForces)
160 {
161 fileName = strpr("forces.comp.%04d", myRank);
162 fileForces.open(fileName.c_str());
163
164 // File header.
165 if (myRank == 0)
166 {
167 vector<string> title;
168 vector<string> colName;
169 vector<string> colInfo;
170 vector<size_t> colSize;
171 title.push_back("Force comparison.");
172 colSize.push_back(10);
173 colName.push_back("index_s");
174 colInfo.push_back("Structure index.");
175 colSize.push_back(10);
176 colName.push_back("index_a");
177 colInfo.push_back("Atom index (x, y, z components in consecutive "
178 "lines).");
179 colSize.push_back(16);
180 colName.push_back("Fref_phys");
181 colInfo.push_back("Reference force (physical units).");
182 colSize.push_back(16);
183 colName.push_back("Fnnp_phys");
184 colInfo.push_back("NNP force (physical units).");
185 if (normalize)
186 {
187 colSize.push_back(16);
188 colName.push_back("Fref_int");
189 colInfo.push_back("Reference force (internal units).");
190 colSize.push_back(16);
191 colName.push_back("Fnnp_int");
192 colInfo.push_back("NNP force (internal units).");
193 }
194 appendLinesToFile(fileForces,
195 createFileHeader(title,
196 colSize,
197 colName,
198 colInfo));
199 }
200 }
201
202 // Open output.data file.
203 fileName = strpr("output.data.%04d", myRank);
204 fileOutputData.open(fileName.c_str());
205
206 for (vector<Structure>::iterator it = dataset.structures.begin();
207 it != dataset.structures.end(); ++it)
208 {
209 // Set derivatives argument to true in any case to fill dEdG vectors
210 // in atom storage.
211 dataset.evaluateNNP((*it), useForces, true);
212
213 // Loop over atoms, collect sensitivity data and clear memory.
214 for (vector<Atom>::iterator it2 = it->atoms.begin();
215 it2 != it->atoms.end(); ++it2)
216 {
217 // Collect sensitivity data.
218 size_t const& e = it2->element;
219 count.at(e)++;
220 for (size_t i = 0; i < numSymmetryFunctions.at(e); ++i)
221 {
222 double const& s = it2->dEdG.at(i);
223 sensMean.at(e).at(i) += s * s;
224 sensMax.at(e).at(i) = max(sensMax.at(e).at(i), abs(s));
225 }
226 // Clear unnecessary memory (neighbor list and others), energies
227 // and forces are still stored. Don't use these structures after
228 // these operations unless you know what you do!
229 it2->numNeighborsUnique = 0;
230 it2->neighborsUnique.clear();
231 vector<size_t>(it2->neighborsUnique).swap(it2->neighborsUnique);
232
233 it2->numNeighborsPerElement.clear();
234 vector<size_t>(it2->numNeighborsPerElement).swap(
235 it2->numNeighborsPerElement);
236
237 it2->G.clear();
238 vector<double>(it2->G).swap(it2->G);
239
240 it2->dEdG.clear();
241 vector<double>(it2->dEdG).swap(it2->dEdG);
242
243#ifdef N2P2_FULL_SFD_MEMORY
244 it2->dGdxia.clear();
245 vector<double>(it2->dGdxia).swap(it2->dGdxia);
246#endif
247
248 it2->dGdr.clear();
249 vector<Vec3D>(it2->dGdr).swap(it2->dGdr);
250
251 it2->numNeighbors = 0;
252 it2->neighbors.clear();
253 vector<Atom::Neighbor>(it2->neighbors).swap(it2->neighbors);
254
255 it2->hasNeighborList = false;
256 it2->hasSymmetryFunctions = false;
257 it2->hasSymmetryFunctionDerivatives = false;
258 }
259 it->hasNeighborList = false;
260 it->hasSymmetryFunctions = false;
261 it->hasSymmetryFunctionDerivatives = false;
262 it->updateError("energy", errorEnergy, countEnergy);
263 fileEnergy << strpr("%10zu %10zu", it->index, it->numAtoms);
264 if (normalize)
265 {
266 fileEnergy << strpr(" %16.8E %16.8E %16.8E %16.8E\n",
267 dataset.physicalEnergy(*it, true)
268 + dataset.getEnergyOffset(*it),
269 dataset.physicalEnergy(*it, false)
270 + dataset.getEnergyOffset(*it),
271 it->energyRef,
272 it->energy);
273 }
274 else
275 {
276 fileEnergy << strpr(" %16.8E %16.8E\n",
277 dataset.getEnergyWithOffset(*it, true),
278 dataset.getEnergyWithOffset(*it, false));
279 }
280 if (useForces)
281 {
282 it->updateError("force", errorForces, countForces);
283 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
284 it2 != it->atoms.end(); ++it2)
285 {
286 for (size_t i = 0; i < 3; ++i)
287 {
288 fileForces << strpr("%10zu %10zu",
289 it2->indexStructure,
290 it2->index);
291 if (normalize)
292 {
293 fileForces << strpr(
294 " %16.8E %16.8E",
295 dataset.physical("force", it2->fRef[i]),
296 dataset.physical("force", it2->f[i]));
297 }
298 fileForces << strpr(" %16.8E %16.8E\n",
299 it2->fRef[i],
300 it2->f[i]);
301 }
302 }
303 }
304 if (normalize)
305 {
306 it->toPhysicalUnits(dataset.getMeanEnergy(),
307 dataset.getConvEnergy(),
308 dataset.getConvLength(),
309 dataset.getConvCharge());
310 }
311 dataset.addEnergyOffset(*it, false);
312 it->writeToFile(&fileOutputData, false);
313 }
314
315 fileEnergy.close();
316 if (useForces) fileForces.close();
317 fileOutputData.close();
318 MPI_Barrier(MPI_COMM_WORLD);
319
320 if (myRank == 0)
321 {
322 fileName = "energy.comp";
323 dataset.combineFiles(fileName);
324 if (useForces)
325 {
326 fileName = "forces.comp";
327 dataset.combineFiles(fileName);
328 }
329 fileName = "output.data";
330 dataset.combineFiles(fileName);
331 }
332
333 dataset.collectError("energy", errorEnergy, countEnergy);
334 if (useForces) dataset.collectError("force", errorForces, countForces);
335
336 if (myRank == 0)
337 {
338 if (useForces)
339 {
340 dataset.log << "Energy and force comparison in files:\n";
341 dataset.log << " - energy.comp\n";
342 dataset.log << " - forces.comp\n";
343 }
344 else
345 {
346 dataset.log << "Energy comparison in file:\n";
347 dataset.log << " - energy.comp\n";
348 }
349 dataset.log << "Predicted data set in \"output.data\"\n";
350 }
351 dataset.log << "Error metrics for energies and forces:\n";
352 dataset.log << "-----------------------------------------"
353 "-----------------------------------------"
354 "--------------------------------------\n";
355 dataset.log << " physical units ";
356 if (normalize)
357 {
358 dataset.log << " | internal units ";
359 }
360 dataset.log << "\n";
361 dataset.log << strpr(" %13s %13s %13s %13s",
362 "RMSEpa", "RMSE", "MAEpa", "MAE");
363 if (normalize)
364 {
365 dataset.log << strpr(" | %13s %13s %13s %13s",
366 "RMSEpa", "RMSE", "MAEpa", "MAE");
367 }
368 dataset.log << "\n";
369 dataset.log << "ENERGY";
370 if (normalize)
371 {
372 dataset.log << strpr(
373 " %13.5E %13.5E %13.5E %13.5E |",
374 dataset.physical("energy", errorEnergy.at("RMSEpa")),
375 dataset.physical("energy", errorEnergy.at("RMSE")),
376 dataset.physical("energy", errorEnergy.at("MAEpa")),
377 dataset.physical("energy", errorEnergy.at("MAE")));
378 }
379 dataset.log << strpr(" %13.5E %13.5E %13.5E %13.5E\n",
380 errorEnergy.at("RMSEpa"),
381 errorEnergy.at("RMSE"),
382 errorEnergy.at("MAEpa"),
383 errorEnergy.at("MAE"));
384 if (useForces)
385 {
386 dataset.log << "FORCES";
387 if (normalize)
388 {
389 dataset.log << strpr(
390 " %13s %13.5E %13s %13.5E |", "",
391 dataset.physical("force", errorForces.at("RMSE")), "",
392 dataset.physical("force", errorForces.at("MAE")));
393 }
394 dataset.log << strpr(" %13s %13.5E %13s %13.5E\n", "",
395 errorForces.at("RMSE"), "",
396 errorForces.at("MAE"));
397 }
398 dataset.log << "-----------------------------------------"
399 "-----------------------------------------"
400 "--------------------------------------\n";
401 dataset.log << "*****************************************"
402 "**************************************\n";
403
404 dataset.log << "\n";
405 dataset.log << "*** SENSITIVITY ANALYSIS ****************"
406 "**************************************\n";
407 dataset.log << "\n";
408
409 // Combine sensititvity data from all procs.
410 if (myRank == 0)
411 {
412 dataset.log << "Writing sensitivity analysis data to files:\n";
413 MPI_Reduce(MPI_IN_PLACE, &(count.front()), numElements, MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
414 for (size_t i = 0; i < numElements; ++i)
415 {
416 size_t const& n = numSymmetryFunctions.at(i);
417 double sensMeanSum = 0.0;
418 double sensMaxSum = 0.0;
419 MPI_Reduce(MPI_IN_PLACE, &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
420 MPI_Reduce(MPI_IN_PLACE, &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
421 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
422 {
423 sensMean.at(i).at(j) = sqrt(sensMean.at(i).at(j)
424 / count.at(i));
425 sensMeanSum += sensMean.at(i).at(j);
426 sensMaxSum += sensMax.at(i).at(j);
427 }
428 ofstream sensFile;
429 string sensFileName = strpr("sensitivity.%03d.out",
430 dataset.elementMap.atomicNumber(i));
431 dataset.log << strpr(" - %s\n", sensFileName.c_str());
432 sensFile.open(sensFileName.c_str());
433
434 // File header.
435 vector<string> title;
436 vector<string> colName;
437 vector<string> colInfo;
438 vector<size_t> colSize;
439 title.push_back(strpr("Sensitivity analysis for element %2s.",
440 dataset.elementMap[i].c_str()));
441 colSize.push_back(10);
442 colName.push_back("index");
443 colInfo.push_back("Symmetry function index.");
444 colSize.push_back(16);
445 colName.push_back("sens_msa_norm");
446 colInfo.push_back("Mean square average sensitivity (normalized, "
447 "sum = 100%).");
448 colSize.push_back(16);
449 colName.push_back("sens_max_norm");
450 colInfo.push_back("Maximum sensitivity (normalized, sum = 100%).");
451 colSize.push_back(16);
452 colName.push_back("sens_msa_phys");
453 colInfo.push_back("Mean square average sensitivity (physical "
454 "energy units).");
455 colSize.push_back(16);
456 colName.push_back("sens_max_phys");
457 colInfo.push_back("Maximum sensitivity (physical energy units).");
458 if (normalize)
459 {
460 colSize.push_back(16);
461 colName.push_back("sens_msa_int");
462 colInfo.push_back("Mean square average sensitivity (internal "
463 "units).");
464 colSize.push_back(16);
465 colName.push_back("sens_max_int");
466 colInfo.push_back("Maximum sensitivity (internal units).");
467 }
468 appendLinesToFile(sensFile,
469 createFileHeader(title,
470 colSize,
471 colName,
472 colInfo));
473
474 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
475 {
476 sensFile << strpr("%10d", j + 1);
477 sensFile << strpr(" %16.8E %16.8E",
478 sensMean.at(i).at(j) / sensMeanSum * 100.0,
479 sensMax.at(i).at(j) / sensMaxSum * 100.0);
480 if (normalize)
481 {
482 sensFile << strpr(" %16.8E %16.8E",
483 dataset.physical("energy",
484 sensMean.at(i).at(j)),
485 dataset.physical("energy",
486 sensMax.at(i).at(j)));
487 }
488 sensFile << strpr(" %16.8E %16.8E\n",
489 sensMean.at(i).at(j),
490 sensMax.at(i).at(j));
491 }
492 sensFile.close();
493 }
494 }
495 else
496 {
497 MPI_Reduce(&(count.front()), &(count.front()), numElements, MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
498 for (size_t i = 0; i < numElements; ++i)
499 {
500 size_t const& n = numSymmetryFunctions.at(i);
501 MPI_Reduce(&(sensMean.at(i).front()), &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
502 MPI_Reduce(&(sensMax.at(i).front() ), &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
503 }
504 }
505
506 dataset.log << "*****************************************"
507 "**************************************\n";
508
509 myLog.close();
510
511 MPI_Finalize();
512
513 return 0;
514}
Collect and process large data sets.
Definition: Dataset.h:35
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:724
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void setupRandomNumberGenerator()
Initialize random number generator.
Definition: Dataset.cpp:110
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
Definition: Dataset.cpp:1712
void toNormalizedUnits()
Switch all structures to normalized units.
Definition: Dataset.cpp:952
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Definition: ElementMap.h:145
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
Definition: Log.cpp:91
bool writeToStdout
Turn on/off output to stdout.
Definition: Log.h:85
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
Definition: Mode.cpp:2122
ElementMap elementMap
Global element map, populated by setupElementMap().
Definition: Mode.h:591
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
Definition: Mode.cpp:2018
void initialize()
Write welcome message with version information.
Definition: Mode.cpp:55
std::size_t getNumElements() const
Getter for Mode::numElements.
Definition: Mode.h:698
double getConvLength() const
Getter for Mode::convLength.
Definition: Mode.h:683
void setupGeneric(std::string const &nnpDir="", bool skipNormalize=false, bool initialHardness=false)
Combine multiple setup routines and provide a basic NNP setup.
Definition: Mode.cpp:212
virtual void setupNeuralNetworkWeights(std::map< std::string, std::string > fileNameFormats=std::map< std::string, std::string >())
Set up neural network weights from files with given name format.
Definition: Mode.cpp:1445
double physical(std::string const &property, double value) const
Undo normalization for a given property.
Definition: Mode.cpp:2110
bool useNormalization() const
Check if normalization is enabled.
Definition: Mode.h:703
double getConvEnergy() const
Getter for Mode::convEnergy.
Definition: Mode.h:678
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
Definition: Mode.cpp:712
std::vector< std::size_t > getNumSymmetryFunctions() const
Get number of symmetry functions per element.
Definition: Mode.cpp:2180
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
Definition: Mode.cpp:2193
Log log
Global log file.
Definition: Mode.h:593
double getMeanEnergy() const
Getter for Mode::meanEnergy.
Definition: Mode.h:673
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
Definition: Mode.cpp:2069
void evaluateNNP(Structure &structure, bool useForces=true, bool useDEdG=true)
Evaluate neural network potential (includes total energy, optionally forces and in some cases charges...
Definition: Mode.cpp:1967
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
Definition: Mode.cpp:1103
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
Definition: Mode.cpp:161
double getConvCharge() const
Getter for Mode::convCharge.
Definition: Mode.h:688
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
Definition: Mode.cpp:2056
#define MPI_SIZE_T
Definition: mpi-extra.h:22
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
int main(int argc, char *argv[])
Definition: nnp-dataset.cpp:33