n2p2 - A neural network potential package
nnp-comp2.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Dataset.h"
18#include "Log.h"
19#include "utility.h"
20#include <algorithm>
21#include <cmath>
22#include <cstddef>
23#include <iostream>
24#include <fstream>
25#include <stdexcept>
26#include <string>
27
28using namespace std;
29using namespace nnp;
30
31int main(int argc, char* argv[])
32{
33 string mode;
34 int numProcs = 0;
35 int numProcsNNP = 0;
36 int numProcsData = 0;
37 int myRank = 0;
38 int myRankNNP = 0;
39 int myRankData = 0;
40 int myNNP = 0;
41 int myData = 0;
42 int numWorkers = 0;
43 size_t thresholdEW = 0;
44 double thresholdEnergy = 0.0;
45 double thresholdForce = 0.0;
46 string elements;
47 ofstream myLog;
48
49 if (argc < 2)
50 {
51 cout << "USAGE: " << argv[0] << " <mode> <t_ew> <t_en <t_f> "
52 "<elem1 <elem2 ...>>\n"
53 << " <mode> ... Either compare 2 NNPs (compare) or apply"
54 " threshold to existing comparison data set (select).\n"
55 << " If <mode> is \"select\":\n"
56 << " <t_ew> ... Extrapolation warning threshold.\n"
57 << " <t_en> ... Energy per atom threshold.\n"
58 << " <t_f> .... Force threshold.\n"
59 << " <elem1 <elem2 ...>> ... Element strings in data set "
60 "(e.g. H O).\n"
61 << " Execute in directory with the data set file\n"
62 << " - input.data\n"
63 << " and 2 subdirectories\n"
64 << " - nnp-data-1\n"
65 << " - nnp-data-2\n"
66 << " each containing these NNP files:\n"
67 << " - input.nn (NNP settings)\n"
68 << " - scaling.data (symmetry function scaling data)\n"
69 << " - \"weights.%%03d.data\" (weights files)\n";
70 return 1;
71 }
72
73 mode = argv[1];
74 if (mode == "compare")
75 {
76 }
77 else if (mode == "select")
78 {
79 if (argc < 6)
80 {
81 throw runtime_error("ERROR: Wrong number of arguments.\n");
82 }
83 thresholdEW = (size_t)atoi(argv[2]);
84 thresholdEnergy = atof(argv[3]);
85 thresholdForce = atof(argv[4]);
86 size_t numElements = argc - 5;
87 elements += argv[5];
88 for (size_t i = 6; i < numElements + 5; ++i)
89 {
90 elements += " ";
91 elements += argv[i];
92 }
93 }
94 else
95 {
96 throw runtime_error("ERROR: Unknown mode selected.\n");
97 }
98
99 MPI_Init(&argc, &argv);
100 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
101 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
102
103 if ((mode == "compare") && (numProcs % 2 != 0))
104 {
105 throw runtime_error("ERROR: Please start with an even number of MPI"
106 " processes.\n");
107 }
108 else if ((mode == "select") && (numProcs != 1))
109 {
110 throw runtime_error("ERROR: Please start with a single MPI "
111 "processes.\n");
112 }
113
114 if (mode == "compare")
115 {
116 numWorkers = numProcs / 2;
117 MPI_Comm commNNP;
118 myNNP = myRank / numWorkers;
119 MPI_Comm_split(MPI_COMM_WORLD, myNNP, myRank, &commNNP);
120 MPI_Comm_size(commNNP, &numProcsNNP);
121 MPI_Comm_rank(commNNP, &myRankNNP);
122
123 MPI_Comm commData;
124 myData = myRankNNP % numWorkers;
125 MPI_Comm_split(MPI_COMM_WORLD, myData, myRank, &commData);
126 MPI_Comm_size(commData, &numProcsData);
127 MPI_Comm_rank(commData, &myRankData);
128
129 Dataset dataset;
130 if (myRank != 0) dataset.log.writeToStdout = false;
131 myLog.open(strpr("nnp-comp2.log.%1d.%04d",
132 myNNP + 1,
133 myRankNNP).c_str());
134 dataset.log.registerStreamPointer(&myLog);
135
136 string dirNNP = strpr("nnp-data-%1d", myNNP + 1);
137
138 dataset.log << "\n";
139 dataset.log << "*** 2-NNP COMPARISON INITIALIZATION *****"
140 "**************************************\n";
141 dataset.log << "\n";
142 dataset.log << strpr("Number of workers per NNP: %4d\n", numWorkers);
143 dataset.log << strpr("NNP Id : %4d\n", myNNP);
144 dataset.log << strpr("Data Id : %4d\n", myData);
145 dataset.log << "My NNP directory : " + dirNNP + "\n";
146 dataset.log << "\n";
147 dataset.log << "Global Communicator:\n";
148 dataset.log << strpr("- numProcs: %4d\n", numProcs);
149 dataset.log << strpr("- myRank : %4d\n", myRank);
150 dataset.log << "\n";
151 dataset.log << "NNP Communicator:\n";
152 dataset.log << strpr("- numProcs: %4d\n", numProcsNNP);
153 dataset.log << strpr("- myRank : %4d\n", myRankNNP);
154 dataset.log << "\n";
155 dataset.log << "Data Communicator:\n";
156 dataset.log << strpr("- numProcs: %4d\n", numProcsData);
157 dataset.log << strpr("- myRank : %4d\n", myRankData);
158 dataset.log << "\n";
159 dataset.log << "Starting NNP initialization...\n";
160 dataset.log << "*****************************************"
161 "**************************************\n";
162
163 dataset.setupMPI(&commNNP);
164 dataset.initialize();
165 dataset.loadSettingsFile(dirNNP + "/input.nn");
166 dataset.setupGeneric();
167 bool normalize = dataset.useNormalization();
168 bool useForces = dataset.settingsKeywordExists("use_short_forces");
169 dataset.setupSymmetryFunctionScaling(dirNNP + "/scaling.data");
170 dataset.setupSymmetryFunctionStatistics(false, true, false, false);
171 dataset.setupNeuralNetworkWeights(dirNNP + "/weights.%03d.data");
172 dataset.distributeStructures(false);
173 if (normalize) dataset.toNormalizedUnits();
174
175 dataset.log << "\n";
176 dataset.log << "*** 2-NNP COMPARISON ********************"
177 "**************************************\n";
178 dataset.log << "\n";
179 if (useForces)
180 {
181 dataset.log << "Calculating energies and forces for dataset.\n";
182 }
183 else
184 {
185 dataset.log << "Calculating energies for dataset.\n";
186 }
187
188 for (vector<Structure>::iterator it = dataset.structures.begin();
189 it != dataset.structures.end(); ++it)
190 {
191 it->calculateNeighborList(dataset.getMaxCutoffRadius());
192#ifdef N2P2_NO_SF_GROUPS
193 dataset.calculateSymmetryFunctions((*it), useForces);
194#else
195 dataset.calculateSymmetryFunctionGroups((*it), useForces);
196#endif
197 dataset.calculateAtomicNeuralNetworks((*it), useForces);
198 dataset.calculateEnergy((*it));
199 if (useForces) dataset.calculateForces((*it));
200 if (normalize) dataset.convertToPhysicalUnits((*it));
201 dataset.addEnergyOffset((*it), true);
202 dataset.addEnergyOffset((*it), false);
203 // Store number of extrapolation warnings in unused
204 // "numElementsPresent" field.
205 it->clearNeighborList();
206 it->numElementsPresent = dataset.getNumExtrapolationWarnings();
208 }
209
210 // Receive data from other NNP worker (rank 1 in data comm) and
211 // subtract results.
212 if (myRankData == 0)
213 {
214 ofstream myComparisonData;
215 myComparisonData.open(strpr("comp.data.%04d", myRankNNP).c_str());
216 ofstream myDiff;
217 myDiff.open(strpr("diff.out.%04d", myRankNNP).c_str());
218 if (myRankNNP == 0)
219 {
220 // File header.
221 vector<string> title;
222 vector<string> colName;
223 vector<string> colInfo;
224 vector<size_t> colSize;
225 title.push_back("2-NNP EW, energy and force comparison.");
226 colSize.push_back(7);
227 colName.push_back("index");
228 colInfo.push_back("Structure index.");
229 colSize.push_back(10);
230 colName.push_back("nAtoms");
231 colInfo.push_back("Number of atoms in structure.");
232 colSize.push_back(10);
233 colName.push_back("EW_NNP1");
234 colInfo.push_back("Extrapolation warnings issued by NNP 1.");
235 colSize.push_back(10);
236 colName.push_back("EW_NNP2");
237 colInfo.push_back("Extrapolation warnings issued by NNP 2.");
238 colSize.push_back(16);
239 colName.push_back("E_NNP1");
240 colInfo.push_back("Energy prediction of NNP 1.");
241 colSize.push_back(16);
242 colName.push_back("E_NNP2");
243 colInfo.push_back("Energy prediction of NNP 2.");
244 colSize.push_back(16);
245 colName.push_back("FDiffMean");
246 colInfo.push_back("Mean absolute force difference.");
247 colSize.push_back(16);
248 colName.push_back("FDiffMax");
249 colInfo.push_back("Maximum absolute force difference.");
250 appendLinesToFile(myDiff,
251 createFileHeader(title,
252 colSize,
253 colName,
254 colInfo));
255 }
256
257 for (vector<Structure>::iterator it = dataset.structures.begin();
258 it != dataset.structures.end(); ++it)
259 {
260 vector<double> buffer;
261 size_t sizeBuffer = 2;
262 if (useForces) sizeBuffer += 3 * it->numAtoms;
263 buffer.resize(sizeBuffer, 0.0);
264 MPI_Recv(&(buffer.front()), buffer.size(), MPI_DOUBLE, 1, 0, commData, MPI_STATUS_IGNORE);
265 size_t count = 0;
266 double eNNP2 = buffer.at(count++);
267 size_t ewNNP2 = (size_t)buffer.at(count++);
268 double meanForceDiff = 0.0;
269 double maxForceDiff = 0.0;
270 it->energyRef = it->energy - eNNP2;
271 if (useForces)
272 {
273 for (vector<Atom>::iterator it2 = it->atoms.begin();
274 it2 != it->atoms.end(); ++it2)
275 {
276 for (size_t i = 0; i < 3; ++i)
277 {
278 it2->fRef[i] = it2->f[i] - buffer.at(count++);
279 double const fAbsDiff = fabs(it2->fRef[i]);
280 meanForceDiff += fAbsDiff;
281 maxForceDiff = max(maxForceDiff, fAbsDiff);
282 }
283 }
284 meanForceDiff /= 3 * it->numAtoms;
285 }
286 // Add EW sum to comment line.
287 myDiff << strpr("%7zu %10zu %10zu %10zu %16.8E %16.8E "
288 "%16.8E %16.8E\n",
289 it->index,
290 it->numAtoms,
291 it->numElementsPresent,
292 ewNNP2,
293 it->energy,
294 eNNP2,
295 meanForceDiff,
296 maxForceDiff);
297 it->numElementsPresent += ewNNP2;
298 it->comment = strpr("EWSUM %zu ", it->numElementsPresent)
299 + it->comment;
300 it->writeToFile(&myComparisonData);
301 }
302 myComparisonData.close();
303 myDiff.close();
304 }
305 // Send data to other NNP worker (rank 0 in data comm).
306 else if (myRankData == 1)
307 {
308 for (vector<Structure>::const_iterator it = dataset.structures.begin();
309 it != dataset.structures.end(); ++it)
310 {
311 double ew = (double)it->numElementsPresent;
312 vector<double> buffer;
313 buffer.push_back(it->energy);
314 buffer.push_back(ew);
315 if (useForces)
316 {
317 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
318 it2 != it->atoms.end(); ++it2)
319 {
320 buffer.push_back(it2->f[0]);
321 buffer.push_back(it2->f[1]);
322 buffer.push_back(it2->f[2]);
323 }
324 }
325 MPI_Send(&(buffer.front()), buffer.size(), MPI_DOUBLE, 0, 0, commData);
326 }
327 }
328
329 MPI_Barrier(MPI_COMM_WORLD);
330 if (myRank == 0)
331 {
332 dataset.log << "Difference structures written to \"comp.data\".\n";
333 dataset.combineFiles("comp.data");
334 dataset.log << "Difference data collected in \"diff.out\".\n";
335 dataset.combineFiles("diff.out");
336 }
337
338 MPI_Comm_free(&commNNP);
339 MPI_Comm_free(&commData);
340
341 dataset.log << "*****************************************"
342 "**************************************\n";
343 }
344 else if (mode == "select")
345 {
346 ofstream logFile;
347 logFile.open("nnp-comp2.log");
348 Log log;
350 log << "\n";
351 log << "*** 2-NNP COMPARISON SELECTION **********"
352 "**************************************\n";
353 log << "\n";
354 log << strpr("Element string: %s\n", elements.c_str());
355 log << strpr("Threshold extrapolation warnings : %d\n",
356 thresholdEW);
357 log << strpr("Threshold energy difference per atom: %g\n",
358 thresholdEnergy);
359 log << strpr("Threshold force difference : %g\n",
360 thresholdForce);
361 log << "*****************************************"
362 "**************************************\n";
363
364 ofstream selectFile;
365 selectFile.open("select.out");
366 // File header.
367 vector<string> title;
368 vector<string> colName;
369 vector<string> colInfo;
370 vector<size_t> colSize;
371 title.push_back("Selection results based on 2-NNP comparison.");
372 colSize.push_back(7);
373 colName.push_back("index");
374 colInfo.push_back("Structure index.");
375 colSize.push_back(5);
376 colName.push_back("fAny");
377 colInfo.push_back("Whether structure was selected.");
378 colSize.push_back(5);
379 colName.push_back("fEW");
380 colInfo.push_back("Structure selected due to extrapolation warning.");
381 colSize.push_back(5);
382 colName.push_back("fE");
383 colInfo.push_back("Structure selected due to energy difference.");
384 colSize.push_back(5);
385 colName.push_back("fF");
386 colInfo.push_back("Structure selected due to force difference.");
387 colSize.push_back(10);
388 colName.push_back("nEW");
389 colInfo.push_back("Number of extrapolation warnings (both NNPs "
390 "combined).");
391 colSize.push_back(10);
392 colName.push_back("nAtoms");
393 colInfo.push_back("Number of atoms in structure.");
394 colSize.push_back(16);
395 colName.push_back("Ediff");
396 colInfo.push_back("Energy difference per atom.");
397 colSize.push_back(10);
398 colName.push_back("nForce");
399 colInfo.push_back("Number of forces exceeding threshold.");
400 colSize.push_back(16);
401 colName.push_back("FDiffMean");
402 colInfo.push_back("Mean absolute force difference.");
403 colSize.push_back(16);
404 colName.push_back("FDiffMax");
405 colInfo.push_back("Maximum absolute force difference.");
406 appendLinesToFile(selectFile,
407 createFileHeader(title, colSize, colName, colInfo));
408
409 ElementMap elementMap;
410 elementMap.registerElements(elements);
411
412 Structure structure;
413 structure.setElementMap(elementMap);
414
415 ifstream comparisonData;
416 comparisonData.open("comp.data");
417
418 ofstream selectionData;
419 selectionData.open("comp-selection.data");
420
421 size_t countEWHit = 0;
422 size_t countEnergyHit = 0;
423 size_t countForceHit = 0;
424 size_t countStructures = 0;
425 size_t countSelected = 0;
426 while (comparisonData.peek() != EOF)
427 {
428 bool flagEW = false;
429 bool flagEnergy = false;
430 bool flagForce = false;
431 size_t countForce = 0;
432 double meanForceDiff = 0.0;
433 double maxForceDiff = 0.0;
434 structure.readFromFile(comparisonData);
435 structure.index = countStructures++;
436 Structure const& s = structure;
437 size_t countEW = (size_t)atoi(split(s.comment).at(1).c_str());
438 if (thresholdEW > 0 && countEW >= thresholdEW)
439 {
440 flagEW = true;
441 countEWHit++;
442 }
443 if (thresholdEnergy > 0 &&
444 fabs(s.energyRef) / s.numAtoms > thresholdEnergy)
445 {
446 flagEnergy = true;
447 countEnergyHit++;
448 }
449 if (thresholdForce > 0)
450 {
451 for (vector<Atom>::const_iterator it = s.atoms.begin();
452 it != s.atoms.end(); ++it)
453 {
454 for (size_t i = 0; i < 3; ++i)
455 {
456 double const fAbsDiff = fabs(it->fRef[i]);
457 maxForceDiff = max(maxForceDiff, fAbsDiff);
458 meanForceDiff += fAbsDiff;
459 if (fAbsDiff > thresholdForce)
460 {
461 flagForce = true;
462 countForce++;
463 }
464 }
465 }
466 meanForceDiff /= 3 * s.numAtoms;
467 if (flagForce) countForceHit++;
468 }
469 if (flagEW || flagEnergy || flagForce)
470 {
471 s.writeToFile(&selectionData);
472 countSelected++;
473 log << strpr("Structure %7zu selected.\n", structure.index);
474 }
475 selectFile << strpr("%7zu %5d %5d %5d %5d %10zu %10zu %16.8E "
476 "%10zu %16.8E %16.8E\n",
477 structure.index,
478 (int)(flagEW || flagEnergy || flagForce),
479 (int)flagEW,
480 (int)flagEnergy,
481 (int)flagForce,
482 countEW,
483 structure.numAtoms,
484 structure.energyRef / structure.numAtoms,
485 countForce,
486 meanForceDiff,
487 maxForceDiff);
488 structure.reset();
489 }
490 selectFile.close();
491
492 log << "*****************************************"
493 "**************************************\n";
494 log << strpr("Total number of structures : %7zu\n", countStructures);
495 log << strpr("Number of selected structures: %7zu (%6.2f %)\n",
496 countSelected, countSelected * 100.0 / countStructures);
497 log << strpr("EW threshold exceeded : %7zu (%6.2f %)\n",
498 countEWHit, countEWHit * 100.0 / countStructures);
499 log << strpr("Energy threshold exceeded : %7zu (%6.2f %)\n",
500 countEnergyHit, countEnergyHit * 100.0 / countStructures);
501 log << strpr("Force threshold exceeded : %7zu (%6.2f %)\n",
502 countForceHit, countForceHit * 100.0 / countStructures);
503 log << "Selected structures written to \"comp-selection.data\".\n";
504 log << "*****************************************"
505 "**************************************\n";
506 }
507
508 MPI_Finalize();
509
510 return 0;
511}
Collect and process large data sets.
Definition: Dataset.h:35
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:724
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
void toNormalizedUnits()
Switch all structures to normalized units.
Definition: Dataset.cpp:952
Contains element map.
Definition: ElementMap.h:30
std::size_t registerElements(std::string const &elementLine)
Extract all elements and store in element map.
Definition: ElementMap.cpp:36
Logging class for library output.
Definition: Log.h:34
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
Definition: Log.cpp:91
bool writeToStdout
Turn on/off output to stdout.
Definition: Log.h:85
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
Definition: Mode.cpp:2018
void initialize()
Write welcome message with version information.
Definition: Mode.cpp:55
void setupGeneric(std::string const &nnpDir="", bool skipNormalize=false, bool initialHardness=false)
Combine multiple setup routines and provide a basic NNP setup.
Definition: Mode.cpp:212
virtual void setupNeuralNetworkWeights(std::map< std::string, std::string > fileNameFormats=std::map< std::string, std::string >())
Set up neural network weights from files with given name format.
Definition: Mode.cpp:1445
void calculateAtomicNeuralNetworks(Structure &structure, bool const derivatives, std::string id="")
Calculate atomic neural networks for all atoms in given structure.
Definition: Mode.cpp:1642
double getMaxCutoffRadius() const
Getter for Mode::maxCutoffRadius.
Definition: Mode.h:693
bool useNormalization() const
Check if normalization is enabled.
Definition: Mode.h:703
void calculateEnergy(Structure &structure) const
Calculate potential energy for a given structure.
Definition: Mode.cpp:1803
void calculateSymmetryFunctionGroups(Structure &structure, bool const derivatives)
Calculate all symmetry function groups for all atoms in given structure.
Definition: Mode.cpp:1561
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
Definition: Mode.cpp:712
std::size_t getNumExtrapolationWarnings() const
Count total number of extrapolation warnings encountered for all elements and symmetry functions.
Definition: Mode.cpp:2166
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
Definition: Mode.cpp:2193
Log log
Global log file.
Definition: Mode.h:593
void resetExtrapolationWarnings()
Erase all extrapolation warnings and reset counters.
Definition: Mode.cpp:2155
void convertToPhysicalUnits(Structure &structure) const
Convert one structure to physical units.
Definition: Mode.cpp:2142
void calculateSymmetryFunctions(Structure &structure, bool const derivatives)
Calculate all symmetry functions for all atoms in given structure.
Definition: Mode.cpp:1480
void calculateForces(Structure &structure) const
Calculate forces for all atoms in given structure.
Definition: Mode.cpp:1849
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
Definition: Mode.cpp:1103
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
Definition: Mode.cpp:161
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
Definition: utility.cpp:33
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
int main(int argc, char *argv[])
Definition: nnp-comp2.cpp:31
ofstream logFile
Definition: nnp-cutoff.cpp:29
Storage for one atomic configuration.
Definition: Structure.h:39
void writeToFile(std::string const fileName="output.data", bool const ref=true, bool const append=false) const
Write configuration to file.
Definition: Structure.cpp:1449
std::string comment
Structure comment.
Definition: Structure.h:110
void setElementMap(ElementMap const &elementMap)
Set element map of structure.
Definition: Structure.cpp:71
std::size_t index
Index number of this structure.
Definition: Structure.h:73
void readFromFile(std::string const fileName="input.data")
Read configuration from file.
Definition: Structure.cpp:97
double energyRef
Reference potential energy.
Definition: Structure.h:85
void reset()
Reset everything but elementMap.
Definition: Structure.cpp:1319
std::size_t numAtoms
Total number of atoms present in this structure.
Definition: Structure.h:75
std::vector< Atom > atoms
Vector of all atoms in this structure.
Definition: Structure.h:122