34{
35 bool shuffle = false;
36 bool useForces = false;
37 bool normalize = false;
38 int numProcs = 0;
39 int myRank = 0;
40 size_t countEnergy = 0;
41 size_t countForces = 0;
42 map<string, double> errorEnergy;
43 map<string, double> errorForces;
44 string fileName;
45 ofstream fileEnergy;
46 ofstream fileForces;
47 ofstream fileOutputData;
48 ofstream myLog;
49
50 errorEnergy["RMSEpa"] = 0.0;
51 errorEnergy["RMSE"] = 0.0;
52 errorEnergy["MAEpa"] = 0.0;
53 errorEnergy["MAE"] = 0.0;
54 errorForces["RMSE"] = 0.0;
55 errorForces["MAE"] = 0.0;
56
57 if (argc != 2)
58 {
59 cout << "USAGE: " << argv[0] << " <shuffle>\n"
60 << " <shuffle> ... Randomly distribute structures to MPI"
61 " processes (0/1 = no/yes).\n"
62 << " Execute in directory with these NNP files present:\n"
63 << " - input.data (structure file)\n"
64 << " - input.nn (NNP settings)\n"
65 << " - scaling.data (symmetry function scaling data)\n"
66 << " - \"weights.%%03d.data\" (weights files)\n";
67 return 1;
68 }
69
70 shuffle = (bool)atoi(argv[1]);
71
72 MPI_Init(&argc, &argv);
73 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
74 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
75
78 myLog.open(
strpr(
"nnp-dataset.log.%04d", myRank).c_str());
91
93 dataset.
log <<
"*** DATA SET PREDICTION *****************"
94 "**************************************\n";
96
98 if (useForces)
99 {
100 dataset.
log <<
"Energies and forces are predicted.\n";
101 }
102 else
103 {
104 dataset.
log <<
"Only energies are predicted.\n";
105 }
106
107
110 vector<size_t> count(numElements, 0);
111 vector<vector<double> > sensMean;
112 vector<vector<double> > sensMax;
113 sensMean.resize(numElements);
114 sensMax.resize(numElements);
115 for (size_t i = 0; i < numElements; ++i)
116 {
117 sensMean.at(i).resize(numSymmetryFunctions.at(i), 0.0);
118 sensMax.at(i).resize(numSymmetryFunctions.at(i), 0.0);
119 }
120
121
122 fileName =
strpr(
"energy.comp.%04d", myRank);
123 fileEnergy.open(fileName.c_str());
124
125
126 if (myRank == 0)
127 {
128 vector<string> title;
129 vector<string> colName;
130 vector<string> colInfo;
131 vector<size_t> colSize;
132 title.push_back("Energy comparison.");
133 colSize.push_back(10);
134 colName.push_back("index");
135 colInfo.push_back("Structure index.");
136 colSize.push_back(10);
137 colName.push_back("N");
138 colInfo.push_back("Number of atoms in structure.");
139 colSize.push_back(16);
140 colName.push_back("Eref_phys");
141 colInfo.push_back("Reference potential energy (physical units, "
142 "atomic energy offsets added).");
143 colSize.push_back(16);
144 colName.push_back("Ennp_phys");
145 colInfo.push_back("NNP potential energy (physical units, "
146 "atomic energy offsets added).");
147 if (normalize)
148 {
149 colSize.push_back(16);
150 colName.push_back("Eref_int");
151 colInfo.push_back("Reference potential energy (internal units).");
152 colSize.push_back(16);
153 colName.push_back("Ennp_int");
154 colInfo.push_back("NNP potential energy (internal units).");
155 }
158 }
159 if (useForces)
160 {
161 fileName =
strpr(
"forces.comp.%04d", myRank);
162 fileForces.open(fileName.c_str());
163
164
165 if (myRank == 0)
166 {
167 vector<string> title;
168 vector<string> colName;
169 vector<string> colInfo;
170 vector<size_t> colSize;
171 title.push_back("Force comparison.");
172 colSize.push_back(10);
173 colName.push_back("index_s");
174 colInfo.push_back("Structure index.");
175 colSize.push_back(10);
176 colName.push_back("index_a");
177 colInfo.push_back("Atom index (x, y, z components in consecutive "
178 "lines).");
179 colSize.push_back(16);
180 colName.push_back("Fref_phys");
181 colInfo.push_back("Reference force (physical units).");
182 colSize.push_back(16);
183 colName.push_back("Fnnp_phys");
184 colInfo.push_back("NNP force (physical units).");
185 if (normalize)
186 {
187 colSize.push_back(16);
188 colName.push_back("Fref_int");
189 colInfo.push_back("Reference force (internal units).");
190 colSize.push_back(16);
191 colName.push_back("Fnnp_int");
192 colInfo.push_back("NNP force (internal units).");
193 }
196 colSize,
197 colName,
198 colInfo));
199 }
200 }
201
202
203 fileName =
strpr(
"output.data.%04d", myRank);
204 fileOutputData.open(fileName.c_str());
205
206 for (vector<Structure>::iterator it = dataset.
structures.begin();
208 {
209
210
212
213
214 for (vector<Atom>::iterator it2 = it->atoms.begin();
215 it2 != it->atoms.end(); ++it2)
216 {
217
218 size_t const& e = it2->element;
219 count.at(e)++;
220 for (size_t i = 0; i < numSymmetryFunctions.at(e); ++i)
221 {
222 double const& s = it2->dEdG.at(i);
223 sensMean.at(e).at(i) += s * s;
224 sensMax.at(e).at(i) = max(sensMax.at(e).at(i), abs(s));
225 }
226
227
228
229 it2->numNeighborsUnique = 0;
230 it2->neighborsUnique.clear();
231 vector<size_t>(it2->neighborsUnique).swap(it2->neighborsUnique);
232
233 it2->numNeighborsPerElement.clear();
234 vector<size_t>(it2->numNeighborsPerElement).swap(
235 it2->numNeighborsPerElement);
236
237 it2->G.clear();
238 vector<double>(it2->G).swap(it2->G);
239
240 it2->dEdG.clear();
241 vector<double>(it2->dEdG).swap(it2->dEdG);
242
243#ifdef N2P2_FULL_SFD_MEMORY
244 it2->dGdxia.clear();
245 vector<double>(it2->dGdxia).swap(it2->dGdxia);
246#endif
247
248 it2->dGdr.clear();
249 vector<Vec3D>(it2->dGdr).swap(it2->dGdr);
250
251 it2->numNeighbors = 0;
252 it2->neighbors.clear();
253 vector<Atom::Neighbor>(it2->neighbors).swap(it2->neighbors);
254
255 it2->hasNeighborList = false;
256 it2->hasSymmetryFunctions = false;
257 it2->hasSymmetryFunctionDerivatives = false;
258 }
259 it->hasNeighborList = false;
260 it->hasSymmetryFunctions = false;
261 it->hasSymmetryFunctionDerivatives = false;
262 it->updateError("energy", errorEnergy, countEnergy);
263 fileEnergy <<
strpr(
"%10zu %10zu", it->index, it->numAtoms);
264 if (normalize)
265 {
266 fileEnergy <<
strpr(
" %16.8E %16.8E %16.8E %16.8E\n",
271 it->energyRef,
272 it->energy);
273 }
274 else
275 {
276 fileEnergy <<
strpr(
" %16.8E %16.8E\n",
279 }
280 if (useForces)
281 {
282 it->updateError("force", errorForces, countForces);
283 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
284 it2 != it->atoms.end(); ++it2)
285 {
286 for (size_t i = 0; i < 3; ++i)
287 {
288 fileForces <<
strpr(
"%10zu %10zu",
289 it2->indexStructure,
290 it2->index);
291 if (normalize)
292 {
294 " %16.8E %16.8E",
295 dataset.
physical(
"force", it2->fRef[i]),
296 dataset.
physical(
"force", it2->f[i]));
297 }
298 fileForces <<
strpr(
" %16.8E %16.8E\n",
299 it2->fRef[i],
300 it2->f[i]);
301 }
302 }
303 }
304 if (normalize)
305 {
310 }
312 it->writeToFile(&fileOutputData, false);
313 }
314
315 fileEnergy.close();
316 if (useForces) fileForces.close();
317 fileOutputData.close();
318 MPI_Barrier(MPI_COMM_WORLD);
319
320 if (myRank == 0)
321 {
322 fileName = "energy.comp";
324 if (useForces)
325 {
326 fileName = "forces.comp";
328 }
329 fileName = "output.data";
331 }
332
333 dataset.
collectError(
"energy", errorEnergy, countEnergy);
334 if (useForces) dataset.
collectError(
"force", errorForces, countForces);
335
336 if (myRank == 0)
337 {
338 if (useForces)
339 {
340 dataset.
log <<
"Energy and force comparison in files:\n";
341 dataset.
log <<
" - energy.comp\n";
342 dataset.
log <<
" - forces.comp\n";
343 }
344 else
345 {
346 dataset.
log <<
"Energy comparison in file:\n";
347 dataset.
log <<
" - energy.comp\n";
348 }
349 dataset.
log <<
"Predicted data set in \"output.data\"\n";
350 }
351 dataset.
log <<
"Error metrics for energies and forces:\n";
352 dataset.
log <<
"-----------------------------------------"
353 "-----------------------------------------"
354 "--------------------------------------\n";
355 dataset.
log <<
" physical units ";
356 if (normalize)
357 {
358 dataset.
log <<
" | internal units ";
359 }
361 dataset.
log <<
strpr(
" %13s %13s %13s %13s",
362 "RMSEpa", "RMSE", "MAEpa", "MAE");
363 if (normalize)
364 {
365 dataset.
log <<
strpr(
" | %13s %13s %13s %13s",
366 "RMSEpa", "RMSE", "MAEpa", "MAE");
367 }
369 dataset.
log <<
"ENERGY";
370 if (normalize)
371 {
373 " %13.5E %13.5E %13.5E %13.5E |",
374 dataset.
physical(
"energy", errorEnergy.at(
"RMSEpa")),
375 dataset.
physical(
"energy", errorEnergy.at(
"RMSE")),
376 dataset.
physical(
"energy", errorEnergy.at(
"MAEpa")),
377 dataset.
physical(
"energy", errorEnergy.at(
"MAE")));
378 }
379 dataset.
log <<
strpr(
" %13.5E %13.5E %13.5E %13.5E\n",
380 errorEnergy.at("RMSEpa"),
381 errorEnergy.at("RMSE"),
382 errorEnergy.at("MAEpa"),
383 errorEnergy.at("MAE"));
384 if (useForces)
385 {
386 dataset.
log <<
"FORCES";
387 if (normalize)
388 {
390 " %13s %13.5E %13s %13.5E |", "",
391 dataset.
physical(
"force", errorForces.at(
"RMSE")),
"",
392 dataset.
physical(
"force", errorForces.at(
"MAE")));
393 }
394 dataset.
log <<
strpr(
" %13s %13.5E %13s %13.5E\n",
"",
395 errorForces.at("RMSE"), "",
396 errorForces.at("MAE"));
397 }
398 dataset.
log <<
"-----------------------------------------"
399 "-----------------------------------------"
400 "--------------------------------------\n";
401 dataset.
log <<
"*****************************************"
402 "**************************************\n";
403
405 dataset.
log <<
"*** SENSITIVITY ANALYSIS ****************"
406 "**************************************\n";
408
409
410 if (myRank == 0)
411 {
412 dataset.
log <<
"Writing sensitivity analysis data to files:\n";
413 MPI_Reduce(MPI_IN_PLACE, &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
414 for (size_t i = 0; i < numElements; ++i)
415 {
416 size_t const& n = numSymmetryFunctions.at(i);
417 double sensMeanSum = 0.0;
418 double sensMaxSum = 0.0;
419 MPI_Reduce(MPI_IN_PLACE, &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
420 MPI_Reduce(MPI_IN_PLACE, &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
421 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
422 {
423 sensMean.at(i).at(j) = sqrt(sensMean.at(i).at(j)
424 / count.at(i));
425 sensMeanSum += sensMean.at(i).at(j);
426 sensMaxSum += sensMax.at(i).at(j);
427 }
428 ofstream sensFile;
429 string sensFileName =
strpr(
"sensitivity.%03d.out",
431 dataset.
log <<
strpr(
" - %s\n", sensFileName.c_str());
432 sensFile.open(sensFileName.c_str());
433
434
435 vector<string> title;
436 vector<string> colName;
437 vector<string> colInfo;
438 vector<size_t> colSize;
439 title.push_back(
strpr(
"Sensitivity analysis for element %2s.",
441 colSize.push_back(10);
442 colName.push_back("index");
443 colInfo.push_back("Symmetry function index.");
444 colSize.push_back(16);
445 colName.push_back("sens_msa_norm");
446 colInfo.push_back("Mean square average sensitivity (normalized, "
447 "sum = 100%).");
448 colSize.push_back(16);
449 colName.push_back("sens_max_norm");
450 colInfo.push_back("Maximum sensitivity (normalized, sum = 100%).");
451 colSize.push_back(16);
452 colName.push_back("sens_msa_phys");
453 colInfo.push_back("Mean square average sensitivity (physical "
454 "energy units).");
455 colSize.push_back(16);
456 colName.push_back("sens_max_phys");
457 colInfo.push_back("Maximum sensitivity (physical energy units).");
458 if (normalize)
459 {
460 colSize.push_back(16);
461 colName.push_back("sens_msa_int");
462 colInfo.push_back("Mean square average sensitivity (internal "
463 "units).");
464 colSize.push_back(16);
465 colName.push_back("sens_max_int");
466 colInfo.push_back("Maximum sensitivity (internal units).");
467 }
470 colSize,
471 colName,
472 colInfo));
473
474 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
475 {
476 sensFile <<
strpr(
"%10d", j + 1);
477 sensFile <<
strpr(
" %16.8E %16.8E",
478 sensMean.at(i).at(j) / sensMeanSum * 100.0,
479 sensMax.at(i).at(j) / sensMaxSum * 100.0);
480 if (normalize)
481 {
482 sensFile <<
strpr(
" %16.8E %16.8E",
484 sensMean.at(i).at(j)),
486 sensMax.at(i).at(j)));
487 }
488 sensFile <<
strpr(
" %16.8E %16.8E\n",
489 sensMean.at(i).at(j),
490 sensMax.at(i).at(j));
491 }
492 sensFile.close();
493 }
494 }
495 else
496 {
497 MPI_Reduce(&(count.front()), &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
498 for (size_t i = 0; i < numElements; ++i)
499 {
500 size_t const& n = numSymmetryFunctions.at(i);
501 MPI_Reduce(&(sensMean.at(i).front()), &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
502 MPI_Reduce(&(sensMax.at(i).front() ), &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
503 }
504 }
505
506 dataset.
log <<
"*****************************************"
507 "**************************************\n";
508
509 myLog.close();
510
511 MPI_Finalize();
512
513 return 0;
514}
Collect and process large data sets.
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
std::vector< Structure > structures
All structures in this dataset.
void setupRandomNumberGenerator()
Initialize random number generator.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
void toNormalizedUnits()
Switch all structures to normalized units.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
bool writeToStdout
Turn on/off output to stdout.
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
ElementMap elementMap
Global element map, populated by setupElementMap().
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
void initialize()
Write welcome message with version information.
std::size_t getNumElements() const
Getter for Mode::numElements.
double getConvLength() const
Getter for Mode::convLength.
void setupGeneric(std::string const &nnpDir="", bool skipNormalize=false, bool initialHardness=false)
Combine multiple setup routines and provide a basic NNP setup.
virtual void setupNeuralNetworkWeights(std::map< std::string, std::string > fileNameFormats=std::map< std::string, std::string >())
Set up neural network weights from files with given name format.
double physical(std::string const &property, double value) const
Undo normalization for a given property.
bool useNormalization() const
Check if normalization is enabled.
double getConvEnergy() const
Getter for Mode::convEnergy.
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
std::vector< std::size_t > getNumSymmetryFunctions() const
Get number of symmetry functions per element.
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
double getMeanEnergy() const
Getter for Mode::meanEnergy.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
void evaluateNNP(Structure &structure, bool useForces=true, bool useDEdG=true)
Evaluate neural network potential (includes total energy, optionally forces and in some cases charges...
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
double getConvCharge() const
Getter for Mode::convCharge.
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.