34{
35 bool shuffle = false;
36 bool useForces = false;
37 bool normalize = false;
38 int numProcs = 0;
39 int myRank = 0;
40 size_t countEnergy = 0;
41 size_t countForces = 0;
42 map<string, double> errorEnergy;
43 map<string, double> errorForces;
44 string fileName;
45 ofstream fileEnergy;
46 ofstream fileForces;
47 ofstream fileOutputData;
48 ofstream myLog;
49
50 errorEnergy["RMSEpa"] = 0.0;
51 errorEnergy["RMSE"] = 0.0;
52 errorEnergy["MAEpa"] = 0.0;
53 errorEnergy["MAE"] = 0.0;
54 errorForces["RMSE"] = 0.0;
55 errorForces["MAE"] = 0.0;
56
57 if (argc != 2)
58 {
59 cout << "USAGE: " << argv[0] << " <shuffle>\n"
60 << " <shuffle> ... Randomly distribute structures to MPI"
61 " processes (0/1 = no/yes).\n"
62 << " Execute in directory with these NNP files present:\n"
63 << " - input.data (structure file)\n"
64 << " - input.nn (NNP settings)\n"
65 << " - scaling.data (symmetry function scaling data)\n"
66 << " - \"weights.%%03d.data\" (weights files)\n";
67 return 1;
68 }
69
70 shuffle = (bool)atoi(argv[1]);
71
72 MPI_Init(&argc, &argv);
73 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
74 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
75
78 myLog.open(
strpr(
"nnp-dataset.log.%04d", myRank).c_str());
91
93 dataset.
log <<
"*** DATA SET PREDICTION *****************"
94 "**************************************\n";
96
98 if (useForces)
99 {
100 dataset.
log <<
"Energies and forces are predicted.\n";
101 }
102 else
103 {
104 dataset.
log <<
"Only energies are predicted.\n";
105 }
106
107
110 vector<size_t> count(numElements, 0);
111 vector<vector<double> > sensMean;
112 vector<vector<double> > sensMax;
113 sensMean.resize(numElements);
114 sensMax.resize(numElements);
115 for (size_t i = 0; i < numElements; ++i)
116 {
117 sensMean.at(i).resize(numSymmetryFunctions.at(i), 0.0);
118 sensMax.at(i).resize(numSymmetryFunctions.at(i), 0.0);
119 }
120
121
122 fileName =
strpr(
"energy.comp.%04d", myRank);
123 fileEnergy.open(fileName.c_str());
124
125
126 if (myRank == 0)
127 {
128 vector<string> title;
129 vector<string> colName;
130 vector<string> colInfo;
131 vector<size_t> colSize;
132 title.push_back("Energy comparison.");
133 colSize.push_back(10);
134 colName.push_back("index");
135 colInfo.push_back("Structure index.");
136 colSize.push_back(10);
137 colName.push_back("N");
138 colInfo.push_back("Number of atoms in structure.");
139 colSize.push_back(16);
140 colName.push_back("Eref_phys");
141 colInfo.push_back("Reference potential energy (physical units, "
142 "atomic energy offsets added).");
143 colSize.push_back(16);
144 colName.push_back("Ennp_phys");
145 colInfo.push_back("NNP potential energy (physical units, "
146 "atomic energy offsets added).");
147 if (normalize)
148 {
149 colSize.push_back(16);
150 colName.push_back("Eref_int");
151 colInfo.push_back("Reference potential energy (internal units).");
152 colSize.push_back(16);
153 colName.push_back("Ennp_int");
154 colInfo.push_back("NNP potential energy (internal units).");
155 }
158 }
159 if (useForces)
160 {
161 fileName =
strpr(
"forces.comp.%04d", myRank);
162 fileForces.open(fileName.c_str());
163
164
165 if (myRank == 0)
166 {
167 vector<string> title;
168 vector<string> colName;
169 vector<string> colInfo;
170 vector<size_t> colSize;
171 title.push_back("Force comparison.");
172 colSize.push_back(10);
173 colName.push_back("index_s");
174 colInfo.push_back("Structure index.");
175 colSize.push_back(10);
176 colName.push_back("index_a");
177 colInfo.push_back("Atom index (x, y, z components in consecutive "
178 "lines).");
179 colSize.push_back(16);
180 colName.push_back("Fref_phys");
181 colInfo.push_back("Reference force (physical units).");
182 colSize.push_back(16);
183 colName.push_back("Fnnp_phys");
184 colInfo.push_back("NNP force (physical units).");
185 if (normalize)
186 {
187 colSize.push_back(16);
188 colName.push_back("Fref_int");
189 colInfo.push_back("Reference force (internal units).");
190 colSize.push_back(16);
191 colName.push_back("Fnnp_int");
192 colInfo.push_back("NNP force (internal units).");
193 }
196 colSize,
197 colName,
198 colInfo));
199 }
200 }
201
202
203 fileName =
strpr(
"output.data.%04d", myRank);
204 fileOutputData.open(fileName.c_str());
205
206 for (vector<Structure>::iterator it = dataset.
structures.begin();
208 {
210#ifdef N2P2_NO_SF_GROUPS
212#else
214#endif
215
216 for (vector<Atom>::iterator it2 = it->atoms.begin();
217 it2 != it->atoms.end(); ++it2)
218 {
219 size_t const& e = it2->element;
220 it2->dEdG.resize(numSymmetryFunctions.at(e), 0.0);
221 }
222
223
227
228 for (vector<Atom>::iterator it2 = it->atoms.begin();
229 it2 != it->atoms.end(); ++it2)
230 {
231
232 size_t const& e = it2->element;
233 count.at(e)++;
234 for (size_t i = 0; i < numSymmetryFunctions.at(e); ++i)
235 {
236 double const& s = it2->dEdG.at(i);
237 sensMean.at(e).at(i) += s * s;
238 sensMax.at(e).at(i) = max(sensMax.at(e).at(i), abs(s));
239 }
240
241
242
243 it2->numNeighborsUnique = 0;
244 it2->neighborsUnique.clear();
245 vector<size_t>(it2->neighborsUnique).swap(it2->neighborsUnique);
246
247 it2->numNeighborsPerElement.clear();
248 vector<size_t>(it2->numNeighborsPerElement).swap(
249 it2->numNeighborsPerElement);
250
251 it2->G.clear();
252 vector<double>(it2->G).swap(it2->G);
253
254 it2->dEdG.clear();
255 vector<double>(it2->dEdG).swap(it2->dEdG);
256
257#ifdef N2P2_FULL_SFD_MEMORY
258 it2->dGdxia.clear();
259 vector<double>(it2->dGdxia).swap(it2->dGdxia);
260#endif
261
262 it2->dGdr.clear();
263 vector<Vec3D>(it2->dGdr).swap(it2->dGdr);
264
265 it2->numNeighbors = 0;
266 it2->neighbors.clear();
267 vector<Atom::Neighbor>(it2->neighbors).swap(it2->neighbors);
268
269 it2->hasNeighborList = false;
270 it2->hasSymmetryFunctions = false;
271 it2->hasSymmetryFunctionDerivatives = false;
272 }
273 it->hasNeighborList = false;
274 it->hasSymmetryFunctions = false;
275 it->hasSymmetryFunctionDerivatives = false;
276 it->updateError("energy", errorEnergy, countEnergy);
277 fileEnergy <<
strpr(
"%10zu %10zu", it->index, it->numAtoms);
278 if (normalize)
279 {
280 fileEnergy <<
strpr(
" %16.8E %16.8E %16.8E %16.8E\n",
285 it->energyRef,
286 it->energy);
287 }
288 else
289 {
290 fileEnergy <<
strpr(
" %16.8E %16.8E\n",
293 }
294 if (useForces)
295 {
296 it->updateError("force", errorForces, countForces);
297 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
298 it2 != it->atoms.end(); ++it2)
299 {
300 for (size_t i = 0; i < 3; ++i)
301 {
302 fileForces <<
strpr(
"%10zu %10zu",
303 it2->indexStructure,
304 it2->index);
305 if (normalize)
306 {
308 " %16.8E %16.8E",
309 dataset.
physical(
"force", it2->fRef[i]),
310 dataset.
physical(
"force", it2->f[i]));
311 }
312 fileForces <<
strpr(
" %16.8E %16.8E\n",
313 it2->fRef[i],
314 it2->f[i]);
315 }
316 }
317 }
318 if (normalize)
319 {
323 }
325 it->writeToFile(&fileOutputData, false);
326 }
327
328 fileEnergy.close();
329 if (useForces) fileForces.close();
330 fileOutputData.close();
331 MPI_Barrier(MPI_COMM_WORLD);
332
333 if (myRank == 0)
334 {
335 fileName = "energy.comp";
337 if (useForces)
338 {
339 fileName = "forces.comp";
341 }
342 fileName = "output.data";
344 }
345
346 dataset.
collectError(
"energy", errorEnergy, countEnergy);
347 if (useForces) dataset.
collectError(
"force", errorForces, countForces);
348
349 if (myRank == 0)
350 {
351 if (useForces)
352 {
353 dataset.
log <<
"Energy and force comparison in files:\n";
354 dataset.
log <<
" - energy.comp\n";
355 dataset.
log <<
" - forces.comp\n";
356 }
357 else
358 {
359 dataset.
log <<
"Energy comparison in file:\n";
360 dataset.
log <<
" - energy.comp\n";
361 }
362 dataset.
log <<
"Predicted data set in \"output.data\"\n";
363 }
364 dataset.
log <<
"Error metrics for energies and forces:\n";
365 dataset.
log <<
"-----------------------------------------"
366 "-----------------------------------------"
367 "--------------------------------------\n";
368 dataset.
log <<
" physical units ";
369 if (normalize)
370 {
371 dataset.
log <<
" | internal units ";
372 }
374 dataset.
log <<
strpr(
" %13s %13s %13s %13s",
375 "RMSEpa", "RMSE", "MAEpa", "MAE");
376 if (normalize)
377 {
378 dataset.
log <<
strpr(
" | %13s %13s %13s %13s",
379 "RMSEpa", "RMSE", "MAEpa", "MAE");
380 }
382 dataset.
log <<
"ENERGY";
383 if (normalize)
384 {
386 " %13.5E %13.5E %13.5E %13.5E |",
387 dataset.
physical(
"energy", errorEnergy.at(
"RMSEpa")),
388 dataset.
physical(
"energy", errorEnergy.at(
"RMSE")),
389 dataset.
physical(
"energy", errorEnergy.at(
"MAEpa")),
390 dataset.
physical(
"energy", errorEnergy.at(
"MAE")));
391 }
392 dataset.
log <<
strpr(
" %13.5E %13.5E %13.5E %13.5E\n",
393 errorEnergy.at("RMSEpa"),
394 errorEnergy.at("RMSE"),
395 errorEnergy.at("MAEpa"),
396 errorEnergy.at("MAE"));
397 if (useForces)
398 {
399 dataset.
log <<
"FORCES";
400 if (normalize)
401 {
403 " %13s %13.5E %13s %13.5E |", "",
404 dataset.
physical(
"force", errorForces.at(
"RMSE")),
"",
405 dataset.
physical(
"force", errorForces.at(
"MAE")));
406 }
407 dataset.
log <<
strpr(
" %13s %13.5E %13s %13.5E\n",
"",
408 errorForces.at("RMSE"), "",
409 errorForces.at("MAE"));
410 }
411 dataset.
log <<
"-----------------------------------------"
412 "-----------------------------------------"
413 "--------------------------------------\n";
414 dataset.
log <<
"*****************************************"
415 "**************************************\n";
416
418 dataset.
log <<
"*** SENSITIVITY ANALYSIS ****************"
419 "**************************************\n";
421
422
423 if (myRank == 0)
424 {
425 dataset.
log <<
"Writing sensitivity analysis data to files:\n";
426 MPI_Reduce(MPI_IN_PLACE, &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
427 for (size_t i = 0; i < numElements; ++i)
428 {
429 size_t const& n = numSymmetryFunctions.at(i);
430 double sensMeanSum = 0.0;
431 double sensMaxSum = 0.0;
432 MPI_Reduce(MPI_IN_PLACE, &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
433 MPI_Reduce(MPI_IN_PLACE, &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
434 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
435 {
436 sensMean.at(i).at(j) = sqrt(sensMean.at(i).at(j)
437 / count.at(i));
438 sensMeanSum += sensMean.at(i).at(j);
439 sensMaxSum += sensMax.at(i).at(j);
440 }
441 ofstream sensFile;
442 string sensFileName =
strpr(
"sensitivity.%03d.out",
444 dataset.
log <<
strpr(
" - %s\n", sensFileName.c_str());
445 sensFile.open(sensFileName.c_str());
446
447
448 vector<string> title;
449 vector<string> colName;
450 vector<string> colInfo;
451 vector<size_t> colSize;
452 title.push_back(
strpr(
"Sensitivity analysis for element %2s.",
454 colSize.push_back(10);
455 colName.push_back("index");
456 colInfo.push_back("Symmetry function index.");
457 colSize.push_back(16);
458 colName.push_back("sens_msa_norm");
459 colInfo.push_back("Mean square average sensitivity (normalized, "
460 "sum = 100%).");
461 colSize.push_back(16);
462 colName.push_back("sens_max_norm");
463 colInfo.push_back("Maximum sensitivity (normalized, sum = 100%).");
464 colSize.push_back(16);
465 colName.push_back("sens_msa_phys");
466 colInfo.push_back("Mean square average sensitivity (physical "
467 "energy units).");
468 colSize.push_back(16);
469 colName.push_back("sens_max_phys");
470 colInfo.push_back("Maximum sensitivity (physical energy units).");
471 if (normalize)
472 {
473 colSize.push_back(16);
474 colName.push_back("sens_msa_int");
475 colInfo.push_back("Mean square average sensitivity (internal "
476 "units).");
477 colSize.push_back(16);
478 colName.push_back("sens_max_int");
479 colInfo.push_back("Maximum sensitivity (internal units).");
480 }
483 colSize,
484 colName,
485 colInfo));
486
487 for (size_t j = 0; j < numSymmetryFunctions.at(i); ++j)
488 {
489 sensFile <<
strpr(
"%10d", j + 1);
490 sensFile <<
strpr(
" %16.8E %16.8E",
491 sensMean.at(i).at(j) / sensMeanSum * 100.0,
492 sensMax.at(i).at(j) / sensMaxSum * 100.0);
493 if (normalize)
494 {
495 sensFile <<
strpr(
" %16.8E %16.8E",
497 sensMean.at(i).at(j)),
499 sensMax.at(i).at(j)));
500 }
501 sensFile <<
strpr(
" %16.8E %16.8E\n",
502 sensMean.at(i).at(j),
503 sensMax.at(i).at(j));
504 }
505 sensFile.close();
506 }
507 }
508 else
509 {
510 MPI_Reduce(&(count.front()), &(count.front()), numElements,
MPI_SIZE_T, MPI_SUM, 0, MPI_COMM_WORLD);
511 for (size_t i = 0; i < numElements; ++i)
512 {
513 size_t const& n = numSymmetryFunctions.at(i);
514 MPI_Reduce(&(sensMean.at(i).front()), &(sensMean.at(i).front()), n, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);
515 MPI_Reduce(&(sensMax.at(i).front() ), &(sensMax.at(i).front() ), n, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);
516 }
517 }
518
519 dataset.
log <<
"*****************************************"
520 "**************************************\n";
521
522 myLog.close();
523
524 MPI_Finalize();
525
526 return 0;
527}
Collect and process large data sets.
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
std::vector< Structure > structures
All structures in this dataset.
void setupRandomNumberGenerator()
Initialize random number generator.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
void toNormalizedUnits()
Switch all structures to normalized units.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
bool writeToStdout
Turn on/off output to stdout.
double physicalEnergy(Structure const &structure, bool ref=true) const
Undo normalization for a given energy of structure.
ElementMap elementMap
Global element map, populated by setupElementMap().
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
void initialize()
Write welcome message with version information.
std::size_t getNumElements() const
Getter for Mode::numElements.
double getConvLength() const
Getter for Mode::convLength.
double physical(std::string const &property, double value) const
Undo normalization for a given property.
double getMaxCutoffRadius() const
Getter for Mode::maxCutoffRadius.
void calculateAtomicNeuralNetworks(Structure &structure, bool const derivatives)
Calculate a single atomic neural network for a given atom and nn type.
bool useNormalization() const
Check if normalization is enabled.
virtual void setupNeuralNetworkWeights(std::string const &fileNameFormatShort="weights.%03zu.data", std::string const &fileNameFormatCharge="weightse.%03zu.data")
Set up neural network weights from files.
void calculateEnergy(Structure &structure) const
Calculate potential energy for a given structure.
void calculateSymmetryFunctionGroups(Structure &structure, bool const derivatives)
Calculate all symmetry function groups for all atoms in given structure.
double getConvEnergy() const
Getter for Mode::convEnergy.
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
std::vector< std::size_t > getNumSymmetryFunctions() const
Get number of symmetry functions per element.
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
double getMeanEnergy() const
Getter for Mode::meanEnergy.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
void setupGeneric(bool skipNormalize=false)
Combine multiple setup routines and provide a basic NNP setup.
void calculateSymmetryFunctions(Structure &structure, bool const derivatives)
Calculate all symmetry functions for all atoms in given structure.
void calculateForces(Structure &structure) const
Calculate forces for all atoms in given structure.
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
double getEnergyOffset(Structure const &structure) const
Get atomic energy offset for given structure.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.