n2p2 - A neural network potential package
Training.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Training.h"
18#include "Eigen/src/Core/Matrix.h"
19#include "GradientDescent.h"
20#include "KalmanFilter.h"
21#include "NeuralNetwork.h"
22#include "utility.h"
23#include "mpi-extra.h"
24#ifdef _OPENMP
25#include <omp.h>
26#endif
27#include <algorithm> // std::sort, std::fill, std::find
28#include <cmath> // fabs
29#include <cstdlib> // atoi
30#include <gsl/gsl_rng.h>
31#include <limits> // std::numeric_limits
32#include <stdexcept> // std::runtime_error, std::range_error
33#include <utility> // std::piecewise_construct, std::forward_as_tuple
34
35using namespace std;
36using namespace nnp;
37
38Training::Training() : Dataset(),
39 updaterType (UT_GD ),
40 parallelMode (PM_TRAIN_RK0 ),
41 jacobianMode (JM_SUM ),
42 updateStrategy (US_COMBINED ),
43 hasUpdaters (false ),
44 hasStructures (false ),
45 useForces (false ),
46 repeatedEnergyUpdates (false ),
47 freeMemory (false ),
48 writeTrainingLog (false ),
49 stage (0 ),
50 numUpdaters (0 ),
51 numEpochs (0 ),
52 epoch (0 ),
53 writeWeightsEvery (0 ),
54 writeWeightsAlways (0 ),
55 writeNeuronStatisticsEvery (0 ),
56 writeNeuronStatisticsAlways(0 ),
57 numWeights (0 ),
58 forceWeight (0.0 ),
59 trainingLogFileName ("train-log.out")
60{
61 sw["setup"].start();
62}
63
65{
66 for (vector<Updater*>::iterator it = updaters.begin();
67 it != updaters.end(); ++it)
68 {
69 if (updaterType == UT_GD)
70 {
71 delete dynamic_cast<GradientDescent*>(*it);
72 }
73 else if (updaterType == UT_KF)
74 {
75 delete dynamic_cast<KalmanFilter*>(*it);
76 }
77 }
78
79 if (trainingLog.is_open()) trainingLog.close();
80}
81
83{
84 log << "\n";
85 log << "*** DEFINE TRAINING/TEST SETS ***********"
86 "**************************************\n";
87 log << "\n";
88
89 vector<string> pCheck = {"force", "charge"};
90 bool check = false;
91 for (auto k : pk)
92 {
93 check |= (find(pCheck.begin(), pCheck.end(), k) != pCheck.end());
94 }
95 vector<size_t> numAtomsPerElement(numElements, 0);
96
97 double testSetFraction = atof(settings["test_fraction"].c_str());
98 log << strpr("Desired test set ratio : %f\n", testSetFraction);
99 if (structures.size() > 0) hasStructures = true;
100 else hasStructures = false;
101
102 string k;
103 for (size_t i = 0; i < structures.size(); ++i)
104 {
105 Structure& s = structures.at(i);
106 // Only select set if not already determined.
108 {
109 double const r = gsl_rng_uniform(rng);
110 if (r < testSetFraction) s.sampleType = Structure::ST_TEST;
112 }
114 {
115 size_t const& na = s.numAtoms;
116 k = "energy"; if (p.exists(k)) p[k].numTestPatterns++;
117 k = "force"; if (p.exists(k)) p[k].numTestPatterns += 3 * na;
119 {
120 k = "charge"; if (p.exists(k)) p[k].numTestPatterns++;
121 }
122 else
123 {
124 k = "charge"; if (p.exists(k)) p[k].numTestPatterns += na;
125 }
126 }
128 {
129 for (size_t j = 0; j < numElements; ++j)
130 {
131 numAtomsPerElement.at(j) += s.numAtomsPerElement.at(j);
132 }
133
134 k = "energy";
135 if (p.exists(k))
136 {
137 p[k].numTrainPatterns++;
138 p[k].updateCandidates.push_back(UpdateCandidate());
139 p[k].updateCandidates.back().s = i;
140 }
141 k = "force";
142 if (p.exists(k))
143 {
144 p[k].numTrainPatterns += 3 * s.numAtoms;
145
147 {
148 p[k].updateCandidates.push_back(UpdateCandidate());
149 p[k].updateCandidates.back().s = i;
150 }
151 for (auto &ai : s.atoms)
152 {
153 for (size_t j = 0; j < 3; ++j)
154 {
156 {
157 p[k].updateCandidates.push_back(UpdateCandidate());
158 p[k].updateCandidates.back().s = i;
159 }
160 p[k].updateCandidates.back().subCandidates
161 .push_back(SubCandidate());
162 p[k].updateCandidates.back().subCandidates
163 .back().a = ai.index;
164 p[k].updateCandidates.back().subCandidates
165 .back().c = j;
166 }
167 }
168 }
169 k = "charge";
170 if (p.exists(k))
171 {
173 {
174 p[k].numTrainPatterns++;
175 p[k].updateCandidates.push_back(UpdateCandidate());
176 p[k].updateCandidates.back().s = i;
177 }
178 else
179 {
180 p[k].numTrainPatterns += s.numAtoms;
181 for (vector<Atom>::const_iterator it = s.atoms.begin();
182 it != s.atoms.end(); ++it)
183 {
184 p[k].updateCandidates.push_back(UpdateCandidate());
185 p[k].updateCandidates.back().s = i;
186 p[k].updateCandidates.back().subCandidates
187 .push_back(SubCandidate());
188 p[k].updateCandidates.back().subCandidates.back().a
189 = it->index;
190 }
191 }
192
193 }
194 }
195 else
196 {
197 log << strpr("WARNING: Structure %zu not assigned to either "
198 "training or test set.\n", s.index);
199 }
200 }
201 for (size_t i = 0; i < numElements; ++i)
202 {
203 if (hasStructures && (numAtomsPerElement.at(i) == 0))
204 {
205 log << strpr("WARNING: Process %d has no atoms of element "
206 "%d (%2s).\n",
207 myRank,
208 i,
209 elementMap[i].c_str());
210 }
211 }
212 for (auto k : pk)
213 {
214 MPI_Allreduce(MPI_IN_PLACE, &(p[k].numTrainPatterns), 1, MPI_SIZE_T, MPI_SUM, comm);
215 MPI_Allreduce(MPI_IN_PLACE, &(p[k].numTestPatterns) , 1, MPI_SIZE_T, MPI_SUM, comm);
216 double sum = p[k].numTrainPatterns + p[k].numTestPatterns;
217 log << "Training/test split of data set for property \"" + k + "\":\n";
218 log << strpr("- Total patterns : %.0f\n", sum);
219 log << strpr("- Training patterns : %d\n", p[k].numTrainPatterns);
220 log << strpr("- Test patterns : %d\n", p[k].numTestPatterns);
221 log << strpr("- Test set fraction : %f\n", p[k].numTestPatterns / sum);
222 }
223
224 log << "*****************************************"
225 "**************************************\n";
226
227 return;
228}
229
231{
232 log << "\n";
233 log << "*** WRITE TRAINING/TEST SETS ************"
234 "**************************************\n";
235 log << "\n";
236
237 string fileName = strpr("train.data.%04d", myRank);
238 ofstream fileTrain;
239 fileTrain.open(fileName.c_str());
240 if (!fileTrain.is_open())
241 {
242 runtime_error(strpr("ERROR: Could not open file %s\n",
243 fileName.c_str()));
244 }
245 fileName = strpr("test.data.%04d", myRank);
246 ofstream fileTest;
247 fileTest.open(fileName.c_str());
248 if (!fileTest.is_open())
249 {
250 runtime_error(strpr("ERROR: Could not open file %s\n",
251 fileName.c_str()));
252 }
253 for (vector<Structure>::iterator it = structures.begin();
254 it != structures.end(); ++it)
255 {
256 // Energy offsets are already subtracted at this point.
257 // Here, we quickly add them again to provide consistent data sets.
258 addEnergyOffset(*it);
259 if (it->sampleType == Structure::ST_TRAINING)
260 {
261 it->writeToFile(&fileTrain);
262 }
263 else if (it->sampleType == Structure::ST_TEST)
264 {
265 it->writeToFile(&fileTest);
266 }
267 // Subtract energy offsets again.
269 }
270 fileTrain.flush();
271 fileTrain.close();
272 fileTest.flush();
273 fileTest.close();
274 MPI_Barrier(comm);
275 if (myRank == 0)
276 {
277 log << "Writing training/test set to files:\n";
278 log << " - train.data\n";
279 log << " - test.data\n";
280 fileName = "train.data";
281 combineFiles(fileName);
282 fileName = "test.data";
283 combineFiles(fileName);
284 }
285
286 log << "*****************************************"
287 "**************************************\n";
288
289 return;
290}
291
293{
294 log << "\n";
295 log << "*** WEIGHT INITIALIZATION ***************"
296 "**************************************\n";
297 log << "\n";
298
299 if (settings.keywordExists("nguyen_widrow_weights_short") &&
300 settings.keywordExists("precondition_weights"))
301 {
302 throw runtime_error("ERROR: Nguyen Widrow and preconditioning weights"
303 " initialization are incompatible\n");
304 }
305
306 // Training stage 2 requires electrostatics NN weights from file.
307 if ((nnpType == NNPType::HDNNP_4G && stage == 2) ||
308 (nnpType == NNPType::HDNNP_Q && stage == 2))
309 {
310 log << "Setting up " + nns.at("elec").name + " neural networks:\n";
311 readNeuralNetworkWeights("elec", nns.at("elec").weightFileFormat);
312 }
313
314 // Currently trained NN weights may be read from file or randomized.
315 NNSetup const& nn = nns.at(nnId);
316 log << "Setting up " + nn.name + " neural networks:\n";
317 if (settings.keywordExists("use_old_weights" + nn.keywordSuffix2))
318 {
319 log << "Reading old weights from files.\n";
321 }
323
324 log << "*****************************************"
325 "**************************************\n";
326
327 return;
328}
329
331{
332 this->updateStrategy = updateStrategy;
333 numWeights= 0;
335 {
336 log << strpr("Combined updater for all elements selected: "
337 "UpdateStrategy::US_COMBINED (%d)\n", updateStrategy);
338 numUpdaters = 1;
339 log << strpr("Number of weight updaters : %zu\n", numUpdaters);
340 for (size_t i = 0; i < numElements; ++i)
341 {
342 weightsOffset.push_back(numWeights);
343 numWeights += elements.at(i).neuralNetworks.at(nnId)
344 .getNumConnections();
345 // sqrt of Hardness of element is included in weights vector
346 if (nnpType == NNPType::HDNNP_4G && stage == 1) numWeights ++;
347 }
348 weights.resize(numUpdaters);
349 weights.at(0).resize(numWeights, 0.0);
351 log << strpr("Total fit parameters : %zu\n", numWeights);
352 }
353 else if (updateStrategy == US_ELEMENT)
354 {
355 log << strpr("Separate updaters for elements selected: "
356 "UpdateStrategy::US_ELEMENT (%d)\n", updateStrategy);
358 log << strpr("Number of weight updaters : %zu\n", numUpdaters);
359 weights.resize(numUpdaters);
360 for (size_t i = 0; i < numUpdaters; ++i)
361 {
362 size_t n = elements.at(i).neuralNetworks.at(nnId)
363 .getNumConnections();
364 weights.at(i).resize(n, 0.0);
365 numWeightsPerUpdater.push_back(n);
366 log << strpr("Fit parameters for element %2s: %zu\n",
367 elements.at(i).getSymbol().c_str(),
368 n);
369 }
370 }
371 else
372 {
373 throw runtime_error("ERROR: Unknown update strategy.\n");
374 }
375
376 return;
377}
378
379void Training::setStage(size_t stage)
380{
381 this->stage = stage;
382
383 // NNP of type HDNNP_2G trains <properties> -> <network>:
384 // "energy" (optionally: "force") -> "short"
386 {
387 pk.push_back("energy");
388 if (settings.keywordExists("use_short_forces")) pk.push_back("force");
389 nnId = "short";
390 }
391 // NNP of type HDNNP_4G or HDNNP_Q trains <properties> -> <network>:
392 // * stage 1: "charge" -> "elec"
393 // * stage 2: "energy" (optionally: "force") -> "short"
394 else if (nnpType == NNPType::HDNNP_4G ||
396 {
397 if (stage == 1)
398 {
399 pk.push_back("charge");
400 nnId = "elec";
401 }
402 else if (stage == 2)
403 {
404 pk.push_back("energy");
405 if (settings.keywordExists("use_short_forces"))
406 {
407 pk.push_back("force");
408 }
409 nnId = "short";
410 }
411 else
412 {
413 throw runtime_error(strpr("ERROR: No or incorrect training stage "
414 "specified: %zu (must be 1 or 2).\n",
415 stage));
416 }
417 }
418
419 // Initialize all training properties which will be used.
420 auto initP = [this](string key) {p.emplace(piecewise_construct,
421 forward_as_tuple(key),
422 forward_as_tuple(key));
423 };
424 for (auto k : pk) initP(k);
425
426 return;
427}
428
430{
431 log << "\n";
432 log << "*** DATA SET NORMALIZATION **************"
433 "**************************************\n";
434 log << "\n";
435
436 log << "Computing statistics from reference data and initial "
437 "prediction...\n";
438 log << "\n";
439
440 bool useForcesLocal = settings.keywordExists("use_short_forces");
441
443 stage == 1)
444 {
445 throw runtime_error("ERROR: Normalization of charges not yet "
446 "implemented\n.");
447 }
448 writeWeights("short", "weights.%03zu.norm");
450 stage == 2)
451 {
452 writeWeights("elec", "weightse.%03zu.norm");
453 }
454
455 ofstream fileEvsV;
456 fileEvsV.open(strpr("evsv.dat.%04d", myRank).c_str());
457 if (myRank == 0)
458 {
459 // File header.
460 vector<string> title;
461 vector<string> colName;
462 vector<string> colInfo;
463 vector<size_t> colSize;
464 title.push_back("Energy vs. volume comparison.");
465 colSize.push_back(16);
466 colName.push_back("V_atom");
467 colInfo.push_back("Volume per atom.");
468 colSize.push_back(16);
469 colName.push_back("Eref_atom");
470 colInfo.push_back("Reference energy per atom.");
471 colSize.push_back(10);
472 colName.push_back("N");
473 colInfo.push_back("Number of atoms.");
474 colSize.push_back(16);
475 colName.push_back("V");
476 colInfo.push_back("Volume of structure.");
477 colSize.push_back(16);
478 colName.push_back("Eref");
479 colInfo.push_back("Reference energy of structure.");
480 colSize.push_back(16);
481 colName.push_back("Eref_offset");
482 colInfo.push_back("Reference energy of structure (including offset).");
483 appendLinesToFile(fileEvsV,
484 createFileHeader(title, colSize, colName, colInfo));
485 }
486
487 size_t numAtomsTotal = 0;
488 size_t numStructures = 0;
489 double meanEnergyPerAtomRef = 0.0;
490 double meanEnergyPerAtomNnp = 0.0;
491 double sigmaEnergyPerAtomRef = 0.0;
492 double sigmaEnergyPerAtomNnp = 0.0;
493 double meanForceRef = 0.0;
494 double meanForceNnp = 0.0;
495 double sigmaForceRef = 0.0;
496 double sigmaForceNnp = 0.0;
497 for (auto& s : structures)
498 {
499 // File output for evsv.dat.
500 fileEvsV << strpr("%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
501 s.volume / s.numAtoms,
502 s.energyRef / s.numAtoms,
503 s.numAtoms,
504 s.volume,
505 s.energyRef,
506 getEnergyWithOffset(s, true));
507 s.calculateNeighborList(maxCutoffRadius);
508 // TODO: handle 4G case
509#ifdef N2P2_NO_SF_GROUPS
511#else
513#endif
516 if (useForcesLocal) calculateForces(s);
517 s.clearNeighborList();
518
520 numAtomsTotal += s.numAtoms;
521 meanEnergyPerAtomRef += s.energyRef / s.numAtoms;
522 meanEnergyPerAtomNnp += s.energy / s.numAtoms;
523 for (auto& a : s.atoms)
524 {
525 meanForceRef += a.fRef[0] + a.fRef[1] + a.fRef[2];
526 meanForceNnp += a.f [0] + a.f [1] + a.f [2];
527 }
528 }
529
530 fileEvsV.flush();
531 fileEvsV.close();
532 MPI_Barrier(MPI_COMM_WORLD);
533 log << "Writing energy/atom vs. volume/atom data to \"evsv.dat\".\n";
534 if (myRank == 0) combineFiles("evsv.dat");
535 MPI_Allreduce(MPI_IN_PLACE, &numStructures , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
536 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
537 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
538 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
539 MPI_Allreduce(MPI_IN_PLACE, &meanForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
540 MPI_Allreduce(MPI_IN_PLACE, &meanForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
541 meanEnergyPerAtomRef /= numStructures;
542 meanEnergyPerAtomNnp /= numStructures;
543 meanForceRef /= 3 * numAtomsTotal;
544 meanForceNnp /= 3 * numAtomsTotal;
545 for (auto const& s : structures)
546 {
547 double ediffRef = s.energyRef / s.numAtoms - meanEnergyPerAtomRef;
548 double ediffNnp = s.energy / s.numAtoms - meanEnergyPerAtomNnp;
549 sigmaEnergyPerAtomRef += ediffRef * ediffRef;
550 sigmaEnergyPerAtomNnp += ediffNnp * ediffNnp;
551 for (auto const& a : s.atoms)
552 {
553 double fdiffRef = a.fRef[0] - meanForceRef;
554 double fdiffNnp = a.f [0] - meanForceNnp;
555 sigmaForceRef += fdiffRef * fdiffRef;
556 sigmaForceNnp += fdiffNnp * fdiffNnp;
557 fdiffRef = a.fRef[1] - meanForceRef;
558 fdiffNnp = a.f [1] - meanForceNnp;
559 sigmaForceRef += fdiffRef * fdiffRef;
560 sigmaForceNnp += fdiffNnp * fdiffNnp;
561 fdiffRef = a.fRef[2] - meanForceRef;
562 fdiffNnp = a.f [2] - meanForceNnp;
563 sigmaForceRef += fdiffRef * fdiffRef;
564 sigmaForceNnp += fdiffNnp * fdiffNnp;
565 }
566 }
567 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
568 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
569 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
570 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
571 sigmaEnergyPerAtomRef = sqrt(sigmaEnergyPerAtomRef / (numStructures - 1));
572 sigmaEnergyPerAtomNnp = sqrt(sigmaEnergyPerAtomNnp / (numStructures - 1));
573 sigmaForceRef = sqrt(sigmaForceRef / (3 * numAtomsTotal - 1));
574 sigmaForceNnp = sqrt(sigmaForceNnp / (3 * numAtomsTotal - 1));
575 log << "\n";
576 log << strpr("Total number of structures : %zu\n", numStructures);
577 log << strpr("Total number of atoms : %zu\n", numAtomsTotal);
578 log << "----------------------------------\n";
579 log << "Reference data statistics:\n";
580 log << "----------------------------------\n";
581 log << strpr("Mean/sigma energy per atom : %16.8E / %16.8E\n",
582 meanEnergyPerAtomRef,
583 sigmaEnergyPerAtomRef);
584 log << strpr("Mean/sigma force : %16.8E / %16.8E\n",
585 meanForceRef,
586 sigmaForceRef);
587 log << "----------------------------------\n";
588 log << "Initial NNP prediction statistics:\n";
589 log << "----------------------------------\n";
590 log << strpr("Mean/sigma energy per atom : %16.8E / %16.8E\n",
591 meanEnergyPerAtomNnp,
592 sigmaEnergyPerAtomNnp);
593 log << strpr("Mean/sigma force : %16.8E / %16.8E\n",
594 meanForceNnp,
595 sigmaForceNnp);
596 log << "----------------------------------\n";
597 // Now set conversion quantities of Mode class.
598 if (settings["normalize_data_set"] == "stats-only")
599 {
600 log << "Data set statistics computation completed, now make up for \n";
601 log << "initially skipped normalization setup...\n";
602 log << "\n";
603 setupNormalization(false);
604 }
605 else if (settings["normalize_data_set"] == "ref")
606 {
607 log << "Normalization based on standard deviation of reference data "
608 "selected:\n";
609 log << "\n";
610 log << " mean(e_ref) = 0, sigma(e_ref) = sigma(F_ref) = 1\n";
611 log << "\n";
612 meanEnergy = meanEnergyPerAtomRef;
613 convEnergy = 1.0 / sigmaEnergyPerAtomRef;
614 if (useForcesLocal) convLength = sigmaForceRef / sigmaEnergyPerAtomRef;
615 else convLength = 1.0;
616 normalize = true;
617 }
618 else if (settings["normalize_data_set"] == "force")
619 {
620 if (!useForcesLocal)
621 {
622 throw runtime_error("ERROR: Selected normalization mode only "
623 "possible when forces are available.\n");
624 }
625 log << "Normalization based on standard deviation of reference forces "
626 "and their\n";
627 log << "initial prediction selected:\n";
628 log << "\n";
629 log << " mean(e_ref) = 0, sigma(F_NNP) = sigma(F_ref) = 1\n";
630 log << "\n";
631 meanEnergy = meanEnergyPerAtomRef;
632 convEnergy = sigmaForceNnp / sigmaForceRef;
633 convLength = sigmaForceNnp;
634 normalize = true;
635 }
636 else
637 {
638 throw runtime_error("ERROR: Unknown data set normalization mode.\n");
639 }
640
641 if (settings["normalize_data_set"] != "stats-only")
642 {
643 log << "Final conversion data:\n";
644 log << strpr("Mean ref. energy per atom = %24.16E\n", meanEnergy);
645 log << strpr("Conversion factor energy = %24.16E\n", convEnergy);
646 log << strpr("Conversion factor length = %24.16E\n", convLength);
647 log << "----------------------------------\n";
648 }
649
650 if ((myRank == 0) &&
651 (settings["normalize_data_set"] != "stats-only"))
652 {
653 log << "\n";
654 log << "Writing backup of original settings file to "
655 "\"input.nn.bak\".\n";
656 ofstream fileSettings;
657 fileSettings.open("input.nn.bak");
658 writeSettingsFile(&fileSettings);
659 fileSettings.close();
660
661 log << "\n";
662 log << "Writing normalization data to settings file \"input.nn\".\n";
663 string n1 = strpr("mean_energy %24.16E # nnp-train\n",
664 meanEnergyPerAtomRef);
665 string n2 = strpr("conv_energy %24.16E # nnp-train\n",
666 convEnergy);
667 string n3 = strpr("conv_length %24.16E # nnp-train\n",
668 convLength);
669 // Check for existing normalization header and record line numbers
670 // to replace.
671 auto lines = settings.getSettingsLines();
672 map<size_t, string> replace;
673 for (size_t i = 0; i < lines.size(); ++i)
674 {
675 vector<string> sl = split(lines.at(i));
676 if (sl.size() > 0)
677 {
678 if (sl.at(0) == "mean_energy") replace[i] = n1;
679 if (sl.at(0) == "conv_energy") replace[i] = n2;
680 if (sl.at(0) == "conv_length") replace[i] = n3;
681 }
682 }
683 if (!replace.empty())
684 {
685 log << "WARNING: Preexisting normalization data was found and "
686 "replaced in original \"input.nn\" file.\n";
687 }
688
689 fileSettings.open("input.nn");
690 if (replace.empty())
691 {
692 fileSettings << "#########################################"
693 "######################################\n";
694 fileSettings << "# DATA SET NORMALIZATION\n";
695 fileSettings << "#########################################"
696 "######################################\n";
697 fileSettings << n1;
698 fileSettings << n2;
699 fileSettings << n3;
700 fileSettings << "#########################################"
701 "######################################\n";
702 fileSettings << "\n";
703 }
704 settings.writeSettingsFile(&fileSettings, replace);
705 fileSettings.close();
706 }
707
708 // Now make up for left-out normalization setup, need to repeat entire
709 // symmetry function setup.
710 log << "\n";
711 if (normalize)
712 {
713 log << "Silently repeating symmetry function setup...\n";
714 log.silent = true;
715 for (auto& e : elements) e.clearSymmetryFunctions();
717#ifndef N2P2_FULL_SFD_MEMORY
719#endif
720#ifndef N2P2_NO_SF_CACHE
722#endif
723#ifndef N2P2_NO_SF_GROUPS
725#endif
727 setupSymmetryFunctionStatistics(false, false, false, false);
728 log.silent = false;
729 }
730
731 log << "*****************************************"
732 "**************************************\n";
733
734 return;
735}
736
738{
739 log << "\n";
740 log << "*** SETUP: TRAINING *********************"
741 "**************************************\n";
742 log << "\n";
743
744 if (nnpType == NNPType::HDNNP_4G ||
746 {
747 log << strpr("Running stage %zu of training: ", stage);
748 if (stage == 1) log << "electrostatic NN fitting.\n";
749 else if (stage == 2) log << "short-range NN fitting.\n";
750 else throw runtime_error("\nERROR: Unknown training stage.\n");
751 }
752
753 if ( nnpType == NNPType::HDNNP_2G ||
754 (nnpType == NNPType::HDNNP_4G && stage == 2) ||
755 (nnpType == NNPType::HDNNP_Q && stage == 2))
756 {
757 useForces = settings.keywordExists("use_short_forces");
758 if (useForces)
759 {
760 log << "Forces will be used for training.\n";
761 if (settings.keywordExists("force_weight"))
762 {
763 forceWeight = atof(settings["force_weight"].c_str());
764 }
765 else
766 {
767 log << "WARNING: Force weight not set, using default value.\n";
768 forceWeight = 1.0;
769 }
770 log << strpr("Force update weight: %10.2E\n", forceWeight);
771 }
772 else
773 {
774 log << "Only energies will be used for training.\n";
775 }
776 }
777 log << "Training will act on \"" << nns.at(nnId).name
778 << "\" neural networks.\n";
779
780 if (settings.keywordExists("main_error_metric"))
781 {
782 string k;
783 if (settings["main_error_metric"] == "RMSEpa")
784 {
785 k = "energy"; if (p.exists(k)) p[k].displayMetric = "RMSEpa";
786 k = "force"; if (p.exists(k)) p[k].displayMetric = "RMSE";
787 k = "charge"; if (p.exists(k)) p[k].displayMetric = "RMSE";
788 }
789 else if (settings["main_error_metric"] == "RMSE")
790 {
791 k = "energy"; if (p.exists(k)) p[k].displayMetric = "RMSE";
792 k = "force"; if (p.exists(k)) p[k].displayMetric = "RMSE";
793 k = "charge"; if (p.exists(k)) p[k].displayMetric = "RMSE";
794 }
795 else if (settings["main_error_metric"] == "MAEpa")
796 {
797 k = "energy"; if (p.exists(k)) p[k].displayMetric = "MAEpa";
798 k = "force"; if (p.exists(k)) p[k].displayMetric = "MAE";
799 k = "charge"; if (p.exists(k)) p[k].displayMetric = "MAE";
800 }
801 else if (settings["main_error_metric"] == "MAE")
802 {
803 k = "energy"; if (p.exists(k)) p[k].displayMetric = "MAE";
804 k = "force"; if (p.exists(k)) p[k].displayMetric = "MAE";
805 k = "charge"; if (p.exists(k)) p[k].displayMetric = "MAE";
806 }
807 else
808 {
809 throw runtime_error("ERROR: Unknown error metric.\n");
810 }
811 }
812 else
813 {
814 string k;
815 k = "energy"; if (p.exists(k)) p[k].displayMetric = "RMSEpa";
816 k = "force"; if (p.exists(k)) p[k].displayMetric = "RMSE";
817 k = "charge"; if (p.exists(k)) p[k].displayMetric = "RMSE";
818 }
819
820 updaterType = (UpdaterType)atoi(settings["updater_type"].c_str());
821 if (updaterType == UT_GD)
822 {
823 log << strpr("Weight update via gradient descent selected: "
824 "updaterType::UT_GD (%d)\n",
826 }
827 else if (updaterType == UT_KF)
828 {
829 log << strpr("Weight update via Kalman filter selected: "
830 "updaterType::UT_KF (%d)\n",
832 }
833 else if (updaterType == UT_LM)
834 {
835 throw runtime_error("ERROR: LM algorithm not yet implemented.\n");
836 log << strpr("Weight update via Levenberg-Marquardt algorithm "
837 "selected: updaterType::UT_LM (%d)\n",
839 }
840 else
841 {
842 throw runtime_error("ERROR: Unknown updater type.\n");
843 }
844
845 parallelMode = (ParallelMode)atoi(settings["parallel_mode"].c_str());
846 //if (parallelMode == PM_DATASET)
847 //{
848 // log << strpr("Serial training selected: "
849 // "ParallelMode::PM_DATASET (%d)\n",
850 // parallelMode);
851 //}
853 {
854 log << strpr("Parallel training (rank 0 updates) selected: "
855 "ParallelMode::PM_TRAIN_RK0 (%d)\n",
857 }
858 else if (parallelMode == PM_TRAIN_ALL)
859 {
860 log << strpr("Parallel training (all ranks update) selected: "
861 "ParallelMode::PM_TRAIN_ALL (%d)\n",
863 }
864 else
865 {
866 throw runtime_error("ERROR: Unknown parallelization mode.\n");
867 }
868
869 jacobianMode = (JacobianMode)atoi(settings["jacobian_mode"].c_str());
870 if (jacobianMode == JM_SUM)
871 {
872 log << strpr("Gradient summation only selected: "
873 "JacobianMode::JM_SUM (%d)\n", jacobianMode);
874 log << "No Jacobi matrix, gradients of all training candidates are "
875 "summed up instead.\n";
876 }
877 else if (jacobianMode == JM_TASK)
878 {
879 log << strpr("Per-task Jacobian selected: "
880 "JacobianMode::JM_TASK (%d)\n",
882 log << "One Jacobi matrix row per MPI task is stored, within each "
883 "task gradients are summed up.\n";
884 }
885 else if (jacobianMode == JM_FULL)
886 {
887 log << strpr("Full Jacobian selected: "
888 "JacobianMode::JM_FULL (%d)\n",
890 log << "Each update candidate generates one Jacobi matrix "
891 "row entry.\n";
892 }
893 else
894 {
895 throw runtime_error("ERROR: Unknown Jacobian mode.\n");
896 }
897
899 {
900 throw runtime_error("ERROR: Gradient descent methods can only be "
901 "combined with Jacobian mode JM_SUM.\n");
902 }
903
905 {
906 throw runtime_error("ERROR: US_ELEMENT only implemented for "
907 "HDNNP_2G.\n");
908 }
909
910 updateStrategy = (UpdateStrategy)atoi(settings["update_strategy"].c_str());
911 // This section is pushed into a separate function because it's needed also
912 // for testing purposes.
914 // Now it is possible to fill the weights arrays with weight parameters
915 // from the neural network.
916 getWeights();
917
918 // Set up update candidate selection modes.
919 setupSelectionMode("all");
920 for (auto k : pk) setupSelectionMode(k);
921
922 log << "-----------------------------------------"
923 "--------------------------------------\n";
924 repeatedEnergyUpdates = settings.keywordExists("repeated_energy_update");
926 {
927 throw runtime_error("ERROR: Repeated energy updates are not correctly"
928 " implemented at the moment.\n");
929 //log << "After each force update an energy update for the\n";
930 //log << "corresponding structure will be performed.\n";
931 }
932
933 freeMemory = !(settings.keywordExists("memorize_symfunc_results"));
934 if (freeMemory)
935 {
936 log << "Symmetry function memory is cleared after each calculation.\n";
937 }
938 else
939 {
940 log << "Symmetry function memory is reused (HIGH MEMORY USAGE!).\n";
941 }
942
943 numEpochs = (size_t)atoi(settings["epochs"].c_str());
944 log << strpr("Training will be stopped after %zu epochs.\n", numEpochs);
945
946 // Set up how often comparison output files should be written.
947 for (auto k : pk) setupFileOutput(k);
948 // Set up how often weight files should be written.
949 setupFileOutput("weights_epoch");
950 // Set up how often neuron statistics files should be written.
951 setupFileOutput("neuronstats");
952
953 // Prepare training log header.
954 writeTrainingLog = settings.keywordExists("write_trainlog");
955 if (writeTrainingLog && myRank == 0)
956 {
957 if (nnpType == NNPType::HDNNP_4G ||
959 {
960 trainingLogFileName += strpr(".stage-%zu", stage);
961 }
962 log << strpr("Training log with update information will be written to:"
963 " %s.\n", trainingLogFileName.c_str());
964 trainingLog.open(trainingLogFileName.c_str());
965
966 // File header.
967 vector<string> title;
968 vector<string> colName;
969 vector<string> colInfo;
970 vector<size_t> colSize;
971 title.push_back("Detailed information on each weight update.");
972 colSize.push_back(3);
973 colName.push_back("U");
974 colInfo.push_back("Update type (E = energy, F = force, Q = charge).");
975 colSize.push_back(5);
976 colName.push_back("epoch");
977 colInfo.push_back("Current training epoch.");
978 colSize.push_back(10);
979 colName.push_back("count");
980 colInfo.push_back("Update counter (Multiple lines with identical count"
981 " for multi-streaming!).");
982 colSize.push_back(5);
983 colName.push_back("proc");
984 colInfo.push_back("MPI process providing this update candidate.");
985 colSize.push_back(3);
986 colName.push_back("tl");
987 colInfo.push_back("Threshold loop counter.");
988 colSize.push_back(10);
989 colName.push_back("rmse_frac");
990 colInfo.push_back("Update candidates error divided by this "
991 "epochs RMSE.");
992 colSize.push_back(10);
993 colName.push_back("s_ind_g");
994 colInfo.push_back("Global structure index.");
995 colSize.push_back(5);
996 colName.push_back("s_ind");
997 colInfo.push_back("Local structure index on this MPI process.");
998 colSize.push_back(5);
999 colName.push_back("a_ind");
1000 colInfo.push_back("Atom index.");
1001 colSize.push_back(2);
1002 colName.push_back("c");
1003 colInfo.push_back("Force component (0 = x, 1 = y, 2 = z).");
1005 createFileHeader(title, colSize, colName, colInfo));
1006 }
1007
1008 // Compute number of updates and properties per update.
1009 log << "-----------------------------------------"
1010 "--------------------------------------\n";
1011 for (auto k : pk) setupUpdatePlan(k);
1012 if (p.exists("energy") && p.exists("force"))
1013 {
1014 Property& pe = p["energy"];
1015 Property& pf = p["force"];
1016 log << strpr("Energy to force ratio : "
1017 " 1 : %5.1f\n",
1018 static_cast<double>(
1021 log << strpr("Energy to force percentages : "
1022 "%5.1f%% : %5.1f%%\n",
1023 pe.numUpdates * pe.patternsPerUpdateGlobal * 100.0 /
1026 pf.numUpdates * pf.patternsPerUpdateGlobal * 100.0 /
1029 }
1030 double totalUpdates = 0.0;
1031 for (auto k : pk) totalUpdates += p[k].numUpdates;
1032 log << "-----------------------------------------"
1033 "--------------------------------------\n";
1034
1035 // Allocate error and Jacobian arrays.
1036 for (auto k : pk) allocateArrays(k);
1037 log << "-----------------------------------------"
1038 "--------------------------------------\n";
1039
1040 // Set up new C++11 random number generator (TODO: move it!).
1041 rngGlobalNew.seed(gsl_rng_get(rngGlobal));
1042 rngNew.seed(gsl_rng_get(rng));
1043
1044 // Updater setup.
1046 if (updaterType == UT_GD)
1047 {
1048 descentType = (GradientDescent::DescentType)
1049 atoi(settings["gradient_type"].c_str());
1050 }
1052 if (updaterType == UT_KF)
1053 {
1054 kalmanType = (KalmanFilter::KalmanType)
1055 atoi(settings["kalman_type"].c_str());
1056 }
1057
1058 for (size_t i = 0; i < numUpdaters; ++i)
1059 {
1060 if ( (myRank == 0) || (parallelMode == PM_TRAIN_ALL) )
1061 {
1062 if (updaterType == UT_GD)
1063 {
1064 updaters.push_back(
1066 descentType));
1067 }
1068 else if (updaterType == UT_KF)
1069 {
1070 updaters.push_back(
1072 kalmanType));
1073 }
1074 updaters.back()->setState(&(weights.at(i).front()));
1075 updaters.back()->setupTiming(strpr("wupd%zu", i));
1076 updaters.back()->resetTimingLoop();
1077 }
1078 }
1079 if (updaters.size() > 0) hasUpdaters = true;
1080 else hasUpdaters = false;
1081
1082 if (hasUpdaters && updaterType == UT_GD)
1083 {
1084 if (descentType == GradientDescent::DT_FIXED)
1085 {
1086 double const eta = atof(settings["gradient_eta"].c_str());
1087 for (size_t i = 0; i < numUpdaters; ++i)
1088 {
1089 GradientDescent* u =
1090 dynamic_cast<GradientDescent*>(updaters.at(i));
1091 u->setParametersFixed(eta);
1092 }
1093 }
1094 if (descentType == GradientDescent::DT_ADAM)
1095 {
1096 double const eta = atof(settings["gradient_adam_eta"].c_str());
1097 double const beta1 = atof(settings["gradient_adam_beta1"].c_str());
1098 double const beta2 = atof(settings["gradient_adam_beta2"].c_str());
1099 double const eps = atof(settings["gradient_adam_epsilon"].c_str());
1100 for (size_t i = 0; i < numUpdaters; ++i)
1101 {
1102 GradientDescent* u =
1103 dynamic_cast<GradientDescent*>(updaters.at(i));
1104 u->setParametersAdam(eta, beta1, beta2, eps);
1105 }
1106 }
1107 }
1108 else if (hasUpdaters && updaterType == UT_KF)
1109 {
1110 if (kalmanType == KalmanFilter::KT_STANDARD)
1111 {
1112 double const epsilon = atof(settings["kalman_epsilon"].c_str());
1113 double const q0 = atof(settings["kalman_q0" ].c_str());
1114 double const qtau = atof(settings["kalman_qtau" ].c_str())
1115 / totalUpdates;
1116 log << "qtau is divided by number "
1117 "of projected updates per epoch.\n";
1118 double const qmin = atof(settings["kalman_qmin" ].c_str());
1119 double const eta0 = atof(settings["kalman_eta" ].c_str());
1120 double etatau = 1.0;
1121 double etamax = eta0;
1122 if (settings.keywordExists("kalman_etatau") &&
1123 settings.keywordExists("kalman_etamax"))
1124 {
1125 etatau = atof(settings["kalman_etatau"].c_str())
1126 / totalUpdates;
1127 log << "etatau is divided by number "
1128 "of projected updates per epoch.\n";
1129 etamax = atof(settings["kalman_etamax"].c_str());
1130 }
1131 for (size_t i = 0; i < updaters.size(); ++i)
1132 {
1133 KalmanFilter* u = dynamic_cast<KalmanFilter*>(updaters.at(i));
1134 u->setParametersStandard(epsilon,
1135 q0,
1136 qtau,
1137 qmin,
1138 eta0,
1139 etatau,
1140 etamax);
1141 }
1142 }
1143 else if (kalmanType == KalmanFilter::KT_FADINGMEMORY)
1144 {
1145 double const epsilon = atof(settings["kalman_epsilon"].c_str());
1146 double const q0 = atof(settings["kalman_q0" ].c_str());
1147 double const qtau = atof(settings["kalman_qtau" ].c_str())
1148 / totalUpdates;
1149 log << "qtau is divided by number "
1150 "of projected updates per epoch.\n";
1151 double const qmin = atof(settings["kalman_qmin"].c_str());
1152 double const lambda =
1153 atof(settings["kalman_lambda_short"].c_str());
1154 //double const nu =
1155 // pow(atof(settings["kalman_nue_short"].c_str()), numProcs);
1156 //log << "nu is exponentiated with the number of streams.\n";
1157 double const nu = atof(settings["kalman_nue_short"].c_str());
1158 for (size_t i = 0; i < updaters.size(); ++i)
1159 {
1160 KalmanFilter* u = dynamic_cast<KalmanFilter*>(updaters.at(i));
1161 u->setParametersFadingMemory(epsilon,
1162 q0,
1163 qtau,
1164 qmin,
1165 lambda,
1166 nu);
1167 }
1168 }
1169 }
1170
1171 log << "-----------------------------------------"
1172 "--------------------------------------\n";
1173 for (size_t i = 0; i < updaters.size(); ++i)
1174 {
1176 {
1177 log << strpr("Combined weight updater:\n");
1178 }
1179 else if (updateStrategy == US_ELEMENT)
1180 {
1181 log << strpr("Weight updater for element %2s :\n",
1182 elements.at(i).getSymbol().c_str());
1183 }
1184 log << "-----------------------------------------"
1185 "--------------------------------------\n";
1186 log << updaters.at(i)->info();
1187 if (updaterType == UT_KF)
1188 {
1189 log << "Note: During training loop the actual observation\n";
1190 log << " size corresponds to error vector size:\n";
1191 for (auto k : pk)
1192 {
1193 log << strpr("sizeObservation = %zu (%s updates)\n",
1194 p[k].error.at(i).size(), k.c_str());
1195 }
1196 }
1197 log << "-----------------------------------------"
1198 "--------------------------------------\n";
1199 }
1200
1201 log << strpr("TIMING Finished setup: %.2f seconds.\n",
1202 sw["setup"].stop());
1203 log << "*****************************************"
1204 "**************************************\n";
1205
1206 return;
1207}
1208
1210{
1211 log << "\n";
1212 log << "*** SETUP WEIGHT DERIVATIVES CHECK ******"
1213 "**************************************\n";
1214 log << "\n";
1215
1216 log << "Weight derivatives will be checked for these properties:\n";
1217 for (auto k : pk) log << " - " + p[k].plural + "\n";
1218 log << "\n";
1219
1221 {
1222 nnId = "short";
1223 readNeuralNetworkWeights(nnId, "weights.%03zu.data");
1224 }
1225 else if ( (nnpType == NNPType::HDNNP_4G || nnpType == NNPType::HDNNP_Q) &&
1226 stage == 1)
1227 {
1228 nnId = "elec";
1229 readNeuralNetworkWeights(nnId, "weightse.%03zu.data");
1230 }
1231 else if ( (nnpType == NNPType::HDNNP_4G || nnpType == NNPType::HDNNP_Q) &&
1232 stage == 2)
1233 {
1234 nnId = "short";
1235 readNeuralNetworkWeights("elec", "weightse.%03zu.data");
1236 readNeuralNetworkWeights(nnId, "weights.%03zu.data");
1237 }
1239 getWeights();
1240
1241 log << "*****************************************"
1242 "**************************************\n";
1243
1244 return pk;
1245}
1246
1248{
1249 sw["nl"].start();
1250 log << "\n";
1251 log << "*** CALCULATE NEIGHBOR LISTS ************"
1252 "**************************************\n";
1253 log << "\n";
1254
1255#ifdef _OPENMP
1256 int num_threads = omp_get_max_threads();
1257 omp_set_num_threads(1);
1258 log << strpr("Temporarily disabling OpenMP parallelization: %d threads.\n",
1259 omp_get_max_threads());
1260#endif
1261 log << "Calculating neighbor lists for all structures.\n";
1262 double maxCutoffRadiusPhys = maxCutoffRadius;
1263 if (normalize) maxCutoffRadiusPhys = maxCutoffRadius / convLength;
1264 // TODO: may not actually be cutoff (ewald real space cutoff is often
1265 // larger)
1267 log << strpr("Cutoff radius for neighbor lists: %f\n",
1268 maxCutoffRadiusPhys);
1269 double maxCutoffRadiusAllStructures = 0.0;
1270 for (vector<Structure>::iterator it = structures.begin();
1271 it != structures.end(); ++it)
1272 {
1273 // List only needs to be sorted for 4G
1275 {
1276 it->calculateMaxCutoffRadiusOverall(
1277 ewaldSetup,
1280 it->calculateNeighborList(maxCutoffRadius,cutoffs);
1281
1282 if (it->maxCutoffRadiusOverall > maxCutoffRadiusAllStructures)
1283 maxCutoffRadiusAllStructures = it->maxCutoffRadiusOverall;
1284 }
1285 else
1286 {
1287 it->calculateNeighborList(maxCutoffRadius);
1288 }
1289 }
1290 if (normalize) maxCutoffRadiusAllStructures /= convLength;
1292 log << strpr("Largest cutoff radius for neighbor lists: %f\n",
1293 maxCutoffRadiusAllStructures);
1294#ifdef _OPENMP
1295 omp_set_num_threads(num_threads);
1296 log << strpr("Restoring OpenMP parallelization: max. %d threads.\n",
1297 omp_get_max_threads());
1298#endif
1299
1300 log << "-----------------------------------------"
1301 "--------------------------------------\n";
1302 log << strpr("TIMING Finished neighbor lists: %.2f seconds.\n",
1303 sw["nl"].stop());
1304 log << "*****************************************"
1305 "**************************************\n";
1306
1307 return;
1308}
1309
1311 map<string, pair<string, string>> const fileNames)
1312{
1313#ifdef _OPENMP
1314 int num_threads = omp_get_max_threads();
1315 omp_set_num_threads(1);
1316#endif
1317 vector<string> write;
1318 for (auto i : fileNames)
1319 {
1320 if (i.second.first.size() == 0 || i.second.second.size() == 0)
1321 {
1322 throw runtime_error("ERROR: No filename provided for comparison "
1323 "files.\n");
1324 }
1325 write.push_back(i.first);
1326 }
1327 auto doWrite = [&write](string key){
1328 return find(write.begin(),
1329 write.end(),
1330 key) != write.end();
1331 };
1332
1333
1334 map<string, size_t> countTrain;
1335 map<string, size_t> countTest;
1336 for (auto k : pk) countTrain[k] = 0;
1337 for (auto k : pk) countTest[k] = 0;
1338
1339 map<string, ofstream> filesTrain;
1340 map<string, ofstream> filesTest;
1341
1342 // Reset current error metrics.
1343 for (auto k : pk)
1344 {
1345 for (auto& m : p[k].errorTrain) m.second = 0.0;
1346 for (auto& m : p[k].errorTest) m.second = 0.0;
1347 }
1348
1349 for (auto k : write)
1350 {
1351 filesTrain[k].open(strpr("%s.%04d",
1352 fileNames.at(k).first.c_str(),
1353 myRank).c_str());
1354 filesTest[k].open(strpr("%s.%04d",
1355 fileNames.at(k).second.c_str(),
1356 myRank).c_str());
1357 // File header.
1358 vector<string> header;
1359 if (myRank == 0)
1360 {
1361 vector<string> title;
1362 vector<string> colName;
1363 vector<string> colInfo;
1364 vector<size_t> colSize;
1365 if (k == "energy")
1366 {
1367 title.push_back("Energy comparison.");
1368 colSize.push_back(10);
1369 colName.push_back("index");
1370 colInfo.push_back("Structure index.");
1371 colSize.push_back(16);
1372 colName.push_back("Eref");
1373 colInfo.push_back("Reference potential energy per atom "
1374 "(training units).");
1375 colSize.push_back(16);
1376 colName.push_back("Ennp");
1377 colInfo.push_back("NNP potential energy per atom "
1378 "(training units).");
1379 }
1380 else if (k == "force")
1381 {
1382 title.push_back("Force comparison.");
1383 colSize.push_back(10);
1384 colName.push_back("index_s");
1385 colInfo.push_back("Structure index.");
1386 colSize.push_back(10);
1387 colName.push_back("index_a");
1388 colInfo.push_back("Atom index (x, y, z components in "
1389 "consecutive lines).");
1390 colSize.push_back(16);
1391 colName.push_back("Fref");
1392 colInfo.push_back("Reference force (training units).");
1393 colSize.push_back(16);
1394 colName.push_back("Fnnp");
1395 colInfo.push_back("NNP force (training units).");
1396 }
1397 else if (k == "charge")
1398 {
1399 title.push_back("Charge comparison.");
1400 colSize.push_back(10);
1401 colName.push_back("index_s");
1402 colInfo.push_back("Structure index.");
1403 colSize.push_back(10);
1404 colName.push_back("index_a");
1405 colInfo.push_back("Atom index.");
1406 colSize.push_back(16);
1407 colName.push_back("Qref");
1408 colInfo.push_back("Reference charge.");
1409 colSize.push_back(16);
1410 colName.push_back("Qnnp");
1411 colInfo.push_back("NNP charge.");
1412 }
1413 header = createFileHeader(title, colSize, colName, colInfo);
1414 appendLinesToFile(filesTrain.at(k), header);
1415 appendLinesToFile(filesTest.at(k), header);
1416 }
1417 }
1418
1419 for (vector<Structure>::iterator it = structures.begin();
1420 it != structures.end(); ++it)
1421 {
1422#ifdef N2P2_NO_SF_GROUPS
1424#else
1426#endif
1427 // TODO: Can we use evaluateNNP?
1429 {
1430 if ( stage == 1 )
1431 {
1433 chargeEquilibration((*it), false);
1434 }
1435 else
1436 {
1437 if ( !it->hasCharges || (!it->hasAMatrix && useForces) )
1438 {
1440 "elec");
1442 }
1444 calculateEnergy((*it));
1445 if (useForces) calculateForces((*it));
1446 }
1447 }
1448 else
1449 {
1451 calculateEnergy((*it));
1452 if (useForces) calculateForces((*it));
1453 }
1454
1455 for (auto k : pk)
1456 {
1457 map<string, double>* error = nullptr;
1458 size_t* count = nullptr;
1459 ofstream* file = nullptr;
1460 if (it->sampleType == Structure::ST_TRAINING)
1461 {
1462 error = &(p[k].errorTrain);
1463 count = &(countTrain.at(k));
1464 if (doWrite(k)) file = &(filesTrain.at(k));
1465 }
1466 else if (it->sampleType == Structure::ST_TEST)
1467 {
1468 error = &(p[k].errorTest);
1469 count = &(countTest.at(k));
1470 if (doWrite(k)) file = &(filesTest.at(k));
1471 }
1472
1473 it->updateError(k, *error, *count);
1474 if (doWrite(k))
1475 {
1476 if (k == "energy") (*file) << it->getEnergyLine();
1477 else if (k == "force")
1478 {
1479 for (auto l : it->getForcesLines()) (*file) << l;
1480 }
1481 else if (k == "charge")
1482 {
1483 for (auto l : it->getChargesLines()) (*file) << l;
1484 }
1485 }
1486 }
1487 if (freeMemory) it->freeAtoms(true, maxCutoffRadius);
1488 it->clearElectrostatics();
1489 }
1490
1491 for (auto k : pk)
1492 {
1493 collectError(k, p[k].errorTrain, countTrain.at(k));
1494 collectError(k, p[k].errorTest, countTest.at(k));
1495 if (doWrite(k))
1496 {
1497 filesTrain.at(k).close();
1498 filesTest.at(k).close();
1499 MPI_Barrier(comm);
1500 if (myRank == 0)
1501 {
1502 combineFiles(fileNames.at(k).first);
1503 combineFiles(fileNames.at(k).second);
1504 }
1505 }
1506 }
1507
1508#ifdef _OPENMP
1509 omp_set_num_threads(num_threads);
1510#endif
1511
1512 return;
1513}
1514
1516{
1517 // Check whether property comparison files should be written for
1518 // this epoch.
1519 map<string, pair<string, string>> fileNames;
1520
1521 for (auto const& ip : p)
1522 {
1523 string const& k = ip.first; // key
1524 Property const& d = ip.second; // data
1525 if (d.writeCompEvery > 0 &&
1526 (epoch % d.writeCompEvery == 0 || epoch <= d.writeCompAlways))
1527 {
1528 string middle;
1529 if (k == "energy") middle = "points";
1530 else if (k == "force" ) middle = "forces";
1531 else if (k == "charge") middle = "charges";
1532 fileNames[k] = make_pair(strpr("train%s.%06zu.out",
1533 middle.c_str(), epoch),
1534 strpr("test%s.%06zu.out",
1535 middle.c_str(), epoch));
1536 }
1537 }
1538
1539 // Calculate errors and write comparison files.
1540 calculateError(fileNames);
1541
1542 return;
1543}
1544
1546 Eigen::VectorXd &cVec,
1547 double &cNorm)
1548{
1549 cVec.resize(s.numAtoms);
1550 for (size_t i = 0; i < s.numAtoms; ++i)
1551 {
1552 cVec(i) = s.atoms.at(i).charge - s.atoms.at(i).chargeRef;
1553 }
1554 cNorm = cVec.norm();
1555 return;
1556}
1557
1559{
1560 string metric = "?";
1561 string peratom = "";
1562
1563 log << "The training loop output covers different errors, update and\n";
1564 log << "timing information. The following quantities are organized\n";
1565 log << "according to the matrix scheme below:\n";
1566 log << "-------------------------------------------------------------------\n";
1567 log << "ep ........ Epoch.\n";
1568 for (auto k : pk)
1569 {
1570 string const& pmetric = p[k].displayMetric;
1571 if (pmetric.find("RMSE") != pmetric.npos) metric = "RMSE";
1572 else if (pmetric.find("MAE") != pmetric.npos) metric = "MAE";
1573 if (pmetric.find("pa") != pmetric.npos) peratom = " per atom";
1574 else peratom = "";
1575 log << p[k].tiny << "_count ... Number of " << k << " updates.\n";
1576 log << p[k].tiny << "_train ... " << metric << " of training "
1577 << p[k].plural << peratom << ".\n";
1578 log << p[k].tiny << "_test .... " << metric << " of test "
1579 << p[k].plural << peratom << ".\n";
1580 //log << p[k].tiny << "_time ........ Time for " << k << " updates "
1581 // << "(seconds).\n";
1582 log << p[k].tiny << "_pt ...... Percentage of time for " << k <<
1583 " updates w.r.t. to t_train.\n";
1584 }
1585 log << "count ..... Total number of updates.\n";
1586 log << "train ..... Percentage of time for training.\n";
1587 log << "error ..... Percentage of time for error calculation.\n";
1588 log << "other ..... Percentage of time for other purposes.\n";
1589 log << "epoch ..... Total time for this epoch (seconds).\n";
1590 log << "total ..... Total time for all epochs (seconds).\n";
1591 log << "-------------------------------------------------------------------\n";
1592 for (auto k : pk)
1593 {
1594 log << strpr("%-6s", k.c_str())
1595 << strpr(" %5s", "ep")
1596 << strpr(" %7s", (p[k].tiny + "_count").c_str())
1597 << strpr(" %11s", (p[k].tiny + "_train").c_str())
1598 << strpr(" %11s", (p[k].tiny + "_test").c_str())
1599 << strpr(" %5s", (p[k].tiny + "_pt").c_str())
1600 << "\n";
1601 }
1602 log << strpr("%-6s", "timing")
1603 << strpr(" %5s", "ep")
1604 << strpr(" %7s", "count")
1605 << strpr(" %5s", "train")
1606 << strpr(" %5s", "error")
1607 << strpr(" %5s", "other")
1608 << strpr(" %9s", "epoch")
1609 << strpr(" %9s", "total")
1610 << "\n";
1611 log << "-------------------------------------------------------------------\n";
1612
1613 return;
1614}
1615
1617{
1618 double timeLoop = sw["loop"].getLoop();
1619 double timeTrain = sw["train"].getLoop();
1620 size_t totalUpdates = 0;
1621 for (auto k : pk)
1622 {
1623 totalUpdates += p[k].countUpdates;
1624 double timeProp = sw[k].getLoop();
1625 string caps = k;
1626 for (auto& c : caps) c = toupper(c);
1627 log << strpr("%-6s", caps.c_str());
1628 log << strpr(" %5zu", epoch);
1629 log << strpr(" %7zu", p[k].countUpdates);
1630 if (normalize)
1631 {
1632 log << strpr(" %11.5E %11.5E",
1633 physical(k, p[k].errorTrain.at(p[k].displayMetric)),
1634 physical(k, p[k].errorTest.at(p[k].displayMetric)));
1635 }
1636 else
1637 {
1638 log << strpr(" %11.5E %11.5E",
1639 p[k].errorTrain.at(p[k].displayMetric),
1640 p[k].errorTest.at(p[k].displayMetric));
1641 }
1642 if (epoch == 0) log << strpr(" %5.1f", 0.0);
1643 else log << strpr(" %5.1f", timeProp / timeTrain * 100.0);
1644 log << "\n";
1645 }
1646 double timeOther = timeLoop;
1647 timeOther -= sw["error"].getLoop();
1648 timeOther -= sw["train"].getLoop();
1649 log << strpr("%-6s", "TIMING");
1650 log << strpr(" %5zu", epoch);
1651 log << strpr(" %7zu", totalUpdates);
1652 log << strpr(" %5.1f", sw["train"].getLoop() / timeLoop * 100.0);
1653 log << strpr(" %5.1f", sw["error"].getLoop() / timeLoop * 100.0);
1654 log << strpr(" %5.1f", timeOther / timeLoop * 100.0);
1655 log << strpr(" %9.2f", sw["loop"].getLoop());
1656 log << strpr(" %9.2f", sw["loop"].getTotal());
1657 log << "\n";
1658
1659 return;
1660}
1661
1662void Training::writeWeights(string const& nnName,
1663 string const& fileNameFormat) const
1664{
1665 ofstream file;
1666
1667 for (size_t i = 0; i < numElements; ++i)
1668 {
1669 string fileName = strpr(fileNameFormat.c_str(),
1670 elements.at(i).getAtomicNumber());
1671 file.open(fileName.c_str());
1672 elements.at(i).neuralNetworks.at(nnName).writeConnections(file);
1673 file.close();
1674 }
1675
1676 return;
1677}
1678
1680{
1681 if (writeWeightsEvery > 0 &&
1683 {
1684 string weightFileFormat = strpr(".%%03zu.%06d.out", epoch);
1685 if ( nnpType == NNPType::HDNNP_2G ||
1686 (nnpType == NNPType::HDNNP_4G && stage == 2) ||
1687 (nnpType == NNPType::HDNNP_Q && stage == 2))
1688 {
1689 weightFileFormat = "weights" + weightFileFormat;
1690 }
1691 else if ((nnpType == NNPType::HDNNP_4G && stage == 1) ||
1692 (nnpType == NNPType::HDNNP_Q && stage == 1))
1693 {
1694 weightFileFormat = "weightse" + weightFileFormat;
1695 }
1696 writeWeights(nnId, weightFileFormat);
1697 }
1698
1699 return;
1700}
1701
1702void Training::writeHardness(string const& fileNameFormat) const
1703{
1704 ofstream file;
1705
1706 for (size_t i = 0; i < numElements; ++i)
1707 {
1708 string fileName = strpr(fileNameFormat.c_str(),
1709 elements.at(i).getAtomicNumber());
1710 file.open(fileName.c_str());
1711 double hardness = elements.at(i).getHardness();
1712 if (normalize) hardness = physical("hardness", hardness);
1713 file << hardness << endl;
1714 file.close();
1715 }
1716
1717 return;
1718}
1719
1721{
1722 if (writeWeightsEvery > 0 &&
1724 nnpType == NNPType::HDNNP_4G && stage == 1)
1725 {
1726 string hardnessFileFormat = strpr(".%%03zu.%06d.out", epoch);
1727 hardnessFileFormat = "hardness" + hardnessFileFormat;
1728 writeHardness(hardnessFileFormat);
1729 }
1730
1731 return;
1732}
1733
1734void Training::writeLearningCurve(bool append, string const fileName) const
1735{
1736 ofstream file;
1737 string fileNameActual = fileName;
1738 if (nnpType == NNPType::HDNNP_4G ||
1740 {
1741 fileNameActual += strpr(".stage-%zu", stage);
1742 }
1743
1744 if (append) file.open(fileNameActual.c_str(), ofstream::app);
1745 else
1746 {
1747 file.open(fileNameActual.c_str());
1748
1749 // File header.
1750 vector<string> title;
1751 vector<string> colName;
1752 vector<string> colInfo;
1753 vector<size_t> colSize;
1754 title.push_back("Learning curves for training properties:");
1755 for (auto k : pk)
1756 {
1757 title.push_back(" * " + p[k].plural);
1758 }
1759 colSize.push_back(10);
1760 colName.push_back("epoch");
1761 colInfo.push_back("Current epoch.");
1762
1763 map<string, string> text;
1764 text["RMSEpa"] = "RMSE of %s %s per atom";
1765 text["RMSE"] = "RMSE of %s %s";
1766 text["MAEpa"] = "MAE of %s %s per atom";
1767 text["MAE"] = "MAE of %s %s";
1768
1769 for (auto k : pk)
1770 {
1771 for (auto m : p[k].errorMetrics)
1772 {
1773 colSize.push_back(16);
1774 colName.push_back(m + "_" + p[k].tiny + "train_pu");
1775 colInfo.push_back(strpr(
1776 (text[m] + " (physical units)").c_str(),
1777 "training",
1778 p[k].plural.c_str()));
1779 colSize.push_back(16);
1780 colName.push_back(m + "_" + p[k].tiny + "test_pu");
1781 colInfo.push_back(strpr(
1782 (text[m] + " (physical units)").c_str(),
1783 "test",
1784 p[k].plural.c_str()));
1785 }
1786 }
1787 if (normalize)
1788 {
1789 for (auto k : pk)
1790 {
1791 // Internal units only for energies, forces and charges.
1792 if (!(k == "energy" || k == "force" || k == "charge")) continue;
1793 for (auto m : p[k].errorMetrics)
1794 {
1795 colSize.push_back(16);
1796 colName.push_back(m + "_" + p[k].tiny + "train_iu");
1797 colInfo.push_back(strpr(
1798 (text[m] + " (training units)").c_str(),
1799 "training",
1800 p[k].plural.c_str()));
1801 colSize.push_back(16);
1802 colName.push_back(m + "_" + p[k].tiny + "test_iu");
1803 colInfo.push_back(strpr(
1804 (text[m] + " (training units)").c_str(),
1805 "test",
1806 p[k].plural.c_str()));
1807 }
1808 }
1809 }
1810 appendLinesToFile(file,
1811 createFileHeader(title, colSize, colName, colInfo));
1812 }
1813
1814 file << strpr("%10zu", epoch);
1815 if (normalize)
1816 {
1817 for (auto k : pk)
1818 {
1819 if (!(k == "energy" || k == "force" || k == "charge")) continue;
1820 for (auto m : p[k].errorMetrics)
1821 {
1822 file << strpr(" %16.8E %16.8E",
1823 physical(k, p[k].errorTrain.at(m)),
1824 physical(k, p[k].errorTest.at(m)));
1825 }
1826 }
1827 }
1828 for (auto k : pk)
1829 {
1830 for (auto m : p[k].errorMetrics)
1831 {
1832 file << strpr(" %16.8E %16.8E",
1833 p[k].errorTrain.at(m),
1834 p[k].errorTest.at(m));
1835 }
1836 }
1837 file << "\n";
1838 file.flush();
1839 file.close();
1840
1841 return;
1842}
1843
1845 string const& fileName) const
1846{
1847 ofstream file;
1848 if (myRank == 0)
1849 {
1850 file.open(fileName.c_str());
1851
1852 // File header.
1853 vector<string> title;
1854 vector<string> colName;
1855 vector<string> colInfo;
1856 vector<size_t> colSize;
1857 title.push_back("Statistics for individual neurons of network \""
1858 + id + "\" gathered during RMSE calculation.");
1859 colSize.push_back(10);
1860 colName.push_back("element");
1861 colInfo.push_back("Element index.");
1862 colSize.push_back(10);
1863 colName.push_back("neuron");
1864 colInfo.push_back("Neuron number.");
1865 colSize.push_back(10);
1866 colName.push_back("count");
1867 colInfo.push_back("Number of neuron value computations.");
1868 colSize.push_back(16);
1869 colName.push_back("min");
1870 colInfo.push_back("Minimum neuron value encounterd.");
1871 colSize.push_back(16);
1872 colName.push_back("max");
1873 colInfo.push_back("Maximum neuron value encounterd.");
1874 colSize.push_back(16);
1875 colName.push_back("mean");
1876 colInfo.push_back("Mean neuron value.");
1877 colSize.push_back(16);
1878 colName.push_back("sigma");
1879 colInfo.push_back("Standard deviation of neuron value.");
1880 appendLinesToFile(file,
1881 createFileHeader(title, colSize, colName, colInfo));
1882 }
1883
1884 for (size_t i = 0; i < numElements; ++i)
1885 {
1886 size_t n = elements.at(i).neuralNetworks.at(id).getNumNeurons();
1887 vector<long> count(n, 0);
1888 vector<double> min(n, 0.0);
1889 vector<double> max(n, 0.0);
1890 vector<double> mean(n, 0.0);
1891 vector<double> sigma(n, 0.0);
1892 elements.at(i).neuralNetworks.at(id).
1893 getNeuronStatistics(&(count.front()),
1894 &(min.front()),
1895 &(max.front()),
1896 &(mean.front()),
1897 &(sigma.front()));
1898 // Collect statistics from all processors on proc 0.
1899 if (myRank == 0)
1900 {
1901 MPI_Reduce(MPI_IN_PLACE, &(count.front()), n, MPI_LONG , MPI_SUM, 0, comm);
1902 MPI_Reduce(MPI_IN_PLACE, &(min.front()) , n, MPI_DOUBLE, MPI_MIN, 0, comm);
1903 MPI_Reduce(MPI_IN_PLACE, &(max.front()) , n, MPI_DOUBLE, MPI_MAX, 0, comm);
1904 MPI_Reduce(MPI_IN_PLACE, &(mean.front()) , n, MPI_DOUBLE, MPI_SUM, 0, comm);
1905 MPI_Reduce(MPI_IN_PLACE, &(sigma.front()), n, MPI_DOUBLE, MPI_SUM, 0, comm);
1906 }
1907 else
1908 {
1909 MPI_Reduce(&(count.front()), &(count.front()), n, MPI_LONG , MPI_SUM, 0, comm);
1910 MPI_Reduce(&(min.front()) , &(min.front()) , n, MPI_DOUBLE, MPI_MIN, 0, comm);
1911 MPI_Reduce(&(max.front()) , &(max.front()) , n, MPI_DOUBLE, MPI_MAX, 0, comm);
1912 MPI_Reduce(&(mean.front()) , &(mean.front()) , n, MPI_DOUBLE, MPI_SUM, 0, comm);
1913 MPI_Reduce(&(sigma.front()), &(sigma.front()), n, MPI_DOUBLE, MPI_SUM, 0, comm);
1914 }
1915 if (myRank == 0)
1916 {
1917 for (size_t j = 0; j < n; ++j)
1918 {
1919 size_t m = count.at(j);
1920 sigma.at(j) = sqrt((m * sigma.at(j) - mean.at(j) * mean.at(j))
1921 / (m * (m - 1)));
1922 mean.at(j) /= m;
1923 file << strpr("%10d %10d %10d %16.8E %16.8E %16.8E %16.8E\n",
1924 i + 1,
1925 j + 1,
1926 count[j],
1927 min[j],
1928 max[j],
1929 mean[j],
1930 sigma[j]);
1931 }
1932 }
1933 }
1934
1935 if (myRank == 0)
1936 {
1937 file.close();
1938 }
1939
1940 return;
1941}
1942
1944{
1948 {
1949 string fileName = strpr("neuron-stats.%s.%06zu.out",
1950 nnId.c_str(), epoch);
1951 if (nnpType == NNPType::HDNNP_4G ||
1953 {
1954 fileName += strpr(".stage-%zu", stage);
1955 }
1956 writeNeuronStatistics(nnId, fileName);
1957 }
1958
1959 return;
1960}
1961
1963{
1964 for (vector<Element>::iterator it = elements.begin();
1965 it != elements.end(); ++it)
1966 {
1967 for (auto& nn : it->neuralNetworks) nn.second.resetNeuronStatistics();
1968 }
1969 return;
1970}
1971
1973 string const fileNameFormat) const
1974{
1975 ofstream file;
1976 string fileNameFormatActual = fileNameFormat;
1977 if (nnpType == NNPType::HDNNP_4G ||
1979 {
1980 fileNameFormatActual += strpr(".stage-%zu", stage);
1981 }
1982
1983 for (size_t i = 0; i < numUpdaters; ++i)
1984 {
1985 string fileName;
1987 {
1988 fileName = strpr(fileNameFormatActual.c_str(), 0);
1989 }
1990 else if (updateStrategy == US_ELEMENT)
1991 {
1992 fileName = strpr(fileNameFormatActual.c_str(),
1994 }
1995 if (append) file.open(fileName.c_str(), ofstream::app);
1996 else
1997 {
1998 file.open(fileName.c_str());
1999 appendLinesToFile(file, updaters.at(i)->statusHeader());
2000 }
2001 file << updaters.at(i)->status(epoch);
2002 file.close();
2003 }
2004
2005 return;
2006}
2007
2008void Training::sortUpdateCandidates(string const& property)
2009{
2010 // Update error for all structures.
2011 for (auto& uc : p[property].updateCandidates)
2012 {
2013 if (property == "energy")
2014 {
2015 Structure const& s = structures.at(uc.s);
2016 uc.error = fabs((s.energyRef - s.energy) / s.numAtoms);
2017 }
2018 else if (property == "force")
2019 {
2020 Structure const& s = structures.at(uc.s);
2021 uc.error = 0.0;
2022 for (auto &sc : uc.subCandidates)
2023 {
2024 Atom const& ai = s.atoms.at(sc.a);
2025 sc.error = fabs(ai.fRef[sc.c] - ai.f[sc.c]);
2026 uc.error += sc.error;
2027 }
2028 uc.error /= uc.subCandidates.size();
2029 }
2030 else if (property == "charge")
2031 {
2032 Structure const& s = structures.at(uc.s);
2033 uc.error = 0.0;
2034 for (auto const& a : s.atoms)
2035 {
2036 uc.error = fabs(a.chargeRef - a.charge);
2037 }
2038 uc.error /= s.numAtoms;
2039 }
2040 }
2041 // Sort update candidates list.
2042 sort(p[property].updateCandidates.begin(),
2043 p[property].updateCandidates.end());
2044
2045 for (auto &uc : p[property].updateCandidates)
2046 {
2047 if (uc.subCandidates.size() > 0)
2048 sort(uc.subCandidates.begin(),
2049 uc.subCandidates.end());
2050 }
2051
2052 // Reset current position.
2053 p[property].posUpdateCandidates = 0;
2054 for (auto &uc : p[property].updateCandidates)
2055 {
2056 uc.posSubCandidates = 0;
2057 }
2058
2059 return;
2060}
2061
2062void Training::shuffleUpdateCandidates(string const& property)
2063{
2064 shuffle(p[property].updateCandidates.begin(),
2065 p[property].updateCandidates.end(),
2066 rngNew);
2067
2068 for (auto &uc : p[property].updateCandidates)
2069 {
2070 if (uc.subCandidates.size() > 0)
2071 shuffle(uc.subCandidates.begin(),
2072 uc.subCandidates.end(),
2073 rngNew);
2074 }
2075
2076 // Reset current position.
2077 p[property].posUpdateCandidates = 0;
2078 for (auto &uc : p[property].updateCandidates)
2079 {
2080 uc.posSubCandidates = 0;
2081 }
2082
2083 return;
2084}
2085
2087{
2088 // Clear epoch schedule.
2089 epochSchedule.clear();
2090 vector<int>(epochSchedule).swap(epochSchedule);
2091
2092 // Grow schedule vector by each property's number of desired updates.
2093 // Fill this array looping in reverse direction for backward compatibility.
2094 //for (size_t i = 0; i < pk.size(); ++i)
2095 for (int i = pk.size() - 1; i >= 0; --i)
2096 {
2097 epochSchedule.insert(epochSchedule.end(), p[pk.at(i)].numUpdates, i);
2098 }
2099
2100 // Return if there is only a single training property.
2101 if (pk.size() == 1) return;
2102
2103 // Now shuffle the schedule to get a random sequence.
2104 shuffle(epochSchedule.begin(), epochSchedule.end(), rngGlobalNew);
2105
2106 //for (size_t i = 0; i < epochSchedule.size(); ++i)
2107 //{
2108 // log << strpr("%zu %zu\n", i, epochSchedule.at(i));
2109 //}
2110
2111 return;
2112}
2113
2115{
2116 for (auto k : pk)
2117 {
2118 if (p[k].selectionModeSchedule.find(epoch)
2119 != p[k].selectionModeSchedule.end())
2120 {
2121 p[k].selectionMode = p[k].selectionModeSchedule[epoch];
2122 if (epoch != 0)
2123 {
2124 string message = "INFO Switching selection mode for "
2125 "property \"" + k + "\" to ";
2126 if (p[k].selectionMode == SM_RANDOM)
2127 {
2128 message += strpr("SM_RANDOM (%d).\n", p[k].selectionMode);
2129 }
2130 else if (p[k].selectionMode == SM_SORT)
2131 {
2132 message += strpr("SM_SORT (%d).\n", p[k].selectionMode);
2133 }
2134 else if (p[k].selectionMode == SM_THRESHOLD)
2135 {
2136 message += strpr("SM_THRESHOLD (%d).\n",
2137 p[k].selectionMode);
2138 }
2139 log << message;
2140 }
2141 }
2142 }
2143
2144 return;
2145}
2146
2148{
2149 sw["loop"].start();
2150 log << "\n";
2151 log << "*** TRAINING LOOP ***********************"
2152 "**************************************\n";
2153 log << "\n";
2154 printHeader();
2155
2156 // Calculate initial RMSE and write comparison files.
2157 sw["error"].start();
2159 sw["error"].stop();
2160
2161 // Write initial weights to files.
2162 if (myRank == 0) writeWeightsEpoch();
2163
2164 // Write initial hardness to files (checks for corresponding type and
2165 // stage)
2166 if (myRank == 0) writeHardnessEpoch();
2167
2168 // Write learning curve.
2169 if (myRank == 0) writeLearningCurve(false);
2170
2171 // Write updater status to file.
2172 if (myRank == 0) writeUpdaterStatus(false);
2173
2174 // Write neuron statistics.
2176
2177 // Print timing information.
2178 sw["loop"].stop();
2179 printEpoch();
2180
2181 // Check if training should be continued.
2182 while (advance())
2183 {
2184 sw["loop"].start();
2185
2186 // Increment epoch counter.
2187 epoch++;
2188 log << "------\n";
2189
2190 // Reset update counters.
2191 for (auto k : pk) p[k].countUpdates = 0;
2192
2193 // Check if selection mode should be changed in this epoch.
2195
2196 // Sort or shuffle update candidates.
2197 for (auto k : pk)
2198 {
2199 if (p[k].selectionMode == SM_SORT) sortUpdateCandidates(k);
2201 }
2202
2203 // Determine epoch update schedule.
2205
2206 // Perform property updates according to schedule.
2207 sw["train"].start();
2208 for (auto i : epochSchedule)
2209 {
2210 string property = pk.at(i);
2211 update(property);
2212 p[property].countUpdates++;
2213 }
2214 sw["train"].stop();
2215
2216 // Reset neuron statistics.
2218
2219 // Calculate errors and write comparison files.
2220 sw["error"].start();
2222 sw["error"].stop();
2223
2224 // Write weights to files.
2225 if (myRank == 0) writeWeightsEpoch();
2226
2227 // Write hardness to files (checks for corresponding type and stage).
2228 if (myRank == 0) writeHardnessEpoch();
2229
2230 // Append to learning curve.
2231 if (myRank == 0) writeLearningCurve(true);
2232
2233 // Write updater status to file.
2234 if (myRank == 0) writeUpdaterStatus(true);
2235
2236 // Write neuron statistics.
2238
2239 // Print error overview and timing information.
2240 sw["loop"].stop();
2241 printEpoch();
2242
2243 if (myRank == 0) writeTimingData(epoch != 1);
2244 }
2245
2246 log << "-----------------------------------------"
2247 "--------------------------------------\n";
2248 log << strpr("TIMING Training loop finished: %.2f seconds.\n",
2249 sw["loop"].getTotal());
2250 log << "*****************************************"
2251 "**************************************\n";
2252
2253 return;
2254}
2255
2256void Training::update(string const& property)
2257{
2258 // Shortcuts.
2259 string const& k = property; // Property key.
2260 Property& pu = p[k]; // Update property.
2261 // Start watch for error and jacobian computation, reset loop timer if
2262 // first update in this epoch.
2263 bool newLoop = pu.countUpdates == 0;
2264 sw[k].start(newLoop);
2265 sw[k + "_err"].start(newLoop);
2266
2267#ifdef _OPENMP
2268 int num_threads = omp_get_max_threads();
2269 omp_set_num_threads(1);
2270#endif
2271
2273 // PART 1: Find update candidate, compute error fractions and derivatives
2275
2276 size_t batchSize = pu.taskBatchSize;
2277 if (batchSize == 0) batchSize = pu.patternsPerUpdate;
2278 bool derivatives = false;
2279 if (k == "force") derivatives = true;
2280
2281 vector<size_t> thresholdLoopCount(batchSize, 0);
2282 vector<double> currentRmseFraction(batchSize, 0.0);
2283
2284 bool useSubCandidates = (k == "force" && nnpType == NNPType::HDNNP_4G);
2285
2286 // Either consider only UpdateCandidates or SubCandidates
2287 vector<size_t> currentUpdateCandidates(batchSize, 0);
2288
2289 for (size_t i = 0; i < numUpdaters; ++i)
2290 {
2291 fill(pu.error.at(i).begin(), pu.error.at(i).end(), 0.0);
2292 fill(pu.jacobian.at(i).begin(), pu.jacobian.at(i).end(), 0.0);
2293 }
2294
2295 // Loop over (mini-)batch size.
2296 for (size_t b = 0; b < batchSize; ++b)
2297 {
2298 UpdateCandidate* c = NULL; // Actual current update candidate.
2299 SubCandidate* sC = NULL; // Actual current sub candidate.
2300 size_t* posCandidates = NULL; // position of sub or update candidate.
2301 size_t indexBest = 0; // Index of best update candidate so far.
2302 double rmseFractionBest = 0.0; // RMSE of best update candidate so far.
2303
2304 // For SM_THRESHOLD need to loop until candidate's RMSE is above
2305 // threshold. Other modes don't loop here.
2306 size_t trials = 1;
2307 if (pu.selectionMode == SM_THRESHOLD) trials = pu.rmseThresholdTrials;
2308 size_t il = 0;
2309 for (il = 0; il < trials; ++il)
2310 {
2311 // Restart position index if necessary.
2312 if (pu.posUpdateCandidates >= pu.updateCandidates.size())
2313 {
2314 pu.posUpdateCandidates = 0;
2315 }
2316 //log << strpr("pos %zu b %zu size %zu\n", pu.posUpdateCandidates, b, currentUpdateCandidates.size());
2317 // Set current update candidate.
2318 c = &(pu.updateCandidates.at(pu.posUpdateCandidates));
2319
2320 if (c->posSubCandidates >= c->subCandidates.size())
2321 c->posSubCandidates = 0;
2322 // Shortcut for position counter of interest.
2323 if (useSubCandidates)
2324 {
2325 posCandidates = &(c->posSubCandidates);
2326 sC = &(c->subCandidates.at(c->posSubCandidates));
2327 }
2328 else
2329 {
2330 posCandidates = &(pu.posUpdateCandidates);
2331 if (c->subCandidates.size() > 0)
2332 sC = &(c->subCandidates.front());
2333 }
2334
2335 // Keep update candidates (for logging later).
2336 currentUpdateCandidates.at(b) = *posCandidates;
2337
2338 // Shortcut for current structure.
2339 Structure& s = structures.at(c->s);
2340 // Calculate symmetry functions (if results are already stored
2341 // these functions will return immediately).
2342#ifdef NOSFGROUPS
2343 calculateSymmetryFunctions(s, derivatives);
2344#else
2345 calculateSymmetryFunctionGroups(s, derivatives);
2346#endif
2347 // For SM_THRESHOLD calculate RMSE of update candidate.
2348 if (pu.selectionMode == SM_THRESHOLD)
2349 {
2350 if (k == "energy")
2351 {
2353 {
2354 calculateAtomicNeuralNetworks(s, derivatives);
2355 calculateEnergy(s);
2356 currentRmseFraction.at(b) =
2357 fabs(s.energyRef - s.energy)
2358 / (s.numAtoms * pu.errorTrain.at("RMSEpa"));
2359 }
2360 // Assume stage 2.
2361 else if (nnpType == NNPType::HDNNP_4G)
2362 {
2363 if (!s.hasCharges)
2364 {
2365 calculateAtomicNeuralNetworks(s, derivatives, "elec");
2366 chargeEquilibration(s, derivatives);
2367 }
2368 calculateAtomicNeuralNetworks(s, derivatives, "short");
2369 calculateEnergy(s);
2370 currentRmseFraction.at(b) = fabs(s.energyRef - s.energy)
2371 / (s.numAtoms
2372 * pu.errorTrain.at("RMSEpa"));
2373 }
2374 else if (nnpType == NNPType::HDNNP_Q)
2375 {
2376 // TODO: Reuse already present charge-NN data and
2377 // compute only short-NN energy contributions.
2378 throw runtime_error("ERROR: Not implemented.\n");
2379 }
2380 }
2381 else if (k == "force")
2382 {
2384 {
2385 calculateAtomicNeuralNetworks(s, derivatives);
2386 calculateForces(s);
2387 Atom const& a = s.atoms.at(sC->a);
2388 currentRmseFraction.at(b) =
2389 fabs(a.fRef[sC->c]
2390 - a.f[sC->c])
2391 / pu.errorTrain.at("RMSE");
2392 }
2393 // Assume stage 2.
2394 else if (nnpType == NNPType::HDNNP_4G)
2395 {
2396 if (!s.hasAMatrix)
2397 {
2398 calculateAtomicNeuralNetworks(s, derivatives, "elec");
2399 chargeEquilibration(s, derivatives);
2400 }
2401 calculateAtomicNeuralNetworks(s, derivatives, "short");
2402 calculateForces(s);
2403 Atom const& a = s.atoms.at(sC->a);
2404 currentRmseFraction.at(b) =
2405 fabs(a.fRef[sC->c]
2406 - a.f[sC->c])
2407 / pu.errorTrain.at("RMSE");
2408 }
2409 else if (nnpType == NNPType::HDNNP_Q)
2410 {
2411 // TODO: Reuse already present charge-NN data and
2412 // compute only short-NN force contributions.
2413 throw runtime_error("ERROR: Not implemented.\n");
2414 }
2415 }
2416 else if (k == "charge")
2417 {
2418 // Assume stage 1.
2420 {
2421 calculateAtomicNeuralNetworks(s, derivatives, "");
2422 chargeEquilibration(s, false);
2423 Eigen::VectorXd QError;
2424 double QErrorNorm;
2425 calculateChargeErrorVec(s, QError, QErrorNorm);
2426 currentRmseFraction.at(b) =
2427 QErrorNorm / sqrt(s.numAtoms)
2428 / pu.errorTrain.at("RMSE");
2429 }
2430 else if (nnpType == NNPType::HDNNP_Q)
2431 {
2432 // Compute only charge-NN
2433 Atom& a = s.atoms.at(sC->a);
2434 NeuralNetwork& nn =
2435 elements.at(a.element).neuralNetworks.at(nnId);
2436 nn.setInput(&(a.G.front()));
2437 nn.propagate();
2438 nn.getOutput(&(a.charge));
2439 currentRmseFraction.at(b) =
2440 fabs(a.chargeRef - a.charge)
2441 / pu.errorTrain.at("RMSE");
2442 }
2443 }
2444 // If force RMSE is above threshold stop loop immediately.
2445 if (currentRmseFraction.at(b) > pu.rmseThreshold)
2446 {
2447 // Increment position in update candidate list.
2448 (*posCandidates)++;
2449 break;
2450 }
2451 // If loop continues, free memory and remember best candidate
2452 // so far.
2453 if (freeMemory)
2454 {
2455 s.freeAtoms(true, maxCutoffRadius);
2456 }
2457 if (!useSubCandidates) s.clearElectrostatics();
2458
2459 if (currentRmseFraction.at(b) > rmseFractionBest)
2460 {
2461 rmseFractionBest = currentRmseFraction.at(b);
2462 indexBest = *posCandidates;
2463 }
2464 // Increment position in update candidate list.
2465 (*posCandidates)++;
2466 }
2467 // Break loop for all selection modes but SM_THRESHOLD.
2468 else if (pu.selectionMode == SM_RANDOM ||
2469 pu.selectionMode == SM_SORT)
2470 {
2471 // Increment position in update candidate list.
2472 (*posCandidates)++;
2473 break;
2474 }
2475 }
2476 thresholdLoopCount.at(b) = il;
2477
2478 // If loop was not stopped because of a proper update candidate found
2479 // (RMSE above threshold) use best candidate during iteration.
2480 if (pu.selectionMode == SM_THRESHOLD && il == trials)
2481 {
2482 // Set best candidate.
2483 currentUpdateCandidates.at(b) = indexBest;
2484 currentRmseFraction.at(b) = rmseFractionBest;
2485 // Need to calculate the symmetry functions again, maybe results
2486 // were not stored.
2487 Structure& s = structures.at(c->s);
2488#ifdef N2P2_NO_SF_GROUPS
2489 calculateSymmetryFunctions(s, derivatives);
2490#else
2491 calculateSymmetryFunctionGroups(s, derivatives);
2492#endif
2493 }
2494
2495 // If new update candidate, determine number of subcandidates needed
2496 // before going to next candidate.
2497 if (useSubCandidates)
2498 {
2499 if (pu.countGroupedSubCand == 0)
2500 {
2501 pu.numGroupedSubCand = static_cast<size_t>(
2502 c->subCandidates.size() * pu.epochFraction);
2503 MPI_Allreduce( MPI_IN_PLACE, &(pu.numGroupedSubCand), 1,
2504 MPI_INT, MPI_MIN, comm);
2505 //if (myRank == 0)
2506 // cout << "group size: " << pu.numGroupedSubCand << endl;
2507 }
2509 }
2510
2512 // PART 2: Compute error vector and Jacobian
2514
2515 Structure& s = structures.at(c->s);
2516 // Temporary storage for derivative contributions of atoms (dXdc stores
2517 // dEdc, dFdc or dQdc for energy, force or charge update, respectively.
2518 vector<vector<double>> dXdc;
2519 dXdc.resize(numElements);
2520 // Temporary storage vector for charge errors in structure
2521 Eigen::VectorXd QError;
2522 QError.resize(s.numAtoms);
2523 double QErrorNorm = 0;
2524 for (size_t i = 0; i < numElements; ++i)
2525 {
2526 size_t n = elements.at(i).neuralNetworks.at(nnId)
2527 .getNumConnections();
2528 dXdc.at(i).resize(n, 0.0);
2529 }
2530 // Precalculate offset in Jacobian array.
2531 size_t iu = 0;
2532 vector<size_t> offset(numElements, 0);
2533 for (size_t i = 0; i < numElements; ++i)
2534 {
2535 if (updateStrategy == US_ELEMENT) iu = i;
2536 else iu = 0;
2538 {
2539 // Offset from multiple streams/tasks
2540 offset.at(i) += pu.offsetPerTask.at(myRank)
2541 * numWeightsPerUpdater.at(iu);
2542 //log << strpr("%zu os 1: %zu ", i, offset.at(i));
2543 }
2544 if (jacobianMode == JM_FULL)
2545 {
2546 // Offset from batch training (multiple contributions from
2547 // single stream/task
2548 offset.at(i) += b * numWeightsPerUpdater.at(iu);
2549 //log << strpr("%zu os 2: %zu ", i, offset.at(i));
2550 }
2552 {
2553 // Offset from multiple elements in contribution from single
2554 // stream/task
2555 offset.at(i) += weightsOffset.at(i);
2556 //log << strpr("%zu os 3: %zu", i, offset.at(i));
2557 }
2558 //log << strpr(" %zu final os: %zu\n", i, offset.at(i));
2559 }
2560 // Now compute Jacobian.
2561 if (k == "energy")
2562 {
2564 {
2566 {
2567 calculateAtomicNeuralNetworks(s, derivatives, "elec");
2568 chargeEquilibration(s, derivatives);
2569 }
2570 // Loop over atoms and calculate atomic energy contributions.
2571 for (vector<Atom>::iterator it = s.atoms.begin();
2572 it != s.atoms.end(); ++it)
2573 {
2574 size_t i = it->element;
2575 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(nnId);
2576
2577 // TODO: This part should simplify with improved NN class.
2578 for (size_t j = 0; j < it->G.size(); ++j)
2579 {
2580 nn.setInput(j, it->G.at(j));
2581 }
2583 nn.setInput(it->G.size(), it->charge);
2584 nn.propagate();
2585 nn.getOutput(&(it->energy));
2586 // Compute derivative of output node with respect to all
2587 // neural network connections (weights + biases).
2588 nn.calculateDEdc(&(dXdc.at(i).front()));
2589 // Finally sum up Jacobian.
2590 if (updateStrategy == US_ELEMENT) iu = i;
2591 else iu = 0;
2592 for (size_t j = 0; j < dXdc.at(i).size(); ++j)
2593 {
2594 pu.jacobian.at(iu).at(offset.at(i) + j) +=
2595 dXdc.at(i).at(j);
2596 }
2597 }
2598 }
2599 // Assume stage 2.
2600 else if (nnpType == NNPType::HDNNP_Q)
2601 {
2602 // TODO: Lots of stuff.
2603 throw runtime_error("ERROR: Not implemented.\n");
2604 }
2605 }
2606 else if (k == "force")
2607 {
2609 {
2611 {
2612 if (!s.hasAMatrix)
2613 {
2614 calculateAtomicNeuralNetworks(s, derivatives, "elec");
2615 chargeEquilibration(s, derivatives);
2616 }
2617 s.calculateDQdr(vector<size_t>{sC->a},
2618 vector<size_t>{sC->c},
2620 elements);
2621 }
2622
2623 // Loop over atoms and calculate atomic energy contributions.
2624 for (vector<Atom>::iterator it = s.atoms.begin();
2625 it != s.atoms.end(); ++it)
2626 {
2627 // For force update save derivative of symmetry function
2628 // with respect to coordinate.
2629#ifndef N2P2_FULL_SFD_MEMORY
2630 collectDGdxia((*it), sC->a, sC->c);
2631
2633 {
2634 double dQdxia = s.atoms.at(sC->a).dQdr.at(it->index)[sC->c];
2635 dGdxia.back() = dQdxia;
2636 }
2637#else
2638 it->collectDGdxia(sC->a, sC->c, maxCutoffRadius);
2640 {
2641 double dQdxia = s.atoms.at(sC->a).dQdr.at(it->index)[sC->c];
2642 it->dGdxia.resize(it->G.size() + 1);
2643 it->dGdxia.back() = dQdxia;
2644 }
2645#endif
2646 size_t i = it->element;
2647 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(nnId);
2648 // TODO: This part should simplify with improved NN class.
2649 for (size_t j = 0; j < it->G.size(); ++j)
2650 {
2651 nn.setInput(j, it->G.at(j));
2652 }
2654 nn.setInput(it->G.size(), it->charge);
2655 nn.propagate();
2656 if (derivatives) nn.calculateDEdG(&((it->dEdG).front()));
2657 nn.getOutput(&(it->energy));
2658
2659 // Compute derivative of output node with respect to all
2660 // neural network connections (weights + biases).
2661#ifndef N2P2_FULL_SFD_MEMORY
2662 nn.calculateDFdc(&(dXdc.at(i).front()),
2663 &(dGdxia.front()));
2664#else
2665 nn.calculateDFdc(&(dXdc.at(i).front()),
2666 &(it->dGdxia.front()));
2667#endif
2668 // Finally sum up Jacobian.
2669 if (updateStrategy == US_ELEMENT) iu = i;
2670 else iu = 0;
2671 for (size_t j = 0; j < dXdc.at(i).size(); ++j)
2672 {
2673 pu.jacobian.at(iu).at(offset.at(i) + j) +=
2674 dXdc.at(i).at(j);
2675 }
2676 }
2677
2678 }
2679 // Assume stage 2.
2680 else if (nnpType == NNPType::HDNNP_Q)
2681 {
2682 // TODO: Lots of stuff.
2683 throw runtime_error("ERROR: Not implemented.\n");
2684 }
2685 }
2686 else if (k == "charge")
2687 {
2688 // Assume stage 1.
2690 {
2691
2692 // Vector for storing all atoms dChi/dc
2693 vector<vector<double>> dChidc;
2694 dChidc.resize(s.numAtoms);
2695 for (size_t k = 0; k < s.numAtoms; ++k)
2696 {
2697 Atom& ak = s.atoms.at(k);
2698 size_t n = elements.at(ak.element).neuralNetworks.at(nnId)
2699 .getNumConnections();
2700 dChidc.at(k).resize(n, 0.0);
2701
2702 NeuralNetwork& nn =
2703 elements.at(ak.element).neuralNetworks.at(nnId);
2704 nn.setInput(&(ak.G.front()));
2705 nn.propagate();
2706 nn.getOutput(&(ak.chi));
2707 // Compute derivative of output node with respect to all
2708 // neural network connections (weights + biases).
2709 nn.calculateDEdc(&(dChidc.at(k).front()));
2710 if (normalize)
2711 {
2712 ak.chi = normalized("negativity", ak.chi);
2713 for (auto& dChidGi : ak.dChidG)
2714 dChidGi = normalized("negativity", dChidGi);
2715 for (auto& dChidci : dChidc.at(k))
2716 dChidci = normalized("negativity", dChidci);
2717 }
2718
2719 }
2720 chargeEquilibration(s, false);
2721
2722 vector<Eigen::VectorXd> dQdChi;
2723 s.calculateDQdChi(dQdChi);
2724 vector<Eigen::VectorXd> dQdJ;
2725 s.calculateDQdJ(dQdJ);
2726
2727 calculateChargeErrorVec(s, QError, QErrorNorm);
2728
2729 if (QErrorNorm != 0)
2730 {
2731 // Finally sum up Jacobian.
2732 for (size_t i = 0; i < s.numAtoms; ++i)
2733 {
2734 // weights
2735 for (size_t k = 0; k < s.numAtoms; ++k)
2736 {
2737 size_t l = s.atoms.at(k).element;
2738 for (size_t j = 0; j < dChidc.at(k).size(); ++j)
2739 {
2740 // 1 / QErrorNorm * (Q-Qref) * dQ/dChi * dChi/dc
2741 pu.jacobian.at(0).at(offset.at(l) + j) +=
2742 1.0 / QErrorNorm * QError(i)
2743 * dQdChi.at(k)(i) * dChidc.at(k).at(j);
2744 }
2745 }
2746 // hardness (actually h, where J=h^2)
2747 for (size_t k = 0; k < numElements; ++k)
2748 {
2749 size_t n = elements.at(k).neuralNetworks.at(nnId)
2750 .getNumConnections();
2751 pu.jacobian.at(0).at(offset.at(k) + n) +=
2752 1.0 / QErrorNorm
2753 * QError(i) * dQdJ.at(k)(i) * 2
2754 * sqrt(elements.at(k).getHardness());
2755 }
2756 }
2757 }
2758 }
2759
2760 else if (nnpType == NNPType::HDNNP_Q)
2761 {
2762 // Shortcut to selected atom.
2763 Atom& a = s.atoms.at(sC->a);
2764 size_t i = a.element;
2765 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(nnId);
2766 nn.setInput(&(a.G.front()));
2767 nn.propagate();
2768 nn.getOutput(&(a.charge));
2769 // Compute derivative of output node with respect to all
2770 // neural network connections (weights + biases).
2771 nn.calculateDEdc(&(dXdc.at(i).front()));
2772 // Finally sum up Jacobian.
2773 if (updateStrategy == US_ELEMENT) iu = i;
2774 else iu = 0;
2775 for (size_t j = 0; j < dXdc.at(i).size(); ++j)
2776 {
2777 pu.jacobian.at(iu).at(offset.at(i) + j) +=
2778 dXdc.at(i).at(j);
2779 }
2780 }
2781 }
2782
2783 // Sum up total potential energy or calculate force.
2784 if (k == "energy")
2785 {
2786 calculateEnergy(s);
2787 currentRmseFraction.at(b) = fabs(s.energyRef - s.energy)
2788 / (s.numAtoms
2789 * pu.errorTrain.at("RMSEpa"));
2790 }
2791 else if (k == "force")
2792 {
2793 calculateForces(s);
2794 Atom const& a = s.atoms.at(sC->a);
2795 currentRmseFraction.at(b) = fabs(a.fRef[sC->c] - a.f[sC->c])
2796 / pu.errorTrain.at("RMSE");
2797 }
2798 else if (k == "charge")
2799 {
2801 {
2802 currentRmseFraction.at(b) = QErrorNorm / sqrt(s.numAtoms)
2803 / pu.errorTrain.at("RMSE");
2804 }
2805 else
2806 {
2807 Atom const& a = s.atoms.at(sC->a);
2808 currentRmseFraction.at(b) = fabs(a.chargeRef - a.charge)
2809 / pu.errorTrain.at("RMSE");
2810 }
2811 }
2812
2813 // Now symmetry function memory is not required any more for this
2814 // update.
2815 if (freeMemory) s.freeAtoms(true, maxCutoffRadius);
2816 if (nnpType == NNPType::HDNNP_4G && !useSubCandidates)
2818
2819 // Precalculate offset in error array.
2820 size_t offset2 = 0;
2822 {
2823 offset2 += pu.offsetPerTask.at(myRank);
2824 //log << strpr("os 4: %zu ", offset2);
2825 }
2826 if (jacobianMode == JM_FULL)
2827 {
2828 offset2 += b;
2829 //log << strpr("os 5: %zu ", offset2);
2830 }
2831 //log << strpr(" final os: %zu\n", offset2);
2832
2833
2834 // Compute error vector (depends on update strategy).
2836 {
2837 if (k == "energy")
2838 {
2839 pu.error.at(0).at(offset2) += s.energyRef - s.energy;
2840 }
2841 else if (k == "force")
2842 {
2843 Atom const& a = s.atoms.at(sC->a);
2844 pu.error.at(0).at(offset2) += a.fRef[sC->c] - a.f[sC->c];
2845 }
2846 else if (k == "charge")
2847 {
2849 pu.error.at(0).at(offset2) = -QErrorNorm;
2850 else
2851 {
2852 Atom const& a = s.atoms.at(sC->a);
2853 pu.error.at(0).at(offset2) += a.chargeRef - a.charge;
2854 }
2855 }
2856 }
2857 else if (updateStrategy == US_ELEMENT)
2858 {
2859 for (size_t i = 0; i < numUpdaters; ++i)
2860 {
2861 if (k == "energy")
2862 {
2863 pu.error.at(i).at(offset2) += (s.energyRef - s.energy)
2864 * s.numAtomsPerElement.at(i)
2865 / s.numAtoms;
2866 }
2867 else if (k == "force")
2868 {
2869 Atom const& a = s.atoms.at(sC->a);
2870 pu.error.at(i).at(offset2) += (a.fRef[sC->c] - a.f[sC->c])
2871 * a.numNeighborsPerElement.at(i)
2872 / a.numNeighbors;
2873 }
2874 else if (k == "charge")
2875 {
2877 {
2878 throw runtime_error("ERROR: US_ELEMENT not implemented"
2879 " for HDNNP_4G.\n");
2880 }
2881 Atom const& a = s.atoms.at(sC->a);
2882 pu.error.at(i).at(offset2) += a.chargeRef - a.charge;
2883 }
2884 }
2885 }
2886 }
2887
2888 // Apply force update weight to error and Jacobian.
2889 if (k == "force")
2890 {
2891 for (size_t i = 0; i < numUpdaters; ++i)
2892 {
2893 for (size_t j = 0; j < pu.error.at(i).size(); ++j)
2894 {
2895 pu.error.at(i).at(j) *= forceWeight;
2896 }
2897 for (size_t j = 0; j < pu.jacobian.at(i).size(); ++j)
2898 {
2899 pu.jacobian.at(i).at(j) *= forceWeight;
2900 }
2901 }
2902 }
2903 sw[k + "_err"].stop();
2904
2906 // PART 3: Communicate error and Jacobian.
2908
2909 sw[k + "_com"].start(newLoop);
2910 if (jacobianMode == JM_SUM)
2911 {
2913 {
2914 for (size_t i = 0; i < numUpdaters; ++i)
2915 {
2916 if (myRank == 0) MPI_Reduce(MPI_IN_PLACE , &(pu.error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM, 0, comm);
2917 else MPI_Reduce(&(pu.error.at(i).front()), &(pu.error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM, 0, comm);
2918 if (myRank == 0) MPI_Reduce(MPI_IN_PLACE , &(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, MPI_SUM, 0, comm);
2919 else MPI_Reduce(&(pu.jacobian.at(i).front()), &(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, MPI_SUM, 0, comm);
2920 }
2921 }
2922 else if (parallelMode == PM_TRAIN_ALL)
2923 {
2924 for (size_t i = 0; i < numUpdaters; ++i)
2925 {
2926 MPI_Allreduce(MPI_IN_PLACE, &(pu.error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM, comm);
2927 MPI_Allreduce(MPI_IN_PLACE, &(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, MPI_SUM, comm);
2928 }
2929 }
2930 }
2931 else if (jacobianMode == JM_TASK)
2932 {
2934 {
2935 for (size_t i = 0; i < numUpdaters; ++i)
2936 {
2937 if (myRank == 0) MPI_Gather(MPI_IN_PLACE , 1, MPI_DOUBLE, &(pu.error.at(i).front()), 1, MPI_DOUBLE, 0, comm);
2938 else MPI_Gather(&(pu.error.at(i).front()), 1, MPI_DOUBLE, NULL , 1, MPI_DOUBLE, 0, comm);
2939 if (myRank == 0) MPI_Gather(MPI_IN_PLACE , numWeightsPerUpdater.at(i), MPI_DOUBLE, &(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, 0, comm);
2940 else MPI_Gather(&(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, NULL , numWeightsPerUpdater.at(i), MPI_DOUBLE, 0, comm);
2941 }
2942 }
2943 else if (parallelMode == PM_TRAIN_ALL)
2944 {
2945 for (size_t i = 0; i < numUpdaters; ++i)
2946 {
2947 MPI_Allgather(MPI_IN_PLACE, 1, MPI_DOUBLE, &(pu.error.at(i).front()), 1, MPI_DOUBLE, comm);
2948 MPI_Allgather(MPI_IN_PLACE, numWeightsPerUpdater.at(i), MPI_DOUBLE, &(pu.jacobian.at(i).front()), numWeightsPerUpdater.at(i), MPI_DOUBLE, comm);
2949 }
2950 }
2951 }
2952 else if (jacobianMode == JM_FULL)
2953 {
2955 {
2956 for (size_t i = 0; i < numUpdaters; ++i)
2957 {
2958 if (myRank == 0) MPI_Gatherv(MPI_IN_PLACE , 0 , MPI_DOUBLE, &(pu.error.at(i).front()), &(pu.errorsPerTask.front()), &(pu.offsetPerTask.front()), MPI_DOUBLE, 0, comm);
2959 else MPI_Gatherv(&(pu.error.at(i).front()), pu.errorsPerTask.at(myRank), MPI_DOUBLE, NULL , NULL , NULL , MPI_DOUBLE, 0, comm);
2960 if (myRank == 0) MPI_Gatherv(MPI_IN_PLACE , 0 , MPI_DOUBLE, &(pu.jacobian.at(i).front()), &(pu.weightsPerTask.at(i).front()), &(pu.offsetJacobian.at(i).front()), MPI_DOUBLE, 0, comm);
2961 else MPI_Gatherv(&(pu.jacobian.at(i).front()), pu.weightsPerTask.at(i).at(myRank), MPI_DOUBLE, NULL , NULL , NULL , MPI_DOUBLE, 0, comm);
2962 }
2963 }
2964 else if (parallelMode == PM_TRAIN_ALL)
2965 {
2966 for (size_t i = 0; i < numUpdaters; ++i)
2967 {
2968 MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, &(pu.error.at(i).front()), &(pu.errorsPerTask.front()), &(pu.offsetPerTask.front()), MPI_DOUBLE, comm);
2969 MPI_Allgatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, &(pu.jacobian.at(i).front()), &(pu.weightsPerTask.at(i).front()), &(pu.offsetJacobian.at(i).front()), MPI_DOUBLE, comm);
2970 }
2971 }
2972 }
2973 sw[k + "_com"].stop();
2974
2976 // PART 4: Perform weight update and apply new weights.
2978
2979 sw[k + "_upd"].start(newLoop);
2980#ifdef _OPENMP
2981 omp_set_num_threads(num_threads);
2982#endif
2983 // Loop over all updaters.
2984 for (size_t i = 0; i < updaters.size(); ++i)
2985 {
2986 updaters.at(i)->setError(&(pu.error.at(i).front()),
2987 pu.error.at(i).size());
2988 updaters.at(i)->setJacobian(&(pu.jacobian.at(i).front()),
2989 pu.error.at(i).size());
2990 if (updaterType == UT_KF)
2991 {
2992 KalmanFilter* kf = dynamic_cast<KalmanFilter*>(updaters.at(i));
2993 kf->setSizeObservation(pu.error.at(i).size());
2994 }
2995 updaters.at(i)->update();
2996 }
2997 countUpdates++;
2998
2999 // Redistribute weights to all MPI tasks.
3001 {
3002 for (size_t i = 0; i < numUpdaters; ++i)
3003 {
3004 MPI_Bcast(&(weights.at(i).front()), weights.at(i).size(), MPI_DOUBLE, 0, comm);
3005 }
3006 }
3007
3008 // Set new weights in neural networks.
3009 setWeights();
3010 sw[k + "_upd"].stop();
3011
3013 // PART 5: Communicate candidates and RMSE fractions and write log.
3015
3016 sw[k + "_log"].start(newLoop);
3017 if (writeTrainingLog)
3018 {
3019 vector<int> procUpdateCandidate;
3020 vector<size_t> indexStructure;
3021 vector<size_t> indexStructureGlobal;
3022 vector<size_t> indexAtom;
3023 vector<size_t> indexCoordinate;
3024
3025 vector<int> currentUpdateCandidatesPerTask;
3026 vector<int> currentUpdateCandidatesOffset;
3027 int myCurrentUpdateCandidates = currentUpdateCandidates.size();
3028
3029 if (myRank == 0)
3030 {
3031 currentUpdateCandidatesPerTask.resize(numProcs, 0);
3032 currentUpdateCandidatesPerTask.at(0) = myCurrentUpdateCandidates;
3033 }
3034 if (myRank == 0) MPI_Gather(MPI_IN_PLACE , 1, MPI_INT, &(currentUpdateCandidatesPerTask.front()), 1, MPI_INT, 0, comm);
3035 else MPI_Gather(&(myCurrentUpdateCandidates), 1, MPI_INT, NULL , 1, MPI_INT, 0, comm);
3036
3037 if (myRank == 0)
3038 {
3039 int totalUpdateCandidates = 0;
3040 for (size_t i = 0; i < currentUpdateCandidatesPerTask.size(); ++i)
3041 {
3042 currentUpdateCandidatesOffset.push_back(totalUpdateCandidates);
3043 totalUpdateCandidates += currentUpdateCandidatesPerTask.at(i);
3044 }
3045 procUpdateCandidate.resize(totalUpdateCandidates, 0);
3046 indexStructure.resize(totalUpdateCandidates, 0);
3047 indexStructureGlobal.resize(totalUpdateCandidates, 0);
3048 indexAtom.resize(totalUpdateCandidates, 0);
3049 indexCoordinate.resize(totalUpdateCandidates, 0);
3050 // Increase size of this vectors (only rank 0).
3051 currentRmseFraction.resize(totalUpdateCandidates, 0.0);
3052 thresholdLoopCount.resize(totalUpdateCandidates, 0.0);
3053 }
3054 else
3055 {
3056 procUpdateCandidate.resize(myCurrentUpdateCandidates, 0);
3057 indexStructure.resize(myCurrentUpdateCandidates, 0);
3058 indexStructureGlobal.resize(myCurrentUpdateCandidates, 0);
3059 indexAtom.resize(myCurrentUpdateCandidates, 0);
3060 indexCoordinate.resize(myCurrentUpdateCandidates, 0);
3061 }
3062 for (int i = 0; i < myCurrentUpdateCandidates; ++i)
3063 {
3064 procUpdateCandidate.at(i) = myRank;
3065 UpdateCandidate* c = NULL;
3066 SubCandidate* sC = NULL;
3067 if (useSubCandidates)
3068 {
3069 c = &(pu.updateCandidates.at(pu.posUpdateCandidates));
3070 sC = &(c->subCandidates.at(currentUpdateCandidates.at(i)));
3071 }
3072 else
3073 {
3074 c = &(pu.updateCandidates.at(currentUpdateCandidates.at(i)));
3075 if (c->subCandidates.size() > 0)
3076 sC = &(c->subCandidates.front());
3077 }
3078 indexStructure.at(i) = c->s;
3079 indexStructureGlobal.at(i) = structures.at(c->s).index;
3080 if (useSubCandidates)
3081 {
3082 indexAtom.at(i) = sC->a;
3083 indexCoordinate.at(i) = sC->c;
3084 }
3085 }
3086 if (myRank == 0)
3087 {
3088 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, &(currentRmseFraction.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_DOUBLE, 0, comm);
3089 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_SIZE_T, &(thresholdLoopCount.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_SIZE_T, 0, comm);
3090 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_INT , &(procUpdateCandidate.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_INT , 0, comm);
3091 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_SIZE_T, &(indexStructure.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_SIZE_T, 0, comm);
3092 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_SIZE_T, &(indexStructureGlobal.front()), &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_SIZE_T, 0, comm);
3093 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_SIZE_T, &(indexAtom.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_SIZE_T, 0, comm);
3094 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_SIZE_T, &(indexCoordinate.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_SIZE_T, 0, comm);
3095 }
3096 else
3097 {
3098 MPI_Gatherv(&(currentRmseFraction.front()) , myCurrentUpdateCandidates, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0, comm);
3099 MPI_Gatherv(&(thresholdLoopCount.front()) , myCurrentUpdateCandidates, MPI_SIZE_T, NULL, NULL, NULL, MPI_SIZE_T, 0, comm);
3100 MPI_Gatherv(&(procUpdateCandidate.front()) , myCurrentUpdateCandidates, MPI_INT , NULL, NULL, NULL, MPI_INT , 0, comm);
3101 MPI_Gatherv(&(indexStructure.front()) , myCurrentUpdateCandidates, MPI_SIZE_T, NULL, NULL, NULL, MPI_SIZE_T, 0, comm);
3102 MPI_Gatherv(&(indexStructureGlobal.front()), myCurrentUpdateCandidates, MPI_SIZE_T, NULL, NULL, NULL, MPI_SIZE_T, 0, comm);
3103 MPI_Gatherv(&(indexAtom.front()) , myCurrentUpdateCandidates, MPI_SIZE_T, NULL, NULL, NULL, MPI_SIZE_T, 0, comm);
3104 MPI_Gatherv(&(indexCoordinate.front()) , myCurrentUpdateCandidates, MPI_SIZE_T, NULL, NULL, NULL, MPI_SIZE_T, 0, comm);
3105 }
3106
3107 if (myRank == 0)
3108 {
3109 for (size_t i = 0; i < procUpdateCandidate.size(); ++i)
3110 {
3111 if (k == "energy")
3112 {
3113 addTrainingLogEntry(procUpdateCandidate.at(i),
3114 thresholdLoopCount.at(i),
3115 currentRmseFraction.at(i),
3116 indexStructureGlobal.at(i),
3117 indexStructure.at(i));
3118 }
3119 else if (k == "force")
3120 {
3121 addTrainingLogEntry(procUpdateCandidate.at(i),
3122 thresholdLoopCount.at(i),
3123 currentRmseFraction.at(i),
3124 indexStructureGlobal.at(i),
3125 indexStructure.at(i),
3126 indexAtom.at(i),
3127 indexCoordinate.at(i));
3128 }
3129 else if (k == "charge")
3130 {
3131 addTrainingLogEntry(procUpdateCandidate.at(i),
3132 thresholdLoopCount.at(i),
3133 currentRmseFraction.at(i),
3134 indexStructureGlobal.at(i),
3135 indexStructure.at(i),
3136 indexAtom.at(i));
3137 }
3138 }
3139 }
3140 }
3141 sw[k + "_log"].stop();
3142
3143 // Need to go to next update candidate after numGroupedSubCand is reached.
3144 if (pu.countGroupedSubCand >= pu.numGroupedSubCand && useSubCandidates)
3145 {
3146 pu.countGroupedSubCand = 0;
3147 pu.numGroupedSubCand = 0;
3149 structures.at(c.s).clearElectrostatics(true);
3151 }
3152
3153 sw[k].stop();
3154
3155 return;
3156}
3157
3158double Training::getSingleWeight(size_t element, size_t index)
3159{
3160 getWeights();
3161
3162 return weights.at(element).at(index);
3163}
3164
3165void Training::setSingleWeight(size_t element, size_t index, double value)
3166{
3167 weights.at(element).at(index) = value;
3168 setWeights();
3169
3170 return;
3171}
3172
3173vector<
3175{
3176 Structure& s = *structure;
3177#ifdef N2P2_NO_SF_GROUPS
3179#else
3181#endif
3182
3183 vector<vector<double> > dEdc;
3184 vector<vector<double> > dedc;
3185 dEdc.resize(numElements);
3186 dedc.resize(numElements);
3187 for (size_t i = 0; i < numElements; ++i)
3188 {
3189 size_t n = elements.at(i).neuralNetworks.at("short")
3190 .getNumConnections();
3191 dEdc.at(i).resize(n, 0.0);
3192 dedc.at(i).resize(n, 0.0);
3193 }
3194 for (vector<Atom>::iterator it = s.atoms.begin();
3195 it != s.atoms.end(); ++it)
3196 {
3197 size_t i = it->element;
3198 NeuralNetwork& nn = elements.at(i).neuralNetworks.at("short");
3199 nn.setInput(&((it->G).front()));
3200 nn.propagate();
3201 nn.getOutput(&(it->energy));
3202 nn.calculateDEdc(&(dedc.at(i).front()));
3203 for (size_t j = 0; j < dedc.at(i).size(); ++j)
3204 {
3205 dEdc.at(i).at(j) += dedc.at(i).at(j);
3206 }
3207 }
3208
3209 return dEdc;
3210}
3211
3212// Doxygen requires namespace prefix for arguments...
3213vector<
3215 std::size_t atom,
3216 std::size_t component)
3217{
3218 Structure& s = *structure;
3219#ifdef N2P2_NO_SF_GROUPS
3221#else
3223#endif
3224 // TODO: Is charge equilibration necessary here?
3225
3226 vector<vector<double> > dFdc;
3227 vector<vector<double> > dfdc;
3228 dFdc.resize(numElements);
3229 dfdc.resize(numElements);
3230 for (size_t i = 0; i < numElements; ++i)
3231 {
3232 size_t n = elements.at(i).neuralNetworks.at("short")
3233 .getNumConnections();
3234 dFdc.at(i).resize(n, 0.0);
3235 dfdc.at(i).resize(n, 0.0);
3236 }
3237 for (vector<Atom>::iterator it = s.atoms.begin();
3238 it != s.atoms.end(); ++it)
3239 {
3240#ifndef N2P2_FULL_SFD_MEMORY
3241 collectDGdxia((*it), atom, component);
3242#else
3243 it->collectDGdxia(atom, component, maxCutoffRadius);
3244#endif
3245 size_t i = it->element;
3246 NeuralNetwork& nn = elements.at(i).neuralNetworks.at("short");
3247 nn.setInput(&((it->G).front()));
3248 // TODO: what about charge neuron for 4G?
3249 nn.propagate();
3250 nn.getOutput(&(it->energy));
3251#ifndef N2P2_FULL_SFD_MEMORY
3252 nn.calculateDFdc(&(dfdc.at(i).front()), &(dGdxia.front()));
3253#else
3254 nn.calculateDFdc(&(dfdc.at(i).front()), &(it->dGdxia.front()));
3255#endif
3256 for (size_t j = 0; j < dfdc.at(i).size(); ++j)
3257 {
3258 dFdc.at(i).at(j) += dfdc.at(i).at(j);
3259 }
3260 }
3261
3262 return dFdc;
3263}
3264
3266{
3267 trainingLogFileName = fileName;
3268
3269 return;
3270}
3271
3272size_t Training::getNumConnections(string id) const
3273{
3274 size_t n = 0;
3275 for (auto const& e : elements)
3276 {
3277 n += e.neuralNetworks.at(id).getNumConnections();
3278 }
3279
3280 return n;
3281}
3282
3283vector<size_t> Training::getNumConnectionsPerElement(string id) const
3284{
3285 vector<size_t> npe;
3286 for (auto const& e : elements)
3287 {
3288 npe.push_back(e.neuralNetworks.at(id).getNumConnections());
3289 }
3290
3291 return npe;
3292}
3293
3294vector<size_t> Training::getConnectionOffsets(string id) const
3295{
3296 vector<size_t> offset;
3297 size_t n = 0;
3298 for (auto const& e : elements)
3299 {
3300 offset.push_back(n);
3301 n += e.neuralNetworks.at(id).getNumConnections();
3302 }
3303
3304 return offset;
3305}
3306
3307void Training::dPdc(string property,
3308 Structure& structure,
3309 vector<vector<double>>& dPdc)
3310{
3311 auto npe = getNumConnectionsPerElement();
3312 auto off = getConnectionOffsets();
3313 dPdc.clear();
3314
3315 if (property == "energy")
3316 {
3317 dPdc.resize(1);
3318 dPdc.at(0).resize(getNumConnections(), 0.0);
3319 for (auto const& a : structure.atoms)
3320 {
3321 size_t e = a.element;
3322 NeuralNetwork& nn = elements.at(e).neuralNetworks.at(nnId);
3323 nn.setInput(a.G.data());
3324 nn.propagate();
3325 vector<double> tmp(npe.at(e), 0.0);
3326 nn.calculateDEdc(tmp.data());
3327 for (size_t j = 0; j < tmp.size(); ++j)
3328 {
3329 dPdc.at(0).at(off.at(e) + j) += tmp.at(j);
3330 }
3331 }
3332 }
3333 else if (property == "force")
3334 {
3335 dPdc.resize(3 * structure.numAtoms);
3336 size_t count = 0;
3337 for (size_t ia = 0; ia < structure.numAtoms; ++ia)
3338 {
3339 for (size_t ixyz = 0; ixyz < 3; ++ixyz)
3340 {
3341 dPdc.at(count).resize(getNumConnections(), 0.0);
3342 for (auto& a : structure.atoms)
3343 {
3344#ifndef N2P2_FULL_SFD_MEMORY
3345 collectDGdxia(a, ia, ixyz);
3346#else
3347 a.collectDGdxia(ia, ixyz);
3348#endif
3349 size_t e = a.element;
3350 NeuralNetwork& nn = elements.at(e).neuralNetworks.at(nnId);
3351 nn.setInput(a.G.data());
3352 nn.propagate();
3353 nn.calculateDEdG(a.dEdG.data());
3354 nn.getOutput(&(a.energy));
3355 vector<double> tmp(npe.at(e), 0.0);
3356#ifndef N2P2_FULL_SFD_MEMORY
3357 nn.calculateDFdc(tmp.data(), dGdxia.data());
3358#else
3359 nn.calculateDFdc(tmp.data(), a.dGdxia.data());
3360#endif
3361 for (size_t j = 0; j < tmp.size(); ++j)
3362 {
3363 dPdc.at(count).at(off.at(e) + j) += tmp.at(j);
3364 }
3365 }
3366 count++;
3367 }
3368 }
3369 }
3370 else
3371 {
3372 throw runtime_error("ERROR: Weight derivatives not implemented for "
3373 "property \"" + property + "\".\n");
3374 }
3375
3376 return;
3377}
3378
3379void Training::dPdcN(string property,
3380 Structure& structure,
3381 vector<vector<double>>& dPdc,
3382 double delta)
3383{
3384 auto npe = getNumConnectionsPerElement();
3385 auto off = getConnectionOffsets();
3386 dPdc.clear();
3387
3388 if (property == "energy")
3389 {
3390 dPdc.resize(1);
3391 for (size_t ie = 0; ie < numElements; ++ie)
3392 {
3393 for (size_t ic = 0; ic < npe.at(ie); ++ic)
3394 {
3395 size_t const o = off.at(ie) + ic;
3396 double const w = weights.at(0).at(o);
3397
3398 weights.at(0).at(o) += delta;
3399 setWeights();
3400 calculateAtomicNeuralNetworks(structure, false);
3401 calculateEnergy(structure);
3402 double energyHigh = structure.energy;
3403
3404 weights.at(0).at(o) -= 2.0 * delta;
3405 setWeights();
3406 calculateAtomicNeuralNetworks(structure, false);
3407 calculateEnergy(structure);
3408 double energyLow = structure.energy;
3409
3410 dPdc.at(0).push_back((energyHigh - energyLow) / (2.0 * delta));
3411 weights.at(0).at(o) = w;
3412 }
3413 }
3414 }
3415 else if (property == "force")
3416 {
3417 size_t count = 0;
3418 dPdc.resize(3 * structure.numAtoms);
3419 for (size_t ia = 0; ia < structure.numAtoms; ++ia)
3420 {
3421 for (size_t ixyz = 0; ixyz < 3; ++ixyz)
3422 {
3423 for (size_t ie = 0; ie < numElements; ++ie)
3424 {
3425 for (size_t ic = 0; ic < npe.at(ie); ++ic)
3426 {
3427 size_t const o = off.at(ie) + ic;
3428 double const w = weights.at(0).at(o);
3429
3430 weights.at(0).at(o) += delta;
3431 setWeights();
3432 calculateAtomicNeuralNetworks(structure, true);
3433 calculateForces(structure);
3434 double forceHigh = structure.atoms.at(ia).f[ixyz];
3435
3436 weights.at(0).at(o) -= 2.0 * delta;
3437 setWeights();
3438 calculateAtomicNeuralNetworks(structure, true);
3439 calculateForces(structure);
3440 double forceLow = structure.atoms.at(ia).f[ixyz];
3441
3442 dPdc.at(count).push_back((forceHigh - forceLow)
3443 / (2.0 * delta));
3444 weights.at(0).at(o) = w;
3445 }
3446 }
3447 count++;
3448 }
3449 }
3450 }
3451 else
3452 {
3453 throw runtime_error("ERROR: Numeric weight derivatives not "
3454 "implemented for property \""
3455 + property + "\".\n");
3456 }
3457
3458 return;
3459}
3460
3462{
3463 if (epoch < numEpochs) return true;
3464 else return false;
3465}
3466
3468{
3470 {
3471 size_t pos = 0;
3472 for (size_t i = 0; i < numElements; ++i)
3473 {
3474 NeuralNetwork const& nn = elements.at(i).neuralNetworks.at(nnId);
3475 nn.getConnections(&(weights.at(0).at(pos)));
3476 pos += nn.getNumConnections();
3477 // Leave slot for sqrt of hardness
3478 if (nnpType == NNPType::HDNNP_4G && stage == 1)
3479 {
3480 // TODO: check that hardness is positive?
3481 weights.at(0).at(pos) = sqrt(elements.at(i).getHardness());
3482 pos ++;
3483 }
3484 }
3485 }
3486 else if (updateStrategy == US_ELEMENT)
3487 {
3488 for (size_t i = 0; i < numElements; ++i)
3489 {
3490 NeuralNetwork const& nn = elements.at(i).neuralNetworks.at(nnId);
3491 nn.getConnections(&(weights.at(i).front()));
3492 }
3493 }
3494
3495 return;
3496}
3497
3499{
3501 {
3502 size_t pos = 0;
3503 for (size_t i = 0; i < numElements; ++i)
3504 {
3505 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(nnId);
3506 nn.setConnections(&(weights.at(0).at(pos)));
3507 pos += nn.getNumConnections();
3508 // hardness
3509 if (nnpType == NNPType::HDNNP_4G && stage == 1)
3510 {
3511 elements.at(i).setHardness(pow(weights.at(0).at(pos),2));
3512 pos ++;
3513 }
3514 }
3515 }
3516 else if (updateStrategy == US_ELEMENT)
3517 {
3518 for (size_t i = 0; i < numElements; ++i)
3519 {
3520 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(nnId);
3521 nn.setConnections(&(weights.at(i).front()));
3522 }
3523 }
3524
3525 return;
3526}
3527
3528// Doxygen requires namespace prefix for arguments...
3530 std::size_t il,
3531 double f,
3532 std::size_t isg,
3533 std::size_t is)
3534{
3535 string s = strpr(" E %5zu %10zu %5d %3zu %10.2E %10zu %5zu\n",
3536 epoch, countUpdates, proc, il + 1, f, isg, is);
3537 trainingLog << s;
3538
3539 return;
3540}
3541
3542// Doxygen requires namespace prefix for arguments...
3544 std::size_t il,
3545 double f,
3546 std::size_t isg,
3547 std::size_t is,
3548 std::size_t ia,
3549 std::size_t ic)
3550{
3551 string s = strpr(" F %5zu %10zu %5d %3zu %10.2E %10zu %5zu %5zu %2zu\n",
3552 epoch, countUpdates, proc, il + 1, f, isg, is, ia, ic);
3553 trainingLog << s;
3554
3555 return;
3556}
3557
3558// Doxygen requires namespace prefix for arguments...
3560 std::size_t il,
3561 double f,
3562 std::size_t isg,
3563 std::size_t is,
3564 std::size_t ia)
3565{
3566 string s = strpr(" Q %5zu %10zu %5d %3zu %10.2E %10zu %5zu %5zu\n",
3567 epoch, countUpdates, proc, il + 1, f, isg, is, ia);
3568 trainingLog << s;
3569
3570 return;
3571}
3572
3573#ifndef N2P2_FULL_SFD_MEMORY
3575 size_t indexAtom,
3576 size_t indexComponent)
3577{
3578 size_t const nsf = atom.numSymmetryFunctions;
3579
3580 // Reset dGdxia array.
3581 dGdxia.clear();
3582 vector<double>(dGdxia).swap(dGdxia);
3584 dGdxia.resize(nsf+1, 0.0);
3585 else
3586 dGdxia.resize(nsf, 0.0);
3587
3588 vector<vector<size_t> > const& tableFull
3589 = elements.at(atom.element).getSymmetryFunctionTable();
3590
3591 // TODO: Check if maxCutoffRadius is needed here
3592 for (size_t i = 0; i < atom.numNeighbors; i++)
3593 {
3594 if (atom.neighbors[i].index == indexAtom)
3595 {
3596 Atom::Neighbor const& n = atom.neighbors[i];
3597 vector<size_t> const& table = tableFull.at(n.element);
3598 for (size_t j = 0; j < n.dGdr.size(); ++j)
3599 {
3600 dGdxia[table.at(j)] += n.dGdr[j][indexComponent];
3601 }
3602 }
3603 }
3604 if (atom.index == indexAtom)
3605 {
3606 for (size_t i = 0; i < nsf; ++i)
3607 {
3608 dGdxia[i] += atom.dGdr[i][indexComponent];
3609 }
3610 }
3611
3612 return;
3613}
3614#endif
3615
3617{
3618 string keywordNW = "nguyen_widrow_weights" + nns.at(id).keywordSuffix2;
3619
3620 double minWeights = atof(settings["weights_min"].c_str());
3621 double maxWeights = atof(settings["weights_max"].c_str());
3622 log << strpr("Initial weights selected randomly in interval "
3623 "[%f, %f).\n", minWeights, maxWeights);
3624 vector<double> w;
3625 for (size_t i = 0; i < numElements; ++i)
3626 {
3627 NeuralNetwork& nn = elements.at(i).neuralNetworks.at(id);
3628 w.resize(nn.getNumConnections(), 0);
3629 for (size_t j = 0; j < w.size(); ++j)
3630 {
3631 w.at(j) = minWeights + gsl_rng_uniform(rngGlobal)
3632 * (maxWeights - minWeights);
3633 }
3634 nn.setConnections(&(w.front()));
3635 }
3636 if (settings.keywordExists(keywordNW))
3637 {
3638 log << "Weights modified according to Nguyen Widrow scheme.\n";
3639 for (vector<Element>::iterator it = elements.begin();
3640 it != elements.end(); ++it)
3641 {
3642 NeuralNetwork& nn = it->neuralNetworks.at(id);
3644 }
3645 }
3646 else if (settings.keywordExists("precondition_weights"))
3647 {
3648 throw runtime_error("ERROR: Preconditioning of weights not yet"
3649 " implemented.\n");
3650 }
3651 else
3652 {
3653 log << "Weights modified accoring to Glorot Bengio scheme.\n";
3654 //log << "Weights connected to output layer node set to zero.\n";
3655 log << "Biases set to zero.\n";
3656 for (vector<Element>::iterator it = elements.begin();
3657 it != elements.end(); ++it)
3658 {
3659 NeuralNetwork& nn = it->neuralNetworks.at(id);
3661 //nn->modifyConnections(NeuralNetwork::MS_ZEROOUTPUTWEIGHTS);
3663 }
3664 }
3665
3666 return;
3667}
3668
3669void Training::setupSelectionMode(string const& property)
3670{
3671 bool all = (property == "all");
3672 bool isProperty = (find(pk.begin(), pk.end(), property) != pk.end());
3673 if (!(all || isProperty))
3674 {
3675 throw runtime_error("ERROR: Unknown property for selection mode"
3676 " setup.\n");
3677 }
3678
3679 if (all)
3680 {
3681 if (!(settings.keywordExists("selection_mode") ||
3682 settings.keywordExists("rmse_threshold") ||
3683 settings.keywordExists("rmse_threshold_trials"))) return;
3684 log << "Global selection mode settings:\n";
3685 }
3686 else
3687 {
3688 if (!(settings.keywordExists("selection_mode_" + property) ||
3689 settings.keywordExists("rmse_threshold_" + property) ||
3690 settings.keywordExists("rmse_threshold_trials_"
3691 + property))) return;
3692 log << "Selection mode settings specific to property \""
3693 << property << "\":\n";
3694 }
3695 string keyword;
3696 if (all) keyword = "selection_mode";
3697 else keyword = "selection_mode_" + property;
3698
3699 if (settings.keywordExists(keyword))
3700 {
3701 map<size_t, SelectionMode> schedule;
3702 vector<string> args = split(settings[keyword]);
3703 if (args.size() % 2 != 1)
3704 {
3705 throw runtime_error("ERROR: Incorrect selection mode format.\n");
3706 }
3707 schedule[0] = (SelectionMode)atoi(args.at(0).c_str());
3708 for (size_t i = 1; i < args.size(); i = i + 2)
3709 {
3710 schedule[(size_t)atoi(args.at(i).c_str())] =
3711 (SelectionMode)atoi(args.at(i + 1).c_str());
3712 }
3713 for (map<size_t, SelectionMode>::const_iterator it = schedule.begin();
3714 it != schedule.end(); ++it)
3715 {
3716 log << strpr("- Selection mode starting with epoch %zu:\n",
3717 it->first);
3718 if (it->second == SM_RANDOM)
3719 {
3720 log << strpr(" Random selection of update candidates: "
3721 "SelectionMode::SM_RANDOM (%d)\n", it->second);
3722 }
3723 else if (it->second == SM_SORT)
3724 {
3725 log << strpr(" Update candidates selected according to error: "
3726 "SelectionMode::SM_SORT (%d)\n", it->second);
3727 }
3728 else if (it->second == SM_THRESHOLD)
3729 {
3730 log << strpr(" Update candidates chosen randomly above RMSE "
3731 "threshold: SelectionMode::SM_THRESHOLD (%d)\n",
3732 it->second);
3733 }
3734 else
3735 {
3736 throw runtime_error("ERROR: Unknown selection mode.\n");
3737 }
3738 }
3739 if (all)
3740 {
3741 for (auto& i : p)
3742 {
3743 i.second.selectionModeSchedule = schedule;
3744 i.second.selectionMode = schedule[0];
3745 }
3746 }
3747 else
3748 {
3749 p[property].selectionModeSchedule = schedule;
3750 p[property].selectionMode = schedule[0];
3751 }
3752 }
3753
3754 if (all) keyword = "rmse_threshold";
3755 else keyword = "rmse_threshold_" + property;
3756 if (settings.keywordExists(keyword))
3757 {
3758 double t = atof(settings[keyword].c_str());
3759 log << strpr("- RMSE selection threshold: %.2f * RMSE\n", t);
3760 if (all) for (auto& i : p) i.second.rmseThreshold = t;
3761 else p[property].rmseThreshold = t;
3762 }
3763
3764 if (all) keyword = "rmse_threshold_trials";
3765 else keyword = "rmse_threshold_trials_" + property;
3766 if (settings.keywordExists(keyword))
3767 {
3768 size_t t = atoi(settings[keyword].c_str());
3769 log << strpr("- RMSE selection trials : %zu\n", t);
3770 if (all) for (auto& i : p) i.second.rmseThresholdTrials = t;
3771 else p[property].rmseThresholdTrials = t;
3772 }
3773
3774 return;
3775}
3776
3777void Training::setupFileOutput(string const& type)
3778{
3779 string keyword = "write_";
3780 bool isProperty = (find(pk.begin(), pk.end(), type) != pk.end());
3781 if (type == "energy" ) keyword += "trainpoints";
3782 else if (type == "force" ) keyword += "trainforces";
3783 else if (type == "charge" ) keyword += "traincharges";
3784 else if (type == "weights_epoch") keyword += type;
3785 else if (type == "neuronstats" ) keyword += type;
3786 else
3787 {
3788 throw runtime_error("ERROR: Invalid type for file output setup.\n");
3789 }
3790
3791 // Check how often energy comparison files should be written.
3792 if (settings.keywordExists(keyword))
3793 {
3794 size_t* writeEvery = nullptr;
3795 size_t* writeAlways = nullptr;
3796 string message;
3797 if (isProperty)
3798 {
3799 writeEvery = &(p[type].writeCompEvery);
3800 writeAlways = &(p[type].writeCompAlways);
3801 message = "Property \"" + type + "\" comparison";
3802 message.at(0) = toupper(message.at(0));
3803 }
3804 else if (type == "weights_epoch")
3805 {
3806 writeEvery = &writeWeightsEvery;
3807 writeAlways = &writeWeightsAlways;
3808 message = "Weight";
3809 }
3810 else if (type == "neuronstats")
3811 {
3812 writeEvery = &writeNeuronStatisticsEvery;
3813 writeAlways = &writeNeuronStatisticsAlways;
3814 message = "Neuron statistics";
3815 }
3816
3817 *writeEvery = 1;
3818 vector<string> v = split(reduce(settings[keyword]));
3819 if (v.size() == 1) *writeEvery = (size_t)atoi(v.at(0).c_str());
3820 else if (v.size() == 2)
3821 {
3822 *writeEvery = (size_t)atoi(v.at(0).c_str());
3823 *writeAlways = (size_t)atoi(v.at(1).c_str());
3824 }
3825 log << strpr((message
3826 + " files will be written every %zu epochs.\n").c_str(),
3827 *writeEvery);
3828 if (*writeAlways > 0)
3829 {
3830 log << strpr((message
3831 + " files will always be written up to epoch "
3832 "%zu.\n").c_str(), *writeAlways);
3833 }
3834 }
3835
3836 return;
3837}
3838
3839void Training::setupUpdatePlan(string const& property)
3840{
3841 bool isProperty = (find(pk.begin(), pk.end(), property) != pk.end());
3842 if (!isProperty)
3843 {
3844 throw runtime_error("ERROR: Unknown property for update plan"
3845 " setup.\n");
3846 }
3847
3848 // Actual property modified here.
3849 Property& pa = p[property];
3850 string keyword = property + "_fraction";
3851
3852 // Override force fraction if keyword "energy_force_ratio" is provided.
3853 if (property == "force" &&
3854 p.exists("energy") &&
3855 settings.keywordExists("force_energy_ratio"))
3856 {
3857 double const ratio = atof(settings["force_energy_ratio"].c_str());
3858 if (settings.keywordExists(keyword))
3859 {
3860 log << "WARNING: Given force fraction is ignored because "
3861 "force/energy ratio is provided.\n";
3862 }
3863 log << strpr("Desired force/energy update ratio : %.6f\n",
3864 ratio);
3865 log << "----------------------------------------------\n";
3866 pa.epochFraction = (p["energy"].numTrainPatterns * ratio)
3867 / p["force"].numTrainPatterns;
3868 }
3869 // Default action = read "<property>_fraction" keyword.
3870 else
3871 {
3872 pa.epochFraction = atof(settings[keyword].c_str());
3873 }
3874
3875 keyword = "task_batch_size_" + property;
3876 pa.taskBatchSize = (size_t)atoi(settings[keyword].c_str());
3877 if (pa.taskBatchSize == 0)
3878 {
3880 static_cast<size_t>(pa.updateCandidates.size() * pa.epochFraction);
3881 pa.numUpdates = 1;
3882 }
3883 else
3884 {
3886 pa.numUpdates =
3887 static_cast<size_t>((pa.numTrainPatterns * pa.epochFraction)
3888 / pa.taskBatchSize / numProcs);
3889 }
3891 MPI_Allreduce(MPI_IN_PLACE, &(pa.patternsPerUpdateGlobal), 1, MPI_SIZE_T, MPI_SUM, comm);
3892 pa.errorsPerTask.resize(numProcs, 0);
3893 if (jacobianMode == JM_FULL)
3894 {
3895 pa.errorsPerTask.at(myRank) = static_cast<int>(pa.patternsPerUpdate);
3896 }
3897 else
3898 {
3899 pa.errorsPerTask.at(myRank) = 1;
3900 }
3901 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, &(pa.errorsPerTask.front()), 1, MPI_INT, comm);
3902 if (jacobianMode == JM_FULL)
3903 {
3904 pa.weightsPerTask.resize(numUpdaters);
3905 for (size_t i = 0; i < numUpdaters; ++i)
3906 {
3907 pa.weightsPerTask.at(i).resize(numProcs, 0);
3908 for (int j = 0; j < numProcs; ++j)
3909 {
3910 pa.weightsPerTask.at(i).at(j) = pa.errorsPerTask.at(j)
3911 * numWeightsPerUpdater.at(i);
3912 }
3913 }
3914 }
3915 pa.numErrorsGlobal = 0;
3916 for (size_t i = 0; i < pa.errorsPerTask.size(); ++i)
3917 {
3918 pa.offsetPerTask.push_back(pa.numErrorsGlobal);
3919 pa.numErrorsGlobal += pa.errorsPerTask.at(i);
3920 }
3921 pa.offsetJacobian.resize(numUpdaters);
3922 for (size_t i = 0; i < numUpdaters; ++i)
3923 {
3924 for (size_t j = 0; j < pa.offsetPerTask.size(); ++j)
3925 {
3926 pa.offsetJacobian.at(i).push_back(pa.offsetPerTask.at(j) *
3927 numWeightsPerUpdater.at(i));
3928 }
3929 }
3930 log << "Update plan for property \"" + property + "\":\n";
3931 log << strpr("- Per-task batch size : %zu\n",
3932 pa.taskBatchSize);
3933 log << strpr("- Fraction of patterns used per epoch : %.6f\n",
3934 pa.epochFraction);
3935 if (pa.numUpdates == 0)
3936 {
3937 log << "WARNING: No updates are planned for this property.";
3938 }
3939 log << strpr("- Updates per epoch : %zu\n",
3940 pa.numUpdates);
3941 log << strpr("- Patterns used per update (rank %3d / global) : "
3942 "%10zu / %zu\n",
3944 log << "----------------------------------------------\n";
3945
3946 return;
3947}
3948
3949void Training::allocateArrays(string const& property)
3950{
3951 bool isProperty = (find(pk.begin(), pk.end(), property) != pk.end());
3952 if (!isProperty)
3953 {
3954 throw runtime_error("ERROR: Unknown property for array allocation.\n");
3955 }
3956
3957 log << "Allocating memory for " + property +
3958 " error vector and Jacobian.\n";
3959 Property& pa = p[property];
3960 pa.error.resize(numUpdaters);
3961 pa.jacobian.resize(numUpdaters);
3962 for (size_t i = 0; i < numUpdaters; ++i)
3963 {
3964 size_t size = 1;
3965 if (( parallelMode == PM_TRAIN_ALL ||
3966 (parallelMode == PM_TRAIN_RK0 && myRank == 0)) &&
3968 {
3969 size *= pa.numErrorsGlobal;
3970 }
3971 else if ((parallelMode == PM_TRAIN_RK0 && myRank != 0) &&
3973 {
3974 size *= pa.errorsPerTask.at(myRank);
3975 }
3976 pa.error.at(i).resize(size, 0.0);
3977 pa.jacobian.at(i).resize(size * numWeightsPerUpdater.at(i), 0.0);
3978 log << strpr("Updater %3zu:\n", i);
3979 log << strpr(" - Error size: %zu\n", pa.error.at(i).size());
3980 log << strpr(" - Jacobian size: %zu\n", pa.jacobian.at(i).size());
3981 }
3982 log << "----------------------------------------------\n";
3983
3984 return;
3985}
3986
3987void Training::writeTimingData(bool append, string const fileName)
3988{
3989 ofstream file;
3990 string fileNameActual = fileName;
3991 if (nnpType == NNPType::HDNNP_4G ||
3993 {
3994 fileNameActual += strpr(".stage-%zu", stage);
3995 }
3996
3997 vector<string> sub = {"_err", "_com", "_upd", "_log"};
3998 if (append) file.open(fileNameActual.c_str(), ofstream::app);
3999 else
4000 {
4001 file.open(fileNameActual.c_str());
4002
4003 // File header.
4004 vector<string> title;
4005 vector<string> colName;
4006 vector<string> colInfo;
4007 vector<size_t> colSize;
4008 title.push_back("Timing data for training loop.");
4009 colSize.push_back(10);
4010 colName.push_back("epoch");
4011 colInfo.push_back("Current epoch.");
4012 colSize.push_back(11);
4013 colName.push_back("train");
4014 colInfo.push_back("Time for training.");
4015 colSize.push_back(7);
4016 colName.push_back("ptrain");
4017 colInfo.push_back("Time for training (percentage of loop).");
4018 colSize.push_back(11);
4019 colName.push_back("error");
4020 colInfo.push_back("Time for error calculation.");
4021 colSize.push_back(7);
4022 colName.push_back("perror");
4023 colInfo.push_back("Time for error calculation (percentage of loop).");
4024 colSize.push_back(11);
4025 colName.push_back("epoch");
4026 colInfo.push_back("Time for this epoch.");
4027 colSize.push_back(11);
4028 colName.push_back("total");
4029 colInfo.push_back("Total time for all epochs.");
4030 for (auto k : pk)
4031 {
4032 colSize.push_back(11);
4033 colName.push_back(p[k].tiny + "train");
4034 colInfo.push_back("");
4035 colSize.push_back(7);
4036 colName.push_back(p[k].tiny + "ptrain");
4037 colInfo.push_back("");
4038 }
4039 for (auto s : sub)
4040 {
4041 for (auto k : pk)
4042 {
4043 colSize.push_back(11);
4044 colName.push_back(p[k].tiny + s);
4045 colInfo.push_back("");
4046 colSize.push_back(7);
4047 colName.push_back(p[k].tiny + "p" + s);
4048 colInfo.push_back("");
4049 }
4050 }
4051 appendLinesToFile(file,
4052 createFileHeader(title, colSize, colName, colInfo));
4053 }
4054
4055 double timeLoop = sw["loop"].getLoop();
4056 file << strpr("%10zu", epoch);
4057 file << strpr(" %11.3E", sw["train"].getLoop());
4058 file << strpr(" %7.3f", sw["train"].getLoop() / timeLoop);
4059 file << strpr(" %11.3E", sw["error"].getLoop());
4060 file << strpr(" %7.3f", sw["error"].getLoop() / timeLoop);
4061 file << strpr(" %11.3E", timeLoop);
4062 file << strpr(" %11.3E", sw["loop"].getTotal());
4063
4064 for (auto k : pk)
4065 {
4066 file << strpr(" %11.3E", sw[k].getLoop());
4067 file << strpr(" %7.3f", sw[k].getLoop() / sw["train"].getLoop());
4068 }
4069 for (auto s : sub)
4070 {
4071 for (auto k : pk)
4072 {
4073 file << strpr(" %11.3E", sw[k + s].getLoop());
4074 file << strpr(" %7.3f", sw[k + s].getLoop() / sw[k].getLoop());
4075 }
4076 }
4077 file << "\n";
4078
4079 file.flush();
4080 file.close();
4081
4082 return;
4083}
4084
4085Training::Property::Property(string const& property) :
4086 property (property ),
4087 displayMetric ("" ),
4088 tiny ("" ),
4089 plural ("" ),
4090 selectionMode (SM_RANDOM),
4091 numTrainPatterns (0 ),
4092 numTestPatterns (0 ),
4093 taskBatchSize (0 ),
4094 writeCompEvery (0 ),
4095 writeCompAlways (0 ),
4096 posUpdateCandidates (0 ),
4097 numGroupedSubCand (0 ),
4098 countGroupedSubCand (0 ),
4099 rmseThresholdTrials (0 ),
4100 countUpdates (0 ),
4101 numUpdates (0 ),
4102 patternsPerUpdate (0 ),
4103 patternsPerUpdateGlobal(0 ),
4104 numErrorsGlobal (0 ),
4105 epochFraction (0.0 ),
4106 rmseThreshold (0.0 )
4107{
4108 if (property == "energy")
4109 {
4110 tiny = "E";
4111 plural = "energies";
4112 errorMetrics = {"RMSEpa", "RMSE", "MAEpa", "MAE"};
4113 }
4114 else if (property == "force")
4115 {
4116 tiny = "F";
4117 plural = "forces";
4118 errorMetrics = {"RMSE", "MAE"};
4119 }
4120 else if (property == "charge")
4121 {
4122 tiny = "Q";
4123 plural = "charges";
4124 errorMetrics = {"RMSE", "MAE"};
4125 }
4126 else
4127 {
4128 throw runtime_error("ERROR: Unknown training property.\n");
4129 }
4130
4131 // Set up error metrics
4132 for (auto m : errorMetrics)
4133 {
4134 errorTrain[m] = 0.0;
4135 errorTest[m] = 0.0;
4136 }
4138}
Collect and process large data sets.
Definition: Dataset.h:35
std::size_t numStructures
Total number of structures in dataset.
Definition: Dataset.h:203
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
Definition: Dataset.h:209
MPI_Comm comm
Global MPI communicator.
Definition: Dataset.h:207
int numProcs
Total number of MPI processors.
Definition: Dataset.h:201
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
int myRank
My process ID.
Definition: Dataset.h:199
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
Definition: Dataset.h:211
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
Definition: Dataset.cpp:1712
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Definition: ElementMap.h:145
Weight updates based on simple gradient descent methods.
void setParametersFixed(double const eta)
Set parameters for fixed step gradient descent algorithm.
void setParametersAdam(double const eta, double const beta1, double const beta2, double const epsilon)
Set parameters for Adam algorithm.
DescentType
Enumerate different gradient descent variants.
@ DT_ADAM
Adaptive moment estimation (Adam).
@ DT_FIXED
Fixed step size.
Implementation of the Kalman filter method.
Definition: KalmanFilter.h:32
void setParametersStandard(double const epsilon, double const q0, double const qtau, double const qmin, double const eta0, double const etatau, double const etamax)
Set parameters for standard Kalman filter.
void setParametersFadingMemory(double const epsilon, double const q0, double const qtau, double const qmin, double const lambda, double const nu)
Set parameters for fading memory Kalman filter.
KalmanType
Enumerate different Kalman filter types.
Definition: KalmanFilter.h:36
@ KT_STANDARD
Regular Kalman filter.
Definition: KalmanFilter.h:38
@ KT_FADINGMEMORY
Kalman filtering with fading memory modification.
Definition: KalmanFilter.h:40
void setSizeObservation(std::size_t const sizeObservation)
Set observation vector size.
bool silent
Completely silence output.
Definition: Log.h:87
std::vector< std::vector< double > > cutoffs
Matrix storing all symmetry function cut-offs for all elements.
Definition: Mode.h:652
bool normalize
Definition: Mode.h:629
double convEnergy
Definition: Mode.h:637
void setupNormalization(bool standalone=true)
Set up normalization.
Definition: Mode.cpp:238
ElementMap elementMap
Global element map, populated by setupElementMap().
Definition: Mode.h:591
NNPType nnpType
Definition: Mode.h:628
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
Definition: Mode.cpp:2018
double convLength
Definition: Mode.h:638
void readNeuralNetworkWeights(std::string const &id, std::string const &fileName)
Read in weights for a specific type of neural network.
Definition: Mode.cpp:2270
virtual void setupSymmetryFunctionCache(bool verbose=false)
Set up symmetry function cache.
Definition: Mode.cpp:984
double maxCutoffRadius
Definition: Mode.h:634
@ HDNNP_2G
Short range NNP (2G-HDNNP).
@ HDNNP_Q
Short range NNP with charge NN, no electrostatics/Qeq (M.
@ HDNNP_4G
NNP with electrostatics and non-local charge transfer (4G-HDNNP).
double meanEnergy
Definition: Mode.h:636
ScreeningFunction screeningFunction
Definition: Mode.h:645
void calculateAtomicNeuralNetworks(Structure &structure, bool const derivatives, std::string id="")
Calculate atomic neural networks for all atoms in given structure.
Definition: Mode.cpp:1642
double physical(std::string const &property, double value) const
Undo normalization for a given property.
Definition: Mode.cpp:2110
virtual void setupSymmetryFunctionGroups()
Set up symmetry function groups.
Definition: Mode.cpp:842
std::vector< Element > elements
Definition: Mode.h:646
double normalized(std::string const &property, double value) const
Apply normalization to given property.
Definition: Mode.cpp:2084
std::size_t numElements
Definition: Mode.h:631
void calculateEnergy(Structure &structure) const
Calculate potential energy for a given structure.
Definition: Mode.cpp:1803
void calculateSymmetryFunctionGroups(Structure &structure, bool const derivatives)
Calculate all symmetry function groups for all atoms in given structure.
Definition: Mode.cpp:1561
settings::Settings settings
Definition: Mode.h:642
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
Definition: Mode.cpp:712
void removeEnergyOffset(Structure &structure, bool ref=true)
Remove atomic energy offsets from reference energy.
Definition: Mode.cpp:2037
Log log
Global log file.
Definition: Mode.h:593
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
Definition: Mode.cpp:2069
virtual void setupSymmetryFunctions()
Set up all symmetry functions.
Definition: Mode.cpp:615
void chargeEquilibration(Structure &structure, bool const derivativesElec)
Perform global charge equilibration method.
Definition: Mode.cpp:1754
void calculateSymmetryFunctions(Structure &structure, bool const derivatives)
Calculate all symmetry functions for all atoms in given structure.
Definition: Mode.cpp:1480
std::map< std::string, NNSetup > nns
Definition: Mode.h:649
EwaldSetup ewaldSetup
Definition: Mode.h:641
void calculateForces(Structure &structure) const
Calculate forces for all atoms in given structure.
Definition: Mode.cpp:1849
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
Definition: Mode.cpp:1103
void setupSymmetryFunctionMemory(bool verbose=false)
Extract required memory dimensions for symmetry function derivatives.
Definition: Mode.cpp:894
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
Definition: Mode.cpp:2221
This class implements a feed-forward neural network.
Definition: NeuralNetwork.h:29
int getNumConnections() const
Return total number of connections.
void setInput(double const *const &input) const
Set neural network input layer node values.
void modifyConnections(ModificationScheme modificationScheme)
Change connections according to a given modification scheme.
void setConnections(double const *const &connections)
Set neural network weights and biases.
void calculateDFdc(double *dFdc, double const *const &dGdxyz) const
Calculate "second" derivative of output with respect to connections.
@ MS_ZEROBIAS
Set all bias values to zero.
Definition: NeuralNetwork.h:62
@ MS_GLOROTBENGIO
Normalize connections according to Glorot and Bengio.
Definition: NeuralNetwork.h:90
@ MS_NGUYENWIDROW
Initialize connections according to Nguyen-Widrow scheme.
void getConnections(double *connections) const
Get neural network weights and biases.
void propagate()
Propagate input information through all layers.
void calculateDEdc(double *dEdc) const
Calculate derivative of output neuron with respect to connections.
void calculateDEdG(double *dEdG) const
Calculate derivative of output neuron with respect to input neurons.
void getOutput(double *output) const
Get neural network output layer node values.
double getOuter() const
Getter for outer.
void setTrainingLogFileName(std::string fileName)
Set training log file name.
Definition: Training.cpp:3265
UpdaterType updaterType
Updater type used.
Definition: Training.h:503
std::vector< double > dGdxia
Derivative of symmetry functions with respect to one specific atom coordinate.
Definition: Training.h:565
std::size_t epoch
Current epoch.
Definition: Training.h:529
std::vector< std::size_t > numWeightsPerUpdater
Number of weights per updater.
Definition: Training.h:556
std::size_t writeWeightsAlways
Up to this epoch weights are written every epoch.
Definition: Training.h:533
std::size_t numWeights
Total number of weights.
Definition: Training.h:542
bool useForces
Use forces for training.
Definition: Training.h:515
std::size_t writeWeightsEvery
Write weights every this many epochs.
Definition: Training.h:531
void setupTraining()
General training settings and setup of weight update routine.
Definition: Training.cpp:737
std::mt19937_64 rngGlobalNew
Global random number generator.
Definition: Training.h:579
std::string nnId
ID of neural network the training is working on.
Definition: Training.h:548
void writeHardness(std::string const &fileNameFormat) const
Write hardness to files (one file for each element).
Definition: Training.cpp:1702
void allocateArrays(std::string const &property)
Allocate error and Jacobian arrays for given property.
Definition: Training.cpp:3949
void writeWeightsEpoch() const
Write weights to files during training loop.
Definition: Training.cpp:1679
std::ofstream trainingLog
Training log file.
Definition: Training.h:550
bool hasUpdaters
If this rank performs weight updates.
Definition: Training.h:511
void checkSelectionMode()
Check if selection mode should be changed.
Definition: Training.cpp:2114
std::string trainingLogFileName
File name for training log.
Definition: Training.h:546
JacobianMode jacobianMode
Jacobian mode used.
Definition: Training.h:507
void selectSets()
Randomly select training and test set structures.
Definition: Training.cpp:82
JacobianMode
Jacobian matrix preparation mode.
Definition: Training.h:85
@ JM_SUM
No Jacobian, sum up contributions from update candidates.
Definition: Training.h:87
@ JM_TASK
Prepare one Jacobian entry for each task, sum up within tasks.
Definition: Training.h:89
@ JM_FULL
Prepare full Jacobian matrix.
Definition: Training.h:91
void randomizeNeuralNetworkWeights(std::string const &id)
Randomly initialize specificy neural network weights.
Definition: Training.cpp:3616
std::size_t countUpdates
Update counter (for all training quantities together).
Definition: Training.h:539
std::map< std::string, Stopwatch > sw
Stopwatches for timing overview.
Definition: Training.h:575
void setupSelectionMode(std::string const &property)
Set selection mode for specific training property.
Definition: Training.cpp:3669
void dPdc(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc)
Compute derivatives of property with respect to weights.
Definition: Training.cpp:3307
void printHeader()
Print training loop header on screen.
Definition: Training.cpp:1558
std::mt19937_64 rngNew
Per-task random number generator.
Definition: Training.h:577
PropertyMap p
Actual training properties.
Definition: Training.h:581
std::vector< Updater * > updaters
Weight updater (combined or for each element).
Definition: Training.h:572
std::size_t writeNeuronStatisticsAlways
Up to this epoch neuron statistics are written every epoch.
Definition: Training.h:537
UpdaterType
Type of update routine.
Definition: Training.h:40
@ UT_GD
Simple gradient descent methods.
Definition: Training.h:42
@ UT_KF
Kalman filter-based methods.
Definition: Training.h:44
@ UT_LM
Levenberg-Marquardt algorithm.
Definition: Training.h:46
double forceWeight
Force update weight.
Definition: Training.h:544
void update(std::string const &property)
Perform one update.
Definition: Training.cpp:2256
void dataSetNormalization()
Apply normalization based on initial weights prediction.
Definition: Training.cpp:429
ParallelMode
Training parallelization mode.
Definition: Training.h:56
@ PM_TRAIN_RK0
No training parallelization, only data set distribution.
Definition: Training.h:71
@ PM_TRAIN_ALL
Parallel gradient computation, update on each task.
Definition: Training.h:80
bool writeTrainingLog
Whether training log file is written.
Definition: Training.h:521
std::size_t numUpdaters
Number of updaters (depends on update strategy).
Definition: Training.h:525
bool advance() const
Check if training loop should be continued.
Definition: Training.cpp:3461
~Training()
Destructor, updater vector needs to be cleaned.
Definition: Training.cpp:64
void sortUpdateCandidates(std::string const &property)
Sort update candidates with descending RMSE.
Definition: Training.cpp:2008
void writeNeuronStatisticsEpoch() const
Write neuron statistics during training loop.
Definition: Training.cpp:1943
SelectionMode
How update candidates are selected during Training.
Definition: Training.h:105
@ SM_RANDOM
Select candidates randomly.
Definition: Training.h:107
@ SM_THRESHOLD
Select candidates randomly with RMSE above threshold.
Definition: Training.h:111
@ SM_SORT
Sort candidates according to their RMSE and pick worst first.
Definition: Training.h:109
std::size_t stage
Training stage.
Definition: Training.h:523
void collectDGdxia(Atom const &atom, std::size_t indexAtom, std::size_t indexComponent)
Collect derivative of symmetry functions with respect to one atom's coordinate.
Definition: Training.cpp:3574
void writeHardnessEpoch() const
Write hardness to files during training loop.
Definition: Training.cpp:1720
void setWeights()
Set weights in neural network class.
Definition: Training.cpp:3498
void loop()
Execute main training loop.
Definition: Training.cpp:2147
void printEpoch()
Print preferred error metric and timing information on screen.
Definition: Training.cpp:1616
std::vector< std::size_t > weightsOffset
Offset of each element's weights in combined array.
Definition: Training.h:559
std::size_t numEpochs
Number of epochs requested.
Definition: Training.h:527
void setupFileOutput(std::string const &type)
Set file output intervals for properties and other quantities.
Definition: Training.cpp:3777
std::size_t getNumConnections(std::string id="short") const
Get total number of NN connections.
Definition: Training.cpp:3272
void addTrainingLogEntry(int proc, std::size_t il, double f, std::size_t isg, std::size_t is)
Write energy update data to training log file.
Definition: Training.cpp:3529
void writeTimingData(bool append, std::string const fileName="timing.out")
Write timing data for all clocks.
Definition: Training.cpp:3987
std::size_t writeNeuronStatisticsEvery
Write neuron statistics every this many epochs.
Definition: Training.h:535
void getWeights()
Get weights from neural network class.
Definition: Training.cpp:3467
void dPdcN(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc, double delta=1.0E-4)
Compute numeric derivatives of property with respect to weights.
Definition: Training.cpp:3379
std::vector< std::string > setupNumericDerivCheck()
Set up numeric weight derivatives check.
Definition: Training.cpp:1209
void setSingleWeight(std::size_t element, std::size_t index, double value)
Set a single weight value.
Definition: Training.cpp:3165
void shuffleUpdateCandidates(std::string const &property)
Shuffle update candidates.
Definition: Training.cpp:2062
void calculateChargeErrorVec(Structure const &s, Eigen::VectorXd &cVec, double &cNorm)
Calculate vector of charge errors in one structure.
Definition: Training.cpp:1545
double getSingleWeight(std::size_t element, std::size_t index)
Get a single weight value.
Definition: Training.cpp:3158
void initializeWeights()
Initialize weights for all elements.
Definition: Training.cpp:292
void initializeWeightsMemory(UpdateStrategy updateStrategy=US_COMBINED)
Initialize weights vector according to update strategy.
Definition: Training.cpp:330
bool repeatedEnergyUpdates
After force update perform energy update for corresponding structure.
Definition: Training.h:517
UpdateStrategy updateStrategy
Update strategy used.
Definition: Training.h:509
void calculateNeighborLists()
Calculate neighbor lists for all structures.
Definition: Training.cpp:1247
ParallelMode parallelMode
Parallelization mode used.
Definition: Training.h:505
void writeNeuronStatistics(std::string const &nnName, std::string const &fileName) const
Write neuron statistics collected since last invocation.
Definition: Training.cpp:1844
std::vector< std::size_t > getConnectionOffsets(std::string id="short") const
Get offsets of NN connections for each element.
Definition: Training.cpp:3294
std::vector< int > epochSchedule
Update schedule epoch (stage 1: 0 = charge update; stage 2: 0 = energy update, 1 = force update (opti...
Definition: Training.h:553
void writeUpdaterStatus(bool append, std::string const fileNameFormat="updater.%03zu.out") const
Write updater information to file.
Definition: Training.cpp:1972
UpdateStrategy
Update strategies available for Training.
Definition: Training.h:96
@ US_COMBINED
One combined updater for all elements.
Definition: Training.h:98
@ US_ELEMENT
Separate updaters for individual elements.
Definition: Training.h:100
std::vector< std::size_t > getNumConnectionsPerElement(std::string id="short") const
Get number of NN connections for each element.
Definition: Training.cpp:3283
void resetNeuronStatistics()
Reset neuron statistics for all elements.
Definition: Training.cpp:1962
std::vector< std::vector< double > > calculateWeightDerivatives(Structure *structure)
Calculate derivatives of energy with respect to weights.
Definition: Training.cpp:3174
void calculateError(std::map< std::string, std::pair< std::string, std::string > > const fileNames)
Calculate error metrics for all structures.
Definition: Training.cpp:1310
void writeLearningCurve(bool append, std::string const fileName="learning-curve.out") const
Write current RMSEs and epoch information to file.
Definition: Training.cpp:1734
void calculateErrorEpoch()
Calculate error metrics per epoch for all structures with file names used in training loop.
Definition: Training.cpp:1515
std::vector< std::string > pk
Vector of actually used training properties.
Definition: Training.h:561
void setEpochSchedule()
Select energies/forces schedule for one epoch.
Definition: Training.cpp:2086
bool hasStructures
If this rank holds structure information.
Definition: Training.h:513
std::vector< std::vector< double > > weights
Neural network weights and biases for each element.
Definition: Training.h:570
bool freeMemory
Free symmetry function memory after calculation.
Definition: Training.h:519
void writeSetsToFiles()
Write training and test set to separate files (train.data and test.data, same format as input....
Definition: Training.cpp:230
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
Definition: Training.cpp:1662
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
Definition: Training.cpp:379
void setupUpdatePlan(std::string const &property)
Set up how often properties are updated.
Definition: Training.cpp:3839
Base class for different weight update methods.
Definition: Updater.h:32
void writeSettingsFile(std::ofstream *const &file, std::map< std::size_t, std::string > const &replacements={}) const
Write complete settings file.
Definition: Settings.cpp:301
std::vector< std::string > getSettingsLines() const
Get complete settings file.
Definition: Settings.cpp:271
bool keywordExists(Key const &key, bool const exact=false) const override
Definition: Settings.cpp:194
#define MPI_SIZE_T
Definition: mpi-extra.h:22
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
Definition: utility.cpp:33
string reduce(string const &line, string const &whitespace, string const &fill)
Replace multiple whitespaces with fill.
Definition: utility.cpp:60
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
double d
Definition: nnp-cutoff.cpp:34
Struct to store information on neighbor atoms.
Definition: Atom.h:36
std::size_t element
Element index of neighbor atom.
Definition: Atom.h:42
std::vector< Vec3D > dGdr
Derivatives of symmetry functions with respect to neighbor coordinates.
Definition: Atom.h:60
Storage for a single atom.
Definition: Atom.h:33
std::vector< Neighbor > neighbors
Neighbor array (maximum number defined in macros.h.
Definition: Atom.h:170
std::size_t numSymmetryFunctions
Number of symmetry functions used to describe the atom environment.
Definition: Atom.h:113
Vec3D f
Force vector calculated by neural network.
Definition: Atom.h:127
double charge
Atomic charge determined by neural network.
Definition: Atom.h:121
std::size_t index
Index number of this atom.
Definition: Atom.h:101
Vec3D fRef
Reference force vector from data set.
Definition: Atom.h:131
std::vector< Vec3D > dGdr
Derivative of symmetry functions with respect to this atom's coordinates.
Definition: Atom.h:161
double chi
Atomic electronegativity determined by neural network.
Definition: Atom.h:119
std::size_t element
Element index of this atom.
Definition: Atom.h:107
std::vector< double > G
Symmetry function values.
Definition: Atom.h:146
double chargeRef
Atomic reference charge.
Definition: Atom.h:123
std::vector< std::size_t > numNeighborsPerElement
Number of neighbors per element.
Definition: Atom.h:138
std::vector< double > dChidG
Derivative of electronegativity with respect to symmetry functions.
Definition: Atom.h:153
std::size_t numNeighbors
Total number of neighbors.
Definition: Atom.h:109
Setup data for one neural network.
Definition: Mode.h:598
std::string weightFileFormat
Format for weight files.
Definition: Mode.h:618
std::string keywordSuffix2
Suffix for some other keywords (weight file loading related).
Definition: Mode.h:622
std::string name
Description string for log output, e.g. "electronegativity".
Definition: Mode.h:616
Storage for one atomic configuration.
Definition: Structure.h:39
void freeAtoms(bool all, double const maxCutoffRadius=0.0)
Free symmetry function memory for all atoms, see free() in Atom class.
Definition: Structure.cpp:1307
@ ST_TRAINING
Structure is part of the training set.
Definition: Structure.h:49
@ ST_UNKNOWN
Sample type not assigned yet.
Definition: Structure.h:46
@ ST_TEST
Structure is part of the test set.
Definition: Structure.h:55
std::vector< std::size_t > numAtomsPerElement
Number of atoms of each element in this structure.
Definition: Structure.h:120
void calculateDQdChi(std::vector< Eigen::VectorXd > &dQdChi)
Calculates derivative of the charges with respect to electronegativities.
Definition: Structure.cpp:1005
std::size_t index
Index number of this structure.
Definition: Structure.h:73
SampleType sampleType
Sample type (training or test set).
Definition: Structure.h:108
void calculateDQdJ(std::vector< Eigen::VectorXd > &dQdJ)
Calculates derivative of the charges with respect to atomic hardness.
Definition: Structure.cpp:1020
bool hasAMatrix
If A matrix of this structure is currently stored.
Definition: Structure.h:118
void calculateDQdr(std::vector< size_t > const &atomsIndices, std::vector< size_t > const &compIndices, double const maxCutoffRadius, std::vector< Element > const &elements)
Calculates derivative of the charges with respect to the atom's position.
Definition: Structure.cpp:1039
void clearElectrostatics(bool clearDQdr=false)
Clear A-matrix, dAdrQ and optionally dQdr.
Definition: Structure.cpp:1372
double energy
Potential energy determined by neural network.
Definition: Structure.h:83
double energyRef
Reference potential energy.
Definition: Structure.h:85
std::size_t numAtoms
Total number of atoms present in this structure.
Definition: Structure.h:75
std::vector< Atom > atoms
Vector of all atoms in this structure.
Definition: Structure.h:122
bool hasCharges
If all charges of this structure have been calculated (and stay the same, e.g.
Definition: Structure.h:94
bool exists(std::string const &key)
Check if property is present.
Definition: Training.h:496
Specific training quantity (e.g. energies, forces, charges).
Definition: Training.h:406
std::size_t countGroupedSubCand
Counter for number of used subcandidates belonging to same update candidate.
Definition: Training.h:437
std::size_t numErrorsGlobal
Global number of errors per update.
Definition: Training.h:449
std::size_t numTrainPatterns
Number of training patterns in set.
Definition: Training.h:421
std::vector< int > errorsPerTask
Errors per task for each update.
Definition: Training.h:455
std::string property
Copy of identifier within Property map.
Definition: Training.h:411
SelectionMode selectionMode
Selection mode for update candidates.
Definition: Training.h:419
std::size_t numUpdates
Number of desired updates per epoch.
Definition: Training.h:443
double rmseThreshold
RMSE threshold for update candidates.
Definition: Training.h:453
std::string plural
Plural string of property;.
Definition: Training.h:417
double epochFraction
Desired update fraction per epoch.
Definition: Training.h:451
std::size_t patternsPerUpdate
Patterns used per update.
Definition: Training.h:445
std::size_t taskBatchSize
Batch size for each MPI task.
Definition: Training.h:425
std::vector< std::vector< double > > error
Global error vector (per updater).
Definition: Training.h:476
std::string displayMetric
Error metric for display.
Definition: Training.h:413
std::string tiny
Tiny abbreviation string for property.
Definition: Training.h:415
std::vector< std::string > errorMetrics
Error metrics available for this property.
Definition: Training.h:459
Property(std::string const &property)
Constructor.
Definition: Training.cpp:4085
std::vector< std::vector< int > > offsetJacobian
Stride for Jacobians per task per updater.
Definition: Training.h:473
std::size_t countUpdates
Counter for updates per epoch.
Definition: Training.h:441
std::vector< std::vector< int > > weightsPerTask
Weights per task per updater.
Definition: Training.h:470
std::size_t numGroupedSubCand
Number of subcandidates which are considered before changing the update candidate.
Definition: Training.h:434
std::vector< int > offsetPerTask
Offset for combined error per task.
Definition: Training.h:457
std::vector< UpdateCandidate > updateCandidates
Vector with indices of training patterns.
Definition: Training.h:467
std::map< std::string, double > errorTrain
Current error metrics of training patterns.
Definition: Training.h:462
std::vector< std::vector< double > > jacobian
Global Jacobian (per updater).
Definition: Training.h:479
std::size_t patternsPerUpdateGlobal
Patterns used per update (summed over all MPI tasks).
Definition: Training.h:447
std::map< std::string, double > errorTest
Current error metrics of test patterns.
Definition: Training.h:465
std::size_t rmseThresholdTrials
Maximum trials for SM_THRESHOLD selection mode.
Definition: Training.h:439
std::size_t posUpdateCandidates
Current position in update candidate list (SM_SORT/_THRESHOLD).
Definition: Training.h:431
Contains update candidate which is grouped with others to specific parent update candidate (e....
Definition: Training.h:370
std::size_t a
Atom index (only used for force candidates).
Definition: Training.h:372
std::size_t c
Component index (x,y,z -> 0,1,2, only used for force candidates).
Definition: Training.h:374
Contains location of one update candidate (energy or force).
Definition: Training.h:385
std::vector< SubCandidate > subCandidates
Vector containing grouped candidates.
Definition: Training.h:396
std::size_t posSubCandidates
Current position in sub-candidate list (SM_SORT/_THRESHOLD).
Definition: Training.h:392
std::size_t s
Structure index.
Definition: Training.h:387