n2p2 - A neural network potential package
Training.h
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#ifndef TRAINING_H
18#define TRAINING_H
19
20#include "Atom.h"
21#include "Dataset.h"
22#include "Stopwatch.h"
23#include "Updater.h"
24#include <cstddef> // std::size_t
25#include <fstream> // std::ofstream
26#include <map> // std::map
27#include <random> // std::mt19937_64
28#include <string> // std::string
29#include <vector> // std::vector
30
31namespace nnp
32{
33
35class Training : public Dataset
36{
37public:
40 {
46 UT_LM
47 };
48
56 {
63 //PM_DATASET,
81 };
82
85 {
92 };
93
96 {
101 };
102
105 {
112 };
113
115 Training();
117 ~Training();
122 void selectSets();
126 void writeSetsToFiles();
129 void initializeWeights();
135 = US_COMBINED);
140 void setStage(std::size_t stage);
146 void setupTraining();
149 std::vector<
150 std::string> setupNumericDerivCheck();
161 void calculateError(
162 std::map<std::string,
163 std::pair<
164 std::string, std::string>> const fileNames);
170 void calculateErrorEpoch();
177 void calculateChargeErrorVec( Structure const &s,
178 Eigen::VectorXd &cVec,
179 double &cNorm);
182 void printHeader();
185 void printEpoch();
191 void writeWeights(
192 std::string const& nnName,
193 std::string const& fileNameFormat) const;
196 void writeWeightsEpoch() const;
201 void writeHardness(
202 std::string const& fileNameFormat) const;
205 void writeHardnessEpoch() const;
206
212 void writeLearningCurve(bool append,
213 std::string const fileName
214 = "learning-curve.out") const;
221 std::string const& nnName,
222 std::string const& fileName) const;
225 void writeNeuronStatisticsEpoch() const;
234 void writeUpdaterStatus(bool append,
235 std::string const fileNameFormat
236 = "updater.%03zu.out") const;
241 void sortUpdateCandidates(std::string const& property);
246 void shuffleUpdateCandidates(std::string const& property);
249 void checkSelectionMode();
252 void loop();
255 void setEpochSchedule();
260 void update(std::string const& property);
271 double getSingleWeight(std::size_t element,
272 std::size_t index);
282 void setSingleWeight(std::size_t element,
283 std::size_t index,
284 double value);
294 std::vector<
295 std::vector<double> > calculateWeightDerivatives(Structure* structure);
307 std::vector<
308 std::vector<double> > calculateWeightDerivatives(Structure* structure,
309 std::size_t atom,
310 std::size_t component);
315 void setTrainingLogFileName(std::string fileName);
322 std::size_t getNumConnections(std::string id = "short") const;
329 std::vector<
330 std::size_t> getNumConnectionsPerElement(
331 std::string id = "short") const;
338 std::vector<
339 std::size_t> getConnectionOffsets(std::string id = "short") const;
348 void dPdc(std::string property,
349 Structure& structure,
350 std::vector<std::vector<double>>& dEdc);
360 void dPdcN(
361 std::string property,
362 Structure& structure,
363 std::vector<std::vector<double>>& dEdc,
364 double delta = 1.0E-4);
365
366private:
370 {
372 std::size_t a;
374 std::size_t c;
376 double error;
377
379 bool operator<(SubCandidate const& rhs) const {
380 return this->error > rhs.error;
381 }
382 };
385 {
387 std::size_t s;
390 double error;
392 std::size_t posSubCandidates;
393
396 std::vector<SubCandidate> subCandidates;
397
399 bool operator<(UpdateCandidate const& rhs) const {
400 return this->error > rhs.error;
401 }
402 };
403
405 struct Property
406 {
408 Property(std::string const& property);
409
411 std::string property;
413 std::string displayMetric;
415 std::string tiny;
417 std::string plural;
421 std::size_t numTrainPatterns;
423 std::size_t numTestPatterns;
425 std::size_t taskBatchSize;
427 std::size_t writeCompEvery;
429 std::size_t writeCompAlways;
434 std::size_t numGroupedSubCand;
441 std::size_t countUpdates;
443 std::size_t numUpdates;
445 std::size_t patternsPerUpdate;
449 std::size_t numErrorsGlobal;
455 std::vector<int> errorsPerTask;
457 std::vector<int> offsetPerTask;
459 std::vector<std::string> errorMetrics;
461 std::map<
462 std::string, double> errorTrain;
464 std::map<
465 std::string, double> errorTest;
467 std::vector<UpdateCandidate> updateCandidates;
469 std::vector<
470 std::vector<int>> weightsPerTask;
472 std::vector<
473 std::vector<int>> offsetJacobian;
475 std::vector<
476 std::vector<double>> error;
478 std::vector<
479 std::vector<double>> jacobian;
481 std::map<
483 };
484
486 struct PropertyMap : std::map<std::string, Property>
487 {
489 Property& operator[](std::string const& key) {return this->at(key);}
491 Property const& operator[](std::string const& key) const
492 {
493 return this->at(key);
494 }
496 bool exists(std::string const& key)
497 {
498 return (this->find(key) != this->end());
499 }
500 };
501
523 std::size_t stage;
525 std::size_t numUpdaters;
527 std::size_t numEpochs;
529 std::size_t epoch;
531 std::size_t writeWeightsEvery;
539 std::size_t countUpdates;
542 std::size_t numWeights;
548 std::string nnId;
550 std::ofstream trainingLog;
553 std::vector<int> epochSchedule;
556 std::vector<std::size_t> numWeightsPerUpdater;
559 std::vector<std::size_t> weightsOffset;
561 std::vector<std::string> pk;
562#ifndef N2P2_FULL_SFD_MEMORY
565 std::vector<double> dGdxia;
566#endif
569 std::vector<
570 std::vector<double> > weights;
572 std::vector<Updater*> updaters;
574 std::map<
575 std::string, Stopwatch> sw;
577 std::mt19937_64 rngNew;
579 std::mt19937_64 rngGlobalNew;
582
583
588 bool advance() const;
591 void getWeights();
594 void setWeights();
603 void addTrainingLogEntry(int proc,
604 std::size_t il,
605 double f,
606 std::size_t isg,
607 std::size_t is);
618 void addTrainingLogEntry(int proc,
619 std::size_t il,
620 double f,
621 std::size_t isg,
622 std::size_t is,
623 std::size_t ia,
624 std::size_t ic);
634 void addTrainingLogEntry(int proc,
635 std::size_t il,
636 double f,
637 std::size_t isg,
638 std::size_t is,
639 std::size_t ia);
640#ifndef N2P2_FULL_SFD_MEMORY
659 void collectDGdxia(Atom const& atom,
660 std::size_t indexAtom,
661 std::size_t indexComponent);
662#endif
667 void randomizeNeuralNetworkWeights(std::string const& id);
672 void setupSelectionMode(std::string const& property);
677 void setupFileOutput(std::string const& type);
682 void setupUpdatePlan(std::string const& property);
687 void allocateArrays(std::string const& property);
693 void writeTimingData(bool append,
694 std::string const fileName = "timing.out");
695};
696
697}
698
699#endif
Collect and process large data sets.
Definition: Dataset.h:35
Implements a simple stopwatch on different platforms.
Definition: Stopwatch.h:39
Training methods.
Definition: Training.h:36
void setTrainingLogFileName(std::string fileName)
Set training log file name.
Definition: Training.cpp:3265
UpdaterType updaterType
Updater type used.
Definition: Training.h:503
std::vector< double > dGdxia
Derivative of symmetry functions with respect to one specific atom coordinate.
Definition: Training.h:565
std::size_t epoch
Current epoch.
Definition: Training.h:529
std::vector< std::size_t > numWeightsPerUpdater
Number of weights per updater.
Definition: Training.h:556
std::size_t writeWeightsAlways
Up to this epoch weights are written every epoch.
Definition: Training.h:533
std::size_t numWeights
Total number of weights.
Definition: Training.h:542
bool useForces
Use forces for training.
Definition: Training.h:515
std::size_t writeWeightsEvery
Write weights every this many epochs.
Definition: Training.h:531
void setupTraining()
General training settings and setup of weight update routine.
Definition: Training.cpp:737
std::mt19937_64 rngGlobalNew
Global random number generator.
Definition: Training.h:579
std::string nnId
ID of neural network the training is working on.
Definition: Training.h:548
void writeHardness(std::string const &fileNameFormat) const
Write hardness to files (one file for each element).
Definition: Training.cpp:1702
void allocateArrays(std::string const &property)
Allocate error and Jacobian arrays for given property.
Definition: Training.cpp:3949
void writeWeightsEpoch() const
Write weights to files during training loop.
Definition: Training.cpp:1679
std::ofstream trainingLog
Training log file.
Definition: Training.h:550
bool hasUpdaters
If this rank performs weight updates.
Definition: Training.h:511
void checkSelectionMode()
Check if selection mode should be changed.
Definition: Training.cpp:2114
std::string trainingLogFileName
File name for training log.
Definition: Training.h:546
JacobianMode jacobianMode
Jacobian mode used.
Definition: Training.h:507
void selectSets()
Randomly select training and test set structures.
Definition: Training.cpp:82
JacobianMode
Jacobian matrix preparation mode.
Definition: Training.h:85
@ JM_SUM
No Jacobian, sum up contributions from update candidates.
Definition: Training.h:87
@ JM_TASK
Prepare one Jacobian entry for each task, sum up within tasks.
Definition: Training.h:89
@ JM_FULL
Prepare full Jacobian matrix.
Definition: Training.h:91
void randomizeNeuralNetworkWeights(std::string const &id)
Randomly initialize specificy neural network weights.
Definition: Training.cpp:3616
std::size_t countUpdates
Update counter (for all training quantities together).
Definition: Training.h:539
std::map< std::string, Stopwatch > sw
Stopwatches for timing overview.
Definition: Training.h:575
void setupSelectionMode(std::string const &property)
Set selection mode for specific training property.
Definition: Training.cpp:3669
void dPdc(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc)
Compute derivatives of property with respect to weights.
Definition: Training.cpp:3307
void printHeader()
Print training loop header on screen.
Definition: Training.cpp:1558
std::mt19937_64 rngNew
Per-task random number generator.
Definition: Training.h:577
PropertyMap p
Actual training properties.
Definition: Training.h:581
std::vector< Updater * > updaters
Weight updater (combined or for each element).
Definition: Training.h:572
std::size_t writeNeuronStatisticsAlways
Up to this epoch neuron statistics are written every epoch.
Definition: Training.h:537
UpdaterType
Type of update routine.
Definition: Training.h:40
@ UT_GD
Simple gradient descent methods.
Definition: Training.h:42
@ UT_KF
Kalman filter-based methods.
Definition: Training.h:44
@ UT_LM
Levenberg-Marquardt algorithm.
Definition: Training.h:46
double forceWeight
Force update weight.
Definition: Training.h:544
void update(std::string const &property)
Perform one update.
Definition: Training.cpp:2256
void dataSetNormalization()
Apply normalization based on initial weights prediction.
Definition: Training.cpp:429
ParallelMode
Training parallelization mode.
Definition: Training.h:56
@ PM_TRAIN_RK0
No training parallelization, only data set distribution.
Definition: Training.h:71
@ PM_TRAIN_ALL
Parallel gradient computation, update on each task.
Definition: Training.h:80
bool writeTrainingLog
Whether training log file is written.
Definition: Training.h:521
std::size_t numUpdaters
Number of updaters (depends on update strategy).
Definition: Training.h:525
bool advance() const
Check if training loop should be continued.
Definition: Training.cpp:3461
~Training()
Destructor, updater vector needs to be cleaned.
Definition: Training.cpp:64
void sortUpdateCandidates(std::string const &property)
Sort update candidates with descending RMSE.
Definition: Training.cpp:2008
void writeNeuronStatisticsEpoch() const
Write neuron statistics during training loop.
Definition: Training.cpp:1943
SelectionMode
How update candidates are selected during Training.
Definition: Training.h:105
@ SM_RANDOM
Select candidates randomly.
Definition: Training.h:107
@ SM_THRESHOLD
Select candidates randomly with RMSE above threshold.
Definition: Training.h:111
@ SM_SORT
Sort candidates according to their RMSE and pick worst first.
Definition: Training.h:109
std::size_t stage
Training stage.
Definition: Training.h:523
void collectDGdxia(Atom const &atom, std::size_t indexAtom, std::size_t indexComponent)
Collect derivative of symmetry functions with respect to one atom's coordinate.
Definition: Training.cpp:3574
void writeHardnessEpoch() const
Write hardness to files during training loop.
Definition: Training.cpp:1720
void setWeights()
Set weights in neural network class.
Definition: Training.cpp:3498
void loop()
Execute main training loop.
Definition: Training.cpp:2147
void printEpoch()
Print preferred error metric and timing information on screen.
Definition: Training.cpp:1616
std::vector< std::size_t > weightsOffset
Offset of each element's weights in combined array.
Definition: Training.h:559
std::size_t numEpochs
Number of epochs requested.
Definition: Training.h:527
void setupFileOutput(std::string const &type)
Set file output intervals for properties and other quantities.
Definition: Training.cpp:3777
std::size_t getNumConnections(std::string id="short") const
Get total number of NN connections.
Definition: Training.cpp:3272
void addTrainingLogEntry(int proc, std::size_t il, double f, std::size_t isg, std::size_t is)
Write energy update data to training log file.
Definition: Training.cpp:3529
void writeTimingData(bool append, std::string const fileName="timing.out")
Write timing data for all clocks.
Definition: Training.cpp:3987
std::size_t writeNeuronStatisticsEvery
Write neuron statistics every this many epochs.
Definition: Training.h:535
void getWeights()
Get weights from neural network class.
Definition: Training.cpp:3467
void dPdcN(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc, double delta=1.0E-4)
Compute numeric derivatives of property with respect to weights.
Definition: Training.cpp:3379
std::vector< std::string > setupNumericDerivCheck()
Set up numeric weight derivatives check.
Definition: Training.cpp:1209
void setSingleWeight(std::size_t element, std::size_t index, double value)
Set a single weight value.
Definition: Training.cpp:3165
void shuffleUpdateCandidates(std::string const &property)
Shuffle update candidates.
Definition: Training.cpp:2062
void calculateChargeErrorVec(Structure const &s, Eigen::VectorXd &cVec, double &cNorm)
Calculate vector of charge errors in one structure.
Definition: Training.cpp:1545
double getSingleWeight(std::size_t element, std::size_t index)
Get a single weight value.
Definition: Training.cpp:3158
void initializeWeights()
Initialize weights for all elements.
Definition: Training.cpp:292
void initializeWeightsMemory(UpdateStrategy updateStrategy=US_COMBINED)
Initialize weights vector according to update strategy.
Definition: Training.cpp:330
bool repeatedEnergyUpdates
After force update perform energy update for corresponding structure.
Definition: Training.h:517
UpdateStrategy updateStrategy
Update strategy used.
Definition: Training.h:509
void calculateNeighborLists()
Calculate neighbor lists for all structures.
Definition: Training.cpp:1247
ParallelMode parallelMode
Parallelization mode used.
Definition: Training.h:505
void writeNeuronStatistics(std::string const &nnName, std::string const &fileName) const
Write neuron statistics collected since last invocation.
Definition: Training.cpp:1844
std::vector< std::size_t > getConnectionOffsets(std::string id="short") const
Get offsets of NN connections for each element.
Definition: Training.cpp:3294
std::vector< int > epochSchedule
Update schedule epoch (stage 1: 0 = charge update; stage 2: 0 = energy update, 1 = force update (opti...
Definition: Training.h:553
void writeUpdaterStatus(bool append, std::string const fileNameFormat="updater.%03zu.out") const
Write updater information to file.
Definition: Training.cpp:1972
UpdateStrategy
Update strategies available for Training.
Definition: Training.h:96
@ US_COMBINED
One combined updater for all elements.
Definition: Training.h:98
@ US_ELEMENT
Separate updaters for individual elements.
Definition: Training.h:100
std::vector< std::size_t > getNumConnectionsPerElement(std::string id="short") const
Get number of NN connections for each element.
Definition: Training.cpp:3283
void resetNeuronStatistics()
Reset neuron statistics for all elements.
Definition: Training.cpp:1962
std::vector< std::vector< double > > calculateWeightDerivatives(Structure *structure)
Calculate derivatives of energy with respect to weights.
Definition: Training.cpp:3174
void calculateError(std::map< std::string, std::pair< std::string, std::string > > const fileNames)
Calculate error metrics for all structures.
Definition: Training.cpp:1310
void writeLearningCurve(bool append, std::string const fileName="learning-curve.out") const
Write current RMSEs and epoch information to file.
Definition: Training.cpp:1734
void calculateErrorEpoch()
Calculate error metrics per epoch for all structures with file names used in training loop.
Definition: Training.cpp:1515
std::vector< std::string > pk
Vector of actually used training properties.
Definition: Training.h:561
void setEpochSchedule()
Select energies/forces schedule for one epoch.
Definition: Training.cpp:2086
bool hasStructures
If this rank holds structure information.
Definition: Training.h:513
std::vector< std::vector< double > > weights
Neural network weights and biases for each element.
Definition: Training.h:570
bool freeMemory
Free symmetry function memory after calculation.
Definition: Training.h:519
void writeSetsToFiles()
Write training and test set to separate files (train.data and test.data, same format as input....
Definition: Training.cpp:230
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
Definition: Training.cpp:1662
Training()
Constructor.
Definition: Training.cpp:38
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
Definition: Training.cpp:379
void setupUpdatePlan(std::string const &property)
Set up how often properties are updated.
Definition: Training.cpp:3839
Definition: Atom.h:29
Storage for a single atom.
Definition: Atom.h:33
Storage for one atomic configuration.
Definition: Structure.h:39
Map of all training properties.
Definition: Training.h:487
Property const & operator[](std::string const &key) const
Overload [] operator to simplify access (const version).
Definition: Training.h:491
Property & operator[](std::string const &key)
Overload [] operator to simplify access.
Definition: Training.h:489
bool exists(std::string const &key)
Check if property is present.
Definition: Training.h:496
Specific training quantity (e.g. energies, forces, charges).
Definition: Training.h:406
std::size_t countGroupedSubCand
Counter for number of used subcandidates belonging to same update candidate.
Definition: Training.h:437
std::size_t numErrorsGlobal
Global number of errors per update.
Definition: Training.h:449
std::size_t numTrainPatterns
Number of training patterns in set.
Definition: Training.h:421
std::vector< int > errorsPerTask
Errors per task for each update.
Definition: Training.h:455
std::size_t writeCompAlways
Up to this epoch comparison is written every epoch.
Definition: Training.h:429
std::string property
Copy of identifier within Property map.
Definition: Training.h:411
SelectionMode selectionMode
Selection mode for update candidates.
Definition: Training.h:419
std::size_t numUpdates
Number of desired updates per epoch.
Definition: Training.h:443
double rmseThreshold
RMSE threshold for update candidates.
Definition: Training.h:453
std::string plural
Plural string of property;.
Definition: Training.h:417
double epochFraction
Desired update fraction per epoch.
Definition: Training.h:451
std::size_t patternsPerUpdate
Patterns used per update.
Definition: Training.h:445
std::size_t taskBatchSize
Batch size for each MPI task.
Definition: Training.h:425
std::vector< std::vector< double > > error
Global error vector (per updater).
Definition: Training.h:476
std::string displayMetric
Error metric for display.
Definition: Training.h:413
std::string tiny
Tiny abbreviation string for property.
Definition: Training.h:415
std::vector< std::string > errorMetrics
Error metrics available for this property.
Definition: Training.h:459
Property(std::string const &property)
Constructor.
Definition: Training.cpp:4085
std::vector< std::vector< int > > offsetJacobian
Stride for Jacobians per task per updater.
Definition: Training.h:473
std::size_t numTestPatterns
Number of training patterns in set.
Definition: Training.h:423
std::size_t countUpdates
Counter for updates per epoch.
Definition: Training.h:441
std::size_t writeCompEvery
Write comparison every this many epochs.
Definition: Training.h:427
std::vector< std::vector< int > > weightsPerTask
Weights per task per updater.
Definition: Training.h:470
std::size_t numGroupedSubCand
Number of subcandidates which are considered before changing the update candidate.
Definition: Training.h:434
std::vector< int > offsetPerTask
Offset for combined error per task.
Definition: Training.h:457
std::vector< UpdateCandidate > updateCandidates
Vector with indices of training patterns.
Definition: Training.h:467
std::map< std::string, double > errorTrain
Current error metrics of training patterns.
Definition: Training.h:462
std::vector< std::vector< double > > jacobian
Global Jacobian (per updater).
Definition: Training.h:479
std::size_t patternsPerUpdateGlobal
Patterns used per update (summed over all MPI tasks).
Definition: Training.h:447
std::map< std::string, double > errorTest
Current error metrics of test patterns.
Definition: Training.h:465
std::map< std::size_t, SelectionMode > selectionModeSchedule
Schedule for varying selection mode.
Definition: Training.h:482
std::size_t rmseThresholdTrials
Maximum trials for SM_THRESHOLD selection mode.
Definition: Training.h:439
std::size_t posUpdateCandidates
Current position in update candidate list (SM_SORT/_THRESHOLD).
Definition: Training.h:431
Contains update candidate which is grouped with others to specific parent update candidate (e....
Definition: Training.h:370
bool operator<(SubCandidate const &rhs) const
Overload < operator to sort in descending order.
Definition: Training.h:379
std::size_t a
Atom index (only used for force candidates).
Definition: Training.h:372
std::size_t c
Component index (x,y,z -> 0,1,2, only used for force candidates).
Definition: Training.h:374
double error
Absolute value of error with respect to reference value.
Definition: Training.h:376
Contains location of one update candidate (energy or force).
Definition: Training.h:385
bool operator<(UpdateCandidate const &rhs) const
Overload < operator to sort in descending order.
Definition: Training.h:399
double error
Absolute value of error with respect to reference value (in case of subcandidates,...
Definition: Training.h:390
std::vector< SubCandidate > subCandidates
Vector containing grouped candidates.
Definition: Training.h:396
std::size_t posSubCandidates
Current position in sub-candidate list (SM_SORT/_THRESHOLD).
Definition: Training.h:392
std::size_t s
Structure index.
Definition: Training.h:387