n2p2 - A neural network potential package
Loading...
Searching...
No Matches
Training.h
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#ifndef TRAINING_H
18#define TRAINING_H
19
20#include "Atom.h"
21#include "Dataset.h"
22#include "Stopwatch.h"
23#include "Updater.h"
24#include <cstddef> // std::size_t
25#include <fstream> // std::ofstream
26#include <map> // std::map
27#include <random> // std::mt19937_64
28#include <string> // std::string
29#include <vector> // std::vector
30
31namespace nnp
32{
33
35class Training : public Dataset
36{
37public:
48
56 {
63 //PM_DATASET,
81 };
82
93
102
113
115 Training();
117 ~Training();
122 void selectSets();
126 void writeSetsToFiles();
129 void initializeWeights();
134 void initializeWeightsMemory(UpdateStrategy updateStrategy
135 = US_COMBINED);
140 void setStage(std::size_t stage);
146 void setupTraining();
149 std::vector<
150 std::string> setupNumericDerivCheck();
161 void calculateError(
162 std::map<std::string,
163 std::pair<
164 std::string, std::string>> const fileNames);
170 void calculateErrorEpoch();
177 void calculateChargeErrorVec( Structure const &s,
178 Eigen::VectorXd &cVec,
179 double &cNorm);
182 void printHeader();
185 void printEpoch();
191 void writeWeights(
192 std::string const& nnName,
193 std::string const& fileNameFormat) const;
196 void writeWeightsEpoch() const;
201 void writeHardness(
202 std::string const& fileNameFormat) const;
205 void writeHardnessEpoch() const;
206
212 void writeLearningCurve(bool append,
213 std::string const fileName
214 = "learning-curve.out") const;
221 std::string const& nnName,
222 std::string const& fileName) const;
225 void writeNeuronStatisticsEpoch() const;
234 void writeUpdaterStatus(bool append,
235 std::string const fileNameFormat
236 = "updater.%03zu.out") const;
241 void sortUpdateCandidates(std::string const& property);
246 void shuffleUpdateCandidates(std::string const& property);
249 void checkSelectionMode();
252 void loop();
255 void setEpochSchedule();
260 void update(std::string const& property);
271 double getSingleWeight(std::size_t element,
272 std::size_t index);
282 void setSingleWeight(std::size_t element,
283 std::size_t index,
284 double value);
294 std::vector<
295 std::vector<double> > calculateWeightDerivatives(Structure* structure);
307 std::vector<
308 std::vector<double> > calculateWeightDerivatives(Structure* structure,
309 std::size_t atom,
310 std::size_t component);
315 void setTrainingLogFileName(std::string fileName);
322 std::size_t getNumConnections(std::string id = "short") const;
329 std::vector<
330 std::size_t> getNumConnectionsPerElement(
331 std::string id = "short") const;
338 std::vector<
339 std::size_t> getConnectionOffsets(std::string id = "short") const;
348 void dPdc(std::string property,
349 Structure& structure,
350 std::vector<std::vector<double>>& dEdc);
360 void dPdcN(
361 std::string property,
362 Structure& structure,
363 std::vector<std::vector<double>>& dEdc,
364 double delta = 1.0E-4);
365
366private:
370 {
372 std::size_t a;
374 std::size_t c;
376 double error;
377
379 bool operator<(SubCandidate const& rhs) const {
380 return this->error > rhs.error;
381 }
382 };
383
385 {
387 std::size_t s;
390 double error;
392 std::size_t posSubCandidates;
393
396 std::vector<SubCandidate> subCandidates;
397
399 bool operator<(UpdateCandidate const& rhs) const {
400 return this->error > rhs.error;
401 }
402 };
403
405 struct Property
406 {
408 Property(std::string const& property);
409
411 std::string property;
413 std::string displayMetric;
415 std::string tiny;
417 std::string plural;
421 std::size_t numTrainPatterns;
423 std::size_t numTestPatterns;
425 std::size_t taskBatchSize;
427 std::size_t writeCompEvery;
429 std::size_t writeCompAlways;
434 std::size_t numGroupedSubCand;
441 std::size_t countUpdates;
443 std::size_t numUpdates;
445 std::size_t patternsPerUpdate;
449 std::size_t numErrorsGlobal;
455 std::vector<int> errorsPerTask;
457 std::vector<int> offsetPerTask;
459 std::vector<std::string> errorMetrics;
461 std::map<
462 std::string, double> errorTrain;
464 std::map<
465 std::string, double> errorTest;
467 std::vector<UpdateCandidate> updateCandidates;
469 std::vector<
470 std::vector<int>> weightsPerTask;
472 std::vector<
473 std::vector<int>> offsetJacobian;
475 std::vector<
476 std::vector<double>> error;
478 std::vector<
479 std::vector<double>> jacobian;
481 std::map<
483 };
484
486 struct PropertyMap : std::map<std::string, Property>
487 {
489 Property& operator[](std::string const& key) {return this->at(key);}
491 Property const& operator[](std::string const& key) const
492 {
493 return this->at(key);
494 }
495
496 bool exists(std::string const& key)
497 {
498 return (this->find(key) != this->end());
499 }
500 };
501
523 std::size_t stage;
525 std::size_t numUpdaters;
527 std::size_t numEpochs;
529 std::size_t epoch;
531 std::size_t writeWeightsEvery;
539 std::size_t countUpdates;
542 std::size_t numWeights;
548 std::string nnId;
550 std::ofstream trainingLog;
553 std::vector<int> epochSchedule;
556 std::vector<std::size_t> numWeightsPerUpdater;
559 std::vector<std::size_t> weightsOffset;
561 std::vector<std::string> pk;
562#ifndef N2P2_FULL_SFD_MEMORY
565 std::vector<double> dGdxia;
566#endif
569 std::vector<
570 std::vector<double> > weights;
572 std::vector<Updater*> updaters;
574 std::map<
575 std::string, Stopwatch> sw;
577 std::mt19937_64 rngNew;
579 std::mt19937_64 rngGlobalNew;
582
583
588 bool advance() const;
591 void getWeights();
594 void setWeights();
603 void addTrainingLogEntry(int proc,
604 std::size_t il,
605 double f,
606 std::size_t isg,
607 std::size_t is);
618 void addTrainingLogEntry(int proc,
619 std::size_t il,
620 double f,
621 std::size_t isg,
622 std::size_t is,
623 std::size_t ia,
624 std::size_t ic);
634 void addTrainingLogEntry(int proc,
635 std::size_t il,
636 double f,
637 std::size_t isg,
638 std::size_t is,
639 std::size_t ia);
640#ifndef N2P2_FULL_SFD_MEMORY
659 void collectDGdxia(Atom const& atom,
660 std::size_t indexAtom,
661 std::size_t indexComponent);
662#endif
667 void randomizeNeuralNetworkWeights(std::string const& id);
672 void setupSelectionMode(std::string const& property);
677 void setupFileOutput(std::string const& type);
682 void setupUpdatePlan(std::string const& property);
687 void allocateArrays(std::string const& property);
693 void writeTimingData(bool append,
694 std::string const fileName = "timing.out");
695};
696
697}
698
699#endif
Dataset()
Constructor, initialize members.
Definition Dataset.cpp:36
Implements a simple stopwatch on different platforms.
Definition Stopwatch.h:39
void setTrainingLogFileName(std::string fileName)
Set training log file name.
UpdaterType updaterType
Updater type used.
Definition Training.h:503
std::vector< double > dGdxia
Derivative of symmetry functions with respect to one specific atom coordinate.
Definition Training.h:565
std::size_t epoch
Current epoch.
Definition Training.h:529
std::vector< std::size_t > numWeightsPerUpdater
Number of weights per updater.
Definition Training.h:556
std::size_t writeWeightsAlways
Up to this epoch weights are written every epoch.
Definition Training.h:533
std::size_t numWeights
Total number of weights.
Definition Training.h:542
bool useForces
Use forces for training.
Definition Training.h:515
std::size_t writeWeightsEvery
Write weights every this many epochs.
Definition Training.h:531
void setupTraining()
General training settings and setup of weight update routine.
Definition Training.cpp:737
std::mt19937_64 rngGlobalNew
Global random number generator.
Definition Training.h:579
std::string nnId
ID of neural network the training is working on.
Definition Training.h:548
void writeHardness(std::string const &fileNameFormat) const
Write hardness to files (one file for each element).
void allocateArrays(std::string const &property)
Allocate error and Jacobian arrays for given property.
void writeWeightsEpoch() const
Write weights to files during training loop.
std::ofstream trainingLog
Training log file.
Definition Training.h:550
bool hasUpdaters
If this rank performs weight updates.
Definition Training.h:511
void checkSelectionMode()
Check if selection mode should be changed.
std::string trainingLogFileName
File name for training log.
Definition Training.h:546
JacobianMode jacobianMode
Jacobian mode used.
Definition Training.h:507
void selectSets()
Randomly select training and test set structures.
Definition Training.cpp:82
JacobianMode
Jacobian matrix preparation mode.
Definition Training.h:85
@ JM_SUM
No Jacobian, sum up contributions from update candidates.
Definition Training.h:87
@ JM_TASK
Prepare one Jacobian entry for each task, sum up within tasks.
Definition Training.h:89
@ JM_FULL
Prepare full Jacobian matrix.
Definition Training.h:91
void randomizeNeuralNetworkWeights(std::string const &id)
Randomly initialize specificy neural network weights.
std::size_t countUpdates
Update counter (for all training quantities together).
Definition Training.h:539
std::map< std::string, Stopwatch > sw
Stopwatches for timing overview.
Definition Training.h:575
void setupSelectionMode(std::string const &property)
Set selection mode for specific training property.
void dPdc(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc)
Compute derivatives of property with respect to weights.
void printHeader()
Print training loop header on screen.
std::mt19937_64 rngNew
Per-task random number generator.
Definition Training.h:577
PropertyMap p
Actual training properties.
Definition Training.h:581
std::vector< Updater * > updaters
Weight updater (combined or for each element).
Definition Training.h:572
std::size_t writeNeuronStatisticsAlways
Up to this epoch neuron statistics are written every epoch.
Definition Training.h:537
UpdaterType
Type of update routine.
Definition Training.h:40
@ UT_GD
Simple gradient descent methods.
Definition Training.h:42
@ UT_KF
Kalman filter-based methods.
Definition Training.h:44
@ UT_LM
Levenberg-Marquardt algorithm.
Definition Training.h:46
double forceWeight
Force update weight.
Definition Training.h:544
void update(std::string const &property)
Perform one update.
void dataSetNormalization()
Apply normalization based on initial weights prediction.
Definition Training.cpp:429
ParallelMode
Training parallelization mode.
Definition Training.h:56
@ PM_TRAIN_RK0
No training parallelization, only data set distribution.
Definition Training.h:71
@ PM_TRAIN_ALL
Parallel gradient computation, update on each task.
Definition Training.h:80
bool writeTrainingLog
Whether training log file is written.
Definition Training.h:521
std::size_t numUpdaters
Number of updaters (depends on update strategy).
Definition Training.h:525
bool advance() const
Check if training loop should be continued.
~Training()
Destructor, updater vector needs to be cleaned.
Definition Training.cpp:64
void sortUpdateCandidates(std::string const &property)
Sort update candidates with descending RMSE.
void writeNeuronStatisticsEpoch() const
Write neuron statistics during training loop.
SelectionMode
How update candidates are selected during Training.
Definition Training.h:105
@ SM_RANDOM
Select candidates randomly.
Definition Training.h:107
@ SM_THRESHOLD
Select candidates randomly with RMSE above threshold.
Definition Training.h:111
@ SM_SORT
Sort candidates according to their RMSE and pick worst first.
Definition Training.h:109
std::size_t stage
Training stage.
Definition Training.h:523
void collectDGdxia(Atom const &atom, std::size_t indexAtom, std::size_t indexComponent)
Collect derivative of symmetry functions with respect to one atom's coordinate.
void writeHardnessEpoch() const
Write hardness to files during training loop.
void setWeights()
Set weights in neural network class.
void loop()
Execute main training loop.
void printEpoch()
Print preferred error metric and timing information on screen.
std::vector< std::size_t > weightsOffset
Offset of each element's weights in combined array.
Definition Training.h:559
std::size_t numEpochs
Number of epochs requested.
Definition Training.h:527
void setupFileOutput(std::string const &type)
Set file output intervals for properties and other quantities.
std::size_t getNumConnections(std::string id="short") const
Get total number of NN connections.
void addTrainingLogEntry(int proc, std::size_t il, double f, std::size_t isg, std::size_t is)
Write energy update data to training log file.
void writeTimingData(bool append, std::string const fileName="timing.out")
Write timing data for all clocks.
std::size_t writeNeuronStatisticsEvery
Write neuron statistics every this many epochs.
Definition Training.h:535
void getWeights()
Get weights from neural network class.
void dPdcN(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc, double delta=1.0E-4)
Compute numeric derivatives of property with respect to weights.
std::vector< std::string > setupNumericDerivCheck()
Set up numeric weight derivatives check.
void setSingleWeight(std::size_t element, std::size_t index, double value)
Set a single weight value.
void shuffleUpdateCandidates(std::string const &property)
Shuffle update candidates.
void calculateChargeErrorVec(Structure const &s, Eigen::VectorXd &cVec, double &cNorm)
Calculate vector of charge errors in one structure.
double getSingleWeight(std::size_t element, std::size_t index)
Get a single weight value.
void initializeWeights()
Initialize weights for all elements.
Definition Training.cpp:292
void initializeWeightsMemory(UpdateStrategy updateStrategy=US_COMBINED)
Initialize weights vector according to update strategy.
Definition Training.cpp:330
bool repeatedEnergyUpdates
After force update perform energy update for corresponding structure.
Definition Training.h:517
UpdateStrategy updateStrategy
Update strategy used.
Definition Training.h:509
void calculateNeighborLists()
Calculate neighbor lists for all structures.
ParallelMode parallelMode
Parallelization mode used.
Definition Training.h:505
void writeNeuronStatistics(std::string const &nnName, std::string const &fileName) const
Write neuron statistics collected since last invocation.
std::vector< std::size_t > getConnectionOffsets(std::string id="short") const
Get offsets of NN connections for each element.
std::vector< int > epochSchedule
Update schedule epoch (stage 1: 0 = charge update; stage 2: 0 = energy update, 1 = force update (opti...
Definition Training.h:553
void writeUpdaterStatus(bool append, std::string const fileNameFormat="updater.%03zu.out") const
Write updater information to file.
UpdateStrategy
Update strategies available for Training.
Definition Training.h:96
@ US_COMBINED
One combined updater for all elements.
Definition Training.h:98
@ US_ELEMENT
Separate updaters for individual elements.
Definition Training.h:100
std::vector< std::size_t > getNumConnectionsPerElement(std::string id="short") const
Get number of NN connections for each element.
void resetNeuronStatistics()
Reset neuron statistics for all elements.
std::vector< std::vector< double > > calculateWeightDerivatives(Structure *structure)
Calculate derivatives of energy with respect to weights.
void calculateError(std::map< std::string, std::pair< std::string, std::string > > const fileNames)
Calculate error metrics for all structures.
void writeLearningCurve(bool append, std::string const fileName="learning-curve.out") const
Write current RMSEs and epoch information to file.
void calculateErrorEpoch()
Calculate error metrics per epoch for all structures with file names used in training loop.
std::vector< std::string > pk
Vector of actually used training properties.
Definition Training.h:561
void setEpochSchedule()
Select energies/forces schedule for one epoch.
bool hasStructures
If this rank holds structure information.
Definition Training.h:513
std::vector< std::vector< double > > weights
Neural network weights and biases for each element.
Definition Training.h:570
bool freeMemory
Free symmetry function memory after calculation.
Definition Training.h:519
void writeSetsToFiles()
Write training and test set to separate files (train.data and test.data, same format as input....
Definition Training.cpp:230
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
Training()
Constructor.
Definition Training.cpp:38
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
Definition Training.cpp:379
void setupUpdatePlan(std::string const &property)
Set up how often properties are updated.
Definition Atom.h:29
Storage for a single atom.
Definition Atom.h:33
Storage for one atomic configuration.
Definition Structure.h:39
Map of all training properties.
Definition Training.h:487
Property const & operator[](std::string const &key) const
Overload [] operator to simplify access (const version).
Definition Training.h:491
Property & operator[](std::string const &key)
Overload [] operator to simplify access.
Definition Training.h:489
bool exists(std::string const &key)
Check if property is present.
Definition Training.h:496
Specific training quantity (e.g. energies, forces, charges).
Definition Training.h:406
std::size_t countGroupedSubCand
Counter for number of used subcandidates belonging to same update candidate.
Definition Training.h:437
std::size_t numErrorsGlobal
Global number of errors per update.
Definition Training.h:449
std::size_t numTrainPatterns
Number of training patterns in set.
Definition Training.h:421
std::vector< int > errorsPerTask
Errors per task for each update.
Definition Training.h:455
std::size_t writeCompAlways
Up to this epoch comparison is written every epoch.
Definition Training.h:429
std::string property
Copy of identifier within Property map.
Definition Training.h:411
SelectionMode selectionMode
Selection mode for update candidates.
Definition Training.h:419
std::size_t numUpdates
Number of desired updates per epoch.
Definition Training.h:443
double rmseThreshold
RMSE threshold for update candidates.
Definition Training.h:453
std::string plural
Plural string of property;.
Definition Training.h:417
double epochFraction
Desired update fraction per epoch.
Definition Training.h:451
std::size_t patternsPerUpdate
Patterns used per update.
Definition Training.h:445
std::size_t taskBatchSize
Batch size for each MPI task.
Definition Training.h:425
std::vector< std::vector< double > > error
Global error vector (per updater).
Definition Training.h:476
std::string displayMetric
Error metric for display.
Definition Training.h:413
std::string tiny
Tiny abbreviation string for property.
Definition Training.h:415
std::vector< std::string > errorMetrics
Error metrics available for this property.
Definition Training.h:459
Property(std::string const &property)
Constructor.
std::vector< std::vector< int > > offsetJacobian
Stride for Jacobians per task per updater.
Definition Training.h:473
std::size_t numTestPatterns
Number of training patterns in set.
Definition Training.h:423
std::size_t countUpdates
Counter for updates per epoch.
Definition Training.h:441
std::size_t writeCompEvery
Write comparison every this many epochs.
Definition Training.h:427
std::vector< std::vector< int > > weightsPerTask
Weights per task per updater.
Definition Training.h:470
std::size_t numGroupedSubCand
Number of subcandidates which are considered before changing the update candidate.
Definition Training.h:434
std::vector< int > offsetPerTask
Offset for combined error per task.
Definition Training.h:457
std::vector< UpdateCandidate > updateCandidates
Vector with indices of training patterns.
Definition Training.h:467
std::map< std::string, double > errorTrain
Current error metrics of training patterns.
Definition Training.h:462
std::vector< std::vector< double > > jacobian
Global Jacobian (per updater).
Definition Training.h:479
std::size_t patternsPerUpdateGlobal
Patterns used per update (summed over all MPI tasks).
Definition Training.h:447
std::map< std::string, double > errorTest
Current error metrics of test patterns.
Definition Training.h:465
std::map< std::size_t, SelectionMode > selectionModeSchedule
Schedule for varying selection mode.
Definition Training.h:482
std::size_t rmseThresholdTrials
Maximum trials for SM_THRESHOLD selection mode.
Definition Training.h:439
std::size_t posUpdateCandidates
Current position in update candidate list (SM_SORT/_THRESHOLD).
Definition Training.h:431
Contains update candidate which is grouped with others to specific parent update candidate (e....
Definition Training.h:370
bool operator<(SubCandidate const &rhs) const
Overload < operator to sort in descending order.
Definition Training.h:379
std::size_t a
Atom index (only used for force candidates).
Definition Training.h:372
std::size_t c
Component index (x,y,z -> 0,1,2, only used for force candidates).
Definition Training.h:374
double error
Absolute value of error with respect to reference value.
Definition Training.h:376
Contains location of one update candidate (energy or force).
Definition Training.h:385
bool operator<(UpdateCandidate const &rhs) const
Overload < operator to sort in descending order.
Definition Training.h:399
double error
Absolute value of error with respect to reference value (in case of subcandidates,...
Definition Training.h:390
std::vector< SubCandidate > subCandidates
Vector containing grouped candidates.
Definition Training.h:396
std::size_t posSubCandidates
Current position in sub-candidate list (SM_SORT/_THRESHOLD).
Definition Training.h:392
std::size_t s
Structure index.
Definition Training.h:387