162 std::map<std::string,
164 std::string, std::string>>
const fileNames);
178 Eigen::VectorXd &cVec,
192 std::string
const& nnName,
193 std::string
const& fileNameFormat)
const;
202 std::string
const& fileNameFormat)
const;
213 std::string
const fileName
214 =
"learning-curve.out")
const;
221 std::string
const& nnName,
222 std::string
const& fileName)
const;
235 std::string
const fileNameFormat
236 =
"updater.%03zu.out")
const;
260 void update(std::string
const& property);
310 std::size_t component);
331 std::string
id =
"short")
const;
348 void dPdc(std::string property,
350 std::vector<std::vector<double>>& dEdc);
361 std::string property,
363 std::vector<std::vector<double>>& dEdc,
364 double delta = 1.0E-4);
380 return this->error > rhs.
error;
400 return this->error > rhs.
error;
493 return this->at(key);
498 return (this->find(key) != this->end());
561 std::vector<std::string>
pk;
562#ifndef N2P2_FULL_SFD_MEMORY
640#ifndef N2P2_FULL_SFD_MEMORY
660 std::size_t indexAtom,
661 std::size_t indexComponent);
694 std::string
const fileName =
"timing.out");
Collect and process large data sets.
Implements a simple stopwatch on different platforms.
void setTrainingLogFileName(std::string fileName)
Set training log file name.
UpdaterType updaterType
Updater type used.
std::vector< double > dGdxia
Derivative of symmetry functions with respect to one specific atom coordinate.
std::size_t epoch
Current epoch.
std::vector< std::size_t > numWeightsPerUpdater
Number of weights per updater.
std::size_t writeWeightsAlways
Up to this epoch weights are written every epoch.
std::size_t numWeights
Total number of weights.
bool useForces
Use forces for training.
std::size_t writeWeightsEvery
Write weights every this many epochs.
void setupTraining()
General training settings and setup of weight update routine.
std::mt19937_64 rngGlobalNew
Global random number generator.
std::string nnId
ID of neural network the training is working on.
void writeHardness(std::string const &fileNameFormat) const
Write hardness to files (one file for each element).
void allocateArrays(std::string const &property)
Allocate error and Jacobian arrays for given property.
void writeWeightsEpoch() const
Write weights to files during training loop.
std::ofstream trainingLog
Training log file.
bool hasUpdaters
If this rank performs weight updates.
void checkSelectionMode()
Check if selection mode should be changed.
std::string trainingLogFileName
File name for training log.
JacobianMode jacobianMode
Jacobian mode used.
void selectSets()
Randomly select training and test set structures.
JacobianMode
Jacobian matrix preparation mode.
@ JM_SUM
No Jacobian, sum up contributions from update candidates.
@ JM_TASK
Prepare one Jacobian entry for each task, sum up within tasks.
@ JM_FULL
Prepare full Jacobian matrix.
void randomizeNeuralNetworkWeights(std::string const &id)
Randomly initialize specificy neural network weights.
std::size_t countUpdates
Update counter (for all training quantities together).
std::map< std::string, Stopwatch > sw
Stopwatches for timing overview.
void setupSelectionMode(std::string const &property)
Set selection mode for specific training property.
void dPdc(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc)
Compute derivatives of property with respect to weights.
void printHeader()
Print training loop header on screen.
std::mt19937_64 rngNew
Per-task random number generator.
PropertyMap p
Actual training properties.
std::vector< Updater * > updaters
Weight updater (combined or for each element).
std::size_t writeNeuronStatisticsAlways
Up to this epoch neuron statistics are written every epoch.
UpdaterType
Type of update routine.
@ UT_GD
Simple gradient descent methods.
@ UT_KF
Kalman filter-based methods.
@ UT_LM
Levenberg-Marquardt algorithm.
double forceWeight
Force update weight.
void update(std::string const &property)
Perform one update.
void dataSetNormalization()
Apply normalization based on initial weights prediction.
ParallelMode
Training parallelization mode.
@ PM_TRAIN_RK0
No training parallelization, only data set distribution.
@ PM_TRAIN_ALL
Parallel gradient computation, update on each task.
bool writeTrainingLog
Whether training log file is written.
std::size_t numUpdaters
Number of updaters (depends on update strategy).
bool advance() const
Check if training loop should be continued.
~Training()
Destructor, updater vector needs to be cleaned.
void sortUpdateCandidates(std::string const &property)
Sort update candidates with descending RMSE.
void writeNeuronStatisticsEpoch() const
Write neuron statistics during training loop.
SelectionMode
How update candidates are selected during Training.
@ SM_RANDOM
Select candidates randomly.
@ SM_THRESHOLD
Select candidates randomly with RMSE above threshold.
@ SM_SORT
Sort candidates according to their RMSE and pick worst first.
std::size_t stage
Training stage.
void collectDGdxia(Atom const &atom, std::size_t indexAtom, std::size_t indexComponent)
Collect derivative of symmetry functions with respect to one atom's coordinate.
void writeHardnessEpoch() const
Write hardness to files during training loop.
void setWeights()
Set weights in neural network class.
void loop()
Execute main training loop.
void printEpoch()
Print preferred error metric and timing information on screen.
std::vector< std::size_t > weightsOffset
Offset of each element's weights in combined array.
std::size_t numEpochs
Number of epochs requested.
void setupFileOutput(std::string const &type)
Set file output intervals for properties and other quantities.
std::size_t getNumConnections(std::string id="short") const
Get total number of NN connections.
void addTrainingLogEntry(int proc, std::size_t il, double f, std::size_t isg, std::size_t is)
Write energy update data to training log file.
void writeTimingData(bool append, std::string const fileName="timing.out")
Write timing data for all clocks.
std::size_t writeNeuronStatisticsEvery
Write neuron statistics every this many epochs.
void getWeights()
Get weights from neural network class.
void dPdcN(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc, double delta=1.0E-4)
Compute numeric derivatives of property with respect to weights.
std::vector< std::string > setupNumericDerivCheck()
Set up numeric weight derivatives check.
void setSingleWeight(std::size_t element, std::size_t index, double value)
Set a single weight value.
void shuffleUpdateCandidates(std::string const &property)
Shuffle update candidates.
void calculateChargeErrorVec(Structure const &s, Eigen::VectorXd &cVec, double &cNorm)
Calculate vector of charge errors in one structure.
double getSingleWeight(std::size_t element, std::size_t index)
Get a single weight value.
void initializeWeights()
Initialize weights for all elements.
void initializeWeightsMemory(UpdateStrategy updateStrategy=US_COMBINED)
Initialize weights vector according to update strategy.
bool repeatedEnergyUpdates
After force update perform energy update for corresponding structure.
UpdateStrategy updateStrategy
Update strategy used.
void calculateNeighborLists()
Calculate neighbor lists for all structures.
ParallelMode parallelMode
Parallelization mode used.
void writeNeuronStatistics(std::string const &nnName, std::string const &fileName) const
Write neuron statistics collected since last invocation.
std::vector< std::size_t > getConnectionOffsets(std::string id="short") const
Get offsets of NN connections for each element.
std::vector< int > epochSchedule
Update schedule epoch (stage 1: 0 = charge update; stage 2: 0 = energy update, 1 = force update (opti...
void writeUpdaterStatus(bool append, std::string const fileNameFormat="updater.%03zu.out") const
Write updater information to file.
UpdateStrategy
Update strategies available for Training.
@ US_COMBINED
One combined updater for all elements.
@ US_ELEMENT
Separate updaters for individual elements.
std::vector< std::size_t > getNumConnectionsPerElement(std::string id="short") const
Get number of NN connections for each element.
void resetNeuronStatistics()
Reset neuron statistics for all elements.
std::vector< std::vector< double > > calculateWeightDerivatives(Structure *structure)
Calculate derivatives of energy with respect to weights.
void calculateError(std::map< std::string, std::pair< std::string, std::string > > const fileNames)
Calculate error metrics for all structures.
void writeLearningCurve(bool append, std::string const fileName="learning-curve.out") const
Write current RMSEs and epoch information to file.
void calculateErrorEpoch()
Calculate error metrics per epoch for all structures with file names used in training loop.
std::vector< std::string > pk
Vector of actually used training properties.
void setEpochSchedule()
Select energies/forces schedule for one epoch.
bool hasStructures
If this rank holds structure information.
std::vector< std::vector< double > > weights
Neural network weights and biases for each element.
bool freeMemory
Free symmetry function memory after calculation.
void writeSetsToFiles()
Write training and test set to separate files (train.data and test.data, same format as input....
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
void setupUpdatePlan(std::string const &property)
Set up how often properties are updated.
Storage for a single atom.
Storage for one atomic configuration.
Map of all training properties.
Property const & operator[](std::string const &key) const
Overload [] operator to simplify access (const version).
Property & operator[](std::string const &key)
Overload [] operator to simplify access.
bool exists(std::string const &key)
Check if property is present.
Specific training quantity (e.g. energies, forces, charges).
std::size_t countGroupedSubCand
Counter for number of used subcandidates belonging to same update candidate.
std::size_t numErrorsGlobal
Global number of errors per update.
std::size_t numTrainPatterns
Number of training patterns in set.
std::vector< int > errorsPerTask
Errors per task for each update.
std::size_t writeCompAlways
Up to this epoch comparison is written every epoch.
std::string property
Copy of identifier within Property map.
SelectionMode selectionMode
Selection mode for update candidates.
std::size_t numUpdates
Number of desired updates per epoch.
double rmseThreshold
RMSE threshold for update candidates.
std::string plural
Plural string of property;.
double epochFraction
Desired update fraction per epoch.
std::size_t patternsPerUpdate
Patterns used per update.
std::size_t taskBatchSize
Batch size for each MPI task.
std::vector< std::vector< double > > error
Global error vector (per updater).
std::string displayMetric
Error metric for display.
std::string tiny
Tiny abbreviation string for property.
std::vector< std::string > errorMetrics
Error metrics available for this property.
Property(std::string const &property)
Constructor.
std::vector< std::vector< int > > offsetJacobian
Stride for Jacobians per task per updater.
std::size_t numTestPatterns
Number of training patterns in set.
std::size_t countUpdates
Counter for updates per epoch.
std::size_t writeCompEvery
Write comparison every this many epochs.
std::vector< std::vector< int > > weightsPerTask
Weights per task per updater.
std::size_t numGroupedSubCand
Number of subcandidates which are considered before changing the update candidate.
std::vector< int > offsetPerTask
Offset for combined error per task.
std::vector< UpdateCandidate > updateCandidates
Vector with indices of training patterns.
std::map< std::string, double > errorTrain
Current error metrics of training patterns.
std::vector< std::vector< double > > jacobian
Global Jacobian (per updater).
std::size_t patternsPerUpdateGlobal
Patterns used per update (summed over all MPI tasks).
std::map< std::string, double > errorTest
Current error metrics of test patterns.
std::map< std::size_t, SelectionMode > selectionModeSchedule
Schedule for varying selection mode.
std::size_t rmseThresholdTrials
Maximum trials for SM_THRESHOLD selection mode.
std::size_t posUpdateCandidates
Current position in update candidate list (SM_SORT/_THRESHOLD).
Contains update candidate which is grouped with others to specific parent update candidate (e....
bool operator<(SubCandidate const &rhs) const
Overload < operator to sort in descending order.
std::size_t a
Atom index (only used for force candidates).
std::size_t c
Component index (x,y,z -> 0,1,2, only used for force candidates).
double error
Absolute value of error with respect to reference value.
Contains location of one update candidate (energy or force).
bool operator<(UpdateCandidate const &rhs) const
Overload < operator to sort in descending order.
double error
Absolute value of error with respect to reference value (in case of subcandidates,...
std::vector< SubCandidate > subCandidates
Vector containing grouped candidates.
std::size_t posSubCandidates
Current position in sub-candidate list (SM_SORT/_THRESHOLD).
std::size_t s
Structure index.