29#include <gsl/gsl_rng.h>
39 parallelMode (PM_TRAIN_RK0 ),
40 jacobianMode (JM_SUM ),
41 updateStrategy (US_COMBINED ),
43 hasStructures (false ),
45 repeatedEnergyUpdates (false ),
47 writeTrainingLog (false ),
52 writeWeightsEvery (0 ),
53 writeWeightsAlways (0 ),
54 writeNeuronStatisticsEvery (0 ),
55 writeNeuronStatisticsAlways(0 ),
58 trainingLogFileName (
"train-log.out")
65 for (vector<Updater*>::iterator it =
updaters.begin();
84 log <<
"*** DEFINE TRAINING/TEST SETS ***********"
85 "**************************************\n";
88 vector<string> pCheck = {
"force",
"charge"};
92 check |= (find(pCheck.begin(), pCheck.end(), k) != pCheck.end());
96 double testSetFraction = atof(
settings[
"test_fraction"].c_str());
97 log <<
strpr(
"Desired test set ratio : %f\n", testSetFraction);
102 for (
size_t i = 0; i <
structures.size(); ++i)
108 double const r = gsl_rng_uniform(
rng);
115 k =
"energy";
if (
p.
exists(k))
p[k].numTestPatterns++;
116 k =
"force";
if (
p.
exists(k))
p[k].numTestPatterns += 3 * na;
117 k =
"charge";
if (
p.
exists(k))
p[k].numTestPatterns += na;
129 p[k].numTrainPatterns++;
131 p[k].updateCandidates.back().s = i;
137 for (vector<Atom>::const_iterator it = s.
atoms.begin();
138 it != s.
atoms.end(); ++it)
140 for (
size_t j = 0; j < 3; ++j)
143 p[k].updateCandidates.back().s = i;
144 p[k].updateCandidates.back().a = it->index;
145 p[k].updateCandidates.back().c = j;
153 for (vector<Atom>::const_iterator it = s.
atoms.begin();
154 it != s.
atoms.end(); ++it)
157 p[k].updateCandidates.back().s = i;
158 p[k].updateCandidates.back().a = it->index;
164 log <<
strpr(
"WARNING: Structure %zu not assigned to either "
165 "training or test set.\n", s.
index);
172 log <<
strpr(
"WARNING: Process %d has no atoms of element "
181 MPI_Allreduce(MPI_IN_PLACE, &(
p[k].numTrainPatterns), 1,
MPI_SIZE_T, MPI_SUM,
comm);
182 MPI_Allreduce(MPI_IN_PLACE, &(
p[k].numTestPatterns) , 1,
MPI_SIZE_T, MPI_SUM,
comm);
183 double sum =
p[k].numTrainPatterns +
p[k].numTestPatterns;
184 log <<
"Training/test split of data set for property \"" + k +
"\":\n";
185 log <<
strpr(
"- Total patterns : %.0f\n", sum);
186 log <<
strpr(
"- Training patterns : %d\n",
p[k].numTrainPatterns);
187 log <<
strpr(
"- Test patterns : %d\n",
p[k].numTestPatterns);
188 log <<
strpr(
"- Test set fraction : %f\n",
p[k].numTestPatterns / sum);
191 log <<
"*****************************************"
192 "**************************************\n";
200 log <<
"*** WRITE TRAINING/TEST SETS ************"
201 "**************************************\n";
204 string fileName =
strpr(
"train.data.%04d",
myRank);
206 fileTrain.open(fileName.c_str());
207 if (!fileTrain.is_open())
209 runtime_error(
strpr(
"ERROR: Could not open file %s\n",
214 fileTest.open(fileName.c_str());
215 if (!fileTest.is_open())
217 runtime_error(
strpr(
"ERROR: Could not open file %s\n",
220 for (vector<Structure>::iterator it =
structures.begin();
228 it->writeToFile(&fileTrain);
232 it->writeToFile(&fileTest);
244 log <<
"Writing training/test set to files:\n";
245 log <<
" - train.data\n";
246 log <<
" - test.data\n";
247 fileName =
"train.data";
249 fileName =
"test.data";
253 log <<
"*****************************************"
254 "**************************************\n";
262 log <<
"*** WEIGHT INITIALIZATION ***************"
263 "**************************************\n";
269 throw runtime_error(
"ERROR: Nguyen Widrow and preconditioning weights"
270 " initialization are incompatible\n");
276 log <<
"Setting up charge neural networks:\n";
280 log <<
"Reading old weights from files.\n";
289 log <<
"Setting up short-range neural networks:\n";
292 log <<
"Reading old weights from files.\n";
299 log <<
"Setting up short-range neural networks:\n";
302 log <<
"Reading old weights from files.\n";
308 log <<
"*****************************************"
309 "**************************************\n";
320 log <<
strpr(
"Combined updater for all elements selected: "
328 .getNumConnections();
337 log <<
strpr(
"Separate updaters for elements selected: "
345 .getNumConnections();
348 log <<
strpr(
"Fit parameters for element %2s: %zu\n",
355 throw runtime_error(
"ERROR: Unknown update strategy.\n");
369 pk.push_back(
"energy");
377 if (
stage == 1)
pk.push_back(
"charge");
380 pk.push_back(
"energy");
383 pk.push_back(
"force");
389 auto initP = [
this](
string key) {
p.emplace(piecewise_construct,
390 forward_as_tuple(key),
391 forward_as_tuple(key));
393 for (
auto k :
pk) initP(k);
401 log <<
"*** DATA SET NORMALIZATION **************"
402 "**************************************\n";
405 log <<
"Computing statistics from reference data and initial "
413 throw runtime_error(
"ERROR: Normalization of charges not yet "
423 fileEvsV.open(
strpr(
"evsv.dat.%04d",
myRank).c_str());
427 vector<string> title;
428 vector<string> colName;
429 vector<string> colInfo;
430 vector<size_t> colSize;
431 title.push_back(
"Energy vs. volume comparison.");
432 colSize.push_back(16);
433 colName.push_back(
"V_atom");
434 colInfo.push_back(
"Volume per atom.");
435 colSize.push_back(16);
436 colName.push_back(
"Eref_atom");
437 colInfo.push_back(
"Reference energy per atom.");
438 colSize.push_back(10);
439 colName.push_back(
"N");
440 colInfo.push_back(
"Number of atoms.");
441 colSize.push_back(16);
442 colName.push_back(
"V");
443 colInfo.push_back(
"Volume of structure.");
444 colSize.push_back(16);
445 colName.push_back(
"Eref");
446 colInfo.push_back(
"Reference energy of structure.");
447 colSize.push_back(16);
448 colName.push_back(
"Eref_offset");
449 colInfo.push_back(
"Reference energy of structure (including offset).");
454 size_t numAtomsTotal = 0;
456 double meanEnergyPerAtomRef = 0.0;
457 double meanEnergyPerAtomNnp = 0.0;
458 double sigmaEnergyPerAtomRef = 0.0;
459 double sigmaEnergyPerAtomNnp = 0.0;
460 double meanForceRef = 0.0;
461 double meanForceNnp = 0.0;
462 double sigmaForceRef = 0.0;
463 double sigmaForceNnp = 0.0;
467 fileEvsV <<
strpr(
"%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
468 s.volume / s.numAtoms,
469 s.energyRef / s.numAtoms,
475#ifdef N2P2_NO_SF_GROUPS
483 s.clearNeighborList();
486 numAtomsTotal += s.numAtoms;
487 meanEnergyPerAtomRef += s.energyRef / s.numAtoms;
488 meanEnergyPerAtomNnp += s.energy / s.numAtoms;
489 for (
auto& a : s.atoms)
491 meanForceRef += a.fRef[0] + a.fRef[1] + a.fRef[2];
492 meanForceNnp += a.f [0] + a.f [1] + a.f [2];
498 MPI_Barrier(MPI_COMM_WORLD);
499 log <<
"Writing energy/atom vs. volume/atom data to \"evsv.dat\".\n";
502 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1,
MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
503 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
504 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
505 MPI_Allreduce(MPI_IN_PLACE, &meanForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
506 MPI_Allreduce(MPI_IN_PLACE, &meanForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
509 meanForceRef /= 3 * numAtomsTotal;
510 meanForceNnp /= 3 * numAtomsTotal;
513 double ediffRef = s.energyRef / s.numAtoms - meanEnergyPerAtomRef;
514 double ediffNnp = s.energy / s.numAtoms - meanEnergyPerAtomNnp;
515 sigmaEnergyPerAtomRef += ediffRef * ediffRef;
516 sigmaEnergyPerAtomNnp += ediffNnp * ediffNnp;
517 for (
auto const& a : s.atoms)
519 double fdiffRef = a.fRef[0] - meanForceRef;
520 double fdiffNnp = a.f [0] - meanForceNnp;
521 sigmaForceRef += fdiffRef * fdiffRef;
522 sigmaForceNnp += fdiffNnp * fdiffNnp;
523 fdiffRef = a.fRef[1] - meanForceRef;
524 fdiffNnp = a.f [1] - meanForceNnp;
525 sigmaForceRef += fdiffRef * fdiffRef;
526 sigmaForceNnp += fdiffNnp * fdiffNnp;
527 fdiffRef = a.fRef[2] - meanForceRef;
528 fdiffNnp = a.f [2] - meanForceNnp;
529 sigmaForceRef += fdiffRef * fdiffRef;
530 sigmaForceNnp += fdiffNnp * fdiffNnp;
533 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
534 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
535 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
536 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
537 sigmaEnergyPerAtomRef = sqrt(sigmaEnergyPerAtomRef / (
numStructures - 1));
538 sigmaEnergyPerAtomNnp = sqrt(sigmaEnergyPerAtomNnp / (
numStructures - 1));
539 sigmaForceRef = sqrt(sigmaForceRef / (3 * numAtomsTotal - 1));
540 sigmaForceNnp = sqrt(sigmaForceNnp / (3 * numAtomsTotal - 1));
543 log <<
strpr(
"Total number of atoms : %zu\n", numAtomsTotal);
544 log <<
"----------------------------------\n";
545 log <<
"Reference data statistics:\n";
546 log <<
"----------------------------------\n";
547 log <<
strpr(
"Mean/sigma energy per atom : %16.8E / %16.8E\n",
548 meanEnergyPerAtomRef,
549 sigmaEnergyPerAtomRef);
550 log <<
strpr(
"Mean/sigma force : %16.8E / %16.8E\n",
553 log <<
"----------------------------------\n";
554 log <<
"Initial NNP prediction statistics:\n";
555 log <<
"----------------------------------\n";
556 log <<
strpr(
"Mean/sigma energy per atom : %16.8E / %16.8E\n",
557 meanEnergyPerAtomNnp,
558 sigmaEnergyPerAtomNnp);
559 log <<
strpr(
"Mean/sigma force : %16.8E / %16.8E\n",
562 log <<
"----------------------------------\n";
564 if (
settings[
"normalize_data_set"] ==
"stats-only")
566 log <<
"Data set statistics computation completed, now make up for \n";
567 log <<
"initially skipped normalization setup...\n";
571 else if (
settings[
"normalize_data_set"] ==
"ref")
573 log <<
"Normalization based on standard deviation of reference data "
576 log <<
" mean(e_ref) = 0, sigma(e_ref) = sigma(F_ref) = 1\n";
580 if (useForcesLocal)
convLength = sigmaForceRef / sigmaEnergyPerAtomRef;
584 else if (
settings[
"normalize_data_set"] ==
"force")
588 throw runtime_error(
"ERROR: Selected normalization mode only "
589 "possible when forces are available.\n");
591 log <<
"Normalization based on standard deviation of reference forces "
593 log <<
"initial prediction selected:\n";
595 log <<
" mean(e_ref) = 0, sigma(F_NNP) = sigma(F_ref) = 1\n";
604 throw runtime_error(
"ERROR: Unknown data set normalization mode.\n");
607 if (
settings[
"normalize_data_set"] !=
"stats-only")
609 log <<
"Final conversion data:\n";
613 log <<
"----------------------------------\n";
617 (
settings[
"normalize_data_set"] !=
"stats-only"))
620 log <<
"Writing backup of original settings file to "
621 "\"input.nn.bak\".\n";
622 ofstream fileSettings;
623 fileSettings.open(
"input.nn.bak");
625 fileSettings.close();
628 log <<
"Writing normalization data to settings file \"input.nn\".\n";
629 string n1 =
strpr(
"mean_energy %24.16E # nnp-train\n",
630 meanEnergyPerAtomRef);
631 string n2 =
strpr(
"conv_energy %24.16E # nnp-train\n",
633 string n3 =
strpr(
"conv_length %24.16E # nnp-train\n",
638 map<size_t, string> replace;
639 for (
size_t i = 0; i < lines.size(); ++i)
641 vector<string> sl =
split(lines.at(i));
644 if (sl.at(0) ==
"mean_energy") replace[i] = n1;
645 if (sl.at(0) ==
"conv_energy") replace[i] = n2;
646 if (sl.at(0) ==
"conv_length") replace[i] = n3;
649 if (!replace.empty())
651 log <<
"WARNING: Preexisting normalization data was found and "
652 "replaced in original \"input.nn\" file.\n";
655 fileSettings.open(
"input.nn");
658 fileSettings <<
"#########################################"
659 "######################################\n";
660 fileSettings <<
"# DATA SET NORMALIZATION\n";
661 fileSettings <<
"#########################################"
662 "######################################\n";
666 fileSettings <<
"#########################################"
667 "######################################\n";
668 fileSettings <<
"\n";
671 fileSettings.close();
679 log <<
"Silently repeating symmetry function setup...\n";
681 for (
auto& e :
elements) e.clearSymmetryFunctions();
683#ifndef N2P2_FULL_SFD_MEMORY
686#ifndef N2P2_NO_SF_CACHE
689#ifndef N2P2_NO_SF_GROUPS
697 log <<
"*****************************************"
698 "**************************************\n";
706 log <<
"*** SETUP: TRAINING *********************"
707 "**************************************\n";
713 if (
stage == 1)
log <<
"charge NN fitting.\n";
714 else if (
stage == 2)
log <<
"short-range NN fitting.\n";
715 else throw runtime_error(
"\nERROR: Unknown training stage.\n");
725 log <<
"Forces will be used for training.\n";
732 log <<
"WARNING: Force weight not set, using default value.\n";
739 log <<
"Only energies will used for training.\n";
746 log <<
"Training will act on \"" <<
nnId <<
"\" neural networks.\n";
751 if (
settings[
"main_error_metric"] ==
"RMSEpa")
753 k =
"energy";
if (
p.
exists(k))
p[k].displayMetric =
"RMSEpa";
754 k =
"force";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
755 k =
"charge";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
757 else if (
settings[
"main_error_metric"] ==
"RMSE")
759 k =
"energy";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
760 k =
"force";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
761 k =
"charge";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
763 else if (
settings[
"main_error_metric"] ==
"MAEpa")
765 k =
"energy";
if (
p.
exists(k))
p[k].displayMetric =
"MAEpa";
766 k =
"force";
if (
p.
exists(k))
p[k].displayMetric =
"MAE";
767 k =
"charge";
if (
p.
exists(k))
p[k].displayMetric =
"MAE";
769 else if (
settings[
"main_error_metric"] ==
"MAE")
771 k =
"energy";
if (
p.
exists(k))
p[k].displayMetric =
"MAE";
772 k =
"force";
if (
p.
exists(k))
p[k].displayMetric =
"MAE";
773 k =
"charge";
if (
p.
exists(k))
p[k].displayMetric =
"MAE";
777 throw runtime_error(
"ERROR: Unknown error metric.\n");
783 k =
"energy";
if (
p.
exists(k))
p[k].displayMetric =
"RMSEpa";
784 k =
"force";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
785 k =
"charge";
if (
p.
exists(k))
p[k].displayMetric =
"RMSE";
791 log <<
strpr(
"Weight update via gradient descent selected: "
792 "updaterType::UT_GD (%d)\n",
797 log <<
strpr(
"Weight update via Kalman filter selected: "
798 "updaterType::UT_KF (%d)\n",
803 throw runtime_error(
"ERROR: LM algorithm not yet implemented.\n");
804 log <<
strpr(
"Weight update via Levenberg-Marquardt algorithm "
805 "selected: updaterType::UT_LM (%d)\n",
810 throw runtime_error(
"ERROR: Unknown updater type.\n");
822 log <<
strpr(
"Parallel training (rank 0 updates) selected: "
823 "ParallelMode::PM_TRAIN_RK0 (%d)\n",
828 log <<
strpr(
"Parallel training (all ranks update) selected: "
829 "ParallelMode::PM_TRAIN_ALL (%d)\n",
834 throw runtime_error(
"ERROR: Unknown parallelization mode.\n");
840 log <<
strpr(
"Gradient summation only selected: "
842 log <<
"No Jacobi matrix, gradients of all training candidates are "
843 "summed up instead.\n";
847 log <<
strpr(
"Per-task Jacobian selected: "
848 "JacobianMode::JM_TASK (%d)\n",
850 log <<
"One Jacobi matrix row per MPI task is stored, within each "
851 "task gradients are summed up.\n";
855 log <<
strpr(
"Full Jacobian selected: "
856 "JacobianMode::JM_FULL (%d)\n",
858 log <<
"Each update candidate generates one Jacobi matrix "
863 throw runtime_error(
"ERROR: Unknown Jacobian mode.\n");
868 throw runtime_error(
"ERROR: Gradient descent methods can only be "
869 "combined with Jacobian mode JM_SUM.\n");
884 log <<
"-----------------------------------------"
885 "--------------------------------------\n";
889 throw runtime_error(
"ERROR: Repeated energy updates are not correctly"
890 " implemented at the moment.\n");
898 log <<
"Symmetry function memory is cleared after each calculation.\n";
902 log <<
"Symmetry function memory is reused (HIGH MEMORY USAGE!).\n";
923 log <<
strpr(
"Training log with update information will be written to:"
928 vector<string> title;
929 vector<string> colName;
930 vector<string> colInfo;
931 vector<size_t> colSize;
932 title.push_back(
"Detailed information on each weight update.");
933 colSize.push_back(3);
934 colName.push_back(
"U");
935 colInfo.push_back(
"Update type (E = energy, F = force, Q = charge).");
936 colSize.push_back(5);
937 colName.push_back(
"epoch");
938 colInfo.push_back(
"Current training epoch.");
939 colSize.push_back(10);
940 colName.push_back(
"count");
941 colInfo.push_back(
"Update counter (Multiple lines with identical count"
942 " for multi-streaming!).");
943 colSize.push_back(5);
944 colName.push_back(
"proc");
945 colInfo.push_back(
"MPI process providing this update candidate.");
946 colSize.push_back(3);
947 colName.push_back(
"tl");
948 colInfo.push_back(
"Threshold loop counter.");
949 colSize.push_back(10);
950 colName.push_back(
"rmse_frac");
951 colInfo.push_back(
"Update candidates error divided by this "
953 colSize.push_back(10);
954 colName.push_back(
"s_ind_g");
955 colInfo.push_back(
"Global structure index.");
956 colSize.push_back(5);
957 colName.push_back(
"s_ind");
958 colInfo.push_back(
"Local structure index on this MPI process.");
959 colSize.push_back(5);
960 colName.push_back(
"a_ind");
961 colInfo.push_back(
"Atom index.");
962 colSize.push_back(2);
963 colName.push_back(
"c");
964 colInfo.push_back(
"Force component (0 = x, 1 = y, 2 = z).");
970 log <<
"-----------------------------------------"
971 "--------------------------------------\n";
977 log <<
strpr(
"Energy to force ratio : "
982 log <<
strpr(
"Energy to force percentages : "
983 "%5.1f%% : %5.1f%%\n",
991 double totalUpdates = 0.0;
992 for (
auto k :
pk) totalUpdates +=
p[k].numUpdates;
993 log <<
"-----------------------------------------"
994 "--------------------------------------\n";
998 log <<
"-----------------------------------------"
999 "--------------------------------------\n";
1010 atoi(
settings[
"gradient_type"].c_str());
1016 atoi(
settings[
"kalman_type"].c_str());
1037 updaters.back()->resetTimingLoop();
1047 double const eta = atof(
settings[
"gradient_eta"].c_str());
1057 double const eta = atof(
settings[
"gradient_adam_eta"].c_str());
1058 double const beta1 = atof(
settings[
"gradient_adam_beta1"].c_str());
1059 double const beta2 = atof(
settings[
"gradient_adam_beta2"].c_str());
1060 double const eps = atof(
settings[
"gradient_adam_epsilon"].c_str());
1073 double const epsilon = atof(
settings[
"kalman_epsilon"].c_str());
1074 double const q0 = atof(
settings[
"kalman_q0" ].c_str());
1075 double const qtau = atof(
settings[
"kalman_qtau" ].c_str())
1077 log <<
"qtau is divided by number "
1078 "of projected updates per epoch.\n";
1079 double const qmin = atof(
settings[
"kalman_qmin" ].c_str());
1080 double const eta0 = atof(
settings[
"kalman_eta" ].c_str());
1081 double etatau = 1.0;
1082 double etamax = eta0;
1086 etatau = atof(
settings[
"kalman_etatau"].c_str())
1088 log <<
"etatau is divided by number "
1089 "of projected updates per epoch.\n";
1090 etamax = atof(
settings[
"kalman_etamax"].c_str());
1092 for (
size_t i = 0; i <
updaters.size(); ++i)
1106 double const epsilon = atof(
settings[
"kalman_epsilon"].c_str());
1107 double const q0 = atof(
settings[
"kalman_q0" ].c_str());
1108 double const qtau = atof(
settings[
"kalman_qtau" ].c_str())
1110 log <<
"qtau is divided by number "
1111 "of projected updates per epoch.\n";
1112 double const qmin = atof(
settings[
"kalman_qmin"].c_str());
1113 double const lambda =
1114 atof(
settings[
"kalman_lambda_short"].c_str());
1118 double const nu = atof(
settings[
"kalman_nue_short"].c_str());
1119 for (
size_t i = 0; i <
updaters.size(); ++i)
1132 log <<
"-----------------------------------------"
1133 "--------------------------------------\n";
1134 for (
size_t i = 0; i <
updaters.size(); ++i)
1138 log <<
strpr(
"Combined weight updater:\n");
1142 log <<
strpr(
"Weight updater for element %2s :\n",
1143 elements.at(i).getSymbol().c_str());
1145 log <<
"-----------------------------------------"
1146 "--------------------------------------\n";
1150 log <<
"Note: During training loop the actual observation\n";
1151 log <<
" size corresponds to error vector size:\n";
1154 log <<
strpr(
"sizeObservation = %zu (%s updates)\n",
1155 p[k].error.at(i).size(), k.c_str());
1158 log <<
"-----------------------------------------"
1159 "--------------------------------------\n";
1162 log <<
strpr(
"TIMING Finished setup: %.2f seconds.\n",
1163 sw[
"setup"].stop());
1164 log <<
"*****************************************"
1165 "**************************************\n";
1173 log <<
"*** SETUP WEIGHT DERIVATIVES CHECK ******"
1174 "**************************************\n";
1177 log <<
"Weight derivatives will be checked for these properties:\n";
1178 for (
auto k :
pk)
log <<
" - " +
p[k].plural +
"\n";
1200 log <<
"*****************************************"
1201 "**************************************\n";
1210 log <<
"*** CALCULATE NEIGHBOR LISTS ************"
1211 "**************************************\n";
1215 int num_threads = omp_get_max_threads();
1216 omp_set_num_threads(1);
1217 log <<
strpr(
"Temporarily disabling OpenMP parallelization: %d threads.\n",
1218 omp_get_max_threads());
1220 log <<
"Calculating neighbor lists for all structures.\n";
1223 log <<
strpr(
"Cutoff radius for neighbor lists: %f\n",
1224 maxCutoffRadiusPhys);
1225 for (vector<Structure>::iterator it =
structures.begin();
1231 omp_set_num_threads(num_threads);
1232 log <<
strpr(
"Restoring OpenMP parallelization: max. %d threads.\n",
1233 omp_get_max_threads());
1236 log <<
"-----------------------------------------"
1237 "--------------------------------------\n";
1238 log <<
strpr(
"TIMING Finished neighbor lists: %.2f seconds.\n",
1240 log <<
"*****************************************"
1241 "**************************************\n";
1247 map<
string, pair<string, string>>
const fileNames)
1250 int num_threads = omp_get_max_threads();
1251 omp_set_num_threads(1);
1253 vector<string> write;
1254 for (
auto i : fileNames)
1256 if (i.second.first.size() == 0 || i.second.second.size() == 0)
1258 throw runtime_error(
"ERROR: No filename provided for comparison "
1261 write.push_back(i.first);
1263 auto doWrite = [&write](
string key){
1264 return find(write.begin(),
1266 key) != write.end();
1270 map<string, size_t> countTrain;
1271 map<string, size_t> countTest;
1272 for (
auto k :
pk) countTrain[k] = 0;
1273 for (
auto k :
pk) countTest[k] = 0;
1275 map<string, ofstream> filesTrain;
1276 map<string, ofstream> filesTest;
1281 for (
auto& m :
p[k].errorTrain) m.second = 0.0;
1282 for (
auto& m :
p[k].errorTest) m.second = 0.0;
1285 for (
auto k : write)
1287 filesTrain[k].open(
strpr(
"%s.%04d",
1288 fileNames.at(k).first.c_str(),
1290 filesTest[k].open(
strpr(
"%s.%04d",
1291 fileNames.at(k).second.c_str(),
1294 vector<string> header;
1297 vector<string> title;
1298 vector<string> colName;
1299 vector<string> colInfo;
1300 vector<size_t> colSize;
1303 title.push_back(
"Energy comparison.");
1304 colSize.push_back(10);
1305 colName.push_back(
"index");
1306 colInfo.push_back(
"Structure index.");
1307 colSize.push_back(16);
1308 colName.push_back(
"Eref");
1309 colInfo.push_back(
"Reference potential energy per atom "
1310 "(training units).");
1311 colSize.push_back(16);
1312 colName.push_back(
"Ennp");
1313 colInfo.push_back(
"NNP potential energy per atom "
1314 "(training units).");
1316 else if (k ==
"force")
1318 title.push_back(
"Force comparison.");
1319 colSize.push_back(10);
1320 colName.push_back(
"index_s");
1321 colInfo.push_back(
"Structure index.");
1322 colSize.push_back(10);
1323 colName.push_back(
"index_a");
1324 colInfo.push_back(
"Atom index (x, y, z components in "
1325 "consecutive lines).");
1326 colSize.push_back(16);
1327 colName.push_back(
"Fref");
1328 colInfo.push_back(
"Reference force (training units).");
1329 colSize.push_back(16);
1330 colName.push_back(
"Fnnp");
1331 colInfo.push_back(
"NNP force (training units).");
1333 else if (k ==
"charge")
1335 title.push_back(
"Charge comparison.");
1336 colSize.push_back(10);
1337 colName.push_back(
"index_s");
1338 colInfo.push_back(
"Structure index.");
1339 colSize.push_back(10);
1340 colName.push_back(
"index_a");
1341 colInfo.push_back(
"Atom index.");
1342 colSize.push_back(16);
1343 colName.push_back(
"Qref");
1344 colInfo.push_back(
"Reference charge.");
1345 colSize.push_back(16);
1346 colName.push_back(
"Qnnp");
1347 colInfo.push_back(
"NNP charge.");
1355 for (vector<Structure>::iterator it =
structures.begin();
1358#ifdef N2P2_NO_SF_GROUPS
1368 map<string, double>* error =
nullptr;
1369 size_t* count =
nullptr;
1370 ofstream* file =
nullptr;
1373 error = &(
p[k].errorTrain);
1374 count = &(countTrain.at(k));
1375 if (doWrite(k)) file = &(filesTrain.at(k));
1379 error = &(
p[k].errorTest);
1380 count = &(countTest.at(k));
1381 if (doWrite(k)) file = &(filesTest.at(k));
1384 it->updateError(k, *error, *count);
1387 if (k ==
"energy") (*file) << it->getEnergyLine();
1388 else if (k ==
"force")
1390 for (
auto l : it->getForcesLines()) (*file) << l;
1392 else if (k ==
"charge")
1394 for (
auto l : it->getChargesLines()) (*file) << l;
1407 filesTrain.at(k).close();
1408 filesTest.at(k).close();
1419 omp_set_num_threads(num_threads);
1429 map<string, pair<string, string>> fileNames;
1431 for (
auto const& ip :
p)
1433 string const& k = ip.first;
1435 if (
d.writeCompEvery > 0 &&
1436 (
epoch %
d.writeCompEvery == 0 ||
epoch <=
d.writeCompAlways))
1439 if (k ==
"energy") middle =
"points";
1440 else if (k ==
"force" ) middle =
"forces";
1441 else if (k ==
"charge") middle =
"charges";
1442 fileNames[k] = make_pair(
strpr(
"train%s.%06zu.out",
1443 middle.c_str(),
epoch),
1444 strpr(
"test%s.%06zu.out",
1445 middle.c_str(),
epoch));
1457 string metric =
"?";
1458 string peratom =
"";
1460 log <<
"The training loop output covers different errors, update and\n";
1461 log <<
"timing information. The following quantities are organized\n";
1462 log <<
"according to the matrix scheme below:\n";
1463 log <<
"-------------------------------------------------------------------\n";
1464 log <<
"ep ........ Epoch.\n";
1467 string const& pmetric =
p[k].displayMetric;
1468 if (pmetric.find(
"RMSE") != pmetric.npos) metric =
"RMSE";
1469 else if (pmetric.find(
"MAE") != pmetric.npos) metric =
"MAE";
1470 if (pmetric.find(
"pa") != pmetric.npos) peratom =
" per atom";
1472 log <<
p[k].tiny <<
"_count ... Number of " << k <<
" updates.\n";
1473 log <<
p[k].tiny <<
"_train ... " << metric <<
" of training "
1474 <<
p[k].plural << peratom <<
".\n";
1475 log <<
p[k].tiny <<
"_test .... " << metric <<
" of test "
1476 <<
p[k].plural << peratom <<
".\n";
1479 log <<
p[k].tiny <<
"_pt ...... Percentage of time for " << k <<
1480 " updates w.r.t. to t_train.\n";
1482 log <<
"count ..... Total number of updates.\n";
1483 log <<
"train ..... Percentage of time for training.\n";
1484 log <<
"error ..... Percentage of time for error calculation.\n";
1485 log <<
"other ..... Percentage of time for other purposes.\n";
1486 log <<
"epoch ..... Total time for this epoch (seconds).\n";
1487 log <<
"total ..... Total time for all epochs (seconds).\n";
1488 log <<
"-------------------------------------------------------------------\n";
1492 <<
strpr(
" %5s",
"ep")
1493 <<
strpr(
" %7s", (
p[k].tiny +
"_count").c_str())
1494 <<
strpr(
" %11s", (
p[k].tiny +
"_train").c_str())
1495 <<
strpr(
" %11s", (
p[k].tiny +
"_test").c_str())
1496 <<
strpr(
" %5s", (
p[k].tiny +
"_pt").c_str())
1500 <<
strpr(
" %5s",
"ep")
1501 <<
strpr(
" %7s",
"count")
1502 <<
strpr(
" %5s",
"train")
1503 <<
strpr(
" %5s",
"error")
1504 <<
strpr(
" %5s",
"other")
1505 <<
strpr(
" %9s",
"epoch")
1506 <<
strpr(
" %9s",
"total")
1508 log <<
"-------------------------------------------------------------------\n";
1515 double timeLoop =
sw[
"loop"].getLoop();
1516 double timeTrain =
sw[
"train"].getLoop();
1517 size_t totalUpdates = 0;
1520 totalUpdates +=
p[k].countUpdates;
1521 double timeProp =
sw[k].getLoop();
1523 for (
auto& c : caps) c = toupper(c);
1524 log <<
strpr(
"%-6s", caps.c_str());
1530 physical(k,
p[k].errorTrain.at(
p[k].displayMetric)),
1531 physical(k,
p[k].errorTest.at(
p[k].displayMetric)));
1536 p[k].errorTrain.at(
p[k].displayMetric),
1537 p[k].errorTest.at(
p[k].displayMetric));
1540 else log <<
strpr(
" %5.1f", timeProp / timeTrain * 100.0);
1543 double timeOther = timeLoop;
1544 timeOther -=
sw[
"error"].getLoop();
1545 timeOther -=
sw[
"train"].getLoop();
1548 log <<
strpr(
" %7zu", totalUpdates);
1549 log <<
strpr(
" %5.1f",
sw[
"train"].getLoop() / timeLoop * 100.0);
1550 log <<
strpr(
" %5.1f",
sw[
"error"].getLoop() / timeLoop * 100.0);
1551 log <<
strpr(
" %5.1f", timeOther / timeLoop * 100.0);
1552 log <<
strpr(
" %9.2f",
sw[
"loop"].getLoop());
1553 log <<
strpr(
" %9.2f",
sw[
"loop"].getTotal());
1560 string const& fileNameFormat)
const
1566 string fileName =
strpr(fileNameFormat.c_str(),
1568 file.open(fileName.c_str());
1569 elements.at(i).neuralNetworks.at(nnName).writeConnections(file);
1598 string fileNameActual = fileName;
1601 fileNameActual +=
strpr(
".stage-%zu",
stage);
1604 if (append) file.open(fileNameActual.c_str(), ofstream::app);
1607 file.open(fileNameActual.c_str());
1610 vector<string> title;
1611 vector<string> colName;
1612 vector<string> colInfo;
1613 vector<size_t> colSize;
1617 title.push_back(
"Learning curves for energies and forces.");
1621 title.push_back(
"Learning curves for charges.");
1623 colSize.push_back(10);
1624 colName.push_back(
"epoch");
1625 colInfo.push_back(
"Current epoch.");
1627 map<string, string> text;
1628 text[
"RMSEpa"] =
"RMSE of %s %s per atom";
1629 text[
"RMSE"] =
"RMSE of %s %s";
1630 text[
"MAEpa"] =
"MAE of %s %s per atom";
1631 text[
"MAE"] =
"MAE of %s %s";
1635 for (
auto m :
p[k].errorMetrics)
1637 colSize.push_back(16);
1638 colName.push_back(m +
"_" +
p[k].tiny +
"train_pu");
1639 colInfo.push_back(
strpr(
1640 (text[m] +
" (physical units)").c_str(),
1642 p[k].plural.c_str()));
1643 colSize.push_back(16);
1644 colName.push_back(m +
"_" +
p[k].tiny +
"test_pu");
1645 colInfo.push_back(
strpr(
1646 (text[m] +
" (physical units)").c_str(),
1648 p[k].plural.c_str()));
1656 if (!(k ==
"energy" || k ==
"force"))
continue;
1657 for (
auto m :
p[k].errorMetrics)
1659 colSize.push_back(16);
1660 colName.push_back(m +
"_" +
p[k].tiny +
"train_iu");
1661 colInfo.push_back(
strpr(
1662 (text[m] +
" (training units)").c_str(),
1664 p[k].plural.c_str()));
1665 colSize.push_back(16);
1666 colName.push_back(m +
"_" +
p[k].tiny +
"test_iu");
1667 colInfo.push_back(
strpr(
1668 (text[m] +
" (training units)").c_str(),
1670 p[k].plural.c_str()));
1683 if (!(k ==
"energy" || k ==
"force"))
continue;
1684 for (
auto m :
p[k].errorMetrics)
1686 file <<
strpr(
" %16.8E %16.8E",
1694 for (
auto m :
p[k].errorMetrics)
1696 file <<
strpr(
" %16.8E %16.8E",
1697 p[k].errorTrain.at(m),
1698 p[k].errorTest.at(m));
1709 string const& fileName)
const
1714 file.open(fileName.c_str());
1717 vector<string> title;
1718 vector<string> colName;
1719 vector<string> colInfo;
1720 vector<size_t> colSize;
1721 title.push_back(
"Statistics for individual neurons of network \""
1722 + nnName +
"\" gathered during RMSE calculation.");
1723 colSize.push_back(10);
1724 colName.push_back(
"element");
1725 colInfo.push_back(
"Element index.");
1726 colSize.push_back(10);
1727 colName.push_back(
"neuron");
1728 colInfo.push_back(
"Neuron number.");
1729 colSize.push_back(10);
1730 colName.push_back(
"count");
1731 colInfo.push_back(
"Number of neuron value computations.");
1732 colSize.push_back(16);
1733 colName.push_back(
"min");
1734 colInfo.push_back(
"Minimum neuron value encounterd.");
1735 colSize.push_back(16);
1736 colName.push_back(
"max");
1737 colInfo.push_back(
"Maximum neuron value encounterd.");
1738 colSize.push_back(16);
1739 colName.push_back(
"mean");
1740 colInfo.push_back(
"Mean neuron value.");
1741 colSize.push_back(16);
1742 colName.push_back(
"sigma");
1743 colInfo.push_back(
"Standard deviation of neuron value.");
1750 size_t n =
elements.at(i).neuralNetworks.at(nnName).getNumNeurons();
1751 vector<long> count(n, 0);
1752 vector<double> min(n, 0.0);
1753 vector<double> max(n, 0.0);
1754 vector<double> mean(n, 0.0);
1755 vector<double> sigma(n, 0.0);
1756 elements.at(i).neuralNetworks.at(nnName).
1757 getNeuronStatistics(&(count.front()),
1765 MPI_Reduce(MPI_IN_PLACE, &(count.front()), n, MPI_LONG , MPI_SUM, 0,
comm);
1766 MPI_Reduce(MPI_IN_PLACE, &(min.front()) , n, MPI_DOUBLE, MPI_MIN, 0,
comm);
1767 MPI_Reduce(MPI_IN_PLACE, &(max.front()) , n, MPI_DOUBLE, MPI_MAX, 0,
comm);
1768 MPI_Reduce(MPI_IN_PLACE, &(mean.front()) , n, MPI_DOUBLE, MPI_SUM, 0,
comm);
1769 MPI_Reduce(MPI_IN_PLACE, &(sigma.front()), n, MPI_DOUBLE, MPI_SUM, 0,
comm);
1773 MPI_Reduce(&(count.front()), &(count.front()), n, MPI_LONG , MPI_SUM, 0,
comm);
1774 MPI_Reduce(&(min.front()) , &(min.front()) , n, MPI_DOUBLE, MPI_MIN, 0,
comm);
1775 MPI_Reduce(&(max.front()) , &(max.front()) , n, MPI_DOUBLE, MPI_MAX, 0,
comm);
1776 MPI_Reduce(&(mean.front()) , &(mean.front()) , n, MPI_DOUBLE, MPI_SUM, 0,
comm);
1777 MPI_Reduce(&(sigma.front()), &(sigma.front()), n, MPI_DOUBLE, MPI_SUM, 0,
comm);
1781 for (
size_t j = 0; j < n; ++j)
1783 size_t m = count.at(j);
1784 sigma.at(j) = sqrt((m * sigma.at(j) - mean.at(j) * mean.at(j))
1787 file <<
strpr(
"%10d %10d %10d %16.8E %16.8E %16.8E %16.8E\n",
1809 vector<pair<string, string>> nnInfo;
1818 make_pair(
"short",
strpr(
"neuron-stats.%06zu.out",
epoch)));
1826 strpr(
"neuron-stats.%s.%06zu.out.stage-%zu",
1829 else if (
stage == 2)
1833 strpr(
"neuron-stats.%s.%06zu.out.stage-%zu",
1837 strpr(
"neuron-stats.%s.%06zu.out.stage-%zu",
1849 for (vector<Element>::iterator it =
elements.begin();
1852 for (
auto& nn : it->neuralNetworks) nn.second.resetNeuronStatistics();
1858 string const fileNameFormat)
const
1861 string fileNameFormatActual = fileNameFormat;
1864 fileNameFormatActual +=
strpr(
".stage-%zu",
stage);
1872 fileName =
strpr(fileNameFormatActual.c_str(), 0);
1876 fileName =
strpr(fileNameFormatActual.c_str(),
1879 if (append) file.open(fileName.c_str(), ofstream::app);
1882 file.open(fileName.c_str());
1895 for (
auto& uc :
p[property].updateCandidates)
1897 if (property ==
"energy")
1902 else if (property ==
"force")
1905 uc.error = fabs(a.
fRef[uc.c] - a.
f[uc.c]);
1907 else if (property ==
"charge")
1914 sort(
p[property].updateCandidates.begin(),
1915 p[property].updateCandidates.end());
1917 p[property].posUpdateCandidates = 0;
1924 shuffle(
p[property].updateCandidates.begin(),
1925 p[property].updateCandidates.end(),
1928 p[property].posUpdateCandidates = 0;
1942 for (
int i =
pk.size() - 1; i >= 0; --i)
1948 if (
pk.size() == 1)
return;
1965 if (
p[k].selectionModeSchedule.find(
epoch)
1966 !=
p[k].selectionModeSchedule.end())
1968 p[k].selectionMode =
p[k].selectionModeSchedule[
epoch];
1971 string message =
"INFO Switching selection mode for "
1972 "property \"" + k +
"\" to ";
1975 message +=
strpr(
"SM_RANDOM (%d).\n",
p[k].selectionMode);
1977 else if (
p[k].selectionMode ==
SM_SORT)
1979 message +=
strpr(
"SM_SORT (%d).\n",
p[k].selectionMode);
1983 message +=
strpr(
"SM_THRESHOLD (%d).\n",
1984 p[k].selectionMode);
1998 log <<
"*** TRAINING LOOP ***********************"
1999 "**************************************\n";
2004 sw[
"error"].start();
2034 for (
auto k :
pk)
p[k].countUpdates = 0;
2050 sw[
"train"].start();
2053 string property =
pk.at(i);
2055 p[property].countUpdates++;
2063 sw[
"error"].start();
2086 log <<
"-----------------------------------------"
2087 "--------------------------------------\n";
2088 log <<
strpr(
"TIMING Training loop finished: %.2f seconds.\n",
2089 sw[
"loop"].getTotal());
2090 log <<
"*****************************************"
2091 "**************************************\n";
2099 string const& k = property;
2104 sw[k].start(newLoop);
2105 sw[k +
"_err"].start(newLoop);
2108 int num_threads = omp_get_max_threads();
2109 omp_set_num_threads(1);
2118 bool derivatives =
false;
2119 if (k ==
"force") derivatives =
true;
2121 vector<size_t> thresholdLoopCount(batchSize, 0);
2122 vector<double> currentRmseFraction(batchSize, 0.0);
2123 vector<UpdateCandidate*> currentUpdateCandidates(batchSize, NULL);
2126 fill(pu.
error.at(i).begin(), pu.
error.at(i).end(), 0.0);
2131 for (
size_t b = 0; b < batchSize; ++b)
2134 size_t indexBest = 0;
2135 double rmseFractionBest = 0.0;
2142 for (il = 0; il < trials; ++il)
2154 currentUpdateCandidates.at(b) = c;
2173 currentRmseFraction.at(b) =
2182 throw runtime_error(
"ERROR: Not implemented.\n");
2185 else if (k ==
"force")
2192 currentRmseFraction.at(b) =
2193 fabs(a.
fRef[c->
c] - a.
f[c->
c])
2201 throw runtime_error(
"ERROR: Not implemented.\n");
2204 else if (k ==
"charge")
2230 if (currentRmseFraction.at(b) > rmseFractionBest)
2232 rmseFractionBest = currentRmseFraction.at(b);
2247 thresholdLoopCount.at(b) = il;
2254 currentUpdateCandidates.at(b) =
2256 currentRmseFraction.at(b) = rmseFractionBest;
2260#ifdef N2P2_NO_SF_GROUPS
2274 vector<vector<double>> dXdc;
2279 .getNumConnections();
2280 dXdc.at(i).resize(n, 0.0);
2313 for (vector<Atom>::iterator it = s.
atoms.begin();
2314 it != s.
atoms.end(); ++it)
2316 size_t i = it->element;
2327 for (
size_t j = 0; j < dXdc.at(i).size(); ++j)
2329 pu.
jacobian.at(iu).at(offset.at(i) + j) +=
2336 throw runtime_error(
"ERROR: Not implemented.\n");
2339 else if (k ==
"force")
2344 for (vector<Atom>::iterator it = s.
atoms.begin();
2345 it != s.
atoms.end(); ++it)
2349#ifndef N2P2_FULL_SFD_MEMORY
2352 it->collectDGdxia(c->
a, c->
c);
2354 size_t i = it->element;
2362#ifndef N2P2_FULL_SFD_MEMORY
2367 &(it->dGdxia.front()));
2372 for (
size_t j = 0; j < dXdc.at(i).size(); ++j)
2374 pu.
jacobian.at(iu).at(offset.at(i) + j) +=
2382 throw runtime_error(
"ERROR: Not implemented.\n");
2385 else if (k ==
"charge")
2401 for (
size_t j = 0; j < dXdc.at(i).size(); ++j)
2403 pu.
jacobian.at(iu).at(offset.at(i) + j) +=
2416 else if (k ==
"force")
2420 currentRmseFraction.at(b) = fabs(a.
fRef[c->
c] - a.
f[c->
c])
2423 else if (k ==
"charge")
2456 else if (k ==
"force")
2459 pu.
error.at(0).at(offset2) += a.
fRef[c->
c] - a.
f[c->
c];
2461 else if (k ==
"charge")
2477 else if (k ==
"force")
2480 pu.
error.at(i).at(offset2) += (a.
fRef[c->
c] - a.
f[c->
c])
2484 else if (k ==
"charge")
2498 for (
size_t j = 0; j < pu.
error.at(i).size(); ++j)
2502 for (
size_t j = 0; j < pu.
jacobian.at(i).size(); ++j)
2508 sw[k +
"_err"].stop();
2514 sw[k +
"_com"].start(newLoop);
2521 if (
myRank == 0) MPI_Reduce(MPI_IN_PLACE , &(pu.
error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM, 0,
comm);
2522 else MPI_Reduce(&(pu.
error.at(i).front()), &(pu.
error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM, 0,
comm);
2531 MPI_Allreduce(MPI_IN_PLACE, &(pu.
error.at(i).front()), 1, MPI_DOUBLE, MPI_SUM,
comm);
2542 if (
myRank == 0) MPI_Gather(MPI_IN_PLACE , 1, MPI_DOUBLE, &(pu.
error.at(i).front()), 1, MPI_DOUBLE, 0,
comm);
2543 else MPI_Gather(&(pu.
error.at(i).front()), 1, MPI_DOUBLE, NULL , 1, MPI_DOUBLE, 0,
comm);
2552 MPI_Allgather(MPI_IN_PLACE, 1, MPI_DOUBLE, &(pu.
error.at(i).front()), 1, MPI_DOUBLE,
comm);
2578 sw[k +
"_com"].stop();
2584 sw[k +
"_upd"].start(newLoop);
2586 omp_set_num_threads(num_threads);
2589 for (
size_t i = 0; i <
updaters.size(); ++i)
2592 pu.
error.at(i).size());
2594 pu.
error.at(i).size());
2615 sw[k +
"_upd"].stop();
2621 sw[k +
"_log"].start(newLoop);
2624 vector<int> procUpdateCandidate;
2625 vector<size_t> indexStructure;
2626 vector<size_t> indexStructureGlobal;
2627 vector<size_t> indexAtom;
2628 vector<size_t> indexCoordinate;
2630 vector<int> currentUpdateCandidatesPerTask;
2631 vector<int> currentUpdateCandidatesOffset;
2632 int myCurrentUpdateCandidates = currentUpdateCandidates.size();
2636 currentUpdateCandidatesPerTask.resize(
numProcs, 0);
2637 currentUpdateCandidatesPerTask.at(0) = myCurrentUpdateCandidates;
2639 if (
myRank == 0) MPI_Gather(MPI_IN_PLACE , 1, MPI_INT, &(currentUpdateCandidatesPerTask.front()), 1, MPI_INT, 0,
comm);
2640 else MPI_Gather(&(myCurrentUpdateCandidates), 1, MPI_INT, NULL , 1, MPI_INT, 0,
comm);
2644 int totalUpdateCandidates = 0;
2645 for (
size_t i = 0; i < currentUpdateCandidatesPerTask.size(); ++i)
2647 currentUpdateCandidatesOffset.push_back(totalUpdateCandidates);
2648 totalUpdateCandidates += currentUpdateCandidatesPerTask.at(i);
2650 procUpdateCandidate.resize(totalUpdateCandidates, 0);
2651 indexStructure.resize(totalUpdateCandidates, 0);
2652 indexStructureGlobal.resize(totalUpdateCandidates, 0);
2653 indexAtom.resize(totalUpdateCandidates, 0);
2654 indexCoordinate.resize(totalUpdateCandidates, 0);
2656 currentRmseFraction.resize(totalUpdateCandidates, 0.0);
2657 thresholdLoopCount.resize(totalUpdateCandidates, 0.0);
2661 procUpdateCandidate.resize(myCurrentUpdateCandidates, 0);
2662 indexStructure.resize(myCurrentUpdateCandidates, 0);
2663 indexStructureGlobal.resize(myCurrentUpdateCandidates, 0);
2664 indexAtom.resize(myCurrentUpdateCandidates, 0);
2665 indexCoordinate.resize(myCurrentUpdateCandidates, 0);
2667 for (
int i = 0; i < myCurrentUpdateCandidates; ++i)
2669 procUpdateCandidate.at(i) =
myRank;
2671 indexStructure.at(i) = c.
s;
2672 indexStructureGlobal.at(i) =
structures.at(c.
s).index;
2673 indexAtom.at(i) = c.
a;
2674 indexCoordinate.at(i) = c.
c;
2678 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_DOUBLE, &(currentRmseFraction.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_DOUBLE, 0,
comm);
2679 MPI_Gatherv(MPI_IN_PLACE, 0,
MPI_SIZE_T, &(thresholdLoopCount.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()),
MPI_SIZE_T, 0,
comm);
2680 MPI_Gatherv(MPI_IN_PLACE, 0, MPI_INT , &(procUpdateCandidate.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()), MPI_INT , 0,
comm);
2681 MPI_Gatherv(MPI_IN_PLACE, 0,
MPI_SIZE_T, &(indexStructure.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()),
MPI_SIZE_T, 0,
comm);
2682 MPI_Gatherv(MPI_IN_PLACE, 0,
MPI_SIZE_T, &(indexStructureGlobal.front()), &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()),
MPI_SIZE_T, 0,
comm);
2683 MPI_Gatherv(MPI_IN_PLACE, 0,
MPI_SIZE_T, &(indexAtom.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()),
MPI_SIZE_T, 0,
comm);
2684 MPI_Gatherv(MPI_IN_PLACE, 0,
MPI_SIZE_T, &(indexCoordinate.front()) , &(currentUpdateCandidatesPerTask.front()), &(currentUpdateCandidatesOffset.front()),
MPI_SIZE_T, 0,
comm);
2688 MPI_Gatherv(&(currentRmseFraction.front()) , myCurrentUpdateCandidates, MPI_DOUBLE, NULL, NULL, NULL, MPI_DOUBLE, 0,
comm);
2689 MPI_Gatherv(&(thresholdLoopCount.front()) , myCurrentUpdateCandidates,
MPI_SIZE_T, NULL, NULL, NULL,
MPI_SIZE_T, 0,
comm);
2690 MPI_Gatherv(&(procUpdateCandidate.front()) , myCurrentUpdateCandidates, MPI_INT , NULL, NULL, NULL, MPI_INT , 0,
comm);
2691 MPI_Gatherv(&(indexStructure.front()) , myCurrentUpdateCandidates,
MPI_SIZE_T, NULL, NULL, NULL,
MPI_SIZE_T, 0,
comm);
2692 MPI_Gatherv(&(indexStructureGlobal.front()), myCurrentUpdateCandidates,
MPI_SIZE_T, NULL, NULL, NULL,
MPI_SIZE_T, 0,
comm);
2694 MPI_Gatherv(&(indexCoordinate.front()) , myCurrentUpdateCandidates,
MPI_SIZE_T, NULL, NULL, NULL,
MPI_SIZE_T, 0,
comm);
2699 for (
size_t i = 0; i < procUpdateCandidate.size(); ++i)
2704 thresholdLoopCount.at(i),
2705 currentRmseFraction.at(i),
2706 indexStructureGlobal.at(i),
2707 indexStructure.at(i));
2709 else if (k ==
"force")
2712 thresholdLoopCount.at(i),
2713 currentRmseFraction.at(i),
2714 indexStructureGlobal.at(i),
2715 indexStructure.at(i),
2717 indexCoordinate.at(i));
2719 else if (k ==
"charge")
2722 thresholdLoopCount.at(i),
2723 currentRmseFraction.at(i),
2724 indexStructureGlobal.at(i),
2725 indexStructure.at(i),
2731 sw[k +
"_log"].stop();
2741 return weights.at(element).at(index);
2746 weights.at(element).at(index) = value;
2756#ifdef N2P2_NO_SF_GROUPS
2762 vector<vector<double> > dEdc;
2763 vector<vector<double> > dedc;
2768 size_t n =
elements.at(i).neuralNetworks.at(
"short")
2769 .getNumConnections();
2770 dEdc.at(i).resize(n, 0.0);
2771 dedc.at(i).resize(n, 0.0);
2773 for (vector<Atom>::iterator it = s.
atoms.begin();
2774 it != s.
atoms.end(); ++it)
2776 size_t i = it->element;
2782 for (
size_t j = 0; j < dedc.at(i).size(); ++j)
2784 dEdc.at(i).at(j) += dedc.at(i).at(j);
2795 std::size_t component)
2798#ifdef N2P2_NO_SF_GROUPS
2804 vector<vector<double> > dFdc;
2805 vector<vector<double> > dfdc;
2810 size_t n =
elements.at(i).neuralNetworks.at(
"short")
2811 .getNumConnections();
2812 dFdc.at(i).resize(n, 0.0);
2813 dfdc.at(i).resize(n, 0.0);
2815 for (vector<Atom>::iterator it = s.
atoms.begin();
2816 it != s.
atoms.end(); ++it)
2818#ifndef N2P2_FULL_SFD_MEMORY
2821 it->collectDGdxia(atom, component);
2823 size_t i = it->element;
2828#ifndef N2P2_FULL_SFD_MEMORY
2831 nn.
calculateDFdc(&(dfdc.at(i).front()), &(it->dGdxia.front()));
2833 for (
size_t j = 0; j < dfdc.at(i).size(); ++j)
2835 dFdc.at(i).at(j) += dfdc.at(i).at(j);
2854 n += e.neuralNetworks.at(
id).getNumConnections();
2865 npe.push_back(e.neuralNetworks.at(
id).getNumConnections());
2873 vector<size_t> offset;
2877 offset.push_back(n);
2878 n += e.neuralNetworks.at(
id).getNumConnections();
2886 vector<vector<double>>& dPdc)
2892 if (property ==
"energy")
2896 for (
auto const& a : structure.
atoms)
2898 size_t e = a.element;
2902 vector<double> tmp(npe.at(e), 0.0);
2904 for (
size_t j = 0; j < tmp.size(); ++j)
2906 dPdc.at(0).at(off.at(e) + j) += tmp.at(j);
2910 else if (property ==
"force")
2914 for (
size_t ia = 0; ia < structure.
numAtoms; ++ia)
2916 for (
size_t ixyz = 0; ixyz < 3; ++ixyz)
2919 for (
auto& a : structure.
atoms)
2921#ifndef N2P2_FULL_SFD_MEMORY
2924 a.collectDGdxia(ia, ixyz);
2926 size_t e = a.element;
2932 vector<double> tmp(npe.at(e), 0.0);
2933#ifndef N2P2_FULL_SFD_MEMORY
2938 for (
size_t j = 0; j < tmp.size(); ++j)
2940 dPdc.at(count).at(off.at(e) + j) += tmp.at(j);
2949 throw runtime_error(
"ERROR: Weight derivatives not implemented for "
2950 "property \"" + property +
"\".\n");
2958 vector<vector<double>>& dPdc,
2965 if (property ==
"energy")
2970 for (
size_t ic = 0; ic < npe.at(ie); ++ic)
2972 size_t const o = off.at(ie) + ic;
2973 double const w =
weights.at(0).at(o);
2979 double energyHigh = structure.
energy;
2981 weights.at(0).at(o) -= 2.0 * delta;
2985 double energyLow = structure.
energy;
2987 dPdc.at(0).push_back((energyHigh - energyLow) / (2.0 * delta));
2992 else if (property ==
"force")
2996 for (
size_t ia = 0; ia < structure.
numAtoms; ++ia)
2998 for (
size_t ixyz = 0; ixyz < 3; ++ixyz)
3002 for (
size_t ic = 0; ic < npe.at(ie); ++ic)
3004 size_t const o = off.at(ie) + ic;
3005 double const w =
weights.at(0).at(o);
3011 double forceHigh = structure.
atoms.at(ia).f[ixyz];
3013 weights.at(0).at(o) -= 2.0 * delta;
3017 double forceLow = structure.
atoms.at(ia).f[ixyz];
3019 dPdc.at(count).push_back((forceHigh - forceLow)
3030 throw runtime_error(
"ERROR: Numeric weight derivatives not "
3031 "implemented for property \""
3032 + property +
"\".\n");
3099 string s =
strpr(
" E %5zu %10zu %5d %3zu %10.2E %10zu %5zu\n",
3115 string s =
strpr(
" F %5zu %10zu %5d %3zu %10.2E %10zu %5zu %5zu %2zu\n",
3130 string s =
strpr(
" Q %5zu %10zu %5d %3zu %10.2E %10zu %5zu %5zu\n",
3137#ifndef N2P2_FULL_SFD_MEMORY
3140 size_t indexComponent)
3149 vector<vector<size_t> >
const& tableFull
3154 if (atom.
neighbors[i].index == indexAtom)
3157 vector<size_t>
const& table = tableFull.at(n.
element);
3158 for (
size_t j = 0; j < n.
dGdr.size(); ++j)
3160 dGdxia[table.at(j)] += n.
dGdr[j][indexComponent];
3164 if (atom.
index == indexAtom)
3166 for (
size_t i = 0; i < nsf; ++i)
3178 string keywordNW =
"";
3179 if (type ==
"short" ) keywordNW =
"nguyen_widrow_weights_short";
3180 else if (type ==
"charge") keywordNW =
"nguyen_widrow_weights_charge";
3183 throw runtime_error(
"ERROR: Unknown neural network type.\n");
3186 double minWeights = atof(
settings[
"weights_min"].c_str());
3187 double maxWeights = atof(
settings[
"weights_max"].c_str());
3188 log <<
strpr(
"Initial weights selected randomly in interval "
3189 "[%f, %f).\n", minWeights, maxWeights);
3195 for (
size_t j = 0; j < w.size(); ++j)
3197 w.at(j) = minWeights + gsl_rng_uniform(
rngGlobal)
3198 * (maxWeights - minWeights);
3204 log <<
"Weights modified according to Nguyen Widrow scheme.\n";
3205 for (vector<Element>::iterator it =
elements.begin();
3214 throw runtime_error(
"ERROR: Preconditioning of weights not yet"
3219 log <<
"Weights modified accoring to Glorot Bengio scheme.\n";
3221 log <<
"Biases set to zero.\n";
3222 for (vector<Element>::iterator it =
elements.begin();
3237 bool all = (
property ==
"all");
3238 bool isProperty = (find(
pk.begin(),
pk.end(), property) !=
pk.end());
3239 if (!(all || isProperty))
3241 throw runtime_error(
"ERROR: Unknown property for selection mode"
3250 log <<
"Global selection mode settings:\n";
3257 + property)))
return;
3258 log <<
"Selection mode settings specific to property \""
3259 <<
property <<
"\":\n";
3262 if (all) keyword =
"selection_mode";
3263 else keyword =
"selection_mode_" + property;
3267 map<size_t, SelectionMode> schedule;
3269 if (args.size() % 2 != 1)
3271 throw runtime_error(
"ERROR: Incorrect selection mode format.\n");
3274 for (
size_t i = 1; i < args.size(); i = i + 2)
3276 schedule[(size_t)atoi(args.at(i).c_str())] =
3279 for (map<size_t, SelectionMode>::const_iterator it = schedule.begin();
3280 it != schedule.end(); ++it)
3282 log <<
strpr(
"- Selection mode starting with epoch %zu:\n",
3286 log <<
strpr(
" Random selection of update candidates: "
3287 "SelectionMode::SM_RANDOM (%d)\n", it->second);
3289 else if (it->second ==
SM_SORT)
3291 log <<
strpr(
" Update candidates selected according to error: "
3292 "SelectionMode::SM_SORT (%d)\n", it->second);
3296 log <<
strpr(
" Update candidates chosen randomly above RMSE "
3297 "threshold: SelectionMode::SM_THRESHOLD (%d)\n",
3302 throw runtime_error(
"ERROR: Unknown selection mode.\n");
3309 i.second.selectionModeSchedule = schedule;
3310 i.second.selectionMode = schedule[0];
3315 p[property].selectionModeSchedule = schedule;
3316 p[property].selectionMode = schedule[0];
3320 if (all) keyword =
"rmse_threshold";
3321 else keyword =
"rmse_threshold_" + property;
3324 double t = atof(
settings[keyword].c_str());
3325 log <<
strpr(
"- RMSE selection threshold: %.2f * RMSE\n", t);
3326 if (all)
for (
auto& i :
p) i.second.rmseThreshold = t;
3327 else p[property].rmseThreshold = t;
3330 if (all) keyword =
"rmse_threshold_trials";
3331 else keyword =
"rmse_threshold_trials_" + property;
3334 size_t t = atoi(
settings[keyword].c_str());
3335 log <<
strpr(
"- RMSE selection trials : %zu\n", t);
3336 if (all)
for (
auto& i :
p) i.second.rmseThresholdTrials = t;
3337 else p[property].rmseThresholdTrials = t;
3345 string keyword =
"write_";
3346 bool isProperty = (find(
pk.begin(),
pk.end(), type) !=
pk.end());
3347 if (type ==
"energy" ) keyword +=
"trainpoints";
3348 else if (type ==
"force" ) keyword +=
"trainforces";
3349 else if (type ==
"charge" ) keyword +=
"traincharges";
3350 else if (type ==
"weights_epoch") keyword += type;
3351 else if (type ==
"neuronstats" ) keyword += type;
3354 throw runtime_error(
"ERROR: Invalid type for file output setup.\n");
3360 size_t* writeEvery =
nullptr;
3361 size_t* writeAlways =
nullptr;
3365 writeEvery = &(
p[type].writeCompEvery);
3366 writeAlways = &(
p[type].writeCompAlways);
3367 message =
"Property \"" + type +
"\" comparison";
3368 message.at(0) = toupper(message.at(0));
3370 else if (type ==
"weights_epoch")
3376 else if (type ==
"neuronstats")
3380 message =
"Neuron statistics";
3385 if (v.size() == 1) *writeEvery = (size_t)atoi(v.at(0).c_str());
3386 else if (v.size() == 2)
3388 *writeEvery = (size_t)atoi(v.at(0).c_str());
3389 *writeAlways = (size_t)atoi(v.at(1).c_str());
3392 +
" files will be written every %zu epochs.\n").c_str(),
3394 if (*writeAlways > 0)
3397 +
" files will always be written up to epoch "
3398 "%zu.\n").c_str(), *writeAlways);
3407 bool isProperty = (find(
pk.begin(),
pk.end(), property) !=
pk.end());
3410 throw runtime_error(
"ERROR: Unknown property for update plan"
3416 string keyword =
property +
"_fraction";
3419 if (property ==
"force" &&
3423 double const ratio = atof(
settings[
"force_energy_ratio"].c_str());
3426 log <<
"WARNING: Given force fraction is ignored because "
3427 "force/energy ratio is provided.\n";
3429 log <<
strpr(
"Desired force/energy update ratio : %.6f\n",
3431 log <<
"----------------------------------------------\n";
3433 /
p[
"force"].numTrainPatterns;
3441 keyword =
"task_batch_size_" + property;
3467 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, &(pa.
errorsPerTask.front()), 1, MPI_INT,
comm);
3496 log <<
"Update plan for property \"" +
property +
"\":\n";
3497 log <<
strpr(
"- Per-task batch size : %zu\n",
3499 log <<
strpr(
"- Fraction of patterns used per epoch : %.6f\n",
3503 log <<
"WARNING: No updates are planned for this property.";
3505 log <<
strpr(
"- Updates per epoch : %zu\n",
3507 log <<
strpr(
"- Patterns used per update (rank %3d / global) : "
3510 log <<
"----------------------------------------------\n";
3517 bool isProperty = (find(
pk.begin(),
pk.end(), property) !=
pk.end());
3520 throw runtime_error(
"ERROR: Unknown property for array allocation.\n");
3523 log <<
"Allocating memory for " +
property +
3524 " error vector and Jacobian.\n";
3542 pa.
error.at(i).resize(size, 0.0);
3544 log <<
strpr(
"Updater %3zu:\n", i);
3545 log <<
strpr(
" - Error size: %zu\n", pa.
error.at(i).size());
3548 log <<
"----------------------------------------------\n";
3556 string fileNameActual = fileName;
3559 fileNameActual +=
strpr(
".stage-%zu",
stage);
3562 vector<string> sub = {
"_err",
"_com",
"_upd",
"_log"};
3563 if (append) file.open(fileNameActual.c_str(), ofstream::app);
3566 file.open(fileNameActual.c_str());
3569 vector<string> title;
3570 vector<string> colName;
3571 vector<string> colInfo;
3572 vector<size_t> colSize;
3573 title.push_back(
"Timing data for training loop.");
3574 colSize.push_back(10);
3575 colName.push_back(
"epoch");
3576 colInfo.push_back(
"Current epoch.");
3577 colSize.push_back(11);
3578 colName.push_back(
"train");
3579 colInfo.push_back(
"Time for training.");
3580 colSize.push_back(7);
3581 colName.push_back(
"ptrain");
3582 colInfo.push_back(
"Time for training (percentage of loop).");
3583 colSize.push_back(11);
3584 colName.push_back(
"error");
3585 colInfo.push_back(
"Time for error calculation.");
3586 colSize.push_back(7);
3587 colName.push_back(
"perror");
3588 colInfo.push_back(
"Time for error calculation (percentage of loop).");
3589 colSize.push_back(11);
3590 colName.push_back(
"epoch");
3591 colInfo.push_back(
"Time for this epoch.");
3592 colSize.push_back(11);
3593 colName.push_back(
"total");
3594 colInfo.push_back(
"Total time for all epochs.");
3597 colSize.push_back(11);
3598 colName.push_back(
p[k].tiny +
"train");
3599 colInfo.push_back(
"");
3600 colSize.push_back(7);
3601 colName.push_back(
p[k].tiny +
"ptrain");
3602 colInfo.push_back(
"");
3608 colSize.push_back(11);
3609 colName.push_back(
p[k].tiny + s);
3610 colInfo.push_back(
"");
3611 colSize.push_back(7);
3612 colName.push_back(
p[k].tiny +
"p" + s);
3613 colInfo.push_back(
"");
3620 double timeLoop =
sw[
"loop"].getLoop();
3622 file <<
strpr(
" %11.3E",
sw[
"train"].getLoop());
3623 file <<
strpr(
" %7.3f",
sw[
"train"].getLoop() / timeLoop);
3624 file <<
strpr(
" %11.3E",
sw[
"error"].getLoop());
3625 file <<
strpr(
" %7.3f",
sw[
"error"].getLoop() / timeLoop);
3626 file <<
strpr(
" %11.3E", timeLoop);
3627 file <<
strpr(
" %11.3E",
sw[
"loop"].getTotal());
3631 file <<
strpr(
" %11.3E",
sw[k].getLoop());
3632 file <<
strpr(
" %7.3f",
sw[k].getLoop() /
sw[
"train"].getLoop());
3638 file <<
strpr(
" %11.3E",
sw[k + s].getLoop());
3639 file <<
strpr(
" %7.3f",
sw[k + s].getLoop() /
sw[k].getLoop());
3651 property (property ),
3652 displayMetric (
"" ),
3655 selectionMode (SM_RANDOM),
3656 numTrainPatterns (0 ),
3657 numTestPatterns (0 ),
3659 writeCompEvery (0 ),
3660 writeCompAlways (0 ),
3661 posUpdateCandidates (0 ),
3662 rmseThresholdTrials (0 ),
3665 patternsPerUpdate (0 ),
3666 patternsPerUpdateGlobal(0 ),
3667 numErrorsGlobal (0 ),
3668 epochFraction (0.0 ),
3669 rmseThreshold (0.0 )
3691 throw runtime_error(
"ERROR: Unknown training property.\n");
Collect and process large data sets.
std::size_t numStructures
Total number of structures in dataset.
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
MPI_Comm comm
Global MPI communicator.
int numProcs
Total number of MPI processors.
std::vector< Structure > structures
All structures in this dataset.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
std::size_t atomicNumber(std::size_t index) const
Get atomic number from element index.
Weight updates based on simple gradient descent methods.
void setParametersFixed(double const eta)
Set parameters for fixed step gradient descent algorithm.
void setParametersAdam(double const eta, double const beta1, double const beta2, double const epsilon)
Set parameters for Adam algorithm.
DescentType
Enumerate different gradient descent variants.
@ DT_ADAM
Adaptive moment estimation (Adam).
@ DT_FIXED
Fixed step size.
Implementation of the Kalman filter method.
void setParametersStandard(double const epsilon, double const q0, double const qtau, double const qmin, double const eta0, double const etatau, double const etamax)
Set parameters for standard Kalman filter.
void setParametersFadingMemory(double const epsilon, double const q0, double const qtau, double const qmin, double const lambda, double const nu)
Set parameters for fading memory Kalman filter.
KalmanType
Enumerate different Kalman filter types.
@ KT_STANDARD
Regular Kalman filter.
@ KT_FADINGMEMORY
Kalman filtering with fading memory modification.
void setSizeObservation(std::size_t const sizeObservation)
Set observation vector size.
bool silent
Completely silence output.
void setupNormalization(bool standalone=true)
Set up normalization.
ElementMap elementMap
Global element map, populated by setupElementMap().
void addEnergyOffset(Structure &structure, bool ref=true)
Add atomic energy offsets to reference energy.
virtual void setupSymmetryFunctionCache(bool verbose=false)
Set up symmetry function cache.
@ SHORT_CHARGE_NN
Short range NNP with additional charge NN (M. Bircher).
@ SHORT_ONLY
Short range NNP only.
void readNeuralNetworkWeights(std::string const &type, std::string const &fileName)
Read in weights for a specific type of neural network.
double physical(std::string const &property, double value) const
Undo normalization for a given property.
virtual void setupSymmetryFunctionGroups()
Set up symmetry function groups.
std::vector< Element > elements
void calculateAtomicNeuralNetworks(Structure &structure, bool const derivatives)
Calculate a single atomic neural network for a given atom and nn type.
void calculateEnergy(Structure &structure) const
Calculate potential energy for a given structure.
void calculateSymmetryFunctionGroups(Structure &structure, bool const derivatives)
Calculate all symmetry function groups for all atoms in given structure.
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
void removeEnergyOffset(Structure &structure, bool ref=true)
Remove atomic energy offsets from reference energy.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
virtual void setupSymmetryFunctions()
Set up all symmetry functions.
void calculateSymmetryFunctions(Structure &structure, bool const derivatives)
Calculate all symmetry functions for all atoms in given structure.
void calculateForces(Structure &structure) const
Calculate forces for all atoms in given structure.
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
void setupSymmetryFunctionMemory(bool verbose=false)
Extract required memory dimensions for symmetry function derivatives.
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
This class implements a feed-forward neural network.
int getNumConnections() const
Return total number of connections.
void setInput(double const *const &input) const
Set neural network input layer node values.
void modifyConnections(ModificationScheme modificationScheme)
Change connections according to a given modification scheme.
void setConnections(double const *const &connections)
Set neural network weights and biases.
void calculateDFdc(double *dFdc, double const *const &dGdxyz) const
Calculate "second" derivative of output with respect to connections.
@ MS_ZEROBIAS
Set all bias values to zero.
@ MS_GLOROTBENGIO
Normalize connections according to Glorot and Bengio.
@ MS_NGUYENWIDROW
Initialize connections according to Nguyen-Widrow scheme.
void getConnections(double *connections) const
Get neural network weights and biases.
void propagate()
Propagate input information through all layers.
void calculateDEdc(double *dEdc) const
Calculate derivative of output neuron with respect to connections.
void calculateDEdG(double *dEdG) const
Calculate derivative of output neuron with respect to input neurons.
void getOutput(double *output) const
Get neural network output layer node values.
void writeSettingsFile(std::ofstream *const &file, std::map< std::size_t, std::string > const &replacements={}) const
Write complete settings file.
bool keywordExists(std::string const &keyword, bool exact=false) const
Check if keyword is present in settings file.
std::vector< std::string > getSettingsLines() const
Get complete settings file.
void setTrainingLogFileName(std::string fileName)
Set training log file name.
UpdaterType updaterType
Updater type used.
std::vector< double > dGdxia
Derivative of symmetry functions with respect to one specific atom coordinate.
std::size_t epoch
Current epoch.
std::vector< std::size_t > numWeightsPerUpdater
Number of weights per updater.
std::size_t writeWeightsAlways
Up to this epoch weights are written every epoch.
void randomizeNeuralNetworkWeights(std::string const &type)
Randomly initialize specificy neural network weights.
std::size_t numWeights
Total number of weights.
bool useForces
Use forces for training.
std::size_t writeWeightsEvery
Write weights every this many epochs.
void setupTraining()
General training settings and setup of weight update routine.
std::mt19937_64 rngGlobalNew
Global random number generator.
std::string nnId
ID of neural network the training is working on.
void allocateArrays(std::string const &property)
Allocate error and Jacobian arrays for given property.
void writeWeightsEpoch() const
Write weights to files during training loop.
std::ofstream trainingLog
Training log file.
bool hasUpdaters
If this rank performs weight updates.
void checkSelectionMode()
Check if selection mode should be changed.
std::string trainingLogFileName
File name for training log.
JacobianMode jacobianMode
Jacobian mode used.
void selectSets()
Randomly select training and test set structures.
JacobianMode
Jacobian matrix preparation mode.
@ JM_SUM
No Jacobian, sum up contributions from update candidates.
@ JM_TASK
Prepare one Jacobian entry for each task, sum up within tasks.
@ JM_FULL
Prepare full Jacobian matrix.
std::size_t countUpdates
Update counter (for all training quantities together).
std::map< std::string, Stopwatch > sw
Stopwatches for timing overview.
void setupSelectionMode(std::string const &property)
Set selection mode for specific training property.
void dPdc(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc)
Compute derivatives of property with respect to weights.
void printHeader()
Print training loop header on screen.
std::mt19937_64 rngNew
Per-task random number generator.
PropertyMap p
Actual training properties.
std::vector< Updater * > updaters
Weight updater (combined or for each element).
std::size_t writeNeuronStatisticsAlways
Up to this epoch neuron statistics are written every epoch.
UpdaterType
Type of update routine.
@ UT_GD
Simple gradient descent methods.
@ UT_KF
Kalman filter-based methods.
@ UT_LM
Levenberg-Marquardt algorithm.
double forceWeight
Force update weight.
void update(std::string const &property)
Perform one update.
void dataSetNormalization()
Apply normalization based on initial weights prediction.
ParallelMode
Training parallelization mode.
@ PM_TRAIN_RK0
No training parallelization, only data set distribution.
@ PM_TRAIN_ALL
Parallel gradient computation, update on each task.
bool writeTrainingLog
Whether training log file is written.
std::size_t numUpdaters
Number of updaters (depends on update strategy).
bool advance() const
Check if training loop should be continued.
~Training()
Destructor, updater vector needs to be cleaned.
void sortUpdateCandidates(std::string const &property)
Sort update candidates with descending RMSE.
void writeNeuronStatisticsEpoch() const
Write neuron statistics during training loop.
SelectionMode
How update candidates are selected during Training.
@ SM_RANDOM
Select candidates randomly.
@ SM_THRESHOLD
Select candidates randomly with RMSE above threshold.
@ SM_SORT
Sort candidates according to their RMSE and pick worst first.
std::size_t stage
Training stage.
void collectDGdxia(Atom const &atom, std::size_t indexAtom, std::size_t indexComponent)
Collect derivative of symmetry functions with repect to one atom's coordinate.
void setWeights()
Set weights in neural network class.
void loop()
Execute main training loop.
void printEpoch()
Print preferred error metric and timing information on screen.
std::vector< std::size_t > weightsOffset
Offset of each element's weights in combined array.
std::size_t numEpochs
Number of epochs requested.
void setupFileOutput(std::string const &type)
Set file output intervals for properties and other quantities.
std::size_t getNumConnections(std::string id="short") const
Get total number of NN connections.
void addTrainingLogEntry(int proc, std::size_t il, double f, std::size_t isg, std::size_t is)
Write energy update data to training log file.
void writeTimingData(bool append, std::string const fileName="timing.out")
Write timing data for all clocks.
std::size_t writeNeuronStatisticsEvery
Write neuron statistics every this many epochs.
void getWeights()
Get weights from neural network class.
void dPdcN(std::string property, Structure &structure, std::vector< std::vector< double > > &dEdc, double delta=1.0E-4)
Compute numeric derivatives of property with respect to weights.
std::vector< std::string > setupNumericDerivCheck()
Set up numeric weight derivatives check.
void setSingleWeight(std::size_t element, std::size_t index, double value)
Set a single weight value.
void shuffleUpdateCandidates(std::string const &property)
Shuffle update candidates.
double getSingleWeight(std::size_t element, std::size_t index)
Get a single weight value.
void initializeWeights()
Initialize weights for all elements.
void initializeWeightsMemory(UpdateStrategy updateStrategy=US_COMBINED)
Initialize weights vector according to update strategy.
bool repeatedEnergyUpdates
After force update perform energy update for corresponding structure.
UpdateStrategy updateStrategy
Update strategy used.
void calculateNeighborLists()
Calculate neighbor lists for all structures.
ParallelMode parallelMode
Parallelization mode used.
void writeNeuronStatistics(std::string const &nnName, std::string const &fileName) const
Write neuron statistics collected since last invocation.
std::vector< std::size_t > getConnectionOffsets(std::string id="short") const
Get offsets of NN connections for each element.
std::vector< int > epochSchedule
Update schedule epoch (false = energy update, true = force update).
void writeUpdaterStatus(bool append, std::string const fileNameFormat="updater.%03zu.out") const
Write updater information to file.
UpdateStrategy
Update strategies available for Training.
@ US_COMBINED
One combined updater for all elements.
@ US_ELEMENT
Separate updaters for individual elements.
std::vector< std::size_t > getNumConnectionsPerElement(std::string id="short") const
Get number of NN connections for each element.
void resetNeuronStatistics()
Reset neuron statistics for all elements.
std::vector< std::vector< double > > calculateWeightDerivatives(Structure *structure)
Calculate derivatives of energy with respect to weights.
void calculateError(std::map< std::string, std::pair< std::string, std::string > > const fileNames)
Calculate error metrics for all structures.
void writeLearningCurve(bool append, std::string const fileName="learning-curve.out") const
Write current RMSEs and epoch information to file.
void calculateErrorEpoch()
Calculate error metrics per epoch for all structures with file names used in training loop.
std::vector< std::string > pk
Vector of actually used training properties.
void setEpochSchedule()
Select energies/forces schedule for one epoch.
bool hasStructures
If this rank holds structure information.
std::vector< std::vector< double > > weights
Neural network weights and biases for each element.
bool freeMemory
Free symmetry function memory after calculation.
void writeSetsToFiles()
Write training and test set to separate files (train.data and test.data, same format as input....
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
void setupUpdatePlan(std::string const &property)
Set up how often properties are updated.
Base class for different weight update methods.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
vector< string > split(string const &input, char delimiter)
Split string at each delimiter.
string reduce(string const &line, string const &whitespace, string const &fill)
Replace multiple whitespaces with fill.
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Struct to store information on neighbor atoms.
std::size_t element
Element index of neighbor atom.
std::vector< Vec3D > dGdr
Derivatives of symmetry functions with respect to neighbor coordinates.
Storage for a single atom.
std::vector< Neighbor > neighbors
Neighbor array (maximum number defined in macros.h.
std::size_t numSymmetryFunctions
Number of symmetry functions used to describe the atom environment.
Vec3D f
Force vector calculated by neural network.
double charge
Atomic charge determined by neural network.
std::size_t index
Index number of this atom.
Vec3D fRef
Reference force vector from data set.
std::vector< Vec3D > dGdr
Derivative of symmetry functions with respect to this atom's coordinates.
std::size_t element
Element index of this atom.
std::vector< double > G
Symmetry function values.
double chargeRef
Atomic reference charge.
std::vector< std::size_t > numNeighborsPerElement
Number of neighbors per element.
std::size_t numNeighbors
Total number of neighbors.
Storage for one atomic configuration.
@ ST_TRAINING
Structure is part of the training set.
@ ST_UNKNOWN
Sample type not assigned yet.
@ ST_TEST
Structure is part of the test set.
std::vector< std::size_t > numAtomsPerElement
Number of atoms of each element in this structure.
std::size_t index
Index number of this structure.
SampleType sampleType
Sample type (training or test set).
void freeAtoms(bool all)
Free symmetry function memory for all atoms, see free() in Atom class.
double energy
Potential energy determined by neural network.
double energyRef
Reference potential energy.
std::size_t numAtoms
Total number of atoms present in this structure.
std::vector< Atom > atoms
Vector of all atoms in this structure.
bool exists(std::string const &key)
Check if property is present.
Specific training quantity (e.g. energies, forces, charges).
std::size_t numErrorsGlobal
Global number of errors per update.
std::size_t numTrainPatterns
Number of training patterns in set.
std::vector< int > errorsPerTask
Errors per task for each update.
std::string property
Copy of identifier within Property map.
SelectionMode selectionMode
Selection mode for update candidates.
std::size_t numUpdates
Number of desired updates per epoch.
double rmseThreshold
RMSE threshold for update candidates.
std::string plural
Plural string of property;.
double epochFraction
Desired update fraction per epoch.
std::size_t patternsPerUpdate
Patterns used per update.
std::size_t taskBatchSize
Batch size for each MPI task.
std::vector< std::vector< double > > error
Global error vector (per updater).
std::string displayMetric
Error metric for display.
std::string tiny
Tiny abbreviation string for property.
std::vector< std::string > errorMetrics
Error metrics available for this property.
Property(std::string const &property)
Constructor.
std::vector< std::vector< int > > offsetJacobian
Stride for Jacobians per task per updater.
std::size_t countUpdates
Counter for updates per epoch.
std::vector< std::vector< int > > weightsPerTask
Weights per task per updater.
std::vector< int > offsetPerTask
Offset for combined error per task.
std::vector< UpdateCandidate > updateCandidates
Vector with indices of training patterns.
std::map< std::string, double > errorTrain
Current error metrics of training patterns.
std::vector< std::vector< double > > jacobian
Global Jacobian (per updater).
std::size_t patternsPerUpdateGlobal
Patterns used per update (summed over all MPI tasks).
std::map< std::string, double > errorTest
Current error metrics of test patterns.
std::size_t rmseThresholdTrials
Maximum trials for SM_THRESHOLD selection mode.
std::size_t posUpdateCandidates
Current position in update candidate list (SM_SORT).
Contains location of one update candidate (energy or force).
std::size_t a
Atom index (only used for force candidates).
std::size_t c
Component index (x,y,z -> 0,1,2, only used for force candidates).
std::size_t s
Structure index.