n2p2 - A neural network potential package
nnp-norm2.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Training.h"
18#include "mpi-extra.h"
19#include "utility.h"
20#include <mpi.h>
21#include <cmath>
22#include <cstdlib>
23#include <iostream>
24#include <fstream>
25#include <vector>
26
27using namespace std;
28using namespace nnp;
29
30int main(int argc, char* argv[])
31{
32 int numProcs = 0;
33 int myRank = 0;
34 size_t stage = 0;
35 ofstream myLog;
36
37 if (argc > 2)
38 {
39 cout << "USAGE: " << argv[0] << " <stage>\n"
40 << " <stage> ... Training stage (only required if training"
41 " is a multi-stage process.\n"
42 << " Execute in directory with these NNP files present:\n"
43 << " - input.data (structure file)\n"
44 << " - input.nn (NNP settings)\n"
45 << " - scaling.data (symmetry function scaling data)\n"
46 << " - \"weights(e).%%03d.data\" (weights files)\n";
47 return 1;
48 }
49
50 string suffix = "";
51 if (argc > 1)
52 {
53 stage = (size_t)atoi(argv[1]);
54 suffix = strpr(".stage-%zu", stage);
55 }
56
57 MPI_Init(&argc, &argv);
58 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
59 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
60
61 Training training;
62 if (myRank != 0) training.log.writeToStdout = false;
63 myLog.open((strpr("nnp-norm2.log.%04d", myRank) + suffix).c_str());
64 training.log.registerStreamPointer(&myLog);
65 training.setupMPI();
66 training.initialize();
67 training.loadSettingsFile();
68 training.setStage(stage);
69 training.setupGeneric();
71 training.setupSymmetryFunctionStatistics(false, false, false, false);
72
73 auto nnpType = training.getNnpType();
74 if ( (nnpType == Training::NNPType::HDNNP_4G ||
75 nnpType == Training::NNPType::HDNNP_Q) && stage == 1)
76 {
77 throw runtime_error("ERROR: Normalization of charges not yet "
78 "implemented\n.");
79 }
80
81 if (training.settingsKeywordExists("mean_energy") ||
82 training.settingsKeywordExists("conv_energy") ||
83 training.settingsKeywordExists("conv_length"))
84 {
85 throw runtime_error("ERROR: Normalization keywords found in settings, "
86 "please remove them first.\n");
87 }
88
89 if (!training.settingsKeywordExists("use_short_forces"))
90 {
91 throw runtime_error("ERROR: Normalization is only possible if forces "
92 "are used (keyword \"use_short_forces\").\n");
93 }
94
95 // Need RNG for initial random weights.
97
98 // Distribute structures to MPI processes.
99 training.distributeStructures(false);
100
101 // Initialize weights and biases for neural networks.
102 training.initializeWeights();
103
104 training.writeWeights("short", "weights.%03zu.norm");
105 if ( (nnpType == Training::NNPType::HDNNP_4G ||
106 nnpType == Training::NNPType::HDNNP_Q) && stage == 2)
107 {
108 training.writeWeights("charge", "weightse.%03zu.norm");
109 }
110
111 training.log << "\n";
112 training.log << "*** DATA SET NORMALIZATION **************"
113 "**************************************\n";
114 training.log << "\n";
115
116 ofstream fileEvsV;
117 fileEvsV.open(strpr("evsv.dat.%04d", myRank).c_str());
118 if (myRank == 0)
119 {
120 // File header.
121 vector<string> title;
122 vector<string> colName;
123 vector<string> colInfo;
124 vector<size_t> colSize;
125 title.push_back("Energy vs. volume comparison.");
126 colSize.push_back(16);
127 colName.push_back("V_atom");
128 colInfo.push_back("Volume per atom.");
129 colSize.push_back(16);
130 colName.push_back("Eref_atom");
131 colInfo.push_back("Reference energy per atom.");
132 colSize.push_back(10);
133 colName.push_back("N");
134 colInfo.push_back("Number of atoms.");
135 colSize.push_back(16);
136 colName.push_back("V");
137 colInfo.push_back("Volume of structure.");
138 colSize.push_back(16);
139 colName.push_back("Eref");
140 colInfo.push_back("Reference energy of structure.");
141 colSize.push_back(16);
142 colName.push_back("Eref_offset");
143 colInfo.push_back("Reference energy of structure (including offset).");
144 appendLinesToFile(fileEvsV,
145 createFileHeader(title, colSize, colName, colInfo));
146 }
147
148 size_t numAtomsTotal = 0;
149 size_t numStructures = 0;
150 double meanEnergyPerAtomRef = 0.0;
151 double meanEnergyPerAtomNnp = 0.0;
152 double sigmaEnergyPerAtomRef = 0.0;
153 double sigmaEnergyPerAtomNnp = 0.0;
154 double meanForceRef = 0.0;
155 double meanForceNnp = 0.0;
156 double sigmaForceRef = 0.0;
157 double sigmaForceNnp = 0.0;
158 training.log << "Computing initial prediction for all structures...\n";
159 for (auto& s : training.structures)
160 {
161 // File output for evsv.dat.
162 fileEvsV << strpr("%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
163 s.volume / s.numAtoms,
164 s.energyRef / s.numAtoms,
165 s.numAtoms,
166 s.volume,
167 s.energyRef,
168 training.getEnergyWithOffset(s, true));
169 s.calculateNeighborList(training.getMaxCutoffRadius());
170#ifdef N2P2_NO_SF_GROUPS
171 training.calculateSymmetryFunctions(s, true);
172#else
173 training.calculateSymmetryFunctionGroups(s, true);
174#endif
175 training.calculateAtomicNeuralNetworks(s, true);
176 training.calculateEnergy(s);
177 training.calculateForces(s);
178 s.clearNeighborList();
179
180 numStructures++;
181 numAtomsTotal += s.numAtoms;
182 meanEnergyPerAtomRef += s.energyRef / s.numAtoms;
183 meanEnergyPerAtomNnp += s.energy / s.numAtoms;
184 for (auto& a : s.atoms)
185 {
186 meanForceRef += a.fRef[0] + a.fRef[1] + a.fRef[2];
187 meanForceNnp += a.f [0] + a.f [1] + a.f [2];
188 }
189 }
190
191 fileEvsV.flush();
192 fileEvsV.close();
193 MPI_Barrier(MPI_COMM_WORLD);
194 training.log << "Writing energy/atom vs. volume/atom data "
195 << "to \"evsv.dat\".\n";
196 if (myRank == 0) training.combineFiles("evsv.dat");
197 MPI_Allreduce(MPI_IN_PLACE, &numStructures , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
198 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
199 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
200 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
201 MPI_Allreduce(MPI_IN_PLACE, &meanForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
202 MPI_Allreduce(MPI_IN_PLACE, &meanForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
203 meanEnergyPerAtomRef /= numStructures;
204 meanEnergyPerAtomNnp /= numStructures;
205 meanForceRef /= 3 * numAtomsTotal;
206 meanForceNnp /= 3 * numAtomsTotal;
207 for (auto const& s : training.structures)
208 {
209 double ediffRef = s.energyRef / s.numAtoms - meanEnergyPerAtomRef;
210 double ediffNnp = s.energy / s.numAtoms - meanEnergyPerAtomNnp;
211 sigmaEnergyPerAtomRef += ediffRef * ediffRef;
212 sigmaEnergyPerAtomNnp += ediffNnp * ediffNnp;
213 for (auto const& a : s.atoms)
214 {
215 double fdiffRef = a.fRef[0] - meanForceRef;
216 double fdiffNnp = a.f [0] - meanForceNnp;
217 sigmaForceRef += fdiffRef * fdiffRef;
218 sigmaForceNnp += fdiffNnp * fdiffNnp;
219 fdiffRef = a.fRef[1] - meanForceRef;
220 fdiffNnp = a.f [1] - meanForceNnp;
221 sigmaForceRef += fdiffRef * fdiffRef;
222 sigmaForceNnp += fdiffNnp * fdiffNnp;
223 fdiffRef = a.fRef[2] - meanForceRef;
224 fdiffNnp = a.f [2] - meanForceNnp;
225 sigmaForceRef += fdiffRef * fdiffRef;
226 sigmaForceNnp += fdiffNnp * fdiffNnp;
227 }
228 }
229 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomRef, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
230 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtomNnp, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
231 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceRef , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
232 MPI_Allreduce(MPI_IN_PLACE, &sigmaForceNnp , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
233 sigmaEnergyPerAtomRef = sqrt(sigmaEnergyPerAtomRef / (numStructures - 1));
234 sigmaEnergyPerAtomNnp = sqrt(sigmaEnergyPerAtomNnp / (numStructures - 1));
235 sigmaForceRef = sqrt(sigmaForceRef / (3 * numAtomsTotal - 1));
236 sigmaForceNnp = sqrt(sigmaForceNnp / (3 * numAtomsTotal - 1));
237 training.log << "\n";
238 training.log << strpr("Total number of structures : %zu\n", numStructures);
239 training.log << strpr("Total number of atoms : %zu\n", numAtomsTotal);
240 training.log << "----------------------------------\n";
241 training.log << "Reference data statistics:\n";
242 training.log << "----------------------------------\n";
243 training.log << strpr("Mean/sigma energy per atom : %16.8E +/- %16.8E\n",
244 meanEnergyPerAtomRef,
245 sigmaEnergyPerAtomRef);
246 training.log << strpr("Mean/sigma force : %16.8E +/- %16.8E\n",
247 meanForceRef,
248 sigmaForceRef);
249 training.log << "----------------------------------\n";
250 training.log << "Initial NNP prediction statistics:\n";
251 training.log << "----------------------------------\n";
252 training.log << strpr("Mean/sigma energy per atom : %16.8E +/- %16.8E\n",
253 meanEnergyPerAtomNnp,
254 sigmaEnergyPerAtomNnp);
255 training.log << strpr("Mean/sigma force : %16.8E +/- %16.8E\n",
256 meanForceNnp,
257 sigmaForceNnp);
258 training.log << "----------------------------------\n";
259 double convEnergy = sigmaForceNnp / sigmaForceRef;
260 double convLength = sigmaForceNnp;
261 training.log << strpr("Conversion factor energy : %24.16E\n", convEnergy);
262 training.log << strpr("Conversion factor length : %24.16E\n", convLength);
263
264 //ofstream fileCfg;
265 //fileCfg.open(strpr("output.data.%04d", myRank).c_str());
266 //for (vector<Structure>::iterator it = training.structures.begin();
267 // it != training.structures.end(); ++it)
268 //{
269 // it->energyRef = (it->energyRef - meanEnergyPerAtom * it->numAtoms)
270 // * convEnergy;
271 // it->box[0] *= convLength;
272 // it->box[1] *= convLength;
273 // it->box[2] *= convLength;
274 // for (vector<Atom>::iterator it2 = it->atoms.begin();
275 // it2 != it->atoms.end(); ++it2)
276 // {
277 // it2->r *= convLength;
278 // it2->fRef *= convEnergy / convLength;
279 // }
280 // it->writeToFile(&fileCfg);
281 //}
282 //fileCfg.flush();
283 //fileCfg.close();
284 //MPI_Barrier(MPI_COMM_WORLD);
285 //training.log << "\n";
286 //training.log << "Writing converted data file to \"output.data\".\n";
287 //training.log << "WARNING: This data set is provided for debugging "
288 // "purposes only and is NOT intended for training.\n";
289 //if (myRank == 0) training.combineFiles("output.data");
290
291 if (myRank == 0)
292 {
293 training.log << "\n";
294 training.log << "Writing backup of original settings file to "
295 "\"input.nn.bak\".\n";
296 ofstream fileSettings;
297 fileSettings.open("input.nn.bak");
298 training.writeSettingsFile(&fileSettings);
299 fileSettings.close();
300
301 training.log << "\n";
302 training.log << "Writing extended settings file to \"input.nn\".\n";
303 training.log << "Use this settings file for normalized training.\n";
304 fileSettings.open("input.nn");
305 fileSettings << "#########################################"
306 "######################################\n";
307 fileSettings << "# DATA SET NORMALIZATION\n";
308 fileSettings << "#########################################"
309 "######################################\n";
310 fileSettings << strpr("mean_energy %24.16E # nnp-norm2\n",
311 meanEnergyPerAtomRef);
312 fileSettings << strpr("conv_energy %24.16E # nnp-norm2\n", convEnergy);
313 fileSettings << strpr("conv_length %24.16E # nnp-norm2\n", convLength);
314 fileSettings << "#########################################"
315 "######################################\n";
316 fileSettings << "\n";
317 training.writeSettingsFile(&fileSettings);
318 fileSettings.close();
319 }
320
321 training.log << "*****************************************"
322 "**************************************\n";
323
324 myLog.close();
325
326 MPI_Finalize();
327
328 return 0;
329}
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:724
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void setupRandomNumberGenerator()
Initialize random number generator.
Definition: Dataset.cpp:110
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
Definition: Log.cpp:91
bool writeToStdout
Turn on/off output to stdout.
Definition: Log.h:85
NNPType getNnpType() const
Getter for Mode::nnpType.
Definition: Mode.h:668
void initialize()
Write welcome message with version information.
Definition: Mode.cpp:55
void setupGeneric(std::string const &nnpDir="", bool skipNormalize=false, bool initialHardness=false)
Combine multiple setup routines and provide a basic NNP setup.
Definition: Mode.cpp:212
void calculateAtomicNeuralNetworks(Structure &structure, bool const derivatives, std::string id="")
Calculate atomic neural networks for all atoms in given structure.
Definition: Mode.cpp:1642
double getMaxCutoffRadius() const
Getter for Mode::maxCutoffRadius.
Definition: Mode.h:693
void calculateEnergy(Structure &structure) const
Calculate potential energy for a given structure.
Definition: Mode.cpp:1803
void calculateSymmetryFunctionGroups(Structure &structure, bool const derivatives)
Calculate all symmetry function groups for all atoms in given structure.
Definition: Mode.cpp:1561
virtual void setupSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Set up symmetry function scaling from file.
Definition: Mode.cpp:712
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
Definition: Mode.cpp:2193
Log log
Global log file.
Definition: Mode.h:593
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
Definition: Mode.cpp:2069
void calculateSymmetryFunctions(Structure &structure, bool const derivatives)
Calculate all symmetry functions for all atoms in given structure.
Definition: Mode.cpp:1480
void calculateForces(Structure &structure) const
Calculate forces for all atoms in given structure.
Definition: Mode.cpp:1849
void setupSymmetryFunctionStatistics(bool collectStatistics, bool collectExtrapolationWarnings, bool writeExtrapolationWarnings, bool stopOnExtrapolationWarnings)
Set up symmetry function statistics collection.
Definition: Mode.cpp:1103
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
Definition: Mode.cpp:161
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
Definition: Mode.cpp:2221
Training methods.
Definition: Training.h:36
void initializeWeights()
Initialize weights for all elements.
Definition: Training.cpp:292
void writeWeights(std::string const &nnName, std::string const &fileNameFormat) const
Write weights to files (one file for each element).
Definition: Training.cpp:1662
void setStage(std::size_t stage)
Set training stage (if multiple stages are needed for NNP type).
Definition: Training.cpp:379
#define MPI_SIZE_T
Definition: mpi-extra.h:22
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
int main(int argc, char *argv[])
Definition: nnp-norm2.cpp:30