n2p2 - A neural network potential package
nnp-norm.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "Dataset.h"
18#include "mpi-extra.h"
19#include "utility.h"
20#include <mpi.h>
21#include <cmath>
22#include <cstdlib>
23#include <iostream>
24#include <fstream>
25#include <vector>
26
27using namespace std;
28using namespace nnp;
29
30int main(int argc, char* argv[])
31{
32 int numProcs = 0;
33 int myRank = 0;
34 ofstream myLog;
35
36 if (argc != 1)
37 {
38 cout << "USAGE: " << argv[0] << "\n"
39 << " Execute in directory with these NNP files present:\n"
40 << " - input.data (structure file)\n"
41 << " - input.nn (NNP settings)\n";
42 return 1;
43 }
44
45 MPI_Init(&argc, &argv);
46 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
47 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
48
49 Dataset dataset;
50 if (myRank != 0) dataset.log.writeToStdout = false;
51 myLog.open(strpr("nnp-norm.log.%04d", myRank).c_str());
52 dataset.log.registerStreamPointer(&myLog);
53 dataset.setupMPI();
54 dataset.initialize();
55 dataset.loadSettingsFile();
56 dataset.setupElementMap();
57 dataset.setupElements();
58 dataset.distributeStructures(false);
59
60 dataset.log << "\n";
61 dataset.log << "*** DATA SET NORMALIZATION **************"
62 "**************************************\n";
63 dataset.log << "\n";
64
65 if (dataset.settingsKeywordExists("mean_energy") ||
66 dataset.settingsKeywordExists("conv_energy") ||
67 dataset.settingsKeywordExists("conv_length") ||
68 dataset.settingsKeywordExists("conv_charge"))
69 {
70 throw runtime_error("ERROR: Normalization keywords found in settings, "
71 "please remove them first.\n");
72 }
73
74 ofstream fileEvsV;
75 fileEvsV.open(strpr("evsv.dat.%04d", myRank).c_str());
76
77 // File header.
78 vector<string> title;
79 vector<string> colName;
80 vector<string> colInfo;
81 vector<size_t> colSize;
82 title.push_back("Energy vs. volume comparison.");
83 colSize.push_back(16);
84 colName.push_back("V_atom");
85 colInfo.push_back("Volume per atom.");
86 colSize.push_back(16);
87 colName.push_back("Eref_atom");
88 colInfo.push_back("Reference energy per atom.");
89 colSize.push_back(10);
90 colName.push_back("N");
91 colInfo.push_back("Number of atoms.");
92 colSize.push_back(16);
93 colName.push_back("V");
94 colInfo.push_back("Volume of structure.");
95 colSize.push_back(16);
96 colName.push_back("Eref");
97 colInfo.push_back("Reference energy of structure.");
98 colSize.push_back(16);
99 colName.push_back("Eref_offset");
100 colInfo.push_back("Reference energy of structure (including offset).");
101 appendLinesToFile(fileEvsV,
102 createFileHeader(title, colSize, colName, colInfo));
103
104 size_t numAtomsTotal = 0;
105 size_t numStructures = 0;
106 double meanEnergyPerAtom = 0.0;
107 double sigmaEnergyPerAtom = 0.0;
108 double meanForce = 0.0;
109 double sigmaForce = 0.0;
110 double maxAbsCharge = 0.0;
111 for (vector<Structure>::const_iterator it = dataset.structures.begin();
112 it != dataset.structures.end(); ++it)
113 {
114 numStructures++;
115 numAtomsTotal += it->numAtoms;
116 meanEnergyPerAtom += it->energyRef / it->numAtoms;
117 fileEvsV << strpr("%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
118 it->volume / it->numAtoms,
119 it->energyRef / it->numAtoms,
120 it->numAtoms,
121 it->volume,
122 it->energyRef,
123 dataset.getEnergyWithOffset(*it, true));
124 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
125 it2 != it->atoms.end(); ++it2)
126 {
127 meanForce += it2->fRef[0] + it2->fRef[1] + it2->fRef[2];
128 double absChargeRef = abs(it2->chargeRef);
129 if (abs(it2->chargeRef) > maxAbsCharge)
130 maxAbsCharge = absChargeRef;
131 }
132 }
133 fileEvsV.flush();
134 fileEvsV.close();
135 MPI_Barrier(MPI_COMM_WORLD);
136 dataset.log << "Writing energy/atom vs. volume/atom data "
137 << "to \"evsv.dat\".\n";
138 if (myRank == 0) dataset.combineFiles("evsv.dat");
139 MPI_Allreduce(MPI_IN_PLACE, &numStructures , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
140 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1, MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
141 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
142 MPI_Allreduce(MPI_IN_PLACE, &meanForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
143 MPI_Allreduce(MPI_IN_PLACE, &maxAbsCharge , 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
144 meanEnergyPerAtom /= numStructures;
145 meanForce /= 3 * numAtomsTotal;
146 for (vector<Structure>::const_iterator it = dataset.structures.begin();
147 it != dataset.structures.end(); ++it)
148 {
149 double ediff = it->energyRef / it->numAtoms - meanEnergyPerAtom;
150 sigmaEnergyPerAtom += ediff * ediff;
151 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
152 it2 != it->atoms.end(); ++it2)
153 {
154 double fdiff = it2->fRef[0] - meanForce;
155 sigmaForce += fdiff * fdiff;
156 fdiff = it2->fRef[1] - meanForce;
157 sigmaForce += fdiff * fdiff;
158 fdiff = it2->fRef[2] - meanForce;
159 sigmaForce += fdiff * fdiff;
160 }
161 }
162 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
163 MPI_Allreduce(MPI_IN_PLACE, &sigmaForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
164 sigmaEnergyPerAtom = sqrt(sigmaEnergyPerAtom / (numStructures - 1));
165 sigmaForce = sqrt(sigmaForce / (3 * numAtomsTotal - 1));
166 dataset.log << "\n";
167 dataset.log << strpr("Total number of structures: %zu\n", numStructures);
168 dataset.log << strpr("Total number of atoms : %zu\n", numAtomsTotal);
169 dataset.log << strpr("Mean/sigma energy per atom: %16.8E +/- %16.8E\n",
170 meanEnergyPerAtom,
171 sigmaEnergyPerAtom);
172 dataset.log << strpr("Mean/sigma force : %16.8E +/- %16.8E\n",
173 meanForce,
174 sigmaForce);
175 double convEnergy = 1.0 / sigmaEnergyPerAtom;
176 double convLength = sigmaForce / sigmaEnergyPerAtom;
177 double convCharge = 1.0;
178 if (maxAbsCharge > 0)
179 convCharge = 1.0 / maxAbsCharge;
180 dataset.log << strpr("Conversion factor energy : %24.16E\n", convEnergy);
181 dataset.log << strpr("Conversion factor length : %24.16E\n", convLength);
182 dataset.log << strpr("Conversion factor charge : %24.16E\n", convCharge);
183
184 ofstream fileCfg;
185 fileCfg.open(strpr("output.data.%04d", myRank).c_str());
186 for (vector<Structure>::iterator it = dataset.structures.begin();
187 it != dataset.structures.end(); ++it)
188 {
189 it->energyRef = (it->energyRef - meanEnergyPerAtom * it->numAtoms)
190 * convEnergy;
191 it->chargeRef *= convCharge;
192 it->box[0] *= convLength;
193 it->box[1] *= convLength;
194 it->box[2] *= convLength;
195 for (vector<Atom>::iterator it2 = it->atoms.begin();
196 it2 != it->atoms.end(); ++it2)
197 {
198 it2->r *= convLength;
199 it2->fRef *= convEnergy / convLength;
200 it2->chargeRef *= convCharge;
201 }
202 it->writeToFile(&fileCfg);
203 }
204 fileCfg.flush();
205 fileCfg.close();
206 MPI_Barrier(MPI_COMM_WORLD);
207 dataset.log << "\n";
208 dataset.log << "Writing converted data file to \"output.data\".\n";
209 dataset.log << "WARNING: This data set is provided for debugging "
210 "purposes only and is NOT intended for training.\n";
211 if (myRank == 0) dataset.combineFiles("output.data");
212
213 if (myRank == 0)
214 {
215 dataset.log << "\n";
216 dataset.log << "Writing backup of original settings file to "
217 "\"input.nn.bak\".\n";
218 ofstream fileSettings;
219 fileSettings.open("input.nn.bak");
220 dataset.writeSettingsFile(&fileSettings);
221 fileSettings.close();
222
223 dataset.log << "\n";
224 dataset.log << "Writing extended settings file to \"input.nn\".\n";
225 dataset.log << "Use this settings file for normalized training.\n";
226 fileSettings.open("input.nn");
227 fileSettings << "#########################################"
228 "######################################\n";
229 fileSettings << "# DATA SET NORMALIZATION\n";
230 fileSettings << "#########################################"
231 "######################################\n";
232 fileSettings << strpr("mean_energy %24.16E # nnp-norm\n",
233 meanEnergyPerAtom);
234 fileSettings << strpr("conv_energy %24.16E # nnp-norm\n",
235 convEnergy);
236 fileSettings << strpr("conv_length %24.16E # nnp-norm\n",
237 convLength);
238 fileSettings << strpr("conv_charge %24.16E\n # nnp-norm\n",
239 convCharge);
240 fileSettings << "#########################################"
241 "######################################\n";
242 fileSettings << "\n";
243 dataset.writeSettingsFile(&fileSettings);
244 fileSettings.close();
245 }
246
247 dataset.log << "*****************************************"
248 "**************************************\n";
249
250 myLog.close();
251
252 MPI_Finalize();
253
254 return 0;
255}
Collect and process large data sets.
Definition: Dataset.h:35
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:724
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1744
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
Definition: Log.cpp:91
bool writeToStdout
Turn on/off output to stdout.
Definition: Log.h:85
void initialize()
Write welcome message with version information.
Definition: Mode.cpp:55
virtual void setupElementMap()
Set up the element map.
Definition: Mode.cpp:301
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
Definition: Mode.cpp:2193
Log log
Global log file.
Definition: Mode.h:593
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
Definition: Mode.cpp:2069
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
Definition: Mode.cpp:161
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
Definition: Mode.cpp:2221
virtual void setupElements()
Set up all Element instances.
Definition: Mode.cpp:322
#define MPI_SIZE_T
Definition: mpi-extra.h:22
Definition: Atom.h:29
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:111
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
Definition: utility.cpp:225
int main(int argc, char *argv[])
Definition: nnp-norm.cpp:30