31{
32 int numProcs = 0;
33 int myRank = 0;
34 ofstream myLog;
35
36 if (argc != 1)
37 {
38 cout << "USAGE: " << argv[0] << "\n"
39 << " Execute in directory with these NNP files present:\n"
40 << " - input.data (structure file)\n"
41 << " - input.nn (NNP settings)\n";
42 return 1;
43 }
44
45 MPI_Init(&argc, &argv);
46 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
47 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
48
51 myLog.open(
strpr(
"nnp-norm.log.%04d", myRank).c_str());
59
61 dataset.
log <<
"*** DATA SET NORMALIZATION **************"
62 "**************************************\n";
64
68 {
69 throw runtime_error("ERROR: Normalization keywords found in settings, "
70 "please remove them first.\n");
71 }
72
73 ofstream fileEvsV;
74 fileEvsV.open(
strpr(
"evsv.dat.%04d", myRank).c_str());
75
76
77 vector<string> title;
78 vector<string> colName;
79 vector<string> colInfo;
80 vector<size_t> colSize;
81 title.push_back("Energy vs. volume comparison.");
82 colSize.push_back(16);
83 colName.push_back("V_atom");
84 colInfo.push_back("Volume per atom.");
85 colSize.push_back(16);
86 colName.push_back("Eref_atom");
87 colInfo.push_back("Reference energy per atom.");
88 colSize.push_back(10);
89 colName.push_back("N");
90 colInfo.push_back("Number of atoms.");
91 colSize.push_back(16);
92 colName.push_back("V");
93 colInfo.push_back("Volume of structure.");
94 colSize.push_back(16);
95 colName.push_back("Eref");
96 colInfo.push_back("Reference energy of structure.");
97 colSize.push_back(16);
98 colName.push_back("Eref_offset");
99 colInfo.push_back("Reference energy of structure (including offset).");
102
103 size_t numAtomsTotal = 0;
104 size_t numStructures = 0;
105 double meanEnergyPerAtom = 0.0;
106 double sigmaEnergyPerAtom = 0.0;
107 double meanForce = 0.0;
108 double sigmaForce = 0.0;
109 for (vector<Structure>::const_iterator it = dataset.
structures.begin();
111 {
112 numStructures++;
113 numAtomsTotal += it->numAtoms;
114 meanEnergyPerAtom += it->energyRef / it->numAtoms;
115 fileEvsV <<
strpr(
"%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
116 it->volume / it->numAtoms,
117 it->energyRef / it->numAtoms,
118 it->numAtoms,
119 it->volume,
120 it->energyRef,
122 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
123 it2 != it->atoms.end(); ++it2)
124 {
125 meanForce += it2->fRef[0] + it2->fRef[1] + it2->fRef[2];
126 }
127 }
128 fileEvsV.flush();
129 fileEvsV.close();
130 MPI_Barrier(MPI_COMM_WORLD);
131 dataset.
log <<
"Writing energy/atom vs. volume/atom data "
132 << "to \"evsv.dat\".\n";
134 MPI_Allreduce(MPI_IN_PLACE, &numStructures , 1,
MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
135 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1,
MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
136 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
137 MPI_Allreduce(MPI_IN_PLACE, &meanForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
138 meanEnergyPerAtom /= numStructures;
139 meanForce /= 3 * numAtomsTotal;
140 for (vector<Structure>::const_iterator it = dataset.
structures.begin();
142 {
143 double ediff = it->energyRef / it->numAtoms - meanEnergyPerAtom;
144 sigmaEnergyPerAtom += ediff * ediff;
145 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
146 it2 != it->atoms.end(); ++it2)
147 {
148 double fdiff = it2->fRef[0] - meanForce;
149 sigmaForce += fdiff * fdiff;
150 fdiff = it2->fRef[1] - meanForce;
151 sigmaForce += fdiff * fdiff;
152 fdiff = it2->fRef[2] - meanForce;
153 sigmaForce += fdiff * fdiff;
154 }
155 }
156 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
157 MPI_Allreduce(MPI_IN_PLACE, &sigmaForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
158 sigmaEnergyPerAtom = sqrt(sigmaEnergyPerAtom / (numStructures - 1));
159 sigmaForce = sqrt(sigmaForce / (3 * numAtomsTotal - 1));
161 dataset.
log <<
strpr(
"Total number of structures: %zu\n", numStructures);
162 dataset.
log <<
strpr(
"Total number of atoms : %zu\n", numAtomsTotal);
163 dataset.
log <<
strpr(
"Mean/sigma energy per atom: %16.8E +/- %16.8E\n",
164 meanEnergyPerAtom,
165 sigmaEnergyPerAtom);
166 dataset.
log <<
strpr(
"Mean/sigma force : %16.8E +/- %16.8E\n",
167 meanForce,
168 sigmaForce);
169 double convEnergy = 1.0 / sigmaEnergyPerAtom;
170 double convLength = sigmaForce / sigmaEnergyPerAtom;
171 dataset.
log <<
strpr(
"Conversion factor energy : %24.16E\n", convEnergy);
172 dataset.
log <<
strpr(
"Conversion factor length : %24.16E\n", convLength);
173
174 ofstream fileCfg;
175 fileCfg.open(
strpr(
"output.data.%04d", myRank).c_str());
176 for (vector<Structure>::iterator it = dataset.
structures.begin();
178 {
179 it->energyRef = (it->energyRef - meanEnergyPerAtom * it->numAtoms)
180 * convEnergy;
181 it->box[0] *= convLength;
182 it->box[1] *= convLength;
183 it->box[2] *= convLength;
184 for (vector<Atom>::iterator it2 = it->atoms.begin();
185 it2 != it->atoms.end(); ++it2)
186 {
187 it2->r *= convLength;
188 it2->fRef *= convEnergy / convLength;
189 }
190 it->writeToFile(&fileCfg);
191 }
192 fileCfg.flush();
193 fileCfg.close();
194 MPI_Barrier(MPI_COMM_WORLD);
196 dataset.
log <<
"Writing converted data file to \"output.data\".\n";
197 dataset.
log <<
"WARNING: This data set is provided for debugging "
198 "purposes only and is NOT intended for training.\n";
200
201 if (myRank == 0)
202 {
204 dataset.
log <<
"Writing backup of original settings file to "
205 "\"input.nn.bak\".\n";
206 ofstream fileSettings;
207 fileSettings.open("input.nn.bak");
209 fileSettings.close();
210
212 dataset.
log <<
"Writing extended settings file to \"input.nn\".\n";
213 dataset.
log <<
"Use this settings file for normalized training.\n";
214 fileSettings.open("input.nn");
215 fileSettings << "#########################################"
216 "######################################\n";
217 fileSettings << "# DATA SET NORMALIZATION\n";
218 fileSettings << "#########################################"
219 "######################################\n";
220 fileSettings <<
strpr(
"mean_energy %24.16E # nnp-norm\n",
221 meanEnergyPerAtom);
222 fileSettings <<
strpr(
"conv_energy %24.16E # nnp-norm\n",
223 convEnergy);
224 fileSettings <<
strpr(
"conv_length %24.16E # nnp-norm\n",
225 convLength);
226 fileSettings << "#########################################"
227 "######################################\n";
228 fileSettings << "\n";
230 fileSettings.close();
231 }
232
233 dataset.
log <<
"*****************************************"
234 "**************************************\n";
235
236 myLog.close();
237
238 MPI_Finalize();
239
240 return 0;
241}
Collect and process large data sets.
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
std::vector< Structure > structures
All structures in this dataset.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
bool writeToStdout
Turn on/off output to stdout.
void initialize()
Write welcome message with version information.
virtual void setupElementMap()
Set up the element map.
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
virtual void setupElements()
Set up all Element instances.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.