30int main(
int argc,
char* argv[])
38 cout <<
"USAGE: " << argv[0] <<
"\n"
39 <<
" Execute in directory with these NNP files present:\n"
40 <<
" - input.data (structure file)\n"
41 <<
" - input.nn (NNP settings)\n";
45 MPI_Init(&argc, &argv);
46 MPI_Comm_size(MPI_COMM_WORLD, &numProcs);
47 MPI_Comm_rank(MPI_COMM_WORLD, &myRank);
51 myLog.open(
strpr(
"nnp-norm.log.%04d", myRank).c_str());
61 dataset.
log <<
"*** DATA SET NORMALIZATION **************"
62 "**************************************\n";
70 throw runtime_error(
"ERROR: Normalization keywords found in settings, "
71 "please remove them first.\n");
75 fileEvsV.open(
strpr(
"evsv.dat.%04d", myRank).c_str());
79 vector<string> colName;
80 vector<string> colInfo;
81 vector<size_t> colSize;
82 title.push_back(
"Energy vs. volume comparison.");
83 colSize.push_back(16);
84 colName.push_back(
"V_atom");
85 colInfo.push_back(
"Volume per atom.");
86 colSize.push_back(16);
87 colName.push_back(
"Eref_atom");
88 colInfo.push_back(
"Reference energy per atom.");
89 colSize.push_back(10);
90 colName.push_back(
"N");
91 colInfo.push_back(
"Number of atoms.");
92 colSize.push_back(16);
93 colName.push_back(
"V");
94 colInfo.push_back(
"Volume of structure.");
95 colSize.push_back(16);
96 colName.push_back(
"Eref");
97 colInfo.push_back(
"Reference energy of structure.");
98 colSize.push_back(16);
99 colName.push_back(
"Eref_offset");
100 colInfo.push_back(
"Reference energy of structure (including offset).");
104 size_t numAtomsTotal = 0;
105 size_t numStructures = 0;
106 double meanEnergyPerAtom = 0.0;
107 double sigmaEnergyPerAtom = 0.0;
108 double meanForce = 0.0;
109 double sigmaForce = 0.0;
110 double maxAbsCharge = 0.0;
111 for (vector<Structure>::const_iterator it = dataset.
structures.begin();
115 numAtomsTotal += it->numAtoms;
116 meanEnergyPerAtom += it->energyRef / it->numAtoms;
117 fileEvsV <<
strpr(
"%16.8E %16.8E %10zu %16.8E %16.8E %16.8E\n",
118 it->volume / it->numAtoms,
119 it->energyRef / it->numAtoms,
124 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
125 it2 != it->atoms.end(); ++it2)
127 meanForce += it2->fRef[0] + it2->fRef[1] + it2->fRef[2];
128 double absChargeRef = abs(it2->chargeRef);
129 if (abs(it2->chargeRef) > maxAbsCharge)
130 maxAbsCharge = absChargeRef;
135 MPI_Barrier(MPI_COMM_WORLD);
136 dataset.
log <<
"Writing energy/atom vs. volume/atom data "
137 <<
"to \"evsv.dat\".\n";
139 MPI_Allreduce(MPI_IN_PLACE, &numStructures , 1,
MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
140 MPI_Allreduce(MPI_IN_PLACE, &numAtomsTotal , 1,
MPI_SIZE_T, MPI_SUM, MPI_COMM_WORLD);
141 MPI_Allreduce(MPI_IN_PLACE, &meanEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
142 MPI_Allreduce(MPI_IN_PLACE, &meanForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
143 MPI_Allreduce(MPI_IN_PLACE, &maxAbsCharge , 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
144 meanEnergyPerAtom /= numStructures;
145 meanForce /= 3 * numAtomsTotal;
146 for (vector<Structure>::const_iterator it = dataset.
structures.begin();
149 double ediff = it->energyRef / it->numAtoms - meanEnergyPerAtom;
150 sigmaEnergyPerAtom += ediff * ediff;
151 for (vector<Atom>::const_iterator it2 = it->atoms.begin();
152 it2 != it->atoms.end(); ++it2)
154 double fdiff = it2->fRef[0] - meanForce;
155 sigmaForce += fdiff * fdiff;
156 fdiff = it2->fRef[1] - meanForce;
157 sigmaForce += fdiff * fdiff;
158 fdiff = it2->fRef[2] - meanForce;
159 sigmaForce += fdiff * fdiff;
162 MPI_Allreduce(MPI_IN_PLACE, &sigmaEnergyPerAtom, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
163 MPI_Allreduce(MPI_IN_PLACE, &sigmaForce , 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
164 sigmaEnergyPerAtom = sqrt(sigmaEnergyPerAtom / (numStructures - 1));
165 sigmaForce = sqrt(sigmaForce / (3 * numAtomsTotal - 1));
167 dataset.
log <<
strpr(
"Total number of structures: %zu\n", numStructures);
168 dataset.
log <<
strpr(
"Total number of atoms : %zu\n", numAtomsTotal);
169 dataset.
log <<
strpr(
"Mean/sigma energy per atom: %16.8E +/- %16.8E\n",
172 dataset.
log <<
strpr(
"Mean/sigma force : %16.8E +/- %16.8E\n",
175 double convEnergy = 1.0 / sigmaEnergyPerAtom;
176 double convLength = sigmaForce / sigmaEnergyPerAtom;
177 double convCharge = 1.0;
178 if (maxAbsCharge > 0)
179 convCharge = 1.0 / maxAbsCharge;
180 dataset.
log <<
strpr(
"Conversion factor energy : %24.16E\n", convEnergy);
181 dataset.
log <<
strpr(
"Conversion factor length : %24.16E\n", convLength);
182 dataset.
log <<
strpr(
"Conversion factor charge : %24.16E\n", convCharge);
185 fileCfg.open(
strpr(
"output.data.%04d", myRank).c_str());
186 for (vector<Structure>::iterator it = dataset.
structures.begin();
189 it->energyRef = (it->energyRef - meanEnergyPerAtom * it->numAtoms)
191 it->chargeRef *= convCharge;
192 it->box[0] *= convLength;
193 it->box[1] *= convLength;
194 it->box[2] *= convLength;
195 for (vector<Atom>::iterator it2 = it->atoms.begin();
196 it2 != it->atoms.end(); ++it2)
198 it2->r *= convLength;
199 it2->fRef *= convEnergy / convLength;
200 it2->chargeRef *= convCharge;
202 it->writeToFile(&fileCfg);
206 MPI_Barrier(MPI_COMM_WORLD);
208 dataset.
log <<
"Writing converted data file to \"output.data\".\n";
209 dataset.
log <<
"WARNING: This data set is provided for debugging "
210 "purposes only and is NOT intended for training.\n";
216 dataset.
log <<
"Writing backup of original settings file to "
217 "\"input.nn.bak\".\n";
218 ofstream fileSettings;
219 fileSettings.open(
"input.nn.bak");
221 fileSettings.close();
224 dataset.
log <<
"Writing extended settings file to \"input.nn\".\n";
225 dataset.
log <<
"Use this settings file for normalized training.\n";
226 fileSettings.open(
"input.nn");
227 fileSettings <<
"#########################################"
228 "######################################\n";
229 fileSettings <<
"# DATA SET NORMALIZATION\n";
230 fileSettings <<
"#########################################"
231 "######################################\n";
232 fileSettings <<
strpr(
"mean_energy %24.16E # nnp-norm\n",
234 fileSettings <<
strpr(
"conv_energy %24.16E # nnp-norm\n",
236 fileSettings <<
strpr(
"conv_length %24.16E # nnp-norm\n",
238 fileSettings <<
strpr(
"conv_charge %24.16E\n # nnp-norm\n",
240 fileSettings <<
"#########################################"
241 "######################################\n";
242 fileSettings <<
"\n";
244 fileSettings.close();
247 dataset.
log <<
"*****************************************"
248 "**************************************\n";
Collect and process large data sets.
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
std::vector< Structure > structures
All structures in this dataset.
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
void registerStreamPointer(std::ofstream *const &streamPointer)
Register new C++ ofstream pointer.
bool writeToStdout
Turn on/off output to stdout.
void initialize()
Write welcome message with version information.
virtual void setupElementMap()
Set up the element map.
bool settingsKeywordExists(std::string const &keyword) const
Check if keyword was found in settings file.
double getEnergyWithOffset(Structure const &structure, bool ref=true) const
Add atomic energy offsets and return energy.
void loadSettingsFile(std::string const &fileName="input.nn")
Open settings file and load all keywords into memory.
void writeSettingsFile(std::ofstream *const &file) const
Write complete settings file.
virtual void setupElements()
Set up all Element instances.
string strpr(const char *format,...)
String version of printf function.
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
void appendLinesToFile(ofstream &file, vector< string > const lines)
Append multiple lines of strings to open file stream.
int main(int argc, char *argv[])