n2p2 - A neural network potential package
Dataset.h
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#ifndef DATASET_H
18#define DATASET_H
19
20#include <mpi.h>
21#include "Mode.h"
22#include "Structure.h"
23#include <cstddef> // std::size_t
24#include <fstream> // std::ifstream
25#include <map> // std::map
26#include <string> // std::string
27#include <vector> // std::vector
28#include <gsl/gsl_rng.h>
29
30namespace nnp
31{
32
34class Dataset : public Mode
35{
36public:
39 Dataset();
42 ~Dataset();
45 void setupMPI();
50 void setupMPI(MPI_Comm* communicator);
62 std::size_t getNumStructures(std::ifstream& dataFile);
69 int calculateBufferSize(Structure const& structure) const;
77 int sendStructure(Structure const& structure, int dest) const;
85 int recvStructure(Structure* structure, int src);
96 int distributeStructures(bool randomize,
97 bool excludeRank0 = false,
98 std::string const& fileName
99 = "input.data");
110 std::size_t prepareNumericForces(Structure& original, double delta);
113 void toNormalizedUnits();
116 void toPhysicalUnits();
124 void writeSymmetryFunctionScaling(std::string const& fileName
125 = "scaling.data");
133 void writeSymmetryFunctionHistograms(std::size_t numBins,
134 std::string fileNameFormat
135 = "sf.%03zu.%04zu.histo");
140 void writeSymmetryFunctionFile(std::string fileName
141 = "function.data");
149 std::size_t writeNeighborHistogram(std::string const& fileNameHisto
150 = "neighbors.histo",
151 std::string const& fileNameStructure
152 = "neighbors.out");
155 void sortNeighborLists();
160 void writeNeighborLists(std::string const& fileName
161 = "neighbor-list.data");
172 std::vector<std::vector<
173 std::size_t> > neighCutoff,
174 bool derivatives,
175 std::string const & fileNamePrefix
176 = "atomic-env");
183 void collectError(std::string const& property,
184 std::map<std::string, double>& error,
185 std::size_t& count) const;
192 void combineFiles(std::string filePrefix) const;
193
195 std::vector<Structure> structures;
196
197protected:
203 std::size_t numStructures;
205 std::string myName;
207 MPI_Comm comm;
209 gsl_rng* rng;
211 gsl_rng* rngGlobal;
212};
213
214}
215
216#endif
Collect and process large data sets.
Definition: Dataset.h:35
std::size_t numStructures
Total number of structures in dataset.
Definition: Dataset.h:203
void writeSymmetryFunctionScaling(std::string const &fileName="scaling.data")
Write symmetry function scaling values to file.
Definition: Dataset.cpp:1002
void collectSymmetryFunctionStatistics()
Collect symmetry function statistics from all processors.
Definition: Dataset.cpp:972
gsl_rng * rng
GSL random number generator (different seed for each MPI process).
Definition: Dataset.h:209
int distributeStructures(bool randomize, bool excludeRank0=false, std::string const &fileName="input.data")
Read data file and distribute structures among processors.
Definition: Dataset.cpp:722
Dataset()
Constructor, initialize members.
Definition: Dataset.cpp:36
int recvStructure(Structure *structure, int src)
Receive one structure from source process.
Definition: Dataset.cpp:462
void sortNeighborLists()
Sort all neighbor lists according to element and distance.
Definition: Dataset.cpp:1464
~Dataset()
Destructor.
Definition: Dataset.cpp:46
void writeAtomicEnvironmentFile(std::vector< std::vector< std::size_t > > neighCutoff, bool derivatives, std::string const &fileNamePrefix="atomic-env")
Write atomic environment file.
Definition: Dataset.cpp:1534
void writeNeighborLists(std::string const &fileName="neighbor-list.data")
Write neighbor list file.
Definition: Dataset.cpp:1488
int calculateBufferSize(Structure const &structure) const
Calculate buffer size required to communicate structure via MPI.
Definition: Dataset.cpp:165
MPI_Comm comm
Global MPI communicator.
Definition: Dataset.h:207
void setupMPI()
Initialize MPI with MPI_COMM_WORLD.
Definition: Dataset.cpp:52
int numProcs
Total number of MPI processors.
Definition: Dataset.h:201
std::vector< Structure > structures
All structures in this dataset.
Definition: Dataset.h:195
void writeSymmetryFunctionFile(std::string fileName="function.data")
Write symmetry function legacy file ("function.data").
Definition: Dataset.cpp:1253
std::size_t prepareNumericForces(Structure &original, double delta)
Prepare numeric force check for a single structure.
Definition: Dataset.cpp:886
void toPhysicalUnits()
Switch all structures to physical units.
Definition: Dataset.cpp:961
void writeSymmetryFunctionHistograms(std::size_t numBins, std::string fileNameFormat="sf.%03zu.%04zu.histo")
Calculate and write symmetry function histograms.
Definition: Dataset.cpp:1101
std::size_t writeNeighborHistogram(std::string const &fileNameHisto="neighbors.histo", std::string const &fileNameStructure="neighbors.out")
Calculate and write neighbor histogram and per-structure statistics.
Definition: Dataset.cpp:1318
void setupRandomNumberGenerator()
Initialize random number generator.
Definition: Dataset.cpp:110
int sendStructure(Structure const &structure, int dest) const
Send one structure to destination process.
Definition: Dataset.cpp:251
std::string myName
My processor name.
Definition: Dataset.h:205
void combineFiles(std::string filePrefix) const
Combine individual MPI proc files to one.
Definition: Dataset.cpp:1742
int myRank
My process ID.
Definition: Dataset.h:199
gsl_rng * rngGlobal
Global GSL random number generator (equal seed for each MPI process).
Definition: Dataset.h:211
std::size_t getNumStructures(std::ifstream &dataFile)
Get number of structures in data file.
Definition: Dataset.cpp:707
void collectError(std::string const &property, std::map< std::string, double > &error, std::size_t &count) const
Collect error metrics of a property over all MPI procs.
Definition: Dataset.cpp:1710
void toNormalizedUnits()
Switch all structures to normalized units.
Definition: Dataset.cpp:950
Base class for all NNP applications.
Definition: Mode.h:83
Definition: Atom.h:28
Storage for one atomic configuration.
Definition: Structure.h:34