n2p2 - A neural network potential package
GradientDescent.cpp
Go to the documentation of this file.
1// n2p2 - A neural network potential package
2// Copyright (C) 2018 Andreas Singraber (University of Vienna)
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, either version 3 of the License, or
7// (at your option) any later version.
8//
9// This program is distributed in the hope that it will be useful,
10// but WITHOUT ANY WARRANTY; without even the implied warranty of
11// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12// GNU General Public License for more details.
13//
14// You should have received a copy of the GNU General Public License
15// along with this program. If not, see <https://www.gnu.org/licenses/>.
16
17#include "GradientDescent.h"
18#include "utility.h"
19#include <cstddef>
20#include <cmath>
21
22using namespace std;
23using namespace nnp;
24
25GradientDescent::GradientDescent(size_t const sizeState,
26 DescentType const type) :
27 Updater(sizeState),
28 eta (0.0 ),
29 beta1 (0.0 ),
30 beta2 (0.0 ),
31 epsilon (0.0 ),
32 beta1t (0.0 ),
33 beta2t (0.0 ),
34 state (NULL ),
35 error (NULL ),
36 gradient(NULL )
37{
38 if (!(type == DT_FIXED ||
39 type == DT_ADAM))
40 {
41 throw runtime_error("ERROR: Unknown GradientDescent type.\n");
42 }
43
44 if (sizeState < 1)
45 {
46 throw runtime_error("ERROR: Wrong GradientDescent dimensions.\n");
47 }
48
49 this->type = type;
50
51 if (type == DT_ADAM)
52 {
53 m.resize(sizeState, 0.0);
54 v.resize(sizeState, 0.0);
55 }
56}
57
58void GradientDescent::setState(double* state)
59{
60 this->state = state;
61
62 return;
63}
64
65void GradientDescent::setError(double const* const error,
66 size_t const /* size */)
67{
68 this->error = error;
69
70 return;
71}
72
73void GradientDescent::setJacobian(double const* const jacobian,
74 size_t const /* columns*/)
75{
76 this->gradient = jacobian;
77
78 return;
79}
80
82{
83 if (type == DT_FIXED)
84 {
85 for (std::size_t i = 0; i < sizeState; ++i)
86 {
87 state[i] -= eta * (*error) * -gradient[i];
88 }
89 }
90 else if (type == DT_ADAM)
91 {
92 for (std::size_t i = 0; i < sizeState; ++i)
93 {
94 double const g = (*error) * -gradient[i];
95 m[i] = beta1 * m[i] + (1.0 - beta1) * g;
96 v[i] = beta2 * v[i] + (1.0 - beta2) * g * g;
97
98 // Standard implementation
99 // (Algorithm 1 in publication).
100 //double const mhat = m[i] / (1.0 - beta1t);
101 //double const vhat = v[i] / (1.0 - beta2t);
102 //state[i] -= eta * mhat / (sqrt(vhat) + epsilon);
103
104 // Faster (?) alternative
105 // (see last paragraph in Section 2 of publication).
106 // This is actually only marginally faster
107 // (less statements, but two sqrt() calls)!
108 eta = eta0 * sqrt(1 - beta2t) / (1 - beta1t);
109 state[i] -= eta * m[i] / (sqrt(v[i]) + epsilon);
110 }
111
112 // Update betas.
113 beta1t *= beta1;
114 beta2t *= beta2;
115 }
116
117 return;
118}
119
121{
122 this->eta = eta;
123
124 return;
125}
126
128 double const beta1,
129 double const beta2,
130 double const epsilon)
131{
132 this->eta = eta;
133 this->beta1 = beta1;
134 this->beta2 = beta2;
135 this->epsilon = epsilon;
136
137 eta0 = eta;
138 beta1t = beta1;
139 beta2t = beta2;
140
141 return;
142}
143
144string GradientDescent::status(size_t epoch) const
145{
146 string s = strpr("%10zu %16.8E", epoch, eta);
147
148 if (type == DT_ADAM)
149 {
150 double meanm = 0.0;
151 double meanv = 0.0;
152 for (std::size_t i = 0; i < sizeState; ++i)
153 {
154 meanm += abs(m[i]);
155 meanv += abs(v[i]);
156 }
157 meanm /= sizeState;
158 meanv /= sizeState;
159 s += strpr(" %16.8E %16.8E %16.8E %16.8E",
160 beta1t, beta2t, meanm, meanv);
161 }
162 s += '\n';
163
164 return s;
165}
166
167vector<string> GradientDescent::statusHeader() const
168{
169 vector<string> header;
170
171 vector<string> title;
172 vector<string> colName;
173 vector<string> colInfo;
174 vector<size_t> colSize;
175 title.push_back("Gradient descent status report.");
176 colSize.push_back(10);
177 colName.push_back("epoch");
178 colInfo.push_back("Training epoch.");
179 colSize.push_back(16);
180 colName.push_back("eta");
181 colInfo.push_back("Step size.");
182 if (type == DT_ADAM)
183 {
184 colSize.push_back(16);
185 colName.push_back("beta1t");
186 colInfo.push_back("Decay rate 1 to the power of t.");
187 colSize.push_back(16);
188 colName.push_back("beta2t");
189 colInfo.push_back("Decay rate 2 to the power of t.");
190 colSize.push_back(16);
191 colName.push_back("mag_m");
192 colInfo.push_back("Mean of absolute first momentum (m).");
193 colSize.push_back(16);
194 colName.push_back("mag_v");
195 colInfo.push_back("Mean of absolute second momentum (v).");
196 }
197 header = createFileHeader(title, colSize, colName, colInfo);
198
199 return header;
200}
201
202vector<string> GradientDescent::info() const
203{
204 vector<string> v;
205
206 if (type == DT_FIXED)
207 {
208 v.push_back(strpr("GradientDescentType::DT_FIXED (%d)\n", type));
209 v.push_back(strpr("sizeState = %zu\n", sizeState));
210 v.push_back(strpr("eta = %12.4E\n", eta));
211 }
212 else if (type == DT_ADAM)
213 {
214 v.push_back(strpr("GradientDescentType::DT_ADAM (%d)\n", type));
215 v.push_back(strpr("sizeState = %zu\n", sizeState));
216 v.push_back(strpr("eta = %12.4E\n", eta));
217 v.push_back(strpr("beta1 = %12.4E\n", beta1));
218 v.push_back(strpr("beta2 = %12.4E\n", beta2));
219 v.push_back(strpr("epsilon = %12.4E\n", epsilon));
220 }
221
222 return v;
223}
void setParametersFixed(double const eta)
Set parameters for fixed step gradient descent algorithm.
void setParametersAdam(double const eta, double const beta1, double const beta2, double const epsilon)
Set parameters for Adam algorithm.
std::string status(std::size_t epoch) const
Status report.
double epsilon
Small scalar.
DescentType
Enumerate different gradient descent variants.
@ DT_ADAM
Adaptive moment estimation (Adam).
@ DT_FIXED
Fixed step size.
std::vector< std::string > statusHeader() const
Header for status report file.
std::vector< double > v
Second moment estimate (Adam).
double beta2t
Decay rate 2 to the power of t (Adam).
double beta1t
Decay rate 1 to the power of t (Adam).
double eta0
Initial learning rate.
double eta
Learning rate .
double const * gradient
Gradient vector pointer.
double beta1
Decay rate 1 (Adam).
std::vector< double > m
First moment estimate (Adam).
void setJacobian(double const *const jacobian, std::size_t const columns=1)
Set pointer to current Jacobi matrix.
void setState(double *state)
Set pointer to current state.
double beta2
Decay rate 2 (Adam).
double const * error
Error pointer (single double value).
std::vector< std::string > info() const
Information about gradient descent settings.
double * state
State vector pointer.
void setError(double const *const error, std::size_t const size=1)
Set pointer to current error vector.
void update()
Perform connection update.
Base class for different weight update methods.
Definition: Updater.h:32
std::size_t sizeState
Number of neural network connections (weights + biases).
Definition: Updater.h:110
Definition: Atom.h:28
string strpr(const char *format,...)
String version of printf function.
Definition: utility.cpp:90
vector< string > createFileHeader(vector< string > const &title, vector< size_t > const &colSize, vector< string > const &colName, vector< string > const &colInfo, char const &commentChar)
Definition: utility.cpp:104