SSAGES  0.9.3
Software Suite for Advanced General Ensemble Simulations
CFF.cpp
1 
21 #include "CFF.h"
22 #include "schema.h"
23 #include "Snapshot.h"
24 #include "mxx/bcast.hpp"
25 #include "CVs/CVManager.h"
26 #include "Drivers/DriverException.h"
27 #include "Validator/ObjectRequirement.h"
28 #include <ctime>
29 
30 using namespace Json;
31 
32 // Implementation of the Combined Force Frequency (CFF) method from
33 // Ref: Sevgen et al. J. Chem. Theory Comput. 2020, 16, 3, 1448-1455.a
34 
35 namespace SSAGES
36 {
38 
41  void CFF::PreSimulation(Snapshot* snapshot, const CVManager& cvmanager)
42  {
43  auto cvs = cvmanager.GetCVs(cvmask_);
44  dim_ = cvs.size();
45 
46  // Size and initialize Fold_.
47  Fold_.setZero(dim_);
48 
49  // Initialize w \dot p's for finite difference.
50  wdotp1_.setZero(dim_);
51  wdotp2_.setZero(dim_);
52 
53  int ndim = hgrid_->GetDimension();
54  kbt_ = snapshot->GetKb()*temp_;
55 
56  // Zero out forces and histogram.
57  Eigen::VectorXd vec = Eigen::VectorXd::Zero(ndim);
58  std::fill(hgrid_->begin(), hgrid_->end(), 0);
59  std::fill(ugrid_->begin(), ugrid_->end(), 1.0);
60  std::fill(fgrid_->begin(), fgrid_->end(), vec);
61 
62  // Initialize Nworld.
63  Eigen::Map<Eigen::Array<unsigned int, Eigen::Dynamic, 1>> hist(hgrid_->data(), hgrid_->size());
64  Nworld_ = hist.cast<int>();
65 
66  // Scale initial bias by 1/2*kT and make them positive.
67  bias_.array() = abs(bias_.array())*kbt_*0.5;
68  }
69 
71 
79  void CFF::PostIntegration(Snapshot* snapshot, const CVManager& cvmanager)
80  {
81 
82  // Gather information.
83  // // Bias energy is kb*T*ln(P) where P = unbiased distribution
84  auto cvs = cvmanager.GetCVs(cvmask_);
85  auto& vels = snapshot->GetVelocities();
86  auto& mass = snapshot->GetMasses();
87  auto& forces = snapshot->GetForces();
88  auto& virial = snapshot->GetVirial();
89  auto n = snapshot->GetNumAtoms();
90 
91  using NAtoms = decltype(n);
92 
93  if(snapshot->GetIteration() && snapshot->GetIteration() % nsweep_ == 0 && snapshot->GetIteration() >= nsweep_*1 )
94  {
95  // Switch to full blast.
96  if(citers_ && snapshot->GetIteration() > citers_)
97  pweight_ = 1.0;
98 
99  TrainNetwork();
100  if(IsMasterRank(world_))
101  WriteBias();
102  }
103 
104  // Determine if we are in bounds.
105  Eigen::RowVectorXd vec(dim_);
106  std::vector<double> val(dim_);
107  bool inbounds = true;
108  for(int i = 0; i < dim_; ++i)
109  {
110  val[i] = cvs[i]->GetValue();
111  vec[i] = cvs[i]->GetValue();
112  if(val[i] < hgrid_->GetLower(i) || val[i] > hgrid_->GetUpper(i))
113  inbounds = false;
114  }
115 
116  // Initialize matrix to hold the CV gradient.
117  Eigen::MatrixXd J(dim_, 3*n);
118 
119  // Fill J and CV. Each column represents grad(CV) with flattened Cartesian elements.
120  for(int i = 0; i < dim_; ++i)
121  {
122  auto& grad = cvs[i]->GetGradient();
123  for(NAtoms j = 0; j < n; ++j)
124  J.block<1, 3>(i,3*j) = grad[j];
125  }
126 
127  //* Calculate W using Darve's approach (http://mc.stanford.edu/cgi-bin/images/0/06/Darve_2008.pdf).
128  // However, we will not use mass weighing.
129  Eigen::MatrixXd Jmass = J.transpose();
130 
131  Eigen::MatrixXd Minv = J*Jmass;
132  MPI_Allreduce(MPI_IN_PLACE, Minv.data(), Minv.size(), MPI_DOUBLE, MPI_SUM, comm_);
133  Eigen::MatrixXd Wt = Minv.inverse()*Jmass.transpose();
134 
135  // Fill momenta.
136  Eigen::VectorXd momenta(3*n);
137  for(NAtoms i = 0; i < n; ++i)
138  momenta.segment<3>(3*i) = mass[i]*vels[i];
139 
140  // Compute dot(w,p)
141  Eigen::VectorXd wdotp = Wt*momenta;
142 
143  // Reduce dot product across processors.
144  MPI_Allreduce(MPI_IN_PLACE, wdotp.data(), wdotp.size(), MPI_DOUBLE, MPI_SUM, comm_);
145 
146  // Compute d(wdotp)/dt second order backwards finite difference.
147  // Adding old force removes bias.
148  Eigen::VectorXd dwdotpdt = unitconv_*(1.5*wdotp - 2.0*wdotp1_ + 0.5*wdotp2_)/timestep_ + Fold_;
149  Eigen::VectorXd derivatives = Eigen::VectorXd::Zero(dim_);
150 
151  // If we are in bounds, sum force and frequency into running total.
152  if (inbounds)
153  {
154  if(IsMasterRank(comm_))
155  {
156  for(int i=0; i<dim_; ++i)
157  F_[i]->at(val) += dwdotpdt[i];
158 
159  hgrid_->at(val) += 1;
160  }
161 
162  // Initial sweep is the same as doing adaptive basing force
163  // i.e., Calculate biasing forces from averaged F at current CV coodinates
164 
165  if(snapshot->GetIteration() < nsweep_)
166  {
167  for(int i = 0; i < dim_; ++i)
168  derivatives[i] = (F_[i]->at(val)/std::max((double(hgrid_->at(val))),double(min_)));
169  }
170  else
171  {
172  net_.forward_pass(vec);
173  net2_.forward_pass(vec);
174  derivatives = net_.get_gradient(0)*ratio_ + net2_.get_gradient(0)*(1.0-ratio_);
175  }
176 
177  }
178  // If out of bounds, apply harmonic restraint
179  else
180  {
181  // Output to screen CV value that is out of bounds
182  if(IsMasterRank(comm_))
183  {
184  std::cerr << "CFF (" << snapshot->GetIteration() << "): out of bounds ( ";
185  for(auto& v : val)
186  std::cerr << v << " ";
187  std::cerr << ")" << std::endl;
188  }
189 
190  // Apply harmonic restraints
191  for(int i = 0; i < dim_; ++i)
192  {
193  auto cval = cvs[i]->GetValue();
194  if(cval < lowerb_[i])
195  derivatives[i] += lowerk_[i]*cvs[i]->GetDifference(cval - lowerb_[i]);
196  else if(cval > upperb_[i])
197  derivatives[i] += upperk_[i]*cvs[i]->GetDifference(cval - upperb_[i]);
198  }
199  }
200 
201 
202  for(int i = 0; i < dim_; ++i)
203  {
204  auto& grad = cvs[i]->GetGradient();
205  auto& boxgrad = cvs[i]->GetBoxGradient();
206 
207  // Update the forces in snapshot by adding in the force bias from each
208  // CV to each atom based on the gradient of the CV.
209  for (NAtoms j = 0; j < n; ++j)
210  forces[j] -= derivatives[i]*grad[j];
211 
212  // Update virial.
213  virial += derivatives[i]*boxgrad;
214  }
215  // Collect all walkers.
216  MPI_Barrier(world_);
217 
218  // Store the old summed forces.
219  Fold_ = derivatives;
220 
221  // Update finite difference time derivatives.
222  wdotp2_ = wdotp1_;
223  wdotp1_ = wdotp;
224  }
225 
227  void CFF::PostSimulation(Snapshot*, const CVManager&)
228  {
229  }
230 
232  void CFF::TrainNetwork()
233  {
234  // Start the clock to measure the neural network training time.
235  std::clock_t start = std::clock();
236 
237  // Increment cycle counter.
238  ++sweep_;
239 
240  // Reduce histogram across procs and sync in case system is periodic.
241  mxx::allreduce(hgrid_->data(), hgrid_->size(), std::plus<unsigned int>(), world_);
242  hgrid_->syncGrid();
243 
244  // Update visit frequencies.
245  Eigen::Map<Eigen::Array<unsigned int, Eigen::Dynamic, 1>> hist(hgrid_->data(), hgrid_->size());
246  Nworld_ += hist.cast<int>();
247 
248  // Synchronize unbiased histogram.
249  ugrid_->syncGrid();
250  Eigen::Map<Eigen::Matrix<double, Eigen::Dynamic, 1>> uhist(ugrid_->data(), ugrid_->size());
251 
252  // Update average biased forces across all processors
253  std::vector<Eigen::MatrixXd> Ftrain;
254  for(int i=0; i<dim_; ++i)
255  {
256  MPI_Allreduce(F_[i]->data(), Fworld_[i]->data(), (F_[i]->size()), MPI_DOUBLE, MPI_SUM, world_);
257  Fworld_[i]->syncGrid();
258 
259  // Damp forces used to train net2_ via minimum number of hits
260  Eigen::ArrayXd Nmin = Eigen::ArrayXd::Ones(Fworld_[0]->size())*min_;
261  Ftrain.push_back(Eigen::Map<Eigen::Array<double, Eigen::Dynamic, 1>> (Fworld_[i]->data(),Fworld_[i]->size()) /
262  ( (Nworld_.cast<double>()).max(Nmin) ) );
263  }
264 
265  // Calculate unbiased histrogram from previous unbiased histogram plus estimates from bias energy.
266  uhist.array() = pweight_*uhist.array() + hist.cast<double>()*(bias_/kbt_).array().exp()*weight_;
267 
268  // Synchronize unbiased histogram and clear global histogram holder.
269  ugrid_->syncGrid();
270  hist.setZero();
271 
272  // Initialize boolean vector to enable training data.
273  // 1 = include specified CV bin for training; 0 = remove from training.
274  // Useful for training data partially in the future.
275  force_to_val_ratio_ = Eigen::MatrixXd::Zero(hist_.rows(),1);
276 
277  // Bias energy is kb*T*ln(P) where P = unbiased distribution.
278  bias_.array() = kbt_*uhist.array().log();
279  bias_.array() -= bias_.minCoeff();
280 
281  if (ratio_ > 0.6)
282  {
283  net2_.init_weights();
284  }
285 
286  // Train network.
287  net_.autoscale(hist_, bias_);
288  net2_.autoscale_w_grad(hist_, bias_, Ftrain);
289  if(IsMasterRank(world_))
290  {
291  SetRatio(1.0);
292  double gamma1;
293  gamma1 = net_.train(hist_, bias_, true);
294  SetRatio(0.0);
295  double gamma2 = net2_.train_w_grad(hist_, bias_,Ftrain, force_to_val_ratio_, true);
296  std::cout << "gamma1 " << gamma1 << " " << gamma2 << std::endl;
297  ratio_ = gamma1/(gamma1+gamma2);
298 
299  std::cout << std::endl << "Ratio: " << ratio_ << std::endl;
300 
301  // Output file that accumulates information on the value of ratio_ for every sweeps
302  std::ofstream gammaout;
303  gammaout.open(outfile_ + "_gamma", std::ios_base::app);
304  gammaout.precision(16);
305  gammaout << sweep_ << " " << gamma1 << " " << gamma2 << " " << ratio_ << std::endl;
306  gammaout.close();
307  }
308 
309  // Send optimal neural net params to all procs.
310  nnet::vector_t wb = net_.get_wb();
311  mxx::bcast(wb.data(), wb.size(), 0, world_);
312  net_.set_wb(wb);
313 
314  nnet::vector_t wb2 = net2_.get_wb();
315  mxx::bcast(wb2.data(), wb2.size(), 0, world_);
316  net2_.set_wb(wb2);
317 
318  mxx::bcast(&ratio_, 1, 0, world_);
319 
320  // Evaluate and subtract off min value for applied bias.
321  net_.forward_pass(hist_);
322  net2_.forward_pass(hist_);
323  bias_.array() = net_.get_activation().col(0).array() * ratio_ + net2_.get_activation().col(0).array() * (1.0-ratio_);
324  bias_.array() -= bias_.minCoeff();
325 
326  // Average the generalized force for each bin and output the file.
327  if(IsMasterRank(world_))
328  {
329  int gridPoints = 1;
330  for(int i = 0 ; i < dim_; ++i)
331  gridPoints = gridPoints * hgrid_->GetNumPoints(i);
332 
333  std::string filename;
334  filename = overwrite_ ? "F_out" : "F_out_"+std::to_string(sweep_);
335  std::ofstream file(filename);
336  file << std::endl;
337  file << "Sweep: " << sweep_ << std::endl;
338  file << "Printing out the current Biasing Vector Field from CFF." << std::endl;
339  file << "First (Nr of CVs) columns are the coordinates, the next (Nr of CVs) columns are components of the Generalized Force vector at that point." << std::endl;
340  file << "The columns are " << gridPoints << " long, mapping out a surface of ";
341  for (int i = 0 ; i < dim_-1; ++i)
342  file << (hgrid_->GetNumPoints())[i] << " by " ;
343  file << (hgrid_->GetNumPoints(dim_-1)) << " points in " << dim_ << " dimensions." << std::endl;
344  file << std::endl;
345 
346  for(int j=0; j < (Ftrain[0].array()).size();++j)
347  {
348  file << std::setw(14) << std::setprecision(8) << std::fixed << hist_.row(j);
349  for(int i = 0 ; i < dim_; ++i){
350  nnet::matrix_t x = Ftrain[i].array();
351  file << std::setw(14) << std::setprecision(8) << std::fixed << x(j) << " ";
352  }
353  file << std::endl;
354  }
355  file << std::endl;
356  file << std::endl;
357  file.close();
358  }
359 
360  // Output training time information
361  double duration = ( std::clock() - start ) / (double) CLOCKS_PER_SEC;
362  std::ofstream file1("traintime.out",std::ofstream::app);
363  file1 << duration << std::endl;
364  file1.close();
365 
366  }
367 
368  // Write out neural network data
369  void CFF::WriteBias()
370  {
371  // Write neural network topology and parameters
372  std::string filename;
373  filename = overwrite_ ? "netstate.dat" : "netstate_"+std::to_string(sweep_)+".dat";
374  net_.write(filename.c_str());
375  filename = overwrite_ ? "netstate2.dat" : "netstate2_"+std::to_string(sweep_)+".dat";
376  net2_.write(filename.c_str());
377 
378 
379  // Write CFF output
380  filename = overwrite_ ? outfile_ : outfile_ + std::to_string(sweep_);
381  std::ofstream file(filename);
382  file.precision(8);
383  net_.forward_pass(hist_);
384  net2_.forward_pass(hist_);
385  nnet::matrix_t x = net_.get_activation().array();
386  nnet::matrix_t y = net2_.get_activation().array();
387  nnet::matrix_t q = bias_;
388  nnet::matrix_t m = -q;
389  m.array() -= m.minCoeff();
390 
391  file << std::endl;
392  file << "Sweep: " << sweep_ << std::endl;
393  file << "Printing out the current Combined Force Frequency data." << std::endl;
394  file << "First (Nr of CVs) columns are the coordinates." << std::endl;
395  file << "Next columns (left to right) are: hist bias(freq_NN) bias(force_NN) bias(both_NN) free_energy" << std::endl;
396  file << std::endl;
397  for(int i = 0; i < y.rows(); ++i)
398  {
399  for(int j = 0; j < hist_.cols(); ++j)
400  file << std::fixed << hist_(i,j) << " ";
401  file << std::fixed<< Nworld_[i] << " " << x(i)<< " " << std::fixed << y(i)<< " " << std::fixed << q(i) <<" " << std::fixed << m(i) << "\n";
402  }
403  file.close();
404 
405  }
406 
407  CFF* CFF::Build(
408  const Json::Value& json,
409  const MPI_Comm& world,
410  const MPI_Comm& comm,
411  const std::string& path)
412  {
413  ObjectRequirement validator;
414  Value schema;
415  CharReaderBuilder rbuilder;
416  CharReader* reader = rbuilder.newCharReader();
417  reader->parse(
418  JsonSchema::CFFMethod.c_str(),
419  JsonSchema::CFFMethod.c_str() + JsonSchema::CFFMethod.size(),
420  &schema,
421  nullptr
422  );
423  validator.Parse(schema, path);
424 
425  // Validate inputs.
426  validator.Validate(json, path);
427  if(validator.HasErrors())
428  throw BuildException(validator.GetErrors());
429 
430  // Grid.
431  auto jsongrid = json.get("grid", Json::Value());
432  auto* fgrid = Grid<Eigen::VectorXd>::BuildGrid(jsongrid);
433  auto* hgrid = Grid<unsigned int>::BuildGrid(jsongrid);
434  auto* ugrid = Grid<double>::BuildGrid(jsongrid);
435  std::vector<Grid<double>*> F(json["grid"]["upper"].size());
436  for(auto& grid : F)
437  grid = Grid<double>::BuildGrid(jsongrid);
438  std::vector<Grid<double>*> Fworld(json["grid"]["upper"].size());
439  for(auto& grid : Fworld)
440  grid = Grid<double>::BuildGrid(jsongrid);
441 
442  // Topology.
443  auto nlayers = json["topology"].size() + 2;
444  Eigen::VectorXi topol(nlayers);
445  topol[0] = fgrid->GetDimension();
446  topol[nlayers-1] = 1;
447  for(decltype(nlayers) i = 0; i < json["topology"].size(); ++i)
448  topol[i+1] = json["topology"][i].asInt();
449 
450  // CFF parameters
451  auto weight = json.get("weight", 1.).asDouble();
452  auto temp = json["temperature"].asDouble();
453  auto nsweep = json["nsweep"].asUInt();
454  auto unitconv = json.get("unit_conversion", 1).asDouble();
455  auto timestep = json.get("timestep", 0.002).asDouble();
456  auto min = json.get("minimum_count", 200).asInt();
457 
458  // Assume all vectors are the same size.
459  std::vector<double> lowerb, upperb, lowerk, upperk;
460  auto lbr_size = json["lower_bound_restraints"].size();
461  for(decltype(lbr_size) i = 0; i < lbr_size; ++i)
462  {
463  lowerk.push_back(json["lower_bound_restraints"][i].asDouble());
464  upperk.push_back(json["upper_bound_restraints"][i].asDouble());
465  lowerb.push_back(json["lower_bounds"][i].asDouble());
466  upperb.push_back(json["upper_bounds"][i].asDouble());
467  }
468 
469  auto* m = new CFF(world, comm, topol, fgrid, hgrid, ugrid, F, Fworld, lowerb, upperb, lowerk, upperk, temp, unitconv, timestep, weight, nsweep, min);
470 
471  // Set optional params.
472  m->SetPrevWeight(json.get("prev_weight", 1).asDouble());
473  m->SetOutput(json.get("output_file", "CFF.out").asString());
474  m->SetOutputOverwrite( json.get("overwrite_output", true).asBool());
475  m->SetConvergeIters(json.get("converge_iters", 0).asUInt());
476  m->SetMaxIters(json.get("max_iters", 1000).asUInt());
477  m->SetMinLoss(json.get("min_loss", 0).asDouble());
478  m->SetRatio(json.get("ratio", 0.0).asDouble());
479 
480  return m;
481  }
482 }
Requirements on an object.
virtual void Parse(Value json, const std::string &path) override
Parse JSON value to generate Requirement(s).
virtual void Validate(const Value &json, const std::string &path) override
Validate JSON value.
std::vector< std::string > GetErrors()
Get list of error messages.
Definition: Requirement.h:92
bool HasErrors()
Check if errors have occured.
Definition: Requirement.h:86
Exception to be thrown when building the Driver fails.
Combined Force Frequency (CFF) Algorithm.
Definition: CFF.h:37
Collective variable manager.
Definition: CVManager.h:43
CVList GetCVs(const std::vector< unsigned int > &mask=std::vector< unsigned int >()) const
Get CV iterator.
Definition: CVManager.h:81
Basic Grid.
Definition: Grid.h:59
Class containing a snapshot of the current simulation in time.
Definition: Snapshot.h:48
double GetKb() const
Get system Kb.
Definition: Snapshot.h:165
const Matrix3 & GetVirial() const
Get box virial.
Definition: Snapshot.h:135
unsigned GetNumAtoms() const
Get number of atoms in this snapshot.
Definition: Snapshot.h:202
const std::vector< Vector3 > & GetForces() const
Access the per-particle forces.
Definition: Snapshot.h:351
size_t GetIteration() const
Get the current iteration.
Definition: Snapshot.h:105
const std::vector< double > & GetMasses() const
Const access to the particle masses.
Definition: Snapshot.h:367
const std::vector< Vector3 > & GetVelocities() const
Access the particle velocities.
Definition: Snapshot.h:338