Swarm-NG  1.1
rkck_integrator.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
25 #include "swarm/common.hpp"
26 #include "swarm/gpu/bppt.hpp"
27 
28 namespace swarm { namespace gpu { namespace bppt {
29 
31 struct FixedTimeStep {
32  const static bool adaptive_time_step = false;
33  const static bool conditional_accept_step = false;
34 };
35 
38  const static bool adaptive_time_step = true;
39  const static bool conditional_accept_step = true;
40 };
41 
50 template< class AdaptationStyle, class Monitor, template<class T> class Gravitation >
51 class rkck: public integrator {
52  typedef integrator base;
53  typedef Monitor monitor_t;
54  typedef typename monitor_t::params mon_params_t;
55  private:
56  double _min_time_step;
57  double _max_time_step;
58  double _error_tolerance;
59  int _iteration_count;
60  mon_params_t _mon_params;
61 
62 public:
63  rkck(const config& cfg): base(cfg),_min_time_step(0.001),_max_time_step(0.1), _mon_params(cfg) {
64  if(!cfg.count("min_time_step")) ERROR("Integrator rkck requires a min timestep ('min time step' keyword in the config file).");
65  _min_time_step = atof(cfg.at("min_time_step").c_str());
66  if(!cfg.count("max_time_step")) ERROR("Integrator rkck requires a max timestep ('max time step' keyword in the config file).");
67  _max_time_step = atof(cfg.at("max_time_step").c_str());
68 
69  if(!cfg.count("error_tolerance")) ERROR("Integrator rkck requires a error tolerance ('error tolerance' keyword in the config file).");
70  _error_tolerance = atof(cfg.at("error_tolerance").c_str());
71  }
72 
74  virtual void launch_integrator() {
75  _iteration_count = _destination_time / _max_time_step;
77  }
78 
79 
82 
85 
86  GPUAPI bool is_in_body_component_grid(const int b, const int c, const int nbod)
87  { return ((b < nbod) && (c < 3)); }
88 
90  template<class T>
91  __device__ void kernel(T compile_time_param){
92 
95  // Step 1 coefficient
96  const double b1 = 1.0 / 5.0;
97  // Step 2 coefficient
98  const double b2[] = { 3.0 / 40.0, 9.0 / 40.0 };
99  // Step 3 coefficient
100  const double b3[] = { 0.3, -0.9, 1.2 };
101  // Step 4 coefficient
102  const double b4[] = { -11.0 / 54.0, 2.5, -70.0 / 27.0, 35.0 / 27.0 };
103  // Step 5 coefficient
104  const double b5[] = { 1631.0 / 55296.0, 175.0 / 512.0, 575.0 / 13824.0, 44275.0 / 110592.0, 253.0 / 4096.0 };
105  // Step 6 coefficient
106  const double b6[] = { 37.0 / 378.0, 0, 250.0 / 621.0, 125.0 / 594.0, 0 , 512.0 / 1771.0 } ;
107  // Error estimation coefficients
108  const double ecc[] = { 37.0 / 378.0 - 2825.0 / 27648.0, 0.0, 250.0 / 621.0 - 18575.0 / 48384.0, 125.0 / 594.0 - 13525.0 / 55296.0, -277.00 / 14336.0, 512.0 / 1771.0 - 0.25 };
109 
110  if(sysid()>=_dens.nsys()) return;
111  // References to Ensemble and Shared Memory
112  ensemble::SystemRef sys = _dens[sysid()];
113  typedef Gravitation<T> Grav;
114  typedef typename Grav::shared_data grav_t;
115  Grav calcForces(sys,*( (grav_t*) system_shared_data_pointer(this,compile_time_param) ) );
116 
117  // Local variables
118  const int nbod = T::n;
119  // Body number
120  const int b = thread_body_idx(nbod);
121  // Component number
122  const int c = thread_component_idx(nbod);
123 
124  // local variables
125  monitor_t montest(_mon_params,sys,*_log) ;
126 
127  // NB: We use the same shared memory for two purpose and overwrite each other
128  // Since the use of the memory is not interleaved, we can safely use the same
129  // space for both purposes
130  typedef DoubleCoalescedStruct<SHMEM_CHUNK_SIZE> shared_mag_t[2][nbod][3];
131  shared_mag_t& shared_mag = * (shared_mag_t*) system_shared_data_pointer(this,compile_time_param) ;
132 
133  double time_step = _max_time_step;
134 
136  double pos = 0, vel = 0 ;
137  if( is_in_body_component_grid(b,c,nbod) )
138  pos = sys[b][c].pos() , vel = sys[b][c].vel();
139 
140  montest( thread_in_system() );
142 
143  for(int iter = 0 ; (iter < _iteration_count) && sys.is_active() ; iter ++ ) {
144 
145  double h = time_step;
146 
147  if( sys.time() + h > _destination_time ) {
148  h = _destination_time - sys.time();
149  }
150 
152  double p0 = pos, v0 = vel;
153 
154  // Step 1
155  double k1_acc = calcForces.acc(thread_in_system(),b,c,p0,v0);
156  double k1_vel = v0;
157 
158  double p1 = pos + h * b1 * k1_vel;
159  double v1 = vel + h * b1 * k1_acc;
160 
161  // Step 2
162  double k2_acc = calcForces.acc(thread_in_system(),b,c,p1,v1);
163  double k2_vel = v1;
164 
165  double p2 = pos + h * ( b2[0] * k1_vel + b2[1] * k2_vel );
166  double v2 = vel + h * ( b2[0] * k1_acc + b2[1] * k2_acc );
167 
168  // Step 3
169  double k3_acc = calcForces.acc(thread_in_system(),b,c,p2,v2);
170  double k3_vel = v2;
171 
172  double p3 = pos + h * ( b3[0] * k1_vel + b3[1] * k2_vel + b3[2] * k3_vel );
173  double v3 = vel + h * ( b3[0] * k1_acc + b3[1] * k2_acc + b3[2] * k3_acc );
174 
175  // Step 4
176  double k4_acc = calcForces.acc(thread_in_system(),b,c,p3,v3);
177  double k4_vel = v3;
178 
179  double p4 = pos + h * ( b4[0] * k1_vel + b4[1] * k2_vel + b4[2] * k3_vel + b4[3] * k4_vel );
180  double v4 = vel + h * ( b4[0] * k1_acc + b4[1] * k2_acc + b4[2] * k3_acc + b4[3] * k4_acc );
181 
182  // Step 5
183  double k5_acc = calcForces.acc(thread_in_system(),b,c,p4,v4);
184  double k5_vel = v4;
185 
186  double p5 = pos + h * ( b5[0] * k1_vel + b5[1] * k2_vel + b5[2] * k3_vel + b5[3] * k4_vel + b5[4] * k5_vel );
187  double v5 = vel + h * ( b5[0] * k1_acc + b5[1] * k2_acc + b5[2] * k3_acc + b5[3] * k4_acc + b5[4] * k5_acc );
188 
189  // Step 6
190  double k6_acc = calcForces.acc(thread_in_system(),b,c,p5,v5);
191  double k6_vel = v5;
192 
193  double p6 = pos + h * ( b6[0] * k1_vel + b6[1] * k2_vel + b6[2] * k3_vel + b6[3] * k4_vel + b6[4] * k5_vel + b6[5] * k6_vel );
194  double v6 = vel + h * ( b6[0] * k1_acc + b6[1] * k2_acc + b6[2] * k3_acc + b6[3] * k4_acc + b6[4] * k5_acc + b6[5] * k6_acc );
195 
196 
197  // Error estimate
198  double pos_error = h * ( ecc[0] * k1_vel + ecc[1] * k2_vel + ecc[2] * k3_vel + ecc[3] * k4_vel + ecc[4] * k5_vel + ecc[5] * k6_vel );
199  double vel_error = h * ( ecc[0] * k1_acc + ecc[1] * k2_acc + ecc[2] * k3_acc + ecc[3] * k4_acc + ecc[4] * k5_acc + ecc[5] * k6_acc );
200 
201 
202  bool accept_step = true;
203 
204  if( AdaptationStyle::adaptive_time_step ) {
206  const int integrator_order = 5;
208  const float step_grow_power = -1./(integrator_order+1.);
210  const float step_shrink_power = -1./integrator_order;
212  const float step_guess_safety_factor = 0.9;
214  const float step_grow_max_factor = 5.0;
216  const float step_shrink_min_factor = 0.2;
217 
218  // Calculate the error estimate
219  if( is_in_body_component_grid(b,c,nbod) ) {
220 
221  sys[b][c].pos() = p6 * p6 , sys[b][c].vel() = v6 * v6;
222  shared_mag[0][b][c].value() = pos_error * pos_error;
223  shared_mag[1][b][c].value() = vel_error * vel_error;
224  }
225  __syncthreads();
226 
227  if( is_in_body_component_grid(b,c,nbod) ) { // TODO: Could compute the normalized error using one thread per body and reduction (or atomic max)
228  if ( (c == 0) && (b == 0) ) {
229 
230  double max_error = 0;
231  for(int i = 0; i < nbod ; i++){
232  double pos_error_mag = shared_mag[0][i][0].value() + shared_mag[0][i][1].value() + shared_mag[0][i][2].value();
233  double pos_mag = sys[i][0].pos() + sys[i][1].pos() + sys[i][2].pos();
234  double pe = pos_error_mag / pos_mag ;
235 
236  double vel_error_mag = shared_mag[1][i][0].value() + shared_mag[1][i][1].value() + shared_mag[1][i][2].value();
237  double vel_mag = sys[i][0].vel() + sys[i][1].vel() + sys[i][2].vel();
238  double ve = vel_error_mag / vel_mag ;
239 
240  max_error = max ( max( pe, ve) , max_error );
241  }
242 
243  double normalized_error = max_error / _error_tolerance;
244 
245  // Calculate New time_step
246  double step_guess_power = (normalized_error<1.) ? step_grow_power : step_shrink_power;
247 
250  double step_change_factor = ((normalized_error<0.5)||(normalized_error>1.0)) ? step_guess_safety_factor*pow(normalized_error,0.5*step_guess_power) : 1.0;
251 
252 
254  double new_time_step = (normalized_error>1.) ? max( time_step * max(step_change_factor,step_shrink_min_factor), _min_time_step )
255  : min( time_step * max(min(step_change_factor,step_grow_max_factor),1.0), _max_time_step );
256 
257  bool accept = ( normalized_error < 1.0 ) || (abs(time_step - new_time_step) < 1e-10) ;
258 
259  shared_mag[0][0][0].value() = accept ? 0.0 : 1.0;
260  shared_mag[0][0][1].value() = new_time_step;
261  }
262 
263  }
264  __syncthreads();
265 
266  time_step = shared_mag[0][0][1].value();
267  accept_step = AdaptationStyle::conditional_accept_step ? (shared_mag[0][0][0].value() == 0.0) : true;
269  }
270 
271 
272  if ( accept_step ) {
273  // Set the new positions and velocities and time
274  pos = p6;
275  vel = v6;
276 
277  // Finalize the step
278  if( is_in_body_component_grid(b,c,nbod) )
279  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
280  if( (thread_in_system() == 0) )
281  sys.time() += h;
282 
283  montest(thread_in_system());
284  if( (thread_in_system() == 0) && sys.is_active() ) {
285  if( sys.time() >= _destination_time )
286  { sys.set_inactive(); }
287  }
288  }
289 
290  __syncthreads();
291 
292  }
293 
294  }
295 
296 
297 };
298 
299 
300 } } } // end namespace bppt :: integrators :: swarm