Swarm-NG  1.1
hermite_adap.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
25 #include "swarm/common.hpp"
26 #include "swarm/gpu/bppt.hpp"
27 
28 namespace swarm { namespace gpu { namespace bppt {
29 
34 template< class Monitor , template<class T> class Gravitation >
35 class hermite_adap: public integrator {
36  typedef integrator base;
37  typedef Monitor monitor_t;
38  typedef typename monitor_t::params mon_params_t;
39  private:
40  double _time_step_factor, _min_time_step;
41  mon_params_t _mon_params;
42 
43 public:
44  hermite_adap(const config& cfg): base(cfg),_time_step_factor(0.001),_min_time_step(0.001), _mon_params(cfg) {
45  _time_step_factor = cfg.require("time_step_factor", 0.0);
46  _min_time_step = cfg.require("min_time_step", 0.0);
47  }
48 
49  template<class T>
50  static GENERIC int shmem_per_system(T compile_time_param){
51  return sizeof(SystemSharedData<T>)/SHMEM_CHUNK_SIZE;
52  }
53 
54  virtual void launch_integrator() {
56  }
57 
58  template<class T>
59  struct SystemSharedData {
60  typedef Gravitation<T> Grav;
61  typename Grav::shared_data gravitation;
62  DoubleCoalescedStruct<SHMEM_CHUNK_SIZE> time_step_factor[T::n];
63  };
64 
65  GPUAPI void convert_internal_to_std_coord() {}
66  GPUAPI void convert_std_to_internal_coord() {}
67 
68  template<class T>
69  __device__ double calc_adaptive_time_step(T compile_time_param, SystemSharedData<T>& shared, const double acc, const double jerk)
70  {
71  // Body number
72  int b = thread_body_idx(T::n);
73  // Component number
74  int c = thread_component_idx(T::n);
75 
77  if( (b < T::n) && (c < 3) ) {
78  shared.gravitation[b][c].acc() = acc*acc;
79  shared.gravitation[b][c].jerk() = jerk*jerk;
80  }
81  __syncthreads();
84  if( (b < T::n) && (c==0) ) {
85  double acc_mag_sq = shared.gravitation[b][0].acc()+shared.gravitation[b][1].acc()+shared.gravitation[b][2].acc();
86  double jerk_mag_sq = shared.gravitation[b][0].jerk()+shared.gravitation[b][1].jerk()+shared.gravitation[b][2].jerk();
87  shared.time_step_factor[b].value() = jerk_mag_sq/acc_mag_sq;
88  }
89  __syncthreads();
90  if( thread_in_system() == 0 ) {
91  double tf = shared.time_step_factor[0].value();
92  for(int bb=1;bb<T::n;++bb)
93  tf += shared.time_step_factor[bb].value();
94  shared.time_step_factor[0].value() = rsqrt(tf)*_time_step_factor+_min_time_step;
95  }
96  __syncthreads();
97  return shared.time_step_factor[0].value();
98  }
99 
100 
101 
102  template<class T>
103  __device__ void kernel(T compile_time_param){
104 
105  if(sysid()>=_dens.nsys()) return;
107  typedef Gravitation<T> Grav;
108  ensemble::SystemRef sys = _dens[sysid()];
109  SystemSharedData<T>& shared_data = *(SystemSharedData<T>*) system_shared_data_pointer(this, compile_time_param);
111  Grav calcForces(sys, shared_data.gravitation );
112 
113  // Local variables
114  const int nbod = T::n;
115  // Body number
116  const int b = thread_body_idx(nbod);
117  // Component number
118  const int c = thread_component_idx(nbod);
119 // const bool body_component_grid = (b < nbod) && (c < 3);
120 
121 
122  // local variables
123  monitor_t montest(_mon_params,sys,*_log) ;
124 
125  // local information per component per body
126  double pos = 0.0, vel = 0.0 , acc0 = 0.0, jerk0 = 0.0;
127  if( (b < T::n) && (c < 3) )
128  pos = sys[b][c].pos() , vel = sys[b][c].vel();
129 
130  montest( thread_in_system() );
131 
133 
135  calcForces(thread_in_system(),b,c,pos,vel,acc0,jerk0);
136 
137  for(int iter = 0 ; (iter < _max_iterations) && sys.is_active() ; iter ++ ) {
138  // Since h is the time step that is used for the step it makes more sense to
139  // to calculate time step and assign it to h
140  double h = calc_adaptive_time_step(compile_time_param, shared_data ,acc0,jerk0);
141 
142  if( sys.time() + h > _destination_time ) {
143  h = _destination_time - sys.time();
144  }
145 
146 
149 
150  // Predict
151  pos = pos + h*(vel+(h*0.5)*(acc0+(h/3.0)*jerk0));
152  vel = vel + h*(acc0+(h*0.5)*jerk0);
153 
154  double pre_pos = pos, pre_vel = vel;
155 
156  double acc1,jerk1;
157  {
158  // Evaluation
159  calcForces(thread_in_system(),b,c,pos,vel,acc1,jerk1);
160 
161  // Correct
162 #if 0 // OLD
163  pos = pre_pos + (0.1-0.25) * (acc0 - acc1) * h * h - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h * h * h;
164  vel = pre_vel + ( -0.5 ) * (acc0 - acc1 ) * h - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h * h;
165 #endif
166  pos = pre_pos + ( (0.1-0.25) * (acc0 - acc1) - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h) * h * h;
167  vel = pre_vel + (( -0.5 ) * (acc0 - acc1 ) - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h )* h ;
168  }
169  {
170  // Evaluation
171  calcForces(thread_in_system(),b,c,pos,vel,acc1,jerk1);
172 
173  // Correct
174 #if 0 // OLD
175  pos = pre_pos + (0.1-0.25) * (acc0 - acc1) * h * h - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h * h * h;
176  vel = pre_vel + ( -0.5 ) * (acc0 - acc1 ) * h - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h * h;
177 #endif
178  pos = pre_pos + ((0.1-0.25) * (acc0 - acc1) - 1.0/60.0 * ( 7.0 * jerk0 + 2.0 * jerk1 ) * h )* h * h ;
179  vel = pre_vel + (( -0.5 ) * (acc0 - acc1 ) - 1.0/12.0 * ( 5.0 * jerk0 + jerk1 ) * h ) * h ;
180  }
181  acc0 = acc1, jerk0 = jerk1;
182 
184  if( (b < T::n) && (c < 3) )
185  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
186  if( thread_in_system()==0 )
187  sys.time() += h;
188 
189  montest( thread_in_system() );
190 
191  if( sys.is_active() && thread_in_system()==0 ) {
192  if( sys.time() >= _destination_time )
193  { sys.set_inactive(); }
194 
195  }
196  __syncthreads();
197 
198 
199  }
200 
201  }
202 
203 
204 };
205 
206 
207 } } } // end namespace bppt :: integrators :: swarm