Swarm-NG  1.1
tutorial_integrator.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
25 // @page TutorialIntegrator Tutorial for implementing an Integrator
26 // If you are planning to implement a new ODE integration algorithm for
27 // use with Swarm and a propagator is not enough for your needs, you
28 // may consider writing an integrator. Most of the code consists of
29 // structures that other components of Swarm expect from an integrator.
30 //
31 // Actual mathematical equations embedded in the kernel function.
32 //
33 // First you need to include the headers for the parent classes.
34 #include "swarm/common.hpp"
35 #include "swarm/gpu/bppt.hpp"
36 
37 // We need to use classes in Swarm namespace as well as classes in
38 // Swarm body-pair-per-thread namespace.
39 using swarm::config;
40 using swarm::ensemble;
41 using namespace swarm::gpu::bppt;
42 
44 // a template makes it easier to use different Monitors and different
45 // Gravitational force calculation algorithms
46 template< class Monitor , template<class T> class Gravitation >
49 
50  // Some convenience aliases, just to follow the conventions
51  typedef integrator base;
52  typedef Monitor monitor_t;
53  typedef typename monitor_t::params mon_params_t;
54 
55  // Configuration paramaters that are loaded during when the integrator
56  // is created. Values for these parameters are usually specified
57  // in a config file.
58  private:
59  double _time_step;
60  mon_params_t _mon_params;
61 
62  // The configurations are loaded when the integrator is first created.
63  // The integrator should also configure the monitor by initializing the
64  // _mon_params and passing the cfg parameter to it.
66  public:
67  TutorialIntegrator(const config& cfg): base(cfg),_time_step(0.001), _mon_params(cfg) {
68  _time_step = cfg.require("time_step", 0.0);
69  }
70 
71  // Every integrator should implement the launch_integrator method.
72  // It is an abstract method, C++ will require you to implement it.
73  // for most integrators that are templatized by the number of bodies.
74  // it is easire to call launch_templatized_integrator on self.
75  // launch_templatized integrator will call a version of the
76  // kernel function (see below) optimized for the specific number of
77  // bodies.
79  virtual void launch_integrator() {
81  }
82 
83  // These two function are used by some monitors. If you use
84  // a coordinate system other than Cartesian, you may need
85  // to convert it to Cartesian and back when these functions are called.
87  GPUAPI void convert_internal_to_std_coord() {}
89  GPUAPI void convert_std_to_internal_coord() {}
90 
91  // The CUDA kernel that contains our algorithm is defined in this
92  // `kernel` function. The template contains the compile-time parameters
93  // which this kernel is being optimized for. Usaually it only contains
94  // the number of bodies in a system.
95  //
96  // There are no dynamic parameters, since everything else (ensemble
97  // and configuration parameters) is already
98  // specified in the member variables of current class.
99 
101  template<class T>
102  __device__ void kernel(T compile_time_param){
103 
104  // Usally there are more threads than the number of systems, so
105  // we have to guard.
106  if(sysid()>=_dens.nsys()) return;
107 
108  // We define an auxiliary variable for our system since our thread
109  // only handles one system from the whole ensemble.
110  ensemble::SystemRef sys = _dens[sysid()];
111 
112  // The Gravitation class needs to be optimized with the compile
113  // time parameter. It also needs to use shared memory. So we
114  // use the helper function system_shared_data_pointer to retrieve
115  // the shared memory area dedicated for our system.
116  typedef Gravitation<T> Grav;
117  typedef typename Grav::shared_data grav_t;
118  Grav calcForces(sys,*( (grav_t*) system_shared_data_pointer(this,compile_time_param) ) );
119 
120  // We initialize the monitor and pass it the configuration
121  // parameters and pointers to the current system and logging
122  // object.
123  monitor_t montest(_mon_params,sys,*_log) ;
124 
125  // Here we use some variables to make our code look clearer.
126  //
127  // Number of bodies come from the compile time parameter since
128  // our code is being optimized for that number of bodies.
129  const int nbod = T::n;
130  // This thread is assigned one component (x, y or z) of exactly
131  // one body to work with, we use
132  // the utility functions to find which body and which component
133  // is assigned to current thread.
134  const int b = thread_body_idx(nbod);
135  const int c = thread_component_idx(nbod);
136 
137 
138  // As a part of preparing to integrate, we load the values
139  // to local variables. Local variables are usually represent
140  // registers in native code. We always need to use
141  // the guards (b < nbod)&&(c < 3) because there are some threads
142  // for which b and c are not valid.
143  double pos = 0.0, vel = 0.0 , acc = 0.0, jerk = 0.0;
144  if( (b < nbod) && (c < 3) )
145  { pos = sys[b][c].pos(); vel = sys[b][c].vel(); }
146 
147  // We need to test with the monitor before we proceed since
148  // the system may not get stopped before getting integrated
149  montest( thread_in_system() );
150 
151 
152  // We use _max_iteration to avoid falling into infinite loop
153  // (or very long CUDA calls). Otherwise, the loop only ends
154  // when the system becomes inactive. (due to reaching the destination_time
155  // or triggerring a stopping condition).
156  for(int iter = 0 ; (iter < _max_iterations) && sys.is_active() ; iter ++ )
157  {
158 
159 
160  // First step of integration: calculate the accelartion
161  // (second derivative of position) and jerk (third derivative
162  // of position). The gravitation
163  // algorithm needs all the specified parameters. The acceleration and
164  // jerk are returned in the last two variables (acc,jerk).
165  calcForces(thread_in_system(),b,c,pos,vel,acc,jerk);
166 
167  // For more precise integrations, we want to end exactly at
168  // the destination_time, that means that h cannot be
169  // greater than _desitantion_time - current_system_time
170  double h = min(_destination_time - sys.time(), _time_step);
171 
172  // For this simple integrator, we use explicit Euler integration
173  // equations. More complex equations can be used in practice.
174  pos = pos + h*(vel+(h*0.5)*(acc+(h/3.0)*jerk));
175  vel = vel + h*(acc+(h*0.5)*jerk);
176 
177  // Write the positions and velocities back to memory
178  // for the monitor testing to use it
179  if( (b < nbod) && (c < 3) )
180  { sys[b][c].pos() = pos; sys[b][c].vel() = vel; }
181 
182  // Only the first thread should advance the current
183  // system time
184  if( thread_in_system()==0 )
185  sys.time() += h;
186 
187  // We need to make sure that all the memory writes has
188  // been done before we can proceed.
189  __syncthreads();
190 
191  montest( thread_in_system() );
192  __syncthreads();
193 
194  // We also have to check that if we reached the destination_time
195  // if that is the case, we deactivate system so we can
196  // get out of the loop.
197  if( sys.is_active() && thread_in_system()==0 ) {
198  if( sys.time() >= _destination_time )
199  { sys.set_inactive(); }
200  }
201 
202  // We need to do resynchronize so all threads can see
203  // the changes in time and state of the system.
204  __syncthreads();
205 
206  } // end of for loop
207  } // end of kernel function
208 }; // end of class TutorialIntegrator
209