Swarm-NG  1.1
generic_gpu_bppt_integrator.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
25 #include "../common.hpp"
26 #include "bppt.hpp"
27 #include "device_functions.h"
28 
29 #define ASSUME_PROPAGATOR_USES_STD_COORDINATES 0
30 
31 namespace swarm { namespace gpu { namespace bppt {
32 
33 template<class T>
34 GENERIC const T& max3(const T& a, const T& b, const T& c){
35  if( b > a )
36  return c > b ? c : b;
37  else
38  return c > a ? c : a;
39 }
40 
74 template< template<class T,class G> class Propagator, class Monitor
75  , template<class T> class Gravitation>
76 class generic: public integrator {
77  typedef integrator base;
78 
80  typedef Monitor monitor_t;
81 
83  typedef typename monitor_t::params mon_params_t;
84 
89  typedef typename Propagator< defpar_t, Gravitation<defpar_t> >::params prop_params_t;
90 
91  private:
92  mon_params_t _mon_params;
93  prop_params_t _prop_params;
94 
95  public:
101  generic(const config& cfg): base(cfg), _mon_params(cfg),_prop_params(cfg) {
102  }
103 
105  virtual void launch_integrator() {
107  }
108 
109 
111  template<class T>
112  static GENERIC int thread_per_system(T compile_time_param){
113  const int grav = Gravitation<T>::thread_per_system();
114  const int prop = Propagator<T,Gravitation<T> >::thread_per_system();
115  const int moni = Monitor::thread_per_system(compile_time_param);
116  return max3( grav, prop, moni);
117  }
118 
120  template<class T>
121  static GENERIC int shmem_per_system(T compile_time_param){
122  const int grav = Gravitation<T>::shmem_per_system();
123  const int prop = Propagator<T,Gravitation<T> >::shmem_per_system();
124  const int moni = Monitor::shmem_per_system(compile_time_param);
125  return max3( grav, prop, moni);
126  }
127 
128 
137  template<class T>
138  GPUAPI void kernel(T compile_time_param){
139  if(sysid()>=_dens.nsys()) return;
140 
141  typedef Gravitation<T> GravitationInstance;
142 
143  // References to Ensemble and Shared Memory
144  ensemble::SystemRef sys = _dens[sysid()];
145  typedef typename GravitationInstance::shared_data grav_t;
146  GravitationInstance calcForces(sys,*( (grav_t*) system_shared_data_pointer(this,compile_time_param) ) );
147 
149  const int nbod = T::n; // Number of Bodies
150  int b = thread_body_idx(nbod); // Body id
151  int c = thread_component_idx(nbod); // Component id (x=0,y=1,z=2)
152  int ij = thread_in_system(); // Pair id
153 
154  // Thread barrier predicates
155  // bool body_component_grid = (b < nbod) && (c < 3); // Barrier to act on bodies and components
156  // bool first_thread_in_system = (thread_in_system() == 0); // Barrier to select only the first thread
157 
158 
160  monitor_t montest(_mon_params,sys,*_log) ;
161 
163  Propagator<T,GravitationInstance> prop(_prop_params,sys,calcForces);
164  prop.b = b;
165  prop.c = c;
166  prop.ij = ij;
167  // prop.body_component_grid = body_component_grid;
168  // prop.first_thread_in_system = first_thread_in_system;
169 
171 
172  prop.init();
173  __syncthreads();
174 
175 
176  for(int iter = 0 ; (iter < _max_iterations) && sys.is_active() ; iter ++ ) {
177 
178  prop.max_timestep = _destination_time - sys.time();
179  prop.advance();
180  __syncthreads();
181 
182  bool thread_needs_std_coord = false;
183  bool using_std_coord = false;
184 
185 
186 #if ASSUME_PROPAGATOR_USES_STD_COORDINATES
187  montest( thread_in_system() );
188 #else
189  thread_needs_std_coord = montest.pass_one( thread_in_system() );
190 #if (__CUDA_ARCH__ >= 200)
191  // requires arch=compute_sm_20
192  bool block_needs_std_coord = syncthreads_or((int)(thread_needs_std_coord));
193 #else
194  // \todo Need to make this more intelligent for pre-Fermi GPUs. For now just setting true, so it always makes the conversion on pre-Fermi GPUs
195  // void *ptr_shared_void = calcForces.unused_shared_data_pointer(system_per_block_gpu());
196  // int *ptr_shared_int = static_cast<int *>(ptr_shared_void);
197  // // int put_back = *ptr_shared_int;
198  // *ptr_shared_int = 0;
199  // // atomicOr(ptr_shared_int,thread_needs_std_coord);
200  // // if(thread_needs_std_coord) (*ptr_shared_int)++;
201  // __syncthreads();
202  // bool block_needs_std_coord = static_cast<bool>(*ptr_shared_int);
203  // *ptr_shared_int = put_back;
204  bool block_needs_std_coord = true;
205 #endif
206  if(block_needs_std_coord)
207  {
208  prop.convert_internal_to_std_coord();
209  using_std_coord = true;
210  }
211 
212  __syncthreads();
213  int new_state = montest.pass_two ( thread_in_system() );
214 
215  if( montest.need_to_log_system() && (thread_in_system()==0) )
216  { log::system(*_log, sys); }
217 
218  __syncthreads();
219  if(using_std_coord)
220  {
221  prop.convert_std_to_internal_coord();
222  using_std_coord = false;
223  }
224 #endif
225  __syncthreads();
226 
227  if( sys.is_active() && prop.is_first_thread_in_system() )
228  {
229  if( sys.time() >= _destination_time )
230  { sys.set_inactive(); }
231  }
232  __syncthreads();
233 
234  }
235 
236  prop.shutdown();
237 
238  }
239 
240 
241 };
242 
243 
244 } } } // End namespaces bppt, gpu, swarm