Swarm-NG  1.1
gravitation_largen.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2010 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
29 #pragma once
30 
31 #include "gravitation_common.hpp"
32 
33 namespace swarm { namespace gpu { namespace bppt {
34 
62 template<class T>
64  public:
65  const static int nbod = T::n;
66  const static int body_count = nbod;
67  const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
68 
69  typedef GravitationAccJerkScalars<CHUNK_SIZE> shared_data [body_count];
70 
71  private:
72  public: // hack to make this public to test whether it's worth using shared memory for some small steps
73  ensemble::SystemRef& sys;
74  shared_data &shared;
75 
76  public:
77 
87  GENERIC GravitationLargeN(ensemble::SystemRef& sys,shared_data &shared):sys(sys),shared(shared){ }
88 
89  private:
90 
99  GPUAPI void sum(int b,int c,double& acc, double & jerk)const{
100 
101  // Total acceleration
102  acc = 0.0;
103  // Total jerk
104  jerk = 0.0;
105 
106  // Should we try to load these into registers?
107  // Or is it hopeless/cache good enough anyway
108  // double pos_this_body = sys[b][c].pos();
109  // double vel_this_body = sys[b][c].vel();
110 
111  // Loop over all other bodies, saving sun for last
112  for(int bb=nbod-1;bb>=0;--bb)
113  {
114  if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
115  {
116  // Relative vector from planet b to planet bb
117  double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
118  // Relative velocity between the planets
119  double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
120  // Distance between the planets
121  double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
122 
123  // broadcast jerk_mag and acc_mag via shared memory
124  shared[b].jerk() = inner_product(dx,dv) * 3.0 / r2;
125  shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
126 
127  }
128  __syncthreads();
129  if( (sys[bb].mass()>0.0) && (b!=bb) )
130  {
131  // Relative vector from planet bb to planet b
132  double dx = sys[bb][c].pos()- sys[b][c].pos();
133  // Relative velocity between the planets
134  double dv = sys[bb][c].vel()- sys[b][c].vel();
135  acc += dx* shared[b].acc();
136  jerk += (dv - dx * shared[b].jerk() ) * shared[b].acc();
137  }
138  }
139 
140  }
141 
142 
143  /*
144  * Find the acceleration for a planet.
145  *
146  * @b planet number
147  * @c coordinate number x:0,y:1,z:2
148  *
149  * \todo Remove once allow propagators to use GravitationAcc
150  *
151  */
152  GPUAPI double sum_acc(int b,int c)const{
153 
154 
155  // Total acceleration
156  double acc = 0.0;
157  // Total jerk
158  // jerk = 0.0;
159 
160  // Should we try to load these into registers?
161  // Or is it hopeless/cache good enough anyway
162  // double pos_this_body = sys[b][c].pos();
163  // double vel_this_body = sys[b][c].vel();
164 
165  // Loop over all other bodies, saving sun for last
166  for(int bb=nbod-1;bb>=0;--bb)
167  {
168  if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
169  {
170  // Relative vector from planet b to planet bb
171  double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
172  // Relative velocity between the planets
173  double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
174  // Distance between the planets
175  double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
176 
177  // broadcast jerk_mag and acc_mag via shared memory
178  // shared[b].jerk() = inner_product(dx,dv) * 3.0 / r2;
179  shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
180 
181  }
182  __syncthreads();
183  if( (sys[bb].mass()>0.0) && (b!=bb) )
184  {
185  // Relative vector from planet bb to planet b
186  double dx = sys[bb][c].pos()- sys[b][c].pos();
187  // Relative velocity between the planets
188  double dv = sys[bb][c].vel()- sys[b][c].vel();
189  acc += dx* shared[b].acc();
190  // jerk += (dv - dx * shared[b].jerk() ) * shared[b].acc();
191  }
192  }
193  return acc;
194 
195  }
196 
197 
208  GPUAPI double sum_acc_planets(int b,int c)const{
209 
210  // Total acceleration
211  double acc = 0.0;
212  // Total jerk
213  // jerk = 0.0;
214 
215  // Should we try to load these into registers?
216  // Or is it hopeless/cache good enough anyway
217  // double pos_this_body = sys[b][c].pos();
218  // double vel_this_body = sys[b][c].vel();
219 
220  // Loop over all other bodies, saving sun for last
221  for(int bb=nbod-1;bb>=1;--bb)
222  {
223  if( (sys[bb].mass()>0.0) && (b!=bb) && (c==0) )
224  {
225  // Relative vector from planet b to planet bb
226  double dx[3] = { sys[bb][0].pos()- sys[b][0].pos(),sys[bb][1].pos()- sys[b][1].pos(), sys[bb][2].pos()- sys[b][2].pos() };
227  // Relative velocity between the planets
228  double dv[3] = { sys[bb][0].vel()- sys[b][0].vel(),sys[bb][1].vel()- sys[b][1].vel(), sys[bb][2].vel()- sys[b][2].vel() };
229  // Distance between the planets
230  double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
231 
232  // broadcast jerk_mag and acc_mag via shared memory
233  // shared[b].jerk() = inner_product(dx,dv) * 3.0 / r2;
234  shared[b].acc() = sys[bb].mass() * rsqrt(r2) / r2;
235 
236  }
237  __syncthreads();
238  if( (sys[bb].mass()>0.0) && (b!=bb) )
239  {
240  // Relative vector from planet bb to planet b
241  double dx = sys[bb][c].pos()- sys[b][c].pos();
242  // Relative velocity between the planets
243  double dv = sys[bb][c].vel()- sys[b][c].vel();
244  acc += dx* shared[b].acc();
245  // jerk += (dv - dx * shared[b].jerk() ) * shared[b].acc();
246  }
247  }
248  return acc;
249  }
250 
251  public:
252 
270  GPUAPI void operator() (int ij,int b,int c,double& pos,double& vel,double& acc,double& jerk)const{
271  // Write positions to shared (global) memory
272  if(b < nbod && c < 3)
273  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
274  __syncthreads();
275  if(b < nbod && c < 3){
276  sum(b,c,acc,jerk);
277  }
278  }
279 
280  // TODO: Remove once allow propagators to use GravitationAcc
281  __device__ double acc (int ij,int b,int c,double& pos,double& vel)const{
282  // Write positions to shared (global) memory
283  if(b < nbod && c < 3)
284  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
285  __syncthreads();
286  if(b < nbod && c < 3){
287  return sum_acc(b,c);
288  }
289  else { return 0; }
290  }
291 
292  // TODO: Remove once allow propagators to use GravitationAcc
293  __device__ double acc_planets (int ij,int b,int c,double& pos,double& vel)const{
294  // Write positions to shared (global) memory
295  if(b < nbod && c < 3)
296  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
297  __syncthreads();
298  if(b < nbod && c < 3){
299  return sum_acc_planets(b,c);
300  }
301  else { return 0; }
302  }
303 
304 
305  static GENERIC int shmem_per_system() {
306  const int body_count = nbod;
307  return body_count * sizeof(GravitationAccJerkScalars<CHUNK_SIZE>)/CHUNK_SIZE;
308  }
309 
310  static GPUAPI void * system_shared_data_pointer(const int sysid_in_block) {
311  extern __shared__ char shared_mem[];
312  int b = sysid_in_block / CHUNK_SIZE ;
313  int i = sysid_in_block % CHUNK_SIZE ;
314  int idx = i * sizeof(double)
315  + b * CHUNK_SIZE
316  * shmem_per_system();
317  return &shared_mem[idx];
318  }
319 
321  static GPUAPI void * unused_shared_data_pointer(const int system_per_block) {
322  extern __shared__ char shared_mem[];
323  int b = system_per_block / CHUNK_SIZE ;
324  int i = system_per_block % CHUNK_SIZE ;
325  if(i!=0) b++;
326  int idx = b * CHUNK_SIZE * shmem_per_system();
327  return &shared_mem[idx];
328  }
329 
330  static GENERIC int thread_per_system(){
331  return nbod * 3;
332  }
333 
334  static GENERIC int shmem_per_system() {
335  return sizeof(shared_data)/CHUNK_SIZE;
336  }
337 
338 
339 };
340 
341 } } } // end of namespace bppt :: gpu :: swarm
342