Swarm-NG  1.1
gravitation_mediumn.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2010 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
30 #pragma once
31 
32 #include "gravitation_common.hpp"
33 
34 namespace swarm { namespace gpu { namespace bppt {
35 
65 template<class T>
67  public:
68  const static int nbod = T::n;
69  const static int pair_count = (nbod*(nbod-1))/2;
70  const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
71 
72  typedef GravitationAccJerkScalars<CHUNK_SIZE> shared_data [pair_count][3];
73 
74  private:
75  public:
76  // hack to make this public to test whether it's worth using shared memory for some small steps
77  ensemble::SystemRef& sys;
78  shared_data &shared;
79 
80  public:
81 
91  GENERIC GravitationMediumN(ensemble::SystemRef& sys,shared_data &shared):sys(sys),shared(shared){ }
92 
93  private:
94 
102  GENERIC void calc_pair(int ij_first )const{
103  for(int ij = ij_first ; ij < std::max(3*nbod, pair_count) ; ij += 3*nbod )
104  {
105  int i = first<nbod>( ij );
106  int j = second<nbod>( ij );
107  if(i != j){
108 
109  // Relative vector from planet i to planet j
110  double dx[3] = { sys[j][0].pos()- sys[i][0].pos(),sys[j][1].pos()- sys[i][1].pos(), sys[j][2].pos()- sys[i][2].pos() };
111  // Relative velocity between the planets
112  double dv[3] = { sys[j][0].vel()- sys[i][0].vel(),sys[j][1].vel()- sys[i][1].vel(), sys[j][2].vel()- sys[i][2].vel() };
113 
114  // Distance between the planets
115  double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
116 
117  double jerk_mag = inner_product(dx,dv) * 3.0 / r2;
118  double acc_mag = rsqrt(r2) / r2;
119 
120 #pragma unroll
121  for(int c = 0; c < 3; c ++)
122  {
123  shared[ij][c].acc() = dx[c]* acc_mag;
124  shared[ij][c].jerk() = (dv[c] - dx[c] * jerk_mag ) * acc_mag;
125  }
126  }
127  }
128 
129  }
130 
138  GENERIC double sum_acc_planets(int b,int c)const{
139  double acc_sum = 0;
140 
141 #pragma unroll
142  for(int d = 0; d < pair_count; d++){
143  int x = first<nbod>(d), y= second<nbod>(d);
144 
145  if(x == b){
146  if(y != 0)
147  acc_sum += shared[d][c].acc() * sys[y].mass();
148  }else if(y == b){
149  if(x != 0)
150  acc_sum -= shared[d][c].acc() * sys[x].mass();
151  }
152  }
153 
154  return acc_sum;
155  }
156 
157 
165  GENERIC double sum_acc(int b,int c)const{
166  double acc_from_planets = 0;
167  double acc_from_sun = 0;
168 
170 #pragma unroll
171  for(int d = 0; d < pair_count; d++){
172  int x = first<nbod>(d), y= second<nbod>(d);
173 
174  if(x == b){
175  if(y == 0)
176  acc_from_sun += shared[d][c].acc() * sys[y].mass();
177  else
178  acc_from_planets += shared[d][c].acc() * sys[y].mass();
179  }else if(y == b){
180  if(x == 0)
181  acc_from_sun -= shared[d][c].acc() * sys[x].mass();
182  else
183  acc_from_planets -= shared[d][c].acc() * sys[x].mass();
184  }
185  }
186 
187  return acc_from_sun + acc_from_planets;
188  }
189 
190  GENERIC void sum(int b,int c,double& acc, double & jerk)const
191  // { sum_test(b,c,acc,jerk); }
192  { sum_works(b,c,acc,jerk); }
193 
194 
203  GENERIC void sum_test(int b,int c,double& acc, double & jerk)const{
204  // Total acceleration from sun and other planets
205  double accs[2] = {0.0, 0.0};
206  // Total jerk from sun and other planets
207  double jerks[2] = {0.0, 0.0};
208 
209 #pragma unroll
210  for(int d = 0; d < pair_count; d++){
211  int x = first<nbod>(d), y= second<nbod>(d);
212  if( (x==b) || (y==b) )
213  {
214  double mass;
215  int dest;
216  if(x==b)
217  {
218  mass = sys[y].mass();
219  dest = (y==0) ? 0 : 1;
220  }
221  else
222  {
223  mass = -sys[x].mass();
224  dest = (x==0) ? 0 : 1;
225  }
226 
227  accs[dest] += shared[d][c].acc() * mass;
228  jerks[dest] += shared[d][c].jerk() * mass;
229  }
230  }
231  acc = accs[0] + accs[1];
232  jerk = jerks[0] + jerks[1];
233  }
234 
243  GENERIC void sum_works(int b,int c,double& acc, double & jerk)const{
244  // Total acceleration from other planets
245  double acc_from_planets = 0.0;
246  // Total acceleration from body 0 (sun or star)
247  double acc_from_sun = 0.0;
248  // Total jerk from other planets
249  double jerk_from_planets = 0.0;
250  // Total jerk from body 0 (sun or star)
251  double jerk_from_sun = 0.0;
252 
254 #pragma unroll
255  for(int d = 0; d < pair_count; d++){
256  int x = first<nbod>(d), y= second<nbod>(d);
257 
258  if(x == b){
259  if(y == 0) {
260  acc_from_sun += shared[d][c].acc() * sys[y].mass();
261  jerk_from_sun += shared[d][c].jerk() * sys[y].mass();
262  } else {
263  acc_from_planets += shared[d][c].acc() * sys[y].mass();
264  jerk_from_planets += shared[d][c].jerk() * sys[y].mass();
265  }
266  }else if(y == b){
267  if(x == 0) {
268  acc_from_sun -= shared[d][c].acc() * sys[x].mass();
269  jerk_from_sun -= shared[d][c].jerk() * sys[x].mass();
270  } else {
271  acc_from_planets -= shared[d][c].acc() * sys[x].mass();
272  jerk_from_planets -= shared[d][c].jerk() * sys[x].mass();
273  }
274  }
275  }
276 
277  acc = acc_from_sun + acc_from_planets;
278  jerk = jerk_from_sun + jerk_from_planets;
279  }
280 
281  public:
282 
300  GPUAPI void operator() (int ij,int b,int c,double& pos,double& vel,double& acc,double& jerk) const{
301  // Write positions to shared (global) memory
302  if(b < nbod && c < 3)
303  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
304  __syncthreads();
305 
306  // if(ij < pair_count)
307  if(ij < 3*nbod )
308  calc_pair(ij);
309  __syncthreads();
310  if(b < nbod && c < 3){
311  sum(b,c,acc,jerk);
312  }
313  }
314 
316  __device__ double acc_planets (int ij,int b,int c)const{
317  // if(ij < pair_count)
318  if(ij < 3*nbod )
319  calc_pair(ij);
320  __syncthreads();
321  if(b < nbod && c < 3){
322  return sum_acc_planets(b,c);
323  }else
324  return 0;
325  }
326 
328  __device__ double acc (int ij,int b,int c,double& pos,double& vel)const{
329  // Write positions to shared (global) memory
330  if(b < nbod && c < 3)
331  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
332  __syncthreads();
333  // if(ij < pair_count)
334  if(ij < 3*nbod )
335  calc_pair(ij);
336  __syncthreads();
337  if(b < nbod && c < 3){
338  return sum_acc(b,c);
339  }else
340  return 0;
341  }
342 
343 
344  static GENERIC int shmem_per_system() {
345  const int pair_count = nbod * (nbod - 1) / 2;
346  // return pair_count * 3 * 2 * sizeof(double);
348  return pair_count * 3 * sizeof(GravitationAccJerkScalars<CHUNK_SIZE>)/CHUNK_SIZE;
349  }
350 
351  static __device__ void * system_shared_data_pointer(const int sysid_in_block) {
352  extern __shared__ char shared_mem[];
353  int b = sysid_in_block / CHUNK_SIZE ;
354  int i = sysid_in_block % CHUNK_SIZE ;
355  int idx = i * sizeof(double)
356  + b * CHUNK_SIZE
357  * shmem_per_system();
358  return &shared_mem[idx];
359  }
360 
361  // WARNING: Need to test that this works (accounting for larger memory usage due to coalesced arrys)
362  static __device__ void * unused_shared_data_pointer(const int system_per_block) {
363  extern __shared__ char shared_mem[];
364  // int idx = system_per_block * shmem_per_system();
365  int b = system_per_block / CHUNK_SIZE ;
366  int i = system_per_block % CHUNK_SIZE ;
367  if(i!=0) b++;
368  int idx = b * CHUNK_SIZE * shmem_per_system();
369  return &shared_mem[idx];
370  }
371 
374  return std::max(nbod * 3, (nbod-1)*nbod/2);
375  }
376 
378  static GENERIC int shmem_per_system() {
379  return sizeof(shared_data)/CHUNK_SIZE;
380  }
381 
382 };
383 
384 } } } // end of namespace bppt :: gpu :: swarm
385 
386