Swarm-NG  1.1
gravitation_gr_acc.hpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2010 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
26 #pragma once
27 
28 #include "gravitation_common.hpp"
29 
30 namespace swarm { namespace gpu { namespace bppt {
31 
41 template<class T>
43  public:
44  const static int nbod = T::n;
45  const static int pair_count = (nbod*(nbod-1))/2;
46  const static int CHUNK_SIZE = SHMEM_CHUNK_SIZE;
47 
48  typedef GravitationAccScalars<CHUNK_SIZE> shared_data [pair_count][3];
49 
50  private:
51  ensemble::SystemRef& sys;
52  shared_data &shared;
53  double c2;
54 
55  public:
56 
63  GENERIC GravitationAcc_GR(ensemble::SystemRef& sys,shared_data &shared):sys(sys),shared(shared)
64  {
65  c2 = 101302340.;
66  }
67 
68  private:
69 
70  GENERIC void calc_pair(int ij)const{
71  int i = first<nbod>( ij );
72  int j = second<nbod>( ij );
73  if(i != j){
74 
75  double dx[3] = { sys[j][0].pos()- sys[i][0].pos(),sys[j][1].pos()- sys[i][1].pos(), sys[j][2].pos()- sys[i][2].pos() };
76  double dv[3] = { sys[j][0].vel()- sys[i][0].vel(),sys[j][1].vel()- sys[i][1].vel(), sys[j][2].vel()- sys[i][2].vel() };
77  double r2 = dx[0]*dx[0]+dx[1]*dx[1]+dx[2]*dx[2];
78  double acc_mag = rsqrt(r2) / r2;
79 
80 #pragma unroll
81  for(int c = 0; c < 3; c ++)
82  {
83  shared[ij][c].acc() = dx[c]* acc_mag;
84  }
85  }
86 
87  }
88 
89  __device__ double one_over_r(const int b1, const int b2) const
90  {
91  double sum = 0.;
92 
93  for(int d = 0; d < pair_count; d++)
94  {
95  int x = first<nbod>(d), y= second<nbod>(d);
96  if( ((x==b1) && (y==b2)) || (x==b2) && (y==b1))
97  {
98  sum += shared[d][0].acc()*shared[d][0].acc();
99  sum += shared[d][1].acc()*shared[d][1].acc();
100  sum += shared[d][2].acc()*shared[d][2].acc();
101  }
102  }
103  return sum;
104  }
105 
116  GPUAPI double sum_acc_gr(int b,int c)const
117  {
118  double acc_sum = 0;
119  double one_over_r;
120  // avoid extra square root, by recalculating 1/r (planet-sun) from shared mem
121  for(int d = 0; d < pair_count; d++)
122  {
123  int x = first<nbod>(d), y= second<nbod>(d);
124 
125  if( ((x==b)&&(y==0)) || ((x==0)&&(y==b)) )
126  {
127  one_over_r = shared[d][0].acc()*shared[d][0].acc();
128  one_over_r += shared[d][1].acc()*shared[d][1].acc();
129  one_over_r += shared[d][2].acc()*shared[d][2].acc();
130  }
131  }
132  double v2 = 0., v_dot_r = 0.;
133  double dx = (sys[b][0].pos()-sys[0][0].pos());
134  double dv = (sys[b][0].vel()-sys[0][0].vel());
135  v2 += dx*dx;
136  v_dot_r += dx*dv;
137  dx = (sys[b][1].pos()-sys[0][1].pos());
138  dv = (sys[b][1].vel()-sys[0][1].vel());
139  v2 += dx*dx;
140  v_dot_r += dx*dv;
141  dx = (sys[b][2].pos()-sys[0][2].pos());
142  dv = (sys[b][2].vel()-sys[0][2].vel());
143  v2 += dx*dx;
144  v_dot_r += dx*dv;
145  // assumes G=1.
146  double GM = sys[0].mass();
147  double f1 = (4*GM*one_over_r-v2);
148  double f2 = 4*v_dot_r;
149  acc_sum = -f1*(sys[b][c].pos()-sys[0][c].pos())
150  -f2*(sys[b][c].vel()-sys[0][c].vel());
151  double f0 = GM*one_over_r*one_over_r*one_over_r/c2;
152  acc_sum *= f0;
153  return acc_sum;
154  }
155 
156 
157  // Warning: untested and known to be incomplete...
158  // Calculates acceleration on planet in barycentric frame due to J2
159  // Would need to figure out how to read j2... parameter file? attribute? parameter files specifying which attribute?
160  // Would need to figure out how to deal with back reaction on star before we use this
161  __device__ double sum_acc_j2(int b,int c)const
162  {
163  double acc_sum = 0;
164  double one_over_r;
165  double u2;
166  // avoid extra square root, by recalculating 1/r (planet-sun) from shared mem
167  for(int d = 0; d < pair_count; d++)
168  {
169  int x = first<nbod>(d), y= second<nbod>(d);
170 
171  if( ((x==b)&&(y==0)) || ((x==0)&&(y==b)) )
172  {
173  one_over_r = shared[d][0].acc()*shared[d][0].acc();
174  one_over_r += shared[d][1].acc()*shared[d][1].acc();
175  one_over_r += shared[d][2].acc()*shared[d][2].acc();
176  u2 = shared[d][2].acc()*shared[d][2].acc() / one_over_r;
177  }
178  }
179  double dx = (sys[b][c].pos()-sys[0][c].pos());
180  double mstar = sys[0].mass();
181  double mpl = sys[b].mass();
182  double j2 = sys[b].attribute(1);
183  double jr2 = j2*one_over_r*one_over_r;
184  double tmp2 = jr2*(7.5*u2-1.5);
185  double tmp3 = jr2*3.;
186 #if 0
187  double jr4 = j4*one_over_r*one_over_r*one_over_r*one_over_r;
188  double u4 = u2*u2;
189  tmp2 += jr4*(39.375*u4 - 26.25*u2 + 1.875);
190  tmp3 += jr4*(17.5*u2-7.5);
191 #endif
192 #if 0
193  double jr6 = j6*one_over_r*one_over_r*one_over_r*one_over_r*one_over_r*one_over_r;
194  double u6 = u4*u2;
195  tmp2 += jr6*(187.6875*u6 -216.5625*u4 +59.0625*u2 -2.1875);
196  tmp3 += jr6*(86.625*u4 - 78.75*u2 + 13.125);
197 #endif
198  if(c==2) tmp2 -= tmp3;
199  double tmp1 = mstar*one_over_r*one_over_r*one_over_r;
200  acc_sum += dx*tmp1*tmp2;
201  // \todo Need to figure out how to get this to affect sun.
202  double acc_sun_c = -mpl*acc_sum/mstar;
203  return acc_sum;
204  }
205 
206 
207 
208  GENERIC double sum_acc_planets(int b,int c)const{
209  double acc_sum = 0;
210 
212 #pragma unroll
213  for(int d = 0; d < pair_count; d++){
214  int x = first<nbod>(d), y= second<nbod>(d);
215 
216  if(x == b){
217  if(y != 0)
218  acc_sum += shared[d][c].acc() * sys[y].mass();
219  }else if(y == b){
220  if(x != 0)
221  acc_sum -= shared[d][c].acc() * sys[x].mass();
222  }
223  }
224 
225  return acc_sum;
226  }
227 
228  __device__ double sum_acc(int b,int c) const{
229  double acc_from_planets = 0;
230  double acc_from_sun = 0;
231 
233 #pragma unroll
234  for(int d = 0; d < pair_count; d++){
235  int x = first<nbod>(d), y= second<nbod>(d);
236 
237  if(x == b){
238  if(y == 0)
239  acc_from_sun += shared[d][c].acc() * sys[y].mass();
240  else
241  acc_from_planets += shared[d][c].acc() * sys[y].mass();
242  }else if(y == b){
243  if(x == 0)
244  acc_from_sun -= shared[d][c].acc() * sys[x].mass();
245  else
246  acc_from_planets -= shared[d][c].acc() * sys[x].mass();
247  }
248  }
249 
250  double acc_gr = sum_acc_gr(b,c);
251  return acc_from_sun + acc_from_planets + acc_gr;
252  }
253 
255  public:
256  GENERIC void operator() (int ij,int b,int c,double& pos,double& vel,double& acc)const{
258  if(b < nbod && c < 3)
259  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
260  __syncthreads();
261  if(ij < pair_count)
262  calc_pair(ij);
263  __syncthreads();
264  if(b < nbod && c < 3){
265  acc = sum_acc(b,c);
266  }
267  }
268 
284  GPUAPI double acc_planets (int ij,int b,int c)const{
285  if(ij < pair_count)
286  calc_pair(ij);
287  __syncthreads();
288  if(b < nbod && c < 3){
289  return sum_acc_planets(b,c);
290  }else
291  return 0;
292  }
293 
306  GPUAPI double acc (int ij,int b,int c,double& pos,double& vel)const{
307  // Write positions to shared (global) memory
308  if(b < nbod && c < 3)
309  sys[b][c].pos() = pos , sys[b][c].vel() = vel;
310  __syncthreads();
311  if(ij < pair_count)
312  calc_pair(ij);
313  __syncthreads();
314  if(b < nbod && c < 3){
315  return sum_acc(b,c);
316  }else
317  return 0;
318  }
319 
320  static GENERIC int thread_per_system(){
321  return ( 3*nbod > (nbod-1)*nbod/2 ) ? nbod*3 : (nbod-1)*nbod/2;
322  }
323 
324  static GENERIC int shmem_per_system() {
325  return sizeof(shared_data)/CHUNK_SIZE;
326  }
327 
328 
329 };
330 
331 } } } // end of namespace bppt :: gpu :: swarm
332 
333