Swarm-NG  1.1
gpulog_msg_layout.h
Go to the documentation of this file.
1 /***************************************************************************
2  * Copyright (C) 2010 by Mario Juric *
3  * mjuric@cfa.harvard.EDU *
4  * *
5  * This program is free software; you can redistribute it and/or modify *
6  * it under the terms of the GNU General Public License as published by *
7  * the Free Software Foundation; either version 3 of the License, or *
8  * (at your option) any later version. *
9  * *
10  * This program is distributed in the hope that it will be useful, *
11  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
13  * GNU General Public License for more details. *
14  * *
15  * You should have received a copy of the GNU General Public License *
16  * along with this program; if not, write to the *
17  * Free Software Foundation, Inc., *
18  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
19  ***************************************************************************/
20 
27 #ifndef bits_gpulog_msg_layout_h__
28 #define bits_gpulog_msg_layout_h__
29 
35 namespace gpulog
36 {
37  namespace internal
38  {
39 
40  #if !__CUDACC__
41 
43  template<typename T>
44  inline std::ostream &operator <<(std::ostream &out, const array<T> &v) { return out << "n=" << v.nelem; }
45  inline std::ostream &operator <<(std::ostream &out, const header &h) { return out << "msgid=" << h.msgid << " len=" << h.len IFARGINFO( << " nargs=" << h.nargs << " infos=" << h.infos); }
46  inline std::ostream &operator <<(std::ostream &out, const Tunspec &tu) { return out; }
47  inline std::ostream &operator <<(std::ostream &out, const arginfo &h)
48  {
49  out << "arg=" << h.arg << " align=" << h.align << " size=" << h.size << " dim=" << h.dim;
50  out << " isarr=" << h.isarray << " nelem=" << h.nelem;;
51  out << " begin=" << h.begin << " len=" << h.len;
52  return out;
53  }
54  #endif
55 
56  //
57  // Write support templates
58  // - scalar PODs
59  // - pointers (not allowed, static assertion)
60  // - presized arrays
61  // - array<>s (allocation)
62  //
63 
65  template<typename T> struct argio
66  {
67  __host__ __device__ static inline void put(char *ptr, const T &x, int start, int datalen)
68  {
69  DHOST( std::cerr << "Writing [" << x << "] start=" << start << " len=" << datalen << "\n" );
70  DGPU( printf("Writing start=%d len=%d\n", start, datalen); );
71  *(T*)(ptr + start) = x;
72  }
73  };
74 
76  template<typename T> struct argio<T*>
77  {
78  __host__ __device__ static inline void put(char *ptr, const T *x, int start, int datalen)
79  {
80  STATIC_ASSERTION_FAILED__Pointer_serialization_is_not_allowed__Use_fixed_size_C_arrays_or_array_template_instead_____(x);
81  }
82  };
83 
85  template<typename T, int N> struct argio<T[N]>
86  {
87  // array write specialization
88  __host__ __device__ static inline void put(char *ptr, const T *x, int start, int datalen)
89  {
90  DHOST( std::cerr << "Writing presized array [" << x << "] start=" << start << " len=" << datalen << "\n" );
91  DGPU( printf("Writing presized array start=%d len=%d\n", start, datalen); );
92  ptr += start;
93  for(int i = 0; i != datalen / sizeof(T); i++)
94  {
95  ((T*)ptr)[i] = x[i];
96  }
97  }
98  };
99 
101  template<typename T> struct argio<array<T> >
102  {
103  __host__ __device__ static inline void put(char *ptr, const array<T> &x, int start, int datalen)
104  {
105  // Do nothing. The user must write the array data.
106  DHOST( std::cerr << "Allocating array [" << x << "] start=" << start << " element_size=" << datalen << " v[0]= " << *(T*)(ptr + start) << "\n" );
107  DGPU( printf("Allocating array start=%d element_size=%d\n", start, datalen); );
108  }
109  };
110 
112  #define ASTART(at, a) (at & (a-1) ? (at & ~(a-1)) + a : at) /* Aligned start address closest but >= than at, for type T */
113 
114  // CUDA 2.2/2.3 compilation speedup hack -- otherwise (if ASTART is called directly), nvcc
115  // treats static const int vars as _macros_ and attempts to expand them recursively 10+ times (!)
116  // The compilation succeeds, but lasts forever (~half a minute)
117  template<int at, int a> struct ata { static const int value = ASTART(at, a); };
118  #define ASTARTx(at, a) (ata<at, a>::value)
119 
120  #define ADDR(beg, k) \
121  static const int align##k = ALIGNOF(T##k); \
122  static const int begin##k = ASTARTx(beg, align##k); \
123  static const int len##k = SIZEOF(T##k); \
124  static const int end##k = begin##k + len##k; \
125  typedef argio<T##k> IO##k; \
126  __host__ __device__ static inline void dump##k() { if(ISUNSPEC(T##k)) { return; }; DHOST( std::cerr << "arg " << k << ": begin=" << begin##k << " end=" << end##k << " len=" << len##k << " isarray=" << ISARRAY(T##k) << "\n"; ); } \
127  __host__ __device__ static inline void get_arginfo##k(arginfo *ai, int nelem) \
128  { \
129  typedef ttrait<T##k> TT; \
130  if(!ISUNSPEC(T##k)) \
131  { \
132  ai->arg = k; \
133  ai->align = TT::align; \
134  ai->size = TT::size; \
135  ai->dim = TT::dim; \
136  ai->isarray = TT::isarr; \
137  ai->begin = begin##k; \
138  ai->nelem = ai->isarray ? nelem : 1; \
139  ai->len = len##k * ai->nelem; \
140  } \
141  }
142 
147  template <
148  typename T0,
149  typename T1 = Tunspec, typename T2 = Tunspec, typename T3 = Tunspec, typename T4 = Tunspec, typename T5 = Tunspec,
150  typename T6 = Tunspec, typename T7 = Tunspec, typename T8 = Tunspec, typename T9 = Tunspec, typename T10 = Tunspec
151  >
152  struct pktsize
153  {
154  // number of arguments
155  static const int nargs =
156  !ISUNSPEC(T0) +
157  + !ISUNSPEC(T1) + !ISUNSPEC(T2) + !ISUNSPEC(T3) + !ISUNSPEC(T4) + !ISUNSPEC(T5) +
158  + !ISUNSPEC(T6) + !ISUNSPEC(T7) + !ISUNSPEC(T8) + !ISUNSPEC(T9) + !ISUNSPEC(T10);
159 
160  ADDR(0, 0);
161  #if ARGINFO
162  typedef arginfo T99[nargs];
163  ADDR(end0, 99);
164  ADDR(end99, 1);
165  #else
166  ADDR(end0, 1);
167  #endif
168  ADDR(end1, 2);
169  ADDR(end2, 3);
170  ADDR(end3, 4);
171  ADDR(end4, 5);
172  ADDR(end5, 6);
173  ADDR(end6, 7);
174  ADDR(end7, 8);
175  ADDR(end8, 9);
176  ADDR(end9, 10);
177 
178  static const int end = end10;
179 
180  protected:
181  static const int len = end; /* length of the packet, assuming last specified variable was a scalar */
182  static const int lenp = ASTART(end, ALIGNOF(Tmaxalign)); /* padded length, assuming last variable was a scalar, that properly aligns the next packet */
183 
184  public:
186  template<typename T>
187  __host__ __device__ inline static int len_with_padding(const T& x)
188  {
189  return lenp;
190  }
191 
193  template<typename T>
194  __host__ __device__ inline static int len_with_padding(const array<T> &x)
195  {
196  // compute the end offset
197  int at2 = end;
198  at2 += (x.nelem-1) * SIZEOF(T);
199 
200  // add padding to next packet
201  int lenp = ASTART(at2, ALIGNOF(Tmaxalign));
202 
203  return lenp;
204  }
205 
206  public:
207  #if ARGINFO
208  template<typename T>
209  __host__ __device__ inline static void store_arginfo(const char *ptr, const T &x)
210  {
211  return store_arginfo_aux(ptr, 1);
212  }
213 
214  template<typename T>
215  __host__ __device__ inline static void store_arginfo(const char *ptr, const array<T> &x)
216  {
217  return store_arginfo_aux(ptr, x.nelem);
218  }
219 
220  __host__ __device__ inline static void store_arginfo_aux(const char *ptr, int nelem)
221  {
222  DHOST( std::cerr << "Storing arginfo [nargs=" << nargs << " nelem=" << nelem << "]\n"; )
223  DHOST( std::cerr << "Unspecified args: " << ISUNSPEC(T0) << " " << ISUNSPEC(T1) << " " << ISUNSPEC(T2) << " " << ISUNSPEC(T3) << " " << ISUNSPEC(T4) << " " << ISUNSPEC(T5) << " " << ISUNSPEC(T6) << " " << ISUNSPEC(T7) << " " << ISUNSPEC(T8) << " " << ISUNSPEC(T9) << " " << ISUNSPEC(T10) << "\n"; )
224 
225  arginfo *ai = (arginfo *)(ptr + begin99);
226  get_arginfo0(ai + 0, nelem);
227  get_arginfo1(ai + 1, nelem);
228  get_arginfo2(ai + 2, nelem);
229  get_arginfo3(ai + 3, nelem);
230  get_arginfo4(ai + 4, nelem);
231  get_arginfo5(ai + 5, nelem);
232  get_arginfo6(ai + 6, nelem);
233  get_arginfo7(ai + 7, nelem);
234  get_arginfo8(ai + 8, nelem);
235  get_arginfo9(ai + 9, nelem);
236  get_arginfo10(ai + 10, nelem);
237 
238  // store the data into the header (assumed to be T0)
239  T0 &hdr = *(T0 *)(ptr + begin0);
240  hdr.nargs = nargs;
241  hdr.infos = begin99;
242  }
243  #endif
244 
245  __host__ __device__ inline static void dump() /* debugging */
246  {
247  dump0();
248  #if ARGINFO
249  dump99();
250  #endif
251  dump1(); dump2(); dump3(); dump4(); dump5(); dump6(); dump7(); dump8(); dump9(); dump10();
252  }
253  };
254 
255 
256  }
257 }
258 
259 #endif // bits_gpulog_msg_layout_h__