Swarm-NG  1.1
device_settings.cpp
Go to the documentation of this file.
1 /*************************************************************************
2  * Copyright (C) 2011 by Saleh Dindar and the Swarm-NG Development Team *
3  * *
4  * This program is free software; you can redistribute it and/or modify *
5  * it under the terms of the GNU General Public License as published by *
6  * the Free Software Foundation; either version 3 of the License. *
7  * *
8  * This program is distributed in the hope that it will be useful, *
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of *
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
11  * GNU General Public License for more details. *
12  * *
13  * You should have received a copy of the GNU General Public License *
14  * along with this program; if not, write to the *
15  * Free Software Foundation, Inc., *
16  * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. *
17  ************************************************************************/
18 
25 #include "swarm/common.hpp"
26 #include "device_settings.hpp"
27 
28 
29 const int registers_per_thread = 64;
30 cudaDeviceProp deviceInfo;
31 
36 int optimized_system_per_block(int chunk_size, int thread_per_system
37  , int shmem_per_system){
38  return blocks_per_mp( chunk_size * thread_per_system, chunk_size * shmem_per_system)
39  * chunk_size ;
40 }
41 
43 void select_cuda_device(int dev) {
44  int devcnt; cudaErrCheck( cudaGetDeviceCount(&devcnt) );
45  if( dev >= 0 && dev < devcnt )
46  cudaErrCheck( cudaSetDevice(dev) );
47  else
48  std::cerr << "Cannot select the CUDA device. GPU integrators are disabled" << std::endl;
49 
50  cudaErrCheck( cudaGetDeviceProperties(&deviceInfo, dev) );
51 
52 
53 }
54 
61  cudaDeviceSetCacheConfig(cudaFuncCachePreferL1);
62 }
63 
66  std::cerr << "Device:\t" << deviceInfo.name << "\n"
67  //<< " Compute Capabality: " << deviceInfo.computeMode << "\n"
68  << "Global Memory:\t" << deviceInfo.totalGlobalMem/double(1<<30) << "GB\n"
69  << "Shared Memory\t" << deviceInfo.sharedMemPerBlock/1024 << "KB\n"
70  << "Max Blocksize\t" << deviceInfo.maxThreadsPerBlock << "\n"
71  << "Warp Size \t" << deviceInfo.warpSize << "\n"
72  << "Registers/MP \t" << deviceInfo.regsPerBlock << "\n"
73  ;
74 
75 }
76 
80 int blocks_per_mp( int blocksize, int shmem_per_block ) {
81  assert(blocksize > 0);
82  assert(registers_per_thread > 0);
83  assert(shmem_per_block > 0);
84  assert(deviceInfo.warpSize > 0 );
85  int reg_limit = deviceInfo.regsPerBlock / (blocksize * registers_per_thread);
86  int shm_limit = deviceInfo.sharedMemPerBlock / shmem_per_block ;
87  int block_warps = (blocksize+ deviceInfo.warpSize)/deviceInfo.warpSize;
88  int total_warps = deviceInfo.maxThreadsPerBlock / deviceInfo.warpSize;
89  int warp_limit = block_warps > 0 ? total_warps / block_warps : 0;
90 
91  int limit = std::min( warp_limit, std::min( reg_limit , shm_limit ) );
92 
93  if(limit == 0)
94  $PRINT( "BS: " << blocksize << ", SHM" << shmem_per_block << " -> "
95  << "Limits: reg=" << reg_limit << ", shm=" << shm_limit
96  << ", warp=" << warp_limit );
97 
98  return limit;
99 }
100 
106 bool check_cuda_limits ( int blocksize, int shmem_per_block ){
107  return blocks_per_mp(blocksize, shmem_per_block) > 0;
108 }