1#ifndef __CS_BASE_CUDA_H__
2#define __CS_BASE_CUDA_H__
53#define CS_CUDA_CHECK(a) { \
54 cudaError_t _l_ret_code = a; \
55 if (cudaSuccess != _l_ret_code) { \
56 bft_error(__FILE__, __LINE__, 0, "[CUDA error] %d: %s\n running: %s", \
57 _l_ret_code, ::cudaGetErrorString(_l_ret_code), #a); \
61#define CS_CUDA_CHECK_CALL(a, file_name, line_num) { \
62 cudaError_t _l_ret_code = a; \
63 if (cudaSuccess != _l_ret_code) { \
64 bft_error(file_name, line_num, 0, "[CUDA error] %d: %s\n running: %s", \
65 _l_ret_code, ::cudaGetErrorString(_l_ret_code), #a); \
73#define CS_CUDA_WARP_SIZE 32
87extern int cs_glob_cuda_device_id;
91extern int cs_glob_cuda_shared_mem_per_block;
92extern int cs_glob_cuda_max_threads_per_block;
93extern int cs_glob_cuda_max_block_size;
94extern int cs_glob_cuda_max_blocks;
95extern int cs_glob_cuda_n_mp;
99extern ncclComm_t cs_glob_nccl_comm;
102extern ncclDataType_t cs_datatype_to_nccl[];
109extern bool cs_glob_cuda_allow_graph;
134cs_cuda_copy_h2d(
void *dst,
156cs_cuda_copy_h2d_async(
void *dst,
177cs_cuda_copy_d2h(
void *dst,
198cs_cuda_copy_d2h_async(
void *dst,
217cs_cuda_copy_d2d(
void *dst,
240cs_cuda_get_host_ptr(
const void *ptr);
261static inline unsigned int
263 unsigned int block_size)
265 return (n % block_size) ? n/block_size + 1 : n/block_size;
287cs_sync_or_copy_h2d(
const T *val_h,
294 const T *_val_d = NULL;
300 size_t size = n_vals *
sizeof(T);
303 CS_CUDA_CHECK(cudaMalloc(&_buf_d, size));
304 cs_cuda_copy_h2d(_buf_d, val_h, size);
305 _val_d = (
const T *)_buf_d;
347cs_cuda_get_stream(
int stream_id);
360cs_cuda_get_stream_prefetch(
void);
375cs_cuda_get_stream_id(cudaStream_t stream);
395cs_cuda_get_2_stage_reduce_buffers(
int stream_id,
398 unsigned int grid_size,
416cs_base_cuda_device_info(
cs_log_t log_id);
427cs_base_cuda_version_info(
cs_log_t log_id);
438cs_base_cuda_compiler_info(
cs_log_t log_id);
449cs_base_cuda_nccl_info(
cs_log_t log_id);
463cs_base_cuda_select_default_device(
void);
474cs_base_cuda_get_device(
void);
#define BEGIN_C_DECLS
Definition: cs_defs.h:554
#define END_C_DECLS
Definition: cs_defs.h:555
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:350
cs_log_t
Definition: cs_log.h:48
static const void * cs_get_device_ptr_const(const void *ptr)
Return matching device pointer for a given constant pointer.
Definition: cs_mem.h:697
static cs_alloc_mode_t cs_check_device_ptr(const void *ptr)
Check if a pointer is associated with a device.
Definition: cs_mem.h:737
static void cs_sync_h2d(const void *ptr)
Synchronize data from host to device.
Definition: cs_mem.h:947
cs_alloc_mode_t
Definition: cs_mem.h:50
@ CS_ALLOC_HOST
Definition: cs_mem.h:52
@ CS_ALLOC_HOST_DEVICE_SHARED
Definition: cs_mem.h:57