9.1
general documentation
cs_sles_it_cuda.h
Go to the documentation of this file.
1#ifndef __CS_SLES_IT_CUDA_H__
2#define __CS_SLES_IT_CUDA_H__
3
4/*============================================================================
5 * Sparse Linear Equation Solvers using CUDA
6 *============================================================================*/
7
8/*
9 This file is part of code_saturne, a general-purpose CFD tool.
10
11 Copyright (C) 1998-2025 EDF S.A.
12
13 This program is free software; you can redistribute it and/or modify it under
14 the terms of the GNU General Public License as published by the Free Software
15 Foundation; either version 2 of the License, or (at your option) any later
16 version.
17
18 This program is distributed in the hope that it will be useful, but WITHOUT
19 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
20 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
21 details.
22
23 You should have received a copy of the GNU General Public License along with
24 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
25 Street, Fifth Floor, Boston, MA 02110-1301, USA.
26*/
27
28/*----------------------------------------------------------------------------*/
29
30/*----------------------------------------------------------------------------
31 * Local headers
32 *----------------------------------------------------------------------------*/
33
34#include "base/cs_base.h"
35#include "alge/cs_matrix.h"
36#include "alge/cs_sles.h"
37#include "alge/cs_sles_pc.h"
38
39/*----------------------------------------------------------------------------*/
40
41/*============================================================================
42 * Macro definitions
43 *============================================================================*/
44
45/*============================================================================
46 * Type definitions
47 *============================================================================*/
48
49/*============================================================================
50 * Global variables
51 *============================================================================*/
52
53/*=============================================================================
54 * User function prototypes
55 *============================================================================*/
56
57/*============================================================================
58 * Semi-private function prototypes
59 *============================================================================*/
60
61#if defined(__CUDACC__)
62
63/*----------------------------------------------------------------------------
64 * Compute dot product, summing result over all ranks.
65 *
66 * parameters:
67 * c <-- pointer to solver context info
68 * stream <-- CUDA stream
69 * x <-- first vector
70 * y <-- vector
71 *
72 * return:
73 * result of s = x.x
74 *----------------------------------------------------------------------------*/
75
76double
77cs_sles_it_dot_product
78(
79 const cs_sles_it_t *c,
80 cudaStream_t stream,
81 const cs_real_t *x,
82 const cs_real_t *y
83);
84
85/*----------------------------------------------------------------------------
86 * Compute dot product, summing result over all ranks.
87 *
88 * parameters:
89 * c <-- pointer to solver context info
90 * stream <-- CUDA stream
91 * x <-- first vector
92 * y <-- second vector
93 *
94 * return:
95 * result of s = x.y
96 *----------------------------------------------------------------------------*/
97
98double
99cs_sles_it_dot_product_xx
100(
101 const cs_sles_it_t *c,
102 cudaStream_t stream,
103 const cs_real_t *x,
104 const cs_real_t *y
105);
106
107/*----------------------------------------------------------------------------
108 * Compute dot product, summing result over all ranks.
109 *
110 * parameters:
111 * c <-- pointer to solver context info
112 * stream <-- CUDA stream
113 * x <-- vector
114 *
115 * return:
116 * result of s = x.x
117 *----------------------------------------------------------------------------*/
118
119double
120cs_sles_it_dot_product_xx
121(
122 const cs_sles_it_t *c,
123 cudaStream_t stream,
124 const cs_real_t *x
125);
126
127/*----------------------------------------------------------------------------
128 * Compute 2 dot products, summing result over all ranks.
129 *
130 * parameters:
131 * c <-- pointer to solver context info
132 * stream <-- CUDA stream
133 * x <-- first vector
134 * y <-- second vector
135 * z <-- third vector
136 * xx --> result of s1 = x.x
137 * xy --> result of s2 = x.y
138 *----------------------------------------------------------------------------*/
139
140void
141cs_sles_it_dot_products_xx_xy
142(
143 const cs_sles_it_t *c,
144 cudaStream_t stream,
145 const cs_real_t *x,
146 const cs_real_t *y,
147 double *xx,
148 double *xy
149);
150
151/*----------------------------------------------------------------------------
152 * Compute 3 dot products, summing result over all ranks.
153 *
154 * parameters:
155 * c <-- pointer to solver context info
156 * stream <-- CUDA stream
157 * x <-- first vector
158 * y <-- second vector
159 * z <-- third vector
160 * xx --> result of s1 = x.x
161 * xy --> result of s2 = x.y
162 * yz --> result of s3 = y.z
163 *----------------------------------------------------------------------------*/
164
165void
166cs_sles_it_dot_products_xx_xy_yz
167(
168 const cs_sles_it_t *c,
169 cudaStream_t stream,
170 const cs_real_t *x,
171 const cs_real_t *y,
172 const cs_real_t *z,
173 double *xx,
174 double *xy,
175 double *yz
176);
177
178#endif // defined(__CUDACC__)
179
181
182/*=============================================================================
183 * Public function prototypes
184 *============================================================================*/
185
186/*----------------------------------------------------------------------------
187 * Solution of A.vx = Rhs using Jacobi.
188 *
189 * parameters:
190 * c <-- pointer to solver context info
191 * a <-- linear equation matrix
192 * diag_block_size <-- diagonal block size
193 * rotation_mode <-- halo update option for rotational periodicity
194 * convergence <-- convergence information structure
195 * rhs <-- right hand side
196 * vx_ini <-- initial system solution
197 * (vx if nonzero, nullptr if zero)
198 * vx <-> system solution
199 * aux_size <-- number of elements in aux_vectors (in bytes)
200 * aux_vectors --- optional working area (allocation otherwise)
201 *
202 * returns:
203 * convergence state
204 *----------------------------------------------------------------------------*/
205
208 const cs_matrix_t *a,
209 cs_lnum_t diag_block_size,
210 cs_sles_it_convergence_t *convergence,
211 const cs_real_t *rhs,
212 cs_real_t *vx_ini,
213 cs_real_t *vx,
214 size_t aux_size,
215 void *aux_vectors);
216
217/*----------------------------------------------------------------------------
218 * Solution of A.vx = Rhs using block Jacobi.
219 *
220 * parameters:
221 * c <-- pointer to solver context info
222 * a <-- linear equation matrix
223 * diag_block_size <-- diagonal block size
224 * rotation_mode <-- halo update option for rotational periodicity
225 * convergence <-- convergence information structure
226 * rhs <-- right hand side
227 * vx_ini <-- initial system solution
228 * (vx if nonzero, nullptr if zero)
229 * vx <-> system solution
230 * aux_size <-- number of elements in aux_vectors (in bytes)
231 * aux_vectors --- optional working area (allocation otherwise)
232 *
233 * returns:
234 * convergence state
235 *----------------------------------------------------------------------------*/
236
239 const cs_matrix_t *a,
240 cs_lnum_t diag_block_size,
241 cs_sles_it_convergence_t *convergence,
242 const cs_real_t *rhs,
243 cs_real_t *vx_ini,
244 cs_real_t *vx,
245 size_t aux_size,
246 void *aux_vectors);
247
248/*----------------------------------------------------------------------------
249 * Solution of A.vx = Rhs using flexible preconditioned conjugate gradient.
250 *
251 * Compared to standard PCG, FCG supports variable preconditioners.
252 *
253 * This variant, described in \cite Notay:2015, allows computing the
254 * required inner products with a single global communication.
255 *
256 * parameters:
257 * c <-- pointer to solver context info
258 * a <-- matrix
259 * diag_block_size <-- diagonal block size
260 * convergence <-- convergence information structure
261 * rhs <-- right hand side
262 * vx_ini <-- initial system solution
263 * (vx if nonzero, nullptr if zero)
264 * vx <-> system solution
265 * aux_size <-- number of elements in aux_vectors (in bytes)
266 * aux_vectors --- optional working area (allocation otherwise)
267 *
268 * returns:
269 * convergence state
270 *----------------------------------------------------------------------------*/
271
274 const cs_matrix_t *a,
275 cs_lnum_t diag_block_size,
276 cs_sles_it_convergence_t *convergence,
277 const cs_real_t *rhs,
278 cs_real_t *vx_ini,
279 cs_real_t *vx,
280 size_t aux_size,
281 void *aux_vectors);
282
283/*----------------------------------------------------------------------------
284 * Solution of A.vx = Rhs using optimised preconditioned GCR (CUDA version).
285 *
286 * parameters:
287 * c <-- pointer to solver context info
288 * a <-- matrix
289 * diag_block_size <-- diagonal block size (unused here)
290 * convergence <-- convergence information structure
291 * rhs <-- right hand side
292 * vx_ini <-- initial system solution
293 * (vx if nonzero, nullptr if zero)
294 * vx <-> system solution
295 * aux_size <-- number of elements in aux_vectors (in bytes)
296 * aux_vectors --- optional working area (allocation otherwise)
297 *
298 * returns:
299 * convergence state
300 *----------------------------------------------------------------------------*/
301
304 const cs_matrix_t *a,
305 cs_lnum_t diag_block_size,
306 cs_sles_it_convergence_t *convergence,
307 const cs_real_t *rhs,
308 cs_real_t *vx_ini,
309 cs_real_t *vx,
310 size_t aux_size,
311 void *aux_vectors);
312
313/*----------------------------------------------------------------------------*/
314
316
317#endif /* __CS_SLES_IT_CUDA_H__ */
#define BEGIN_C_DECLS
Definition: cs_defs.h:554
double cs_real_t
Floating-point value.
Definition: cs_defs.h:357
#define END_C_DECLS
Definition: cs_defs.h:555
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:350
struct _cs_matrix_t cs_matrix_t
Definition: cs_matrix.h:114
cs_sles_convergence_state_t
Definition: cs_sles.h:56
struct _cs_sles_it_t cs_sles_it_t
Definition: cs_sles_it.h:95
struct _cs_sles_it_convergence_t cs_sles_it_convergence_t
Definition: cs_sles_it.h:99
cs_sles_convergence_state_t cs_sles_it_cuda_jacobi(cs_sles_it_t *c, const cs_matrix_t *a, cs_lnum_t diag_block_size, cs_sles_it_convergence_t *convergence, const cs_real_t *rhs, cs_real_t *vx_ini, cs_real_t *vx, size_t aux_size, void *aux_vectors)
cs_sles_convergence_state_t cs_sles_it_cuda_fcg(cs_sles_it_t *c, const cs_matrix_t *a, cs_lnum_t diag_block_size, cs_sles_it_convergence_t *convergence, const cs_real_t *rhs, cs_real_t *vx_ini, cs_real_t *vx, size_t aux_size, void *aux_vectors)
cs_sles_convergence_state_t cs_sles_it_cuda_gcr(cs_sles_it_t *c, const cs_matrix_t *a, cs_lnum_t diag_block_size, cs_sles_it_convergence_t *convergence, const cs_real_t *rhs, cs_real_t *vx_ini, cs_real_t *vx, size_t aux_size, void *aux_vectors)
cs_sles_convergence_state_t cs_sles_it_cuda_block_jacobi(cs_sles_it_t *c, const cs_matrix_t *a, cs_lnum_t diag_block_size, cs_sles_it_convergence_t *convergence, const cs_real_t *rhs, cs_real_t *vx_ini, cs_real_t *vx, size_t aux_size, void *aux_vectors)