9.1
general documentation
cs_sles_it_priv.h
Go to the documentation of this file.
1#ifndef __CS_SLES_IT_PRIV_H__
2#define __CS_SLES_IT_PRIV_H__
3
4/*============================================================================
5 * Sparse Linear Equation Solvers: private elements.
6 *
7 * These elements are shared between iterative solvers and smoother
8 * both for host and device implementations, but are not accessible to
9 * calling code.
10 *============================================================================*/
11
12/*
13 This file is part of code_saturne, a general-purpose CFD tool.
14
15 Copyright (C) 1998-2025 EDF S.A.
16
17 This program is free software; you can redistribute it and/or modify it under
18 the terms of the GNU General Public License as published by the Free Software
19 Foundation; either version 2 of the License, or (at your option) any later
20 version.
21
22 This program is distributed in the hope that it will be useful, but WITHOUT
23 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
24 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
25 details.
26
27 You should have received a copy of the GNU General Public License along with
28 this program; if not, write to the Free Software Foundation, Inc., 51 Franklin
29 Street, Fifth Floor, Boston, MA 02110-1301, USA.
30*/
31
32/*----------------------------------------------------------------------------*/
33
34#include "base/cs_defs.h"
35
36/*----------------------------------------------------------------------------
37 * Standard C library headers
38 *----------------------------------------------------------------------------*/
39
40#include <assert.h>
41#include <math.h>
42
43#if defined(HAVE_MPI)
44#include <mpi.h>
45#endif
46
47#if defined(HAVE_NCCL)
48#include <nccl.h>
49#endif
50
51/*----------------------------------------------------------------------------
52 * Local headers
53 *----------------------------------------------------------------------------*/
54
55#include "bft/bft_error.h"
56#include "bft/bft_printf.h"
57
58#include "base/cs_base.h"
59#include "alge/cs_blas.h"
60#include "base/cs_file.h"
61#include "base/cs_log.h"
62#include "base/cs_halo.h"
63#include "base/cs_mem.h"
64#include "mesh/cs_mesh.h"
65#include "alge/cs_matrix.h"
67#include "alge/cs_matrix_util.h"
68#include "base/cs_post.h"
69#include "base/cs_timer.h"
70#include "base/cs_time_plot.h"
71
72/*----------------------------------------------------------------------------
73 * Header for the current file
74 *----------------------------------------------------------------------------*/
75
76#include "alge/cs_sles.h"
77#include "alge/cs_sles_it.h"
78#include "alge/cs_sles_pc.h"
79
80/*----------------------------------------------------------------------------*/
81
83
86/*=============================================================================
87 * Local Macro Definitions
88 *============================================================================*/
89
90#if !defined(HUGE_VAL)
91#define HUGE_VAL 1.E+12
92#endif
93
94#define DB_SIZE_MAX 9
95
96/*=============================================================================
97 * Local Structure Definitions
98 *============================================================================*/
99
100/*----------------------------------------------------------------------------
101 * Function pointer for actual resolution of a linear system.
102 *
103 * parameters:
104 * c <-- pointer to solver context info
105 * a <-- linear equation matrix
106 * convergence <-- convergence information structure
107 * rhs <-- right hand side
108 * vx_ini <-- initial system solution
109 * (vx if nonzero, nullptr if zero)
110 * vx <-> system solution
111 * aux_size <-- number of elements in aux_vectors (in bytes)
112 * aux_vectors --- optional working area (allocation otherwise)
113 *
114 * returns:
115 * convergence status
116 *----------------------------------------------------------------------------*/
117
119(cs_sles_it_solve_t) (cs_sles_it_t *c,
120 const cs_matrix_t *a,
121 cs_lnum_t diag_block_size,
122 cs_sles_it_convergence_t *convergence,
123 const cs_real_t *rhs,
124 cs_real_t *vx_ini,
125 cs_real_t *vx,
126 size_t aux_size,
127 void *aux_vectors);
128
129/* Solver setup data */
130/*-------------------*/
131
132typedef struct _cs_sles_it_setup_t {
133
134 double initial_residual; /* last initial residual value */
135
136 cs_lnum_t n_rows; /* number of associated rows */
137
138 const cs_real_t *ad_inv; /* pointer to diagonal inverse */
139 cs_real_t *_ad_inv; /* private pointer to
140 diagonal inverse */
141
142 void *pc_context; /* preconditioner context */
143 cs_sles_pc_apply_t *pc_apply; /* preconditioner apply */
144
145} cs_sles_it_setup_t;
146
147/* Solver additional data */
148/*------------------------*/
149
150typedef struct _cs_sles_it_add_t {
151
152 cs_lnum_t *order; /* ordering */
153
154} cs_sles_it_add_t;
155
156/* Basic per linear system options and logging */
157/*---------------------------------------------*/
158
159struct _cs_sles_it_t {
160
161 /* Base settings */
162
163 cs_sles_it_type_t type; /* Solver type */
164
165 bool on_device; /* SpMV on device ? */
166
167 bool update_stats; /* do stats need to be updated ? */
168 bool ignore_convergence; /* ignore convergence for some
169 solvers used as preconditioners */
170
171 int n_max_iter; /* maximum number of iterations */
172 int restart_interval; /* maximum number of iterations
173 before restarting the algorithm
174 (only applicable for GMRES or GCR
175 algorithm up to now) */
176
177 cs_sles_it_solve_t *solve; /* pointer to solve function */
178
179 cs_sles_pc_t *pc; /* pointer to possibly shared
180 preconditioner object */
181 cs_sles_pc_t *_pc; /* pointer to owned
182 preconditioner object */
183
184 /* Performance data */
185
186 unsigned n_setups; /* Number of times system setup */
187 unsigned n_solves; /* Number of times system solved */
188
189 unsigned n_iterations_last; /* Number of iterations for last
190 system resolution */
191 unsigned n_iterations_min; /* Minimum number ot iterations
192 in system resolution history */
193 unsigned n_iterations_max; /* Maximum number ot iterations
194 in system resolution history */
195 unsigned long long n_iterations_tot; /* Total accumulated number of
196 iterations */
197
198 cs_timer_counter_t t_setup; /* Total setup */
199 cs_timer_counter_t t_solve; /* Total time used */
200
201 /* Plot info */
202
203 int plot_time_stamp; /* Plot time stamp */
204 cs_time_plot_t *plot; /* Pointer to plot structure,
205 which may be owned or shared */
206 cs_time_plot_t *_plot; /* Pointer to own plot structure */
207
208 /* Communicator used for reduction operations
209 (if left at NULL, main communicator will be used) */
210
211# if defined(HAVE_MPI)
212 MPI_Comm comm;
213 MPI_Comm caller_comm;
214 int caller_n_ranks;
215# endif
216
217#if defined(HAVE_NCCL)
218 ncclComm_t nccl_comm;
219#endif
220
221 const struct _cs_sles_it_t *shared; /* pointer to context sharing some
222 setup and preconditioner data,
223 or NULL */
224
225 cs_sles_it_add_t *add_data; /* additional data */
226
227 cs_sles_it_setup_t *setup_data; /* setup data */
228
229 /* Alternative solvers (fallback or heuristics) */
230
231 cs_sles_convergence_state_t fallback_cvg; /* threshold for fallback
232 convergence */
233 int fallback_n_max_iter; /* number of maximum iteration
234 for fallback solver */
235
236 cs_sles_it_t *fallback; /* fallback solver */
237
238
239};
240
241/* Convergence testing and tracking */
242/*----------------------------------*/
243
244struct _cs_sles_it_convergence_t {
245
246 const char *name; /* Pointer to name string */
247
248 int verbosity; /* Verbosity level */
249
250 unsigned n_iterations; /* Current number of iterations */
251 unsigned n_iterations_max; /* Maximum number of iterations */
252
253 double precision; /* Precision limit */
254 double r_norm; /* Residual normalization */
255 double residual; /* Current residual */
256
257};
258
259/*============================================================================
260 * Inline static function definitions
261 *============================================================================*/
262
263/*----------------------------------------------------------------------------
264 * Compute dot product, summing result over all ranks.
265 *
266 * parameters:
267 * c <-- pointer to solver context info
268 * x <-- first vector in s = x.y
269 * y <-- second vector in s = x.y
270 *
271 * returns:
272 * result of s = x.y
273 *----------------------------------------------------------------------------*/
274
275inline static double
276_dot_product(const cs_sles_it_t *c,
277 const cs_real_t *x,
278 const cs_real_t *y)
279{
280 double s = cs_dot(c->setup_data->n_rows, x, y);
281
282#if defined(HAVE_MPI)
283
284 if (c->comm != MPI_COMM_NULL) {
285 double _sum;
286 MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, c->comm);
287 s = _sum;
288 }
289
290#endif /* defined(HAVE_MPI) */
291
292 return s;
293}
294
295/*----------------------------------------------------------------------------
296 * Compute dot product x.x, summing result over all ranks.
297 *
298 * parameters:
299 * c <-- pointer to solver context info
300 * x <-- vector in s = x.x
301 *
302 * returns:
303 * result of s = x.x
304 *----------------------------------------------------------------------------*/
305
306inline static double
307_dot_product_xx(const cs_sles_it_t *c,
308 const cs_real_t *x)
309{
310 double s;
311
312 s = cs_dot_xx(c->setup_data->n_rows, x);
313
314#if defined(HAVE_MPI)
315
316 if (c->comm != MPI_COMM_NULL) {
317 double _sum;
318 MPI_Allreduce(&s, &_sum, 1, MPI_DOUBLE, MPI_SUM, c->comm);
319 s = _sum;
320 }
321
322#endif /* defined(HAVE_MPI) */
323
324 return s;
325}
326
327/*----------------------------------------------------------------------------
328 * Compute 2 dot products x.x and x.y, summing result over all ranks.
329 *
330 * parameters:
331 * c <-- pointer to solver context info
332 * x <-- vector in s1 = x.x and s2 = x.y
333 * y <-- vector in s2 = x.y
334 * s1 --> result of s1 = x.x
335 * s2 --> result of s2 = x.y
336 *----------------------------------------------------------------------------*/
337
338inline static void
339_dot_products_xx_xy(const cs_sles_it_t *c,
340 const cs_real_t *x,
341 const cs_real_t *y,
342 double *s1,
343 double *s2)
344{
345 double s[2];
346
347 cs_dot_xx_xy(c->setup_data->n_rows, x, y, s, s+1);
348
349#if defined(HAVE_MPI)
350
351 if (c->comm != MPI_COMM_NULL) {
352 double _sum[2];
353 MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, c->comm);
354 s[0] = _sum[0];
355 s[1] = _sum[1];
356 }
357
358#endif /* defined(HAVE_MPI) */
359
360 *s1 = s[0];
361 *s2 = s[1];
362}
363
364/*----------------------------------------------------------------------------
365 * Compute 2 dot products x.x and x.y, summing result over all ranks.
366 *
367 * parameters:
368 * c <-- pointer to solver context info
369 * x <-- vector in s1 = x.y
370 * y <-- vector in s1 = x.y and s2 = y.z
371 * z <-- vector in s2 = y.z
372 * s1 --> result of s1 = x.y
373 * s2 --> result of s2 = y.z
374 *----------------------------------------------------------------------------*/
375
376inline static void
377_dot_products_xy_yz(const cs_sles_it_t *c,
378 const cs_real_t *x,
379 const cs_real_t *y,
380 const cs_real_t *z,
381 double *s1,
382 double *s2)
383{
384 double s[2];
385
386 cs_dot_xy_yz(c->setup_data->n_rows, x, y, z, s, s+1);
387
388#if defined(HAVE_MPI)
389
390 if (c->comm != MPI_COMM_NULL) {
391 double _sum[2];
392 MPI_Allreduce(s, _sum, 2, MPI_DOUBLE, MPI_SUM, c->comm);
393 s[0] = _sum[0];
394 s[1] = _sum[1];
395 }
396
397#endif /* defined(HAVE_MPI) */
398
399 *s1 = s[0];
400 *s2 = s[1];
401}
402
403/*----------------------------------------------------------------------------
404 * Compute 3 dot products, summing result over all ranks.
405 *
406 * parameters:
407 * c <-- pointer to solver context info
408 * x <-- first vector
409 * y <-- second vector
410 * z <-- third vector
411 * s1 --> result of s1 = x.x
412 * s2 --> result of s2 = x.y
413 * s3 --> result of s3 = y.z
414 *----------------------------------------------------------------------------*/
415
416inline static void
417_dot_products_xx_xy_yz(const cs_sles_it_t *c,
418 const cs_real_t *x,
419 const cs_real_t *y,
420 const cs_real_t *z,
421 double *s1,
422 double *s2,
423 double *s3)
424{
425 double s[3];
426
427 cs_dot_xx_xy_yz(c->setup_data->n_rows, x, y, z, s, s+1, s+2);
428
429#if defined(HAVE_MPI)
430
431 if (c->comm != MPI_COMM_NULL) {
432 double _sum[3];
433
434 MPI_Allreduce(s, _sum, 3, MPI_DOUBLE, MPI_SUM, c->comm);
435 s[0] = _sum[0];
436 s[1] = _sum[1];
437 s[2] = _sum[2];
438 }
439
440#endif /* defined(HAVE_MPI) */
441
442 *s1 = s[0];
443 *s2 = s[1];
444 *s3 = s[2];
445}
446
447/*----------------------------------------------------------------------------
448 * Compute 5 dot products, summing result over all ranks.
449 *
450 * parameters:
451 * c <-- pointer to solver context info
452 * x <-- first vector
453 * y <-- second vector
454 * z <-- third vector
455 * xx --> result of x.x
456 * yy --> result of y.y
457 * xy --> result of x.y
458 * xz --> result of x.z
459 * yz --> result of y.z
460 *----------------------------------------------------------------------------*/
461
462inline static void
463_dot_products_xx_yy_xy_xz_yz(const cs_sles_it_t *c,
464 const cs_real_t *x,
465 const cs_real_t *y,
466 const cs_real_t *z,
467 double *restrict xx,
468 double *restrict yy,
469 double *restrict xy,
470 double *restrict xz,
471 double *restrict yz)
472{
473 double s[5];
474
475 cs_dot_xx_yy_xy_xz_yz(c->setup_data->n_rows, x, y, z, s, s+1, s+2, s+3, s+4);
476
477#if defined(HAVE_MPI)
478
479 if (c->comm != MPI_COMM_NULL) {
480 double _sum[5];
481 MPI_Allreduce(s, _sum, 5, MPI_DOUBLE, MPI_SUM, c->comm);
482 memcpy(s, _sum, 5*sizeof(double));
483 }
484
485#endif /* defined(HAVE_MPI) */
486
487 *xx = s[0];
488 *yy = s[1];
489 *xy = s[2];
490 *xz = s[3];
491 *yz = s[4];
492}
493
494/*----------------------------------------------------------------------------
495 * Block Jacobi utilities.
496 * Compute forward and backward to solve an LU 3*3 system.
497 *
498 * parameters:
499 * mat <-- 3*3*dim matrix
500 * x --> solution
501 * b --> 1st part of RHS (c - b)
502 * c --> 2nd part of RHS (c - b)
503 *----------------------------------------------------------------------------*/
504
505inline static void
506_mat_c_m_b_33(const cs_real_t mat[],
508 const cs_real_t *restrict b,
509 const cs_real_t *restrict c)
510{
511 /* c - b */
512 cs_real_t aux[3];
513 for (cs_lnum_t ii = 0; ii < 3; ii++) {
514 aux[ii] = (c[ii] - b[ii]);
515 }
516
517 for (cs_lnum_t ii = 0; ii < 3; ii++) {
518 x[ii] = mat[3*ii + 0]*aux[0]
519 + mat[3*ii + 1]*aux[1]
520 + mat[3*ii + 2]*aux[2];
521 }
522}
523
524/*----------------------------------------------------------------------------
525 * Block Jacobi utilities.
526 * Compute mat.(c-b) product.
527 *
528 * parameters:
529 * mat <-- P*P*dim matrix
530 * db_size <-- matrix size
531 * x --> solution
532 * b --> 1st part of RHS (c - b)
533 * c --> 2nd part of RHS (c - b)
534 *----------------------------------------------------------------------------*/
535
536inline static void
537_mat_c_m_b(const cs_real_t mat[],
538 cs_lnum_t db_size,
540 const cs_real_t *restrict b,
541 const cs_real_t *restrict c)
542{
543 assert(db_size <= DB_SIZE_MAX);
544 cs_real_t aux[DB_SIZE_MAX];
545
546 /* c - b */
547 for (cs_lnum_t ii = 0; ii < db_size; ii++) {
548 aux[ii] = (c[ii] - b[ii]);
549 }
550
551 for (cs_lnum_t ii = 0; ii < db_size; ii++) {
552 x[ii] = 0;
553 for (cs_lnum_t jj = 0; jj < db_size; jj++) {
554 x[ii] += aux[jj]*mat[ii*db_size + jj];
555 }
556 }
557}
558
559/*============================================================================
560 * Public function definitions
561 *============================================================================*/
562
563/*----------------------------------------------------------------------------*/
577/*----------------------------------------------------------------------------*/
578
579void
580cs_sles_it_convergence_init(cs_sles_it_convergence_t *convergence,
581 const char *solver_name,
582 int verbosity,
583 unsigned n_iter_max,
584 double precision,
585 double r_norm,
586 double *residual);
587
588/*----------------------------------------------------------------------------
589 * Setup context for iterative linear solver.
590 *
591 * This function is common to most solvers
592 *
593 * parameters:
594 * c <-> pointer to solver context info
595 * name <-- pointer to system name
596 * a <-- matrix
597 * verbosity <-- verbosity level
598 * diag_block_size <-- diagonal block size
599 * block_nn_inverse <-- if diagonal block size is 3 or 6, compute inverse of
600 * block if true, inverse of block diagonal otherwise
601 * l1_inv <-- if diagonal block size is 1, compute
602 * inverse of L1 norm instead of diagonal
603 *----------------------------------------------------------------------------*/
604
605void
606cs_sles_it_setup_priv(cs_sles_it_t *c,
607 const char *name,
608 const cs_matrix_t *a,
609 int verbosity,
610 int diag_block_size,
611 bool block_nn_inverse,
612 bool l1_inv);
613
616/*----------------------------------------------------------------------------*/
617
619
620#endif /* __CS_SLES_IT_PRIV_H__ */
double cs_dot(cs_lnum_t n, const cs_real_t *x, const cs_real_t *y)
Return the dot product of 2 vectors: x.y.
Definition: cs_blas.cpp:1644
void cs_dot_xx_yy_xy_xz_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *yy, double *xy, double *xz, double *yz)
Return 5 dot products of 3 vectors: x.x, y.y, x.y, x.z, and y.z.
Definition: cs_blas.cpp:1833
double cs_dot_xx(cs_lnum_t n, const cs_real_t *x)
Return dot products of a vector with itself: x.x.
Definition: cs_blas.cpp:1665
void cs_dot_xy_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xy, double *yz)
Return 2 dot products of 3 vectors: x.y, and y.z.
Definition: cs_blas.cpp:1772
void cs_dot_xx_xy_yz(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, const cs_real_t *restrict z, double *xx, double *xy, double *yz)
Return 3 dot products of 3 vectors: x.x, x.y, and y.z.
Definition: cs_blas.cpp:1801
void cs_dot_xx_xy(cs_lnum_t n, const cs_real_t *restrict x, const cs_real_t *restrict y, double *xx, double *xy)
Return 2 dot products of 2 vectors: x.x, and x.y.
Definition: cs_blas.cpp:1745
#define restrict
Definition: cs_defs.h:158
#define BEGIN_C_DECLS
Definition: cs_defs.h:554
double cs_real_t
Floating-point value.
Definition: cs_defs.h:357
#define END_C_DECLS
Definition: cs_defs.h:555
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:350
struct _cs_matrix_t cs_matrix_t
Definition: cs_matrix.h:114
cs_sles_convergence_state_t
Definition: cs_sles.h:56
struct _cs_sles_it_t cs_sles_it_t
Definition: cs_sles_it.h:95
struct _cs_sles_it_convergence_t cs_sles_it_convergence_t
Definition: cs_sles_it.h:99
cs_sles_it_type_t
Definition: cs_sles_it.h:55
cs_sles_pc_state_t() cs_sles_pc_apply_t(void *context, const cs_real_t *x_in, cs_real_t *x_out)
Function pointer for application of a preconditioner.
Definition: cs_sles_pc.h:145
struct _cs_sles_pc_t cs_sles_pc_t
Definition: cs_sles_pc.h:66
struct _cs_time_plot_t cs_time_plot_t
Definition: cs_time_plot.h:48
Definition: cs_timer.h:55