92 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
120 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
150 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_SUM,
182 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MAX,
214 MPI_Allreduce(MPI_IN_PLACE, val, n, cs_datatype_to_mpi[datatype], MPI_MIN,
249 MPI_Bcast(val, n, cs_datatype_to_mpi[datatype], root_rank,
456 const float g_array[],
556#if defined(HAVE_OPENMP)
557 int n_t = omp_get_max_threads();
558 int n_t_l = n_elements / min_thread_elements;
592#if defined(HAVE_OPENMP)
593 const int t_id = omp_get_thread_num();
594 const int n_t = omp_get_num_threads();
595 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
599 *e_id = (t_id+1) * t_n;
602 if (*e_id > n) *e_id = n;
637#if defined(HAVE_OPENMP)
638 const int t_id = omp_get_thread_num();
639 const double n_t = omp_get_num_threads();
642 double r0 = (double)t_id / (
double)n_t;
643 double r1 = (double)(t_id+1) / (double)n_t;
655 if (*e_id > n) *e_id = n;
678 return (n % block_size) ? n/block_size + 1 : n/block_size;
685#if defined(__cplusplus)
704 [[maybe_unused]]
const int n)
709 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_GNUM, MPI_SUM,
729 [[maybe_unused]]
const int n)
734 MPI_Allreduce(MPI_IN_PLACE, cpt, n, CS_MPI_LNUM, MPI_MAX,
754 [[maybe_unused]]
int n,
756 [[maybe_unused]]
void *val)
761 MPI_Allreduce(MPI_IN_PLACE, val, n,
762 cs_datatype_to_mpi[datatype], MPI_SUM,
783 [[maybe_unused]]
int n,
785 [[maybe_unused]]
void *val)
790 MPI_Allreduce(MPI_IN_PLACE, val, n,
791 cs_datatype_to_mpi[datatype], MPI_MAX,
812 [[maybe_unused]]
int n,
814 [[maybe_unused]]
void *val)
819 MPI_Allreduce(MPI_IN_PLACE, val, n,
820 cs_datatype_to_mpi[datatype], MPI_MIN,
852 const cs_lnum_t t_n = (n + n_t - 1) / n_t;
856 *e_id = (t_id+1) * t_n;
859 if (*s_id > n) *s_id = n;
860 if (*e_id > n) *e_id = n;
878template <
typename T,
typename... Vals>
893 constexpr int n_vals =
sizeof...(Vals);
903 T *_values[] = {&values ...};
907 for (
int i = 0; i < n_vals; i++)
908 w[i+1] = *(_values[i]);
913 for (
int i = 0; i < n_vals; i++)
914 *(_values[i]) = w[i+1];
929template <
typename T,
typename... Vals>
939 sum(ec, first, values...);
954template <
int Stride,
typename T,
typename... Vals>
969 constexpr int n_vals =
sizeof...(Vals);
979 T *_values[] = {values ...};
981 constexpr int work_size = (n_vals + 1) * Stride;
984 for (
int i = 0; i < Stride; i++)
987 for (
int i = 0; i < n_vals; i++) {
988 for (
int j = 0; j < Stride; j++)
989 w[(i+1)*Stride + j] = _values[i][j];
994 for (
int i = 0; i < Stride; i++)
997 for (
int i = 0; i < n_vals; i++) {
998 for (
int j = 0; j < Stride; j++)
999 _values[i][j] = w[(i+1)*Stride + j];
1016template <
int Stride,
typename T,
typename... Vals>
1024#if defined(HAVE_MPI)
1026 sum<Stride>(ec, first, values...);
1041template <
typename T,
typename... Vals>
1050#if defined(HAVE_MPI)
1053 constexpr int n_vals =
sizeof...(Vals);
1064 T *_values[] = {&values ...};
1068 for (
int i = 0; i < n_vals; i++)
1069 w[i+1] = *(_values[i]);
1074 for (
int i = 0; i < n_vals; i++)
1075 *(_values[i]) = w[i+1];
1090template <
typename T,
typename... Vals>
1098#if defined(HAVE_MPI)
1100 max(ec, first, values...);
1116template <
int Stride,
typename T,
typename... Vals>
1125#if defined(HAVE_MPI)
1131 constexpr int n_vals =
sizeof...(Vals);
1141 T *_values[] = {values ...};
1143 constexpr int work_size = (n_vals + 1) * Stride;
1146 for (
int i = 0; i < Stride; i++)
1149 for (
int i = 0; i < n_vals; i++)
1150 for (
int j = 0; j < Stride; j++)
1151 w[(i+1)*Stride + j] = _values[i][j];
1155 for (
int i = 0; i < Stride; i++)
1158 for (
int i = 0; i < n_vals; i++)
1159 for (
int j = 0; j < Stride; j++)
1160 _values[i][j] = w[(i+1)*Stride + j];
1175template <
int Stride,
typename T,
typename... Vals>
1183#if defined(HAVE_MPI)
1185 max<Stride>(ec, first, values...);
1200template <
typename T,
typename... Vals>
1209#if defined(HAVE_MPI)
1215 constexpr int n_vals =
sizeof...(Vals);
1227 T *_values[] = {&values ...};
1231 for (
int i = 0; i < n_vals; i++)
1232 w[i + 1] = *(_values[i]);
1237 for (
int i = 0; i < n_vals; i++)
1238 *(_values[i]) = w[i + 1];
1253template <
typename T,
typename... Vals>
1261#if defined(HAVE_MPI)
1263 min(ec, first, values...);
1279template <
int Stride,
typename T,
typename... Vals>
1288#if defined(HAVE_MPI)
1294 constexpr int n_vals =
sizeof...(Vals);
1304 T *_values[] = {values ...};
1306 constexpr int work_size = (n_vals + 1) * Stride;
1309 for (
int i = 0; i < Stride; i++)
1312 for (
int i = 0; i < n_vals; i++)
1313 for (
int j = 0; j < Stride; j++)
1314 w[(i+1)*Stride + j] = _values[i][j];
1318 for (
int i = 0; i < Stride; i++)
1321 for (
int i = 0; i < n_vals; i++)
1322 for (
int j = 0; j < Stride; j++)
1323 _values[i][j] = w[(i+1)*Stride + j];
1339template <
int Stride,
typename T,
typename... Vals>
1347#if defined(HAVE_MPI)
1349 min<Stride>(ec, first, values...);
1366template <
typename T,
typename... Vals>
1374#if defined(HAVE_MPI)
1380 constexpr size_t n_vals =
sizeof...(Vals);
1390 T *_values[] = {&values ...};
1394 for (
size_t i = 0; i < n_vals; i++)
1395 w[i+1] = *(_values[i]);
1400 for (
size_t i = 0; i < n_vals; i++)
1401 *(_values[i]) = w[i+1];
1415template <
typename T,
typename... Vals>
1424#if defined(HAVE_MPI)
1427 constexpr size_t n_vals =
sizeof...(Vals);
1437 T *_values[] = {&values ...};
1441 for (
size_t i = 0; i < n_vals; i++)
1442 w[i+1] = *(_values[i]);
1447 for (
size_t i = 0; i < n_vals; i++)
1448 *(_values[i]) = w[i+1];
1463template <
int Stride,
typename T,
typename... Vals>
1472#if defined(HAVE_MPI)
1477 constexpr size_t n_vals =
sizeof...(Vals);
1487 T *_values[] = {values ...};
1489 constexpr size_t work_size = (n_vals + 1) * Stride;
1492 for (
int i = 0; i < Stride; i++)
1495 for (
size_t i = 0; i < n_vals; i++)
1496 for (
int j = 0; j < Stride; j++)
1497 w[(i+1)*Stride + j] = _values[i][j];
1501 for (
int i = 0; i < Stride; i++)
1504 for (
size_t i = 0; i < n_vals; i++) {
1505 for (
int j = 0; j < Stride; j++)
1506 _values[i][j] = w[(i+1)*Stride + j];
1523template <
int Stride,
typename T,
typename... Vals>
1531#if defined(HAVE_MPI)
1537 constexpr size_t n_vals =
sizeof...(Vals);
1547 T *_values[] = {values ...};
1549 constexpr size_t work_size = (n_vals + 1) * Stride;
1552 for (
int i = 0; i < Stride; i++)
1555 for (
int i = 0; i < n_vals; i++) {
1556 for (
int j = 0; j < Stride; j++)
1557 w[(i+1)*Stride + j] = _values[i][j];
1562 for (
int i = 0; i < Stride; i++)
1565 for (
size_t i = 0; i < n_vals; i++) {
1566 for (
int j = 0; j < Stride; j++)
1567 _values[i][j] = w[(i+1)*Stride + j];
1583template <
typename T,
typename... Vals>
1591#if defined(HAVE_MPI)
1597 constexpr size_t n_vals =
sizeof...(Vals);
1608 T *_values[] = {&values ...};
1612 for (
size_t i = 0; i < n_vals; i++)
1613 w[i+1] = *(_values[i]);
1618 for (
size_t i = 0; i < n_vals; i++)
1619 *(_values[i]) = w[i+1];
1633template <
typename T,
typename... Vals>
1642#if defined(HAVE_MPI)
1645 constexpr size_t n_vals =
sizeof...(Vals);
1656 T *_values[] = {&values ...};
1660 for (
size_t i = 0; i < n_vals; i++)
1661 w[i+1] = *(_values[i]);
1666 for (
size_t i = 0; i < n_vals; i++)
1667 *(_values[i]) = w[i+1];
1683template <
int Stride,
typename T,
typename... Vals>
1691#if defined(HAVE_MPI)
1697 constexpr size_t n_vals =
sizeof...(Vals);
1707 T *_values[] = {values ...};
1709 constexpr size_t work_size = (n_vals + 1) * Stride;
1712 for (
int i = 0; i < Stride; i++)
1715 for (
size_t i = 0; i < n_vals; i++)
1716 for (
int j = 0; j < Stride; j++)
1717 w[(i+1)*Stride + j] = _values[i][j];
1721 for (
int i = 0; i < Stride; i++)
1724 for (
size_t i = 0; i < n_vals; i++)
1725 for (
int j = 0; j < Stride; j++)
1726 _values[i][j] = w[(i+1)*Stride + j];
1741template <
int Stride,
typename T,
typename... Vals>
1750#if defined(HAVE_MPI)
1753 constexpr size_t n_vals =
sizeof...(Vals);
1763 T *_values[] = {values ...};
1765 constexpr size_t work_size = (n_vals + 1) * Stride;
1768 for (
int i = 0; i < Stride; i++)
1771 for (
size_t i = 0; i < n_vals; i++)
1772 for (
int j = 0; j < Stride; j++)
1773 w[(i+1)*Stride + j] = _values[i][j];
1777 for (
int i = 0; i < Stride; i++)
1780 for (
size_t i = 0; i < n_vals; i++)
1781 for (
int j = 0; j < Stride; j++)
1782 _values[i][j] = w[(i+1)*Stride + j];
1797template <
typename T,
typename... Vals>
1805#if defined(HAVE_MPI)
1811 constexpr size_t n_vals =
sizeof...(Vals);
1823 T *_values[] = {&values ...};
1827 for (
size_t i = 0; i < n_vals; i++)
1828 w[i + 1] = *(_values[i]);
1833 for (
size_t i = 0; i < n_vals; i++)
1834 *(_values[i]) = w[i + 1];
1848template <
typename T,
typename... Vals>
1857#if defined(HAVE_MPI)
1860 constexpr size_t n_vals =
sizeof...(Vals);
1872 T *_values[] = {&values ...};
1876 for (
size_t i = 0; i < n_vals; i++)
1877 w[i + 1] = *(_values[i]);
1882 for (
size_t i = 0; i < n_vals; i++)
1883 *(_values[i]) = w[i + 1];
1899template <
int Stride,
typename T,
typename... Vals>
1907#if defined(HAVE_MPI)
1913 constexpr size_t n_vals =
sizeof...(Vals);
1923 T *_values[] = {values ...};
1925 constexpr size_t work_size = (n_vals + 1) * Stride;
1928 for (
int i = 0; i < Stride; i++)
1931 for (
size_t i = 0; i < n_vals; i++)
1932 for (
int j = 0; j < Stride; j++)
1933 w[(i+1)*Stride + j] = _values[i][j];
1937 for (
int i = 0; i < Stride; i++)
1940 for (
size_t i = 0; i < n_vals; i++)
1941 for (
int j = 0; j < Stride; j++)
1942 _values[i][j] = w[(i+1)*Stride + j];
1957template <
int Stride,
typename T,
typename... Vals>
1966#if defined(HAVE_MPI)
1969 constexpr size_t n_vals =
sizeof...(Vals);
1980 T *_values[] = {values ...};
1982 constexpr size_t work_size = (n_vals + 1) * Stride;
1985 for (
int i = 0; i < Stride; i++)
1988 for (
size_t i = 0; i < n_vals; i++)
1989 for (
int j = 0; j < Stride; j++)
1990 w[(i+1)*Stride + j] = _values[i][j];
1994 for (
int i = 0; i < Stride; i++)
1997 for (
size_t i = 0; i < n_vals; i++)
1998 for (
int j = 0; j < Stride; j++)
1999 _values[i][j] = w[(i+1)*Stride + j];
Definition: cs_execution_context.h:61
bool use_mpi() const
Does the execution context uses MPI parallelism ?
Definition: cs_execution_context.h:128
int cs_glob_n_ranks
Definition: cs_defs.cpp:175
cs_datatype_t
Definition: cs_defs.h:315
#define BEGIN_C_DECLS
Definition: cs_defs.h:554
double cs_real_t
Floating-point value.
Definition: cs_defs.h:357
unsigned cs_gnum_t
global mesh entity number
Definition: cs_defs.h:342
static cs_lnum_t cs_align(cs_lnum_t i, cs_lnum_t m)
Given a base index i, return the next index aligned with a size m.
Definition: cs_defs.h:669
#define CS_UNUSED(x)
Definition: cs_defs.h:543
#define END_C_DECLS
Definition: cs_defs.h:555
int cs_lnum_t
local mesh entity id
Definition: cs_defs.h:350
#define CS_CL_SIZE
Definition: cs_defs.h:513
const cs_execution_context * cs_execution_context_glob_get(void)
Get the global execution context.
Definition: cs_execution_context.cpp:61
void cs_parall_gather_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t array[], cs_real_t g_array[])
Build a global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:531
static void cs_parall_bcast(int root_rank, int n, cs_datatype_t datatype, void *val)
Broadcast values of a given datatype to all default communicator processes.
Definition: cs_parall.h:241
static void cs_parall_min_strided(T first[], Vals &&... values)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1902
void cs_parall_set_min_coll_buf_size(size_t buffer_size)
Define minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:854
void cs_parall_gather_ordered_r(int root_rank, int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array on the given root rank from all local arrays.
Definition: cs_parall.cpp:598
static void cs_parall_sum_strided(const cs_execution_context *ec, T first[], Vals &&... values)
Sum strided-values of a given datatype over a communicator.
Definition: cs_parall.h:1466
void cs_parall_min_id_rank_r(cs_lnum_t *elt_id, int *rank_id, cs_real_t val)
Given an (id, rank, value) tuple, return the local id and rank corresponding to the global minimum va...
Definition: cs_parall.cpp:356
static void cs_parall_thread_range_upper(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread for upper triangular matrix elements.
Definition: cs_parall.h:632
static void cs_parall_max(int n, cs_datatype_t datatype, void *val)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:175
static void cs_parall_counter_max(cs_lnum_t cpt[], const int n)
Maximum values of a counter on all default communicator processes.
Definition: cs_parall.h:114
void cs_parall_allgather_r(int n_elts, int n_g_elts, cs_real_t array[], cs_real_t g_array[])
Build a global array from each local array in each domain.
Definition: cs_parall.cpp:410
static int cs_parall_n_threads(cs_lnum_t n_elements, cs_lnum_t min_thread_elements)
Compute recommended number of threads for a section.
Definition: cs_parall.h:553
static void cs_parall_max_scalars(T &first, Vals &... values)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1586
static void cs_parall_counter(cs_gnum_t cpt[], const int n)
Sum values of a counter on all default communicator processes.
Definition: cs_parall.h:86
void cs_parall_scatter_r(int root_rank, int n_elts, int n_g_elts, const cs_real_t g_array[], cs_real_t array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:647
static void cs_parall_sum(int n, cs_datatype_t datatype, void *val)
Sum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:143
void cs_parall_allgather_ordered_r(int n_elts, int n_g_elts, int stride, cs_real_t o_key[], cs_real_t array[], cs_real_t g_array[])
Build an ordered global array from each local array in each domain.
Definition: cs_parall.cpp:487
void cs_parall_scatter_f(int root_rank, int n_elts, int n_g_elts, const float g_array[], float array[])
Distribute a global array from a given root rank over all ranks. Each rank receive the part related t...
Definition: cs_parall.cpp:777
static void cs_parall_thread_range(cs_lnum_t n, size_t type_size, cs_lnum_t *s_id, cs_lnum_t *e_id)
Compute array index bounds for a local thread. When called inside an OpenMP parallel section,...
Definition: cs_parall.h:587
void cs_parall_min_loc_vals(int n, cs_real_t *min, cs_real_t min_loc_vals[])
Minimum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:317
static void cs_parall_sum_scalars(T &first, Vals &... values)
Sum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1369
static void cs_parall_min(int n, cs_datatype_t datatype, void *val)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:207
void cs_parall_gather_f(int root_rank, int n_elts, int n_g_elts, const float array[], float g_array[])
Build a global array on the given root rank from all local arrays. Function dealing with single-preci...
Definition: cs_parall.cpp:712
void cs_parall_max_loc_vals(int n, cs_real_t *max, cs_real_t max_loc_vals[])
Maximum value of a real and the value of related array on all default communicator processes.
Definition: cs_parall.cpp:279
size_t cs_parall_get_min_coll_buf_size(void)
Return minimum recommended scatter or gather buffer size.
Definition: cs_parall.cpp:832
static size_t cs_parall_block_count(size_t n, size_t block_size)
Compute number of blocks needed for a given array and block sizes.
Definition: cs_parall.h:675
static void cs_parall_min_scalars(T &first, Vals &... values)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1800
static void cs_parall_max_strided(T first[], Vals &&... values)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1686
cs_e2n_sum_t
Definition: cs_parall.h:52
@ CS_E2N_SUM_SCATTER_ATOMIC
Definition: cs_parall.h:57
@ CS_E2N_SUM_SCATTER
Definition: cs_parall.h:54
@ CS_E2N_SUM_GATHER
Definition: cs_parall.h:59
cs_e2n_sum_t cs_glob_e2n_sum_type
static void sum(const cs_execution_context *ec, T &first, Vals &... values)
Sum values of a given datatype over a given communicator.
Definition: cs_parall.h:881
static void max(const cs_execution_context *ec, T &first, Vals &... values)
Maximum values of a given datatype on a given communicator processes.
Definition: cs_parall.h:1044
static void max(T first[], Vals &&... values)
Maximum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1178
static void min(const cs_execution_context *ec, T &first, Vals &... values)
Minimum values of a given datatype on a given communicator processes.
Definition: cs_parall.h:1203
static void min(T first[], Vals &&... values)
Minimum values of a given datatype on all default communicator processes.
Definition: cs_parall.h:1342
Definition: cs_array.h:1098
Definition: parall.f90:26