NewMadeleine

Documentation

nm_coll_trees.h
Go to the documentation of this file.
1/*
2 * NewMadeleine
3 * Copyright (C) 2014-2022 (see AUTHORS file)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15
16/* **********************************************************/
17/* @file
18 * trees description for collective operations
19 */
20
21#include <nm_log.h>
22#include <nm_core_interface.h>
23
36 {
53 };
55
58{
61 int n;
62 int self;
63 int root;
64 int height;
65};
66
67
68/* ** generic trees **************************************** */
69
70void nm_coll_tree_init(struct nm_coll_tree_info_s*p_tree, nm_coll_tree_kind_t kind, int n, int self, int root);
71
76void nm_coll_tree_step(const struct nm_coll_tree_info_s*p_tree, int step, int*p_parent, int*p_children, int*n_children);
77
78
79int nm_coll_tree_weight(const struct nm_coll_tree_info_s*p_tree, int step, int self);
80
81static inline const char*nm_coll_tree_kind_name(nm_coll_tree_kind_t kind)
82{
83 static const char*const names[_NM_COLL_TREE_MAX] =
84 {
85 "none",
86 "flat",
87 "chain",
88 "chain-modified",
89 "2chains-modified",
90 "ladder",
91 "binomial",
92 "3nomial",
93 "4nomial",
94 "8nomial",
95 "binary",
96 "3ary",
97 "4ary",
98 "8ary",
99 "default"
100 };
101 assert(kind >= NM_COLL_TREE_NONE);
102 assert(kind < _NM_COLL_TREE_MAX);
103 return names[kind];
104}
105
107
110
111
113
116
117/* ** toolbox ********************************************** */
118
120static inline int nm_coll_log2_floor(int x)
121{
122 assert(x > 0);
123 return ((unsigned) (8*sizeof (unsigned long long) - __builtin_clzll((x)) - 1));
124}
125
127static inline int nm_coll_log2_ceil(int x)
128{
129 assert(x > 0);
130 return nm_coll_log2_floor(x) + (!!(x & (x - 1)));
131}
132
134static inline int nm_coll_log4_floor(int x)
135{
136 return nm_coll_log2_floor(x) / 2;
137}
138
140static inline int nm_coll_ipow(int base, int exp)
141{
142 assert(base >= 0);
143 assert(exp >= 0);
144 int result = 1;
145 for(;;)
146 {
147 if(exp & 1)
148 result *= base;
149 exp >>= 1;
150 if(!exp)
151 break;
152 base *= base;
153 }
154 return result;
155}
156
158static inline int nm_coll_log_n_ceil(int x, const int n)
159{
160 assert(x > 0);
161 assert(n >= 2);
162 int l = 0;
163 int exp = n - 1;
164 x--;
165 while(x > 0)
166 {
167 x -= exp;
168 exp *= n;
169 l++;
170 }
171 return l;
172}
173
175static inline int nm_coll_log_n_floor(int x, const int n)
176{
177 assert(x >= 1);
178 assert(n >= 2);
179 int l = 0;
180 while(x >= n)
181 {
182 l++;
183 x = x / n;
184 }
185 return l;
186}
187
189static inline int nm_coll_r2v(int i, const struct nm_coll_tree_info_s*p_tree)
190{
191 return ((p_tree->n + i - p_tree->root) % p_tree->n);
192}
193
195static inline int nm_coll_v2r(int i, const struct nm_coll_tree_info_s*p_tree)
196{
197 return ((p_tree->root + i) % p_tree->n);
198}
199
200/* ********************************************************* */
201
205{
215};
216
221{
222 p_status->p_session = p_session;
223 p_status->p_group = p_group;
224 p_status->tag = tag;
225 nm_coll_tree_init(&p_status->tree, kind, nm_group_size(p_group), self, root);
226 p_status->n_children = -1;
227 p_status->children_requests = padico_malloc((1 + p_status->tree.max_arity) * sizeof(nm_sr_request_t));
228 p_status->children = padico_malloc((1 + p_status->tree.max_arity) * sizeof(int));
229 p_status->pending_reqs = 0;
230}
231
232static inline void nm_coll_tree_status_destroy(struct nm_coll_tree_status_s*p_status)
233{
234 assert(p_status->p_session != NULL);
235 p_status->p_session = NULL;
236 assert(p_status->p_group != NULL);
237 p_status->p_group = NULL;
238 assert(p_status->pending_reqs == 0);
239 padico_free(p_status->children_requests);
240 p_status->children_requests = NULL;
241 padico_free(p_status->children);
242 p_status->children = NULL;
243}
244
245static inline void nm_coll_tree_send(struct nm_coll_tree_status_s*p_coll_tree, int dest,
246 struct nm_data_s*p_data, nm_sr_request_t*p_req)
247{
248 assert(dest != -1);
249 nm_gate_t p_gate = nm_group_get_gate(p_coll_tree->p_group, dest);
250 nm_atomic_inc(&p_coll_tree->pending_reqs);
251 nm_sr_isend_data(p_coll_tree->p_session, p_gate, p_coll_tree->tag, p_data, p_req);
252}
253
254static inline void nm_coll_tree_issend(struct nm_coll_tree_status_s*p_coll_tree, int dest,
255 struct nm_data_s*p_data, nm_sr_request_t*p_req)
256{
257 assert(dest != -1);
258 nm_gate_t p_gate = nm_group_get_gate(p_coll_tree->p_group, dest);
259 nm_atomic_inc(&p_coll_tree->pending_reqs);
260 nm_sr_send_init(p_coll_tree->p_session, p_req);
261 nm_sr_send_pack_data(p_coll_tree->p_session, p_req, p_data);
262 nm_sr_send_issend(p_coll_tree->p_session, p_req, p_gate, p_coll_tree->tag);
263}
264
265static inline void nm_coll_tree_recv(struct nm_coll_tree_status_s*p_coll_tree, int from,
266 struct nm_data_s*p_data, nm_sr_request_t*p_req)
267{
268 assert(from != -1);
269 nm_gate_t p_gate = nm_group_get_gate(p_coll_tree->p_group, from);
270 nm_atomic_inc(&p_coll_tree->pending_reqs);
271 nm_sr_irecv_data(p_coll_tree->p_session, p_gate, p_coll_tree->tag, p_data, p_req);
272}
273
274static inline void nm_coll_tree_set_notifier(struct nm_coll_tree_status_s*p_coll_tree, nm_sr_request_t*p_req,
275 void(*p_notifier)(nm_sr_event_t, const nm_sr_event_info_t*, void*))
276{
277 nm_sr_request_set_ref(p_req, p_coll_tree);
278 nm_sr_request_monitor(p_coll_tree->p_session, p_req, NM_SR_EVENT_FINALIZED, p_notifier);
279}
280
281static inline int nm_coll_tree_req_index(struct nm_coll_tree_status_s*p_coll_tree, nm_sr_request_t*p_request)
282{
283 if( (p_request < &p_coll_tree->children_requests[0]) ||
284 (p_request > &p_coll_tree->children_requests[p_coll_tree->tree.max_arity - 1]) )
285 {
286 NM_FATAL("given p_request = %p is not in coll_tree", p_request);
287 }
288 const int i = p_request - p_coll_tree->children_requests;
289 assert(i >= 0);
290 assert(i < p_coll_tree->tree.max_arity);
291 return i;
292}
293
294
nm_gate_vect_t nm_group_t
type for groups; in practice, a vector of gates
Definition: nm_group.h:39
nm_coll_tree_kind_t nm_coll_tree_kind_by_name(const char *s_kind)
enum nm_coll_tree_kind_e nm_coll_tree_kind_t
Definition: nm_coll_trees.h:54
static void nm_coll_tree_recv(struct nm_coll_tree_status_s *p_coll_tree, int from, struct nm_data_s *p_data, nm_sr_request_t *p_req)
static int nm_coll_log2_ceil(int x)
ceil(log2(x))
static void nm_coll_tree_issend(struct nm_coll_tree_status_s *p_coll_tree, int dest, struct nm_data_s *p_data, nm_sr_request_t *p_req)
static void nm_coll_tree_set_notifier(struct nm_coll_tree_status_s *p_coll_tree, nm_sr_request_t *p_req, void(*p_notifier)(nm_sr_event_t, const nm_sr_event_info_t *, void *))
static int nm_coll_log_n_floor(int x, const int n)
floor(log_n(x)
nm_coll_tree_kind_t nm_coll_tree_heuristic(int comm_size __attribute__((unused)), nm_len_t data_size)
void nm_coll_tree_init(struct nm_coll_tree_info_s *p_tree, nm_coll_tree_kind_t kind, int n, int self, int root)
static void nm_coll_tree_send(struct nm_coll_tree_status_s *p_coll_tree, int dest, struct nm_data_s *p_data, nm_sr_request_t *p_req)
static int nm_coll_r2v(int i, const struct nm_coll_tree_info_s *p_tree)
translate real ranks to virtual ranks (with root=0)
nm_coll_tree_kind_t nm_coll_tree_env(void)
get tree kind from environment
static const char * nm_coll_tree_kind_name(nm_coll_tree_kind_t kind)
Definition: nm_coll_trees.h:81
int nm_coll_binary_nb_nodes_in_left_subtree(const struct nm_coll_tree_info_s *const p_tree)
nm_coll_tree_kind_e
kind of tree
Definition: nm_coll_trees.h:36
static int nm_coll_log_n_ceil(int x, const int n)
ceil(log_n(x))
static int nm_coll_log2_floor(int x)
floor(log2(x))
static int nm_coll_v2r(int i, const struct nm_coll_tree_info_s *p_tree)
translate virtual ranks (with root=0) to real ranks
static void nm_coll_tree_status_init(struct nm_coll_tree_status_s *p_status, nm_session_t p_session, nm_coll_tree_kind_t kind, nm_group_t p_group, int root, int self, nm_tag_t tag)
initialize a common status for non-blocking collective
static int nm_coll_log4_floor(int x)
floor(log4(x))
void nm_coll_tree_step(const struct nm_coll_tree_info_s *p_tree, int step, int *p_parent, int *p_children, int *n_children)
get the parent & children for the local node at the given step.
static int nm_coll_ipow(int base, int exp)
fast integer power() = base ^ exp
int nm_coll_tree_weight(const struct nm_coll_tree_info_s *p_tree, int step, int self)
static int nm_coll_tree_req_index(struct nm_coll_tree_status_s *p_coll_tree, nm_sr_request_t *p_request)
static void nm_coll_tree_status_destroy(struct nm_coll_tree_status_s *p_status)
@ NM_COLL_TREE_FLAT
Definition: nm_coll_trees.h:38
@ NM_COLL_TREE_8ARY
Definition: nm_coll_trees.h:50
@ NM_COLL_TREE_3NOMIAL
Definition: nm_coll_trees.h:44
@ NM_COLL_TREE_4NOMIAL
Definition: nm_coll_trees.h:45
@ NM_COLL_TREE_2CHAINS_MODIFIED
HPL 2-rings modified.
Definition: nm_coll_trees.h:41
@ NM_COLL_TREE_CHAIN_MODIFIED
HPL ring-modified algorithm.
Definition: nm_coll_trees.h:40
@ NM_COLL_TREE_BINOMIAL
Definition: nm_coll_trees.h:43
@ NM_COLL_TREE_8NOMIAL
Definition: nm_coll_trees.h:46
@ NM_COLL_TREE_DEFAULT
for public interfaces, will fallback to one of the previous tree kinds
Definition: nm_coll_trees.h:51
@ NM_COLL_TREE_NONE
Definition: nm_coll_trees.h:37
@ NM_COLL_TREE_3ARY
Definition: nm_coll_trees.h:48
@ NM_COLL_TREE_4ARY
Definition: nm_coll_trees.h:49
@ NM_COLL_TREE_LADDER
Definition: nm_coll_trees.h:42
@ _NM_COLL_TREE_MAX
Definition: nm_coll_trees.h:52
@ NM_COLL_TREE_BINARY
Definition: nm_coll_trees.h:47
@ NM_COLL_TREE_CHAIN
Definition: nm_coll_trees.h:39
struct nm_core_event_s __attribute__
static int nm_atomic_inc(int *v)
increment int, atomic only when multithread
int nm_sr_request_monitor(nm_session_t p_session, nm_sr_request_t *p_request, nm_sr_event_t mask, nm_sr_event_notifier_t notifier)
Set a notification function called upon request completion.
static void nm_sr_send_init(nm_session_t p_session, nm_sr_request_t *p_request)
Init a send request.
static int nm_sr_send_issend(nm_session_t p_session, nm_sr_request_t *p_request, nm_gate_t p_gate, nm_tag_t tag)
send a built request in synchronous mode
nm_sr_event_t
events for nm_sr_monitor()
static void nm_sr_send_pack_data(nm_session_t p_session, nm_sr_request_t *p_request, const struct nm_data_s *p_data)
Pack data described through an iterator into the given request.
static int nm_sr_request_set_ref(nm_sr_request_t *p_request, void *ref)
Add a user reference to a request.
@ NM_SR_EVENT_FINALIZED
request finalized, may be freed by user
nm_tag_t tag
the user-supplied tag
static nm_session_t p_session
static nm_gate_t p_gate
int nm_group_size(nm_group_t group)
nm_gate_t nm_group_get_gate(nm_group_t p_group, int rank)
Basic primitives to display info & warnings.
#define NM_FATAL(format,...)
Definition: nm_log.h:36
static int nm_sr_isend_data(nm_session_t p_session, nm_gate_t p_gate, nm_tag_t tag, struct nm_data_s *p_data, nm_sr_request_t *p_request)
static int nm_sr_irecv_data(nm_session_t p_session, nm_gate_t p_gate, nm_tag_t tag, struct nm_data_s *p_data, nm_sr_request_t *p_request)
uint64_t nm_tag_t
user tags, 64 bits, contained in indirect hashtable
Definition: nm_types.h:58
uint64_t nm_len_t
data length used by nmad
Definition: nm_types.h:70
description of an instanciated tree
Definition: nm_coll_trees.h:58
int root
root node of the collective
Definition: nm_coll_trees.h:63
int height
tree height: number of communications steps in the tree (0 for a single node)
Definition: nm_coll_trees.h:64
int self
rank of local node
Definition: nm_coll_trees.h:62
int n
number of nodes involved in collective
Definition: nm_coll_trees.h:61
int max_arity
maximum number of children in tree
Definition: nm_coll_trees.h:60
nm_coll_tree_kind_t kind
Definition: nm_coll_trees.h:59
common status for a non-blocking operation that walks a tree
nm_sr_request_t parent_request
struct nm_coll_tree_info_s tree
nm_session_t p_session
int pending_reqs
number of pending send/recv operations
nm_sr_request_t * children_requests
a data descriptor, used to pack/unpack data from app layout to/from contiguous buffers
Definition: nm_data.h:189
Connection to another process.
Definition: nm_gate.h:100
internal defintion of the sendrecv request
information field for sendrecv events