NewMadeleine

Documentation

« back to PM2 home.
nm_coll_private.h
Go to the documentation of this file.
1/*
2 * NewMadeleine
3 * Copyright (C) 2014-2026 (see AUTHORS file)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15
16#ifndef NM_COLL_PRIVATE_H
17#define NM_COLL_PRIVATE_H
18
19#include <nm_public.h>
21#include <nm_private.h>
22#include <Padico/Puk.h>
23
24#include "nm_coll_interface.h"
25
26/* ********************************************************* */
27
29#define NM_COLL_TAG_MASK_P2P 0x7FFFFFFF
31#define NM_COLL_TAG_BASE 0xFF000000
33#define NM_COLL_TAG_COMM_CREATE_1 ( NM_COLL_TAG_BASE | 0x04 )
35#define NM_COLL_TAG_COMM_CREATE_2 ( NM_COLL_TAG_BASE | 0x05 )
36
37/* ********************************************************* */
38
56
58
59typedef void (*nm_coll_req_destructor_t)(struct nm_coll_req_s*);
60
62{
67 void*p_ref;
68#ifdef NMAD_PROFILE
69 puk_tick_t start_time;
70#endif /* NMAD_PROFILE */
72};
73
77
79{
80 return (void*)&p_coll_req->_payload;
81}
82
83static inline struct nm_coll_req_s*nm_coll_req_container(void*p_payload)
84{
86 return p_coll_req;
87}
88
91
92
93/* ********************************************************* */
94
96static inline void nm_coll_datav_descendants(const struct nm_coll_tree_info_s*p_tree,
97 struct nm_datav_s*p_datav, struct nm_data_s*p_data, nm_len_t data_size,
98 int*p_descendants, int child, int child_weight)
99{
100 assert(child_weight > 0);
101 if(p_descendants == NULL)
102 {
103 if(child + child_weight <= p_tree->n)
104 {
105 /* contig */
106 nm_datav_add_chunk_excerpt(p_datav, p_data, data_size * child, data_size * child_weight);
107 }
108 else
109 {
110 /* wrap-around */
111 const int w2 = child + child_weight - p_tree->n;
112 const int w1 = child_weight - w2;
113 nm_datav_add_chunk_excerpt(p_datav, p_data, data_size * child, data_size * w1);
114 nm_datav_add_chunk_excerpt(p_datav, p_data, 0, data_size * w2);
115 }
116 }
117 else
118 {
119 int start_index = p_descendants[0];
120 int span = 1;
121 int k;
122 for(k = 1; k < child_weight; k++)
123 {
124 const int index = p_descendants[k];
125 if(index == start_index + span)
126 {
127 span++;
128 }
129 else
130 {
131 nm_datav_add_chunk_excerpt(p_datav, p_data, data_size * start_index, data_size * span);
132 start_index = index;
133 span = 1;
134 }
135 }
136 nm_datav_add_chunk_excerpt(p_datav, p_data, data_size * start_index, data_size * span);
137 }
138}
139
140/* ** profiling ******************************************** */
141
144{
145 unsigned long long n;
146 unsigned long long total_bytes;
147 unsigned long long total_nodes;
149};
150
151#ifdef NMAD_PROFILE
152#define NM_COLL_PROFILE_ADD(PROF_LOCAL, PROF_GLOBAL, BYTES, NODES, USECS) \
153 do { \
154 if((PROF_LOCAL) != NULL) \
155 { \
156 nm_profile_inc((PROF_LOCAL)->n); \
157 nm_profile_add((PROF_LOCAL)->total_bytes, BYTES); \
158 nm_profile_add((PROF_LOCAL)->total_nodes, NODES); \
159 (PROF_LOCAL)->total_time_usecs += USECS; \
160 } \
161 if((PROF_GLOBAL) != NULL) \
162 { \
163 nm_profile_inc((PROF_GLOBAL)->n); \
164 nm_profile_add((PROF_GLOBAL)->total_bytes, BYTES); \
165 nm_profile_add((PROF_GLOBAL)->total_nodes, NODES); \
166 (PROF_GLOBAL)->total_time_usecs += USECS; \
167 } \
168 } while(0)
169
170static inline void nm_coll_profile_def(struct nm_coll_profile_s*p_profile, const char*name)
171{
172 puk_profile_var_defx(unsigned_long_long, counter, &p_profile->n, 0,
173 "total number of this coll op",
174 "nm_coll.%s.n", name);
175 puk_profile_var_defx(unsigned_long_long, aggregate, &p_profile->total_bytes, 0,
176 "total number of bytes sent through this coll op",
177 "nm_coll.%s.total_bytes", name);
178 puk_profile_var_defx(unsigned_long_long, aggregate, &p_profile->total_nodes, 0,
179 "total number of nodes involved in this coll op",
180 "nm_coll.%s.total_nodes", name);
181 puk_profile_var_defx(double, timer, &p_profile->total_time_usecs, 0.0,
182 "total time spent in this coll op",
183 "nm_coll.%s.total_time_usecs", name);
184}
185
186
187
188#else /* NMAD_PROFILE */
189#define NM_COLL_PROFILE_ADD(PROF_LOCAL, PROF_GLOBAL, BYTES, NODES)
190#endif /* NMAD_PROFILE */
191
192#endif /* NM_COLL_PRIVATE_H */
void(* nm_coll_req_notifier_t)(void *ref)
notification function for collective reqs
Definition nm_coll.h:131
static void nm_datav_add_chunk_excerpt(struct nm_datav_s *p_datav, struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len)
add an excerpt of data to datav; given p_data content is not copied.
Definition nm_data.h:857
nm_status_t nm_cond_status_t
status with synchronization (wait/signal)
static void nm_coll_datav_descendants(const struct nm_coll_tree_info_s *p_tree, struct nm_datav_s *p_datav, struct nm_data_s *p_data, nm_len_t data_size, int *p_descendants, int child, int child_weight)
add chunks of data described by descendants to datav, with aggregation of adjacent chunks
static void * nm_coll_req_payload(struct nm_coll_req_s *p_coll_req)
static struct nm_coll_req_s * nm_coll_req_container(void *p_payload)
enum nm_coll_req_kind_e nm_coll_req_kind_t
struct nm_coll_req_s * nm_coll_req_alloc(nm_len_t payload_size, nm_coll_req_kind_t kind, nm_coll_req_destructor_t p_destructor, nm_coll_req_notifier_t p_notify, void *p_ref)
nm_coll_req_kind_e
@ NM_COLL_REQ_SCATTER
@ NM_COLL_REQ_NONE
@ NM_COLL_REQ_ALLGATHER_BUTTERFLY
@ NM_COLL_REQ_ALLGATHER_TREE
@ NM_COLL_REQ_BCAST_2TREES
@ NM_COLL_REQ_GATHER
@ NM_COLL_REQ_ALLGATHER_BRUCK
@ _NM_COLL_REQ_KIND_MAX
@ NM_COLL_REQ_BCAST_TREE
@ NM_COLL_REQ_DEFAULT
@ NM_COLL_REQ_BCAST_PIPELINE
@ NM_COLL_REQ_BARRIER
@ NM_COLL_REQ_ALLGATHER_RBRUCK
@ NM_COLL_REQ_OTHER
external use of nm_coll_req_e
void(* nm_coll_req_destructor_t)(struct nm_coll_req_s *)
void nm_coll_req_signal(struct nm_coll_req_s *p_coll_req)
used internally to signal the completion of a coll req
#define nm_container_of(ptr, type, member)
assert(p_data->ops.p_traversal !=NULL)
nm_data_propertie_gpu_preinit & p_data
Definition nm_data.h:538
nm_len_t payload_size
Definition nm_headers.h:6
struct nm_coll_req_s * p_coll_req
non-blocking nmad native collective
main header for private nmad definitions.
This is the common public header for NewMad.
uint64_t nm_len_t
data length used by nmad
Definition nm_types.h:68
profiling block for collective ops
unsigned long long n
number of times the operation was called
unsigned long long total_nodes
total number of nodes involved for all ocurrences of this op
unsigned long long total_bytes
total number of bytes sent through this op
int _payload
dummy placeholder; payload is actually allocated after this struct
nm_coll_req_notifier_t p_notify
notification function for op termination
nm_cond_status_t status
status for explicit wait- not signaled if p_notify is non-NULL
nm_coll_req_destructor_t p_destructor
nm_coll_req_kind_t kind
void * p_ref
reference given to above p_notify
description of an instanciated tree
int n
number of nodes involved in collective
a data descriptor, used to pack/unpack data from app layout to/from contiguous buffers
Definition nm_data.h:199
encapsulate a dynamic vector of nm_data
Definition nm_data.h:254