NewMadeleine

Documentation

« back to PM2 home.
nm_data.h
Go to the documentation of this file.
1/*
2 * NewMadeleine
3 * Copyright (C) 2015-2026 (see AUTHORS file)
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at
8 * your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULA R PURPOSE. See the GNU
13 * General Public License for more details.
14 */
15
21#ifndef NM_DATA_H
22#define NM_DATA_H
23
24#include <assert.h>
25#include <string.h>
26
27#include <Padico/Puk.h>
28
29#include <nm_config.h>
30
31#ifdef NMAD_CUDA
32#include <cuda.h>
33#include <cuda_runtime.h>
34#include <cuda_runtime_api.h>
35#endif /* NMAD_CUDA */
36
37#ifdef NMAD_HIP
38#include <hip/hip_runtime.h>
39#include <hip/hip_runtime_api.h>
40#endif /* NMAD_HIP */
41
78/* ** Data descriptor ************************************** */
79
81#define _NM_DATA_CONTENT_SIZE 64
82
83/* forward declaration so that operators declared inside struct nm_data_s may
84 * take struct nm_data_s* as parameters
85 */
86struct nm_data_s;
87
89typedef void (*nm_data_apply_t)(void*ptr, nm_len_t len, void*_context);
90
93{
97#ifdef NMAD_CUDA
98 int is_cuda;
99#endif /* NMAD_CUDA */
100#ifdef NMAD_HIP
101 int is_hip;
102#endif /* NMAD_HIP */
103};
104
117
119{
120 /* empty */
121};
122
124{
125 void*__restrict__ p_dest_ptr;
126 const struct nm_data_s*p_data;
127};
128
130{
131 const void*__restrict__ p_src_ptr;
132 const struct nm_data_s*p_data;
133};
134
140
146
154
159
174
180typedef void (*nm_data_traversal_t)(const void*_data_content, struct nm_data_op_s*p_op);
181
184
192
196
204
206static inline void nm_data_propertie_gpu_preinit(struct nm_data_properties_s*p_props);
207
210static inline void nm_data_propertie_gpu_postinit(const struct nm_data_properties_s*p_props);
211
213void nm_data_properties_gpu_fill(struct nm_data_properties_s*p_props, const void*p_ptr);
214
222#define NM_DATA_TYPE(ENAME, CONTENT_TYPE, OPS) \
223 __attribute__((unused)) \
224 static inline void nm_data_##ENAME##_set(struct nm_data_s*p_data, CONTENT_TYPE value) \
225 { \
226 p_data->ops = *(OPS); \
227 assert(p_data->ops.p_traversal != NULL); \
228 if(p_data->ops.p_properties_compute == NULL) \
229 { \
230 p_data->ops.p_properties_compute = nm_data_default_properties_compute; \
231 } \
232 assert(sizeof(CONTENT_TYPE) <= _NM_DATA_CONTENT_SIZE); \
233 CONTENT_TYPE*p_content = (CONTENT_TYPE*)&p_data->_content[0]; \
234 *p_content = value; \
235 p_data->props.blocks = -1; \
236 nm_data_propertie_gpu_preinit(&p_data->props); \
237 (*p_data->ops.p_properties_compute)(p_data); \
238 nm_data_propertie_gpu_postinit(&p_data->props); \
239 } \
240 __attribute__((unused)) \
241 static inline CONTENT_TYPE*nm_data_##ENAME##_content(const struct nm_data_s*p_data) \
242 { \
243 return (CONTENT_TYPE*)p_data->_content; \
244 }
245
246/* ** datav data (dynamic vector of nm_data)
247 */
248
250#define NM_DATAV_INIT_SIZE 4
251
261
263static inline void nm_datav_init(struct nm_datav_s*p_datav);
265static inline void nm_datav_destroy(struct nm_datav_s*p_datav);
267static inline void nm_datav_add_chunk_data(struct nm_datav_s*p_datav, const struct nm_data_s*p_data);
269static inline void nm_datav_add_chunk(struct nm_datav_s*p_datav, const void*ptr, nm_len_t len);
271static inline nm_len_t nm_datav_size(struct nm_datav_s*p_datav);
274static inline void nm_datav_uncommit(struct nm_datav_s*p_datav);
275
278/* ** Built-in data ops ************************************ */
279
280static inline void nm_data_op_apply(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_s*__restrict__ p_op);
281static inline void nm_data_op_apply_nop(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_nop_s*__restrict__ p_nop);
282static inline void nm_data_op_apply_copy_from(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_from_s*__restrict__ p_copy_from);
283static inline void nm_data_op_apply_copy_to(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_to_s*__restrict__ p_copy_to);
284static inline void nm_data_op_apply_dynamic(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_dynamic_s*__restrict__ p_dynamic);
285static inline void nm_data_op_apply_getprops(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_getprops_s*__restrict__ p_getprops);
286static inline void nm_data_op_apply_chunk(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_chunk_s*__restrict__ p_chunk);
287static inline void nm_data_op_apply_slicer_coroutine(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_slicer_coroutine_s*__restrict__ p_slicer_coroutine);
288
296#define NM_DATA_OP_APPLY_LOOP_REF(I, COUNT, PTR, LEN, P_OP) \
297 { \
298 nm_len_t I; \
299 for(I = 0; I < (COUNT); I++) \
300 { \
301 nm_data_op_apply((PTR), (LEN), (P_OP)); \
302 } \
303 }
304
308#define NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, LEN, P_OP) \
309 { \
310 nm_len_t I; \
311 for(I = START; I < (START) + (COUNT); I++) \
312 { \
313 nm_data_op_apply_##OP_KIND((PTR), (LEN), &(P_OP)->context.OP_KIND); \
314 } \
315 }
316
321#define NM_DATA_OP_APPLY_KIND_VECT(I, OP_KIND, START, COUNT, PTR, LEN, P_OP) \
322 { \
323 if((LEN) == 1) \
324 { \
325 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, 1, P_OP); \
326 } \
327 else if((LEN) == 2) \
328 { \
329 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, 2, P_OP); \
330 } \
331 else if((LEN) == 4) \
332 { \
333 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, 4, P_OP); \
334 } \
335 else if((LEN) == 8) \
336 { \
337 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, 8, P_OP); \
338 } \
339 else if((LEN) == 16) \
340 { \
341 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, 16, P_OP); \
342 } \
343 else \
344 { \
345 NM_DATA_OP_APPLY_KIND_LEN(I, OP_KIND, START, COUNT, PTR, (LEN), P_OP); \
346 } \
347 }
348
352#define NM_DATA_OP_APPLY_LOOP_OPT(I, COUNT, PTR, LEN, P_OP) \
353 { \
354 switch((P_OP)->kind) \
355 { \
356 case NM_DATA_OP_NOP: \
357 NM_DATA_OP_APPLY_KIND_LEN(I, nop, 0, (COUNT), (PTR), (LEN), (P_OP)); \
358 break; \
359 case NM_DATA_OP_COPY_FROM: \
360 NM_DATA_OP_APPLY_KIND_VECT(I, copy_from, 0, (COUNT), (PTR), (LEN), (P_OP)); \
361 break; \
362 case NM_DATA_OP_COPY_TO: \
363 NM_DATA_OP_APPLY_KIND_VECT(I, copy_to, 0, (COUNT), (PTR), (LEN), (P_OP)); \
364 break; \
365 case NM_DATA_OP_GETPROPS: \
366 NM_DATA_OP_APPLY_KIND_LEN(I, getprops, 0, (COUNT), (PTR), (LEN), (P_OP)); \
367 break; \
368 case NM_DATA_OP_DYNAMIC: \
369 NM_DATA_OP_APPLY_KIND_LEN(I, dynamic, 0, (COUNT), (PTR), (LEN), (P_OP)); \
370 break; \
371 case NM_DATA_OP_CHUNK: \
372 { \
373 struct nm_data_op_context_chunk_s*__restrict__ p_chunk = &(P_OP)->context.chunk; \
374 nm_len_t blocks_done = 0; \
375 if(p_chunk->done < p_chunk->chunk_offset) \
376 { \
377 nm_len_t skip_blocks = (p_chunk->chunk_offset - p_chunk->done) / (LEN); \
378 if(skip_blocks > (COUNT)) skip_blocks = (COUNT); \
379 p_chunk->done += skip_blocks * (LEN); \
380 blocks_done += skip_blocks; \
381 if( (p_chunk->done < p_chunk->chunk_offset) && \
382 (blocks_done < (COUNT)) ) \
383 { \
384 assert(p_chunk->chunk_offset - p_chunk->done < (LEN)); \
385 const nm_len_t I = blocks_done; \
386 nm_data_op_apply_chunk((PTR), (LEN), p_chunk); \
387 blocks_done++; \
388 p_chunk->done += (LEN); \
389 } \
390 } \
391 const nm_len_t chunk_end = p_chunk->chunk_offset + p_chunk->chunk_len; \
392 if( (p_chunk->done < chunk_end) && \
393 (blocks_done < (COUNT)) ) \
394 { \
395 nm_len_t op_blocks = (chunk_end - p_chunk->done) / (LEN); \
396 if(blocks_done + op_blocks > (COUNT)) \
397 op_blocks = (COUNT) - blocks_done; \
398 switch(p_chunk->p_op->kind) \
399 { \
400 case NM_DATA_OP_NOP: \
401 break; \
402 case NM_DATA_OP_COPY_FROM: \
403 NM_DATA_OP_APPLY_KIND_VECT(I, copy_from, blocks_done, op_blocks, (PTR), (LEN), p_chunk->p_op); \
404 break; \
405 case NM_DATA_OP_COPY_TO: \
406 NM_DATA_OP_APPLY_KIND_VECT(I, copy_to, blocks_done, op_blocks, (PTR), (LEN), p_chunk->p_op); \
407 break; \
408 case NM_DATA_OP_GETPROPS: \
409 NM_DATA_OP_APPLY_KIND_LEN(I, getprops, blocks_done, op_blocks, (PTR), (LEN), p_chunk->p_op); \
410 break; \
411 default: \
412 { \
413 nm_len_t I; \
414 for(I = blocks_done; I < blocks_done + op_blocks; I++) \
415 { \
416 nm_data_op_apply((PTR), (LEN), p_chunk->p_op); \
417 } \
418 } \
419 break; \
420 } \
421 blocks_done += op_blocks; \
422 p_chunk->done += op_blocks * (LEN); \
423 if((p_chunk->done < chunk_end) && (blocks_done < (COUNT))) \
424 { \
425 assert(chunk_end - p_chunk->done < (LEN)); \
426 const nm_len_t I = blocks_done; \
427 nm_data_op_apply_chunk((PTR), (LEN), p_chunk); \
428 blocks_done++; \
429 p_chunk->done += (LEN); \
430 } \
431 } \
432 assert(blocks_done <= (COUNT)); \
433 } \
434 break; \
435 case NM_DATA_OP_SLICER_COROUTINE: \
436 { \
437 struct nm_data_slicer_s*__restrict__ p_slicer = (P_OP)->context.slicer_coroutine.p_slicer; \
438 nm_len_t blocks_done = 0; \
439 while(blocks_done < (COUNT)) \
440 { \
441 const nm_len_t slice_len = p_slicer->coroutine.slice_len; \
442 nm_len_t slice_blocks = slice_len / (LEN); \
443 if(blocks_done + slice_blocks > (COUNT)) \
444 slice_blocks = (COUNT) - blocks_done; \
445 if(slice_blocks > 0) \
446 { \
447 switch(p_slicer->coroutine.op.kind) \
448 { \
449 case NM_DATA_OP_NOP: \
450 NM_DATA_OP_APPLY_KIND_LEN(I, nop, blocks_done, slice_blocks, (PTR), (LEN), &p_slicer->coroutine.op); \
451 break; \
452 case NM_DATA_OP_COPY_FROM: \
453 NM_DATA_OP_APPLY_KIND_VECT(I, copy_from, blocks_done, slice_blocks, (PTR), (LEN), &p_slicer->coroutine.op); \
454 break; \
455 case NM_DATA_OP_COPY_TO: \
456 NM_DATA_OP_APPLY_KIND_VECT(I, copy_to, blocks_done, slice_blocks, (PTR), (LEN), &p_slicer->coroutine.op); \
457 break; \
458 default: \
459 { \
460 nm_len_t I; \
461 for(I = blocks_done; I < blocks_done + slice_blocks; I++) \
462 { \
463 nm_data_op_apply((PTR), (LEN), &p_slicer->coroutine.op); \
464 } \
465 } \
466 break; \
467 } \
468 p_slicer->coroutine.slice_len = slice_len - slice_blocks * (LEN); \
469 blocks_done += slice_blocks; \
470 if(slice_len == slice_blocks * (LEN)) \
471 { \
472 nm_data_coroutine_yield_to_caller(&p_slicer->coroutine.coroutine); \
473 } \
474 else if(blocks_done < (COUNT)) \
475 { \
476 const nm_len_t I = blocks_done; \
477 nm_data_op_apply_slicer_coroutine((PTR), (LEN), &(P_OP)->context.slicer_coroutine); \
478 blocks_done++; \
479 } \
480 } \
481 else if(blocks_done < (COUNT)) \
482 { \
483 const nm_len_t I = blocks_done; \
484 nm_data_op_apply_slicer_coroutine((PTR), (LEN), &(P_OP)->context.slicer_coroutine); \
485 blocks_done++; \
486 } \
487 } \
488 assert(blocks_done <= (COUNT)); \
489 } \
490 break; \
491 default: \
492 { \
493 nm_len_t I; \
494 for(I = 0; I < (COUNT); I++) \
495 { \
496 nm_data_op_apply((PTR), (LEN), (P_OP)); \
497 } \
498 } \
499 break; \
500 } \
501 }
502
504#define NM_DATA_OP_APPLY_LOOP NM_DATA_OP_APPLY_LOOP_OPT
505
506
507/* ** Built-in data types ********************************** */
508
526{
527 int dummy;
528};
529extern const struct nm_data_ops_s nm_data_ops_null;
531static inline void nm_data_null_build(struct nm_data_s*p_data)
532{
533 struct nm_data_null_s n = { 0 };
534 nm_data_null_set(p_data, n);
535}
536static inline int nm_data_isnull(struct nm_data_s*p_data)
537{
538 return (p_data->ops.p_traversal == nm_data_ops_null.p_traversal);
539}
540
549extern const struct nm_data_ops_s nm_data_ops_contiguous;
551
552static inline void nm_data_contiguous_build(struct nm_data_s*p_data, void*ptr, nm_len_t len)
553{
554 struct nm_data_contiguous_s dc;
555 dc.ptr = ptr;
556 dc.len = len;
557 nm_data_contiguous_set(p_data, dc);
558}
559
564{
565 const struct iovec*v;
566 int n;
567};
568extern const struct nm_data_ops_s nm_data_ops_iov;
570
571static inline void nm_data_iov_build(struct nm_data_s*p_data, const struct iovec*v, int n)
572{
573 struct nm_data_iov_s di;
574 di.v = v;
575 di.n = n;
576 nm_data_iov_set(p_data, di);
577}
578
583{
585};
586extern const struct nm_data_ops_s nm_data_ops_datav;
588
590static inline void nm_data_datav_build(struct nm_data_s*p_datav_data, struct nm_datav_s*p_datav)
591{
592 p_datav->commited = 1;
593 struct nm_data_datav_s dv;
594 dv.p_datav = p_datav;
595 nm_data_datav_set(p_datav_data, dv);
596}
597
607extern const struct nm_data_ops_s nm_data_ops_excerpt;
609
612static inline void nm_data_excerpt_build(struct nm_data_s*p_data, struct nm_data_s*p_inner_data,
614{
615 struct nm_data_excerpt_s de;
617 de.chunk_len = chunk_len;
618 de.p_data = p_inner_data;
619 nm_data_excerpt_set(p_data, de);
620}
621
622
623/* ** Helper functions ************************************* */
624
635static inline void nm_data_traversal_op_apply(const struct nm_data_s*p_data, struct nm_data_op_s*p_op)
636{
637 assert(p_data->ops.p_traversal != NULL);
638 (*p_data->ops.p_traversal)((void*)p_data->_content, p_op);
639}
640
643static inline void nm_data_traversal_apply(const struct nm_data_s*p_data, nm_data_apply_t p_apply, void*_context)
644{
645 struct nm_data_op_s op;
647 op.context.dynamic.p_apply = p_apply;
648 op.context.dynamic.p_apply_context = _context;
650}
651
653 struct nm_data_op_s*p_op);
654
656 nm_data_apply_t p_apply, void*p_apply_context);
657
659static inline const struct nm_data_properties_s*nm_data_properties_get(const struct nm_data_s*p_data)
660{
661 return &p_data->props;
662}
663
665static inline nm_len_t nm_data_size(const struct nm_data_s*p_data)
666{
667 const struct nm_data_properties_s*p_props = nm_data_properties_get((struct nm_data_s*)p_data);
668 return p_props->size;
669}
670
673
676
679 struct nm_data_properties_s*p_props);
680
683
685uint32_t nm_data_checksum(const struct nm_data_s*p_data);
686
688void nm_data_copy_from(const struct nm_data_s*p_data, nm_len_t offset, nm_len_t len, void*destbuf);
689
691void nm_data_copy_to(const struct nm_data_s*p_data, nm_len_t offset, nm_len_t len, const void*srcbuf);
692
694void nm_data_copy(struct nm_data_s*p_dest, struct nm_data_s*p_from);
695
698/* ** Data slicer ****************************************** */
699
719/* ** coroutine */
720
722
723typedef void (*nm_data_coroutine_worker_t)(struct nm_data_coroutine_s*p_coroutine, void*_user_data);
724
732
734{
736 union
737 {
738 struct nm_data_coroutine_ucontext_s*p_ucontext;
739 struct nm_data_coroutine_longjmp_s*p_longjmp;
743};
744
747
748/* ** slicer */
749
757
759typedef struct nm_data_slicer_s
760{
762 const struct nm_data_s*p_data;
764 union
765 {
766 struct
767 {
772 struct
773 {
776 };
778
780#define NM_DATA_SLICER_NULL ((struct nm_data_slicer_s){ .kind = NM_DATA_SLICER_NONE })
781
783static inline int nm_data_slicer_isnull(const nm_data_slicer_t*p_slicer)
784{
785 return (p_slicer->kind == NM_DATA_SLICER_NONE);
786}
787
789void nm_data_slicer_copy_from(nm_data_slicer_t*p_slicer, void*dest_ptr, nm_len_t slice_len);
790void nm_data_slicer_copy_to(nm_data_slicer_t*p_slicer, const void*src_ptr, nm_len_t slice_len);
793
797/* ********************************************************* */
798/* ** inline functions */
799
800
801/* ** datav */
802
803static inline void nm_datav_init(struct nm_datav_s*p_datav)
804{
805 p_datav->p_data = &p_datav->data[0];
806 p_datav->n_data = 0;
807 p_datav->allocated = 0;
808 p_datav->commited = 0;
809}
810
811static inline void nm_datav_destroy(struct nm_datav_s*p_datav)
812{
813 assert(p_datav->p_data != NULL);
814 if(p_datav->p_data != &p_datav->data[0])
815 {
816 padico_free(p_datav->p_data);
817 p_datav->p_data = NULL;
818 }
819 }
820
821static inline void nm_datav_add_chunk_data(struct nm_datav_s*p_datav, const struct nm_data_s*p_data)
822{
823 assert(!p_datav->commited); /* cannot modify datav once it is used as a nm_data */
824 if(p_datav->n_data == NM_DATAV_INIT_SIZE)
825 {
826 assert(p_datav->p_data == &p_datav->data[0]);
827 p_datav->allocated = NM_DATAV_INIT_SIZE * 2;
828 p_datav->p_data = (struct nm_data_s*)padico_malloc(p_datav->allocated * sizeof(struct nm_data_s));
829 memcpy(p_datav->p_data, &p_datav->data[0], p_datav->n_data * sizeof(struct nm_data_s));
830 }
831 else if((p_datav->n_data > NM_DATAV_INIT_SIZE) &&
832 (p_datav->n_data > p_datav->allocated - 1))
833 {
834 assert(p_datav->p_data != &p_datav->data[0]);
835 p_datav->allocated *= 2;
836 p_datav->p_data = (struct nm_data_s*)padico_realloc(p_datav->p_data, p_datav->allocated * sizeof(struct nm_data_s));
837 }
838 p_datav->p_data[p_datav->n_data] = *p_data;
839 p_datav->n_data++;
840}
841
842static inline void nm_datav_add_chunk(struct nm_datav_s*p_datav, const void*ptr, nm_len_t len)
843{
844 struct nm_data_s data;
845 nm_data_contiguous_build(&data, (void*)ptr, len);
846 nm_datav_add_chunk_data(p_datav, &data);
847}
848
849static inline nm_len_t nm_datav_size(struct nm_datav_s*p_datav)
850{
851 nm_len_t size = 0;
852 int i;
853 for(i = 0; i < p_datav->n_data; i++)
854 {
855 size += nm_data_size(&p_datav->p_data[i]);
856 }
857 return size;
858}
859
860static inline void nm_datav_uncommit(struct nm_datav_s*p_datav)
861{
862 assert(p_datav->commited);
863 p_datav->commited = 0;
864}
865
866/* ** GPU */
867
868static inline void nm_data_propertie_gpu_preinit(struct nm_data_properties_s*p_props __attribute__((unused)))
869{
870#ifdef NMAD_CUDA
871 p_props->is_cuda = -1;
872#endif /* NMAD_CUDA */
873#ifdef NMAD_HIP
874 p_props->is_hip = -1;
875#endif /* NMAD_HIP */
876}
877
878static inline void nm_data_propertie_gpu_postinit(const struct nm_data_properties_s*p_props __attribute__((unused)))
879{
880#ifdef NMAD_CUDA
881 if(p_props->is_cuda == -1)
882 {
883 NM_FATAL("incorrect data properties: is_cuda = %d (should be either 0 or 1)\n", p_props->is_cuda);
884 }
885#endif /* NMAD_CUDA */
886#ifdef NMAD_HIP
887 if(p_props->is_hip == -1)
888 {
889 NM_FATAL("incorrect data properties: is_hip = %d (should be either 0 or 1)\n", p_props->is_hip);
890 }
891#endif /* NMAD_HIP */
892}
893
894
899static inline void nm_data_memcpy_from(void*p_dest, const void*p_src, nm_len_t len, const struct nm_data_properties_s*p_props)
900{
901#if defined(NMAD_CUDA)
902 if(p_props->is_cuda)
903 {
904 cudaError_t rc = cudaMemcpy(p_dest /* host */, p_src /* gpu */, len, cudaMemcpyDeviceToHost);
905 if(rc != cudaSuccess)
906 {
907 NM_FATAL("CUDA cannot copy %ld bytes from GPU; error %s\n", len, cudaGetErrorString(rc));
908 }
909 return;
910 }
911#endif /* NMAD_CUDA */
912
913#if defined(NMAD_HIP)
914 if(p_props->is_hip)
915 {
916 hipError_t rc = hipMemcpy(p_dest /* host */, p_src /* gpu */, len, hipMemcpyDeviceToHost);
917 if(rc != hipSuccess)
918 {
919 NM_FATAL("HIP cannot copy %ld bytes from GPU; error %s\n", len, hipGetErrorString(rc));
920 }
921 return;
922 }
923#endif /* NMAD_HIP */
924
925 /* default case: host memory */
926 memcpy(p_dest, p_src, len);
927}
928
933static inline void nm_data_memcpy_to(void*p_dest, const void*p_src, nm_len_t len, const struct nm_data_properties_s*p_props)
934{
935#if defined(NMAD_CUDA)
936 if(p_props->is_cuda)
937 {
938 cudaError_t rc = cudaMemcpy(p_dest /* gpu */, p_src /* src */, len, cudaMemcpyHostToDevice);
939 if(rc != cudaSuccess)
940 {
941 NM_FATAL("CUDA cannot copy %ld bytes from GPU; error %s\n", len, cudaGetErrorString(rc));
942 }
943 return;
944 }
945#endif /* NMAD_CUDA */
946
947#if defined(NMAD_HIP)
948 if(p_props->is_hip)
949 {
950 hipError_t rc = hipMemcpy(p_dest /* gpu */, p_src /* host */, len, hipMemcpyHostToDevice);
951 if(rc != hipSuccess)
952 {
953 NM_FATAL("HIP cannot copy %ld bytes from GPU; error %s\n", len, hipGetErrorString(rc));
954 }
955 return;
956 }
957#endif /* NMAD_HIP */
958
959 /* default case: host memory */
960 memcpy(p_dest, p_src, len);
961}
962
963
964/* ** data op inline */
965
966static inline void nm_data_op_apply_nop(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_nop_s*__restrict__ p_nop)
967{
968 /* empty */
969}
970
971static inline void nm_data_op_apply_copy_from(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_from_s*__restrict__ p_copy_from)
972{
973 nm_data_memcpy_from(p_copy_from->p_dest_ptr, p_ptr, len, &p_copy_from->p_data->props);
974 p_copy_from->p_dest_ptr = (char*)p_copy_from->p_dest_ptr + len;
975}
976
977static inline void nm_data_op_apply_copy_to(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_to_s*__restrict__ p_copy_to)
978{
979 nm_data_memcpy_to(p_ptr, p_copy_to->p_src_ptr, len, &p_copy_to->p_data->props);
980 p_copy_to->p_src_ptr = (char*)p_copy_to->p_src_ptr + len;
981}
982
983static inline void nm_data_op_apply_dynamic(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_dynamic_s*__restrict__ p_dynamic)
984{
985 (*p_dynamic->p_apply)(p_ptr, len, p_dynamic->p_apply_context);
986}
987
988static inline void nm_data_op_apply_getprops(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_getprops_s*__restrict__ p_getprops)
989{
990 p_getprops->props.size += len;
991 p_getprops->props.blocks += 1;
992 if(p_getprops->props.is_contig)
993 {
994 if((p_getprops->p_blockend != NULL) && (p_ptr != p_getprops->p_blockend))
995 p_getprops->props.is_contig = 0;
996 p_getprops->p_blockend = (char*)p_ptr + len;
997#if defined(NMAD_CUDA) || defined(NMAD_HIP)
998 if(p_getprops->p_blockend == NULL) /* test only first chunk */
999 {
1000 nm_data_properties_gpu_fill(&p_getprops->props, p_ptr);
1001 }
1002#endif /* CUDA || HIP */
1003 }
1004}
1005
1006static inline void nm_data_op_apply_chunk(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_chunk_s*__restrict__ p_chunk)
1007{
1008 const nm_len_t chunk_offset = p_chunk->chunk_offset;
1009 const nm_len_t chunk_len = p_chunk->chunk_len;
1010 if( (!(p_chunk->done + len <= p_chunk->chunk_offset)) /* data before chunk- do nothing */
1011 &&
1012 (!(p_chunk->done >= p_chunk->chunk_offset + p_chunk->chunk_len))) /* data after chunk- do nothing */
1013 {
1014 /* data in chunk */
1015 const nm_len_t block_offset = (p_chunk->done < chunk_offset) ? (chunk_offset - p_chunk->done) : 0;
1016 const nm_len_t block_len = (chunk_offset + chunk_len > p_chunk->done + len) ?
1017 (len - block_offset) : (chunk_offset + chunk_len - p_chunk->done - block_offset);
1018 nm_data_op_apply((char*)p_ptr + block_offset, block_len, p_chunk->p_op);
1019 }
1020 p_chunk->done += len;
1021}
1022
1023static inline void nm_data_op_apply_slicer_coroutine(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_slicer_coroutine_s*__restrict__ p_slicer_coroutine)
1024{
1025 struct nm_data_slicer_s*__restrict__ p_slicer = p_slicer_coroutine->p_slicer;
1026 restart:
1027 ;
1028 const nm_len_t slice_len = p_slicer->coroutine.slice_len; /* take a snapshot of volatile var */
1029 const nm_len_t chunk_len = (len > slice_len) ? slice_len : len;
1030 nm_data_op_apply(p_ptr, chunk_len, &p_slicer->coroutine.op);
1031 if(slice_len == chunk_len)
1032 {
1033 /* slice is done- give hand back to caller context */
1034 nm_data_coroutine_yield_to_caller(&p_slicer->coroutine.coroutine);
1035 /* back from caller context */
1036 if(len != chunk_len)
1037 {
1038 /* crop block and process remainder */
1039 p_ptr = (char*)p_ptr + chunk_len;
1040 len -= chunk_len;
1041 goto restart;
1042 }
1043 }
1044 else
1045 {
1046 p_slicer->coroutine.slice_len = slice_len - chunk_len;
1047 }
1048}
1049
1051static inline void nm_data_op_apply(void*__restrict__ p_ptr, nm_len_t len, struct nm_data_op_s*__restrict__ p_op)
1052{
1053 switch(p_op->kind)
1054 {
1055 case NM_DATA_OP_NOP:
1056 nm_data_op_apply_nop(p_ptr, len, &p_op->context.nop);
1057 break;
1059 nm_data_op_apply_copy_from(p_ptr, len, &p_op->context.copy_from);
1060 break;
1061 case NM_DATA_OP_COPY_TO:
1062 nm_data_op_apply_copy_to(p_ptr, len, &p_op->context.copy_to);
1063 break;
1064 case NM_DATA_OP_DYNAMIC:
1065 nm_data_op_apply_dynamic(p_ptr, len, &p_op->context.dynamic);
1066 break;
1068 nm_data_op_apply_getprops(p_ptr, len, &p_op->context.getprops);
1069 break;
1070 case NM_DATA_OP_CHUNK:
1071 nm_data_op_apply_chunk(p_ptr, len, &p_op->context.chunk);
1072 break;
1074 nm_data_op_apply_slicer_coroutine(p_ptr, len, &p_op->context.slicer_coroutine);
1075 break;
1076 case NM_DATA_OP_NONE:
1077 NM_FATAL("cannot apply NM_DATA_OP_NONE.\n");
1078 break;
1079 default:
1080 NM_FATAL("op = %d not managed in apply.\n", p_op->kind);
1081 break;
1082 }
1083}
1084
1085
1086#endif /* NM_DATA_H */
struct nm_data_contiguous_s __attribute__
#define _NM_DATA_CONTENT_SIZE
maximum size of content descriptor for nm_data
Definition nm_data.h:81
nm_data_op_kind_e
operation to apply to data in traversal & slicer
Definition nm_data.h:107
void(* nm_data_traversal_t)(const void *_data_content, struct nm_data_op_s *p_op)
funtion to traverse data with app layout, i.e.
Definition nm_data.h:180
static void nm_datav_uncommit(struct nm_datav_s *p_datav)
'uncommit' a datav: explicitely declare that nm_data pointing to this datav has been destroyed.
Definition nm_data.h:860
#define NM_DATAV_INIT_SIZE
initial size of an nm_datav
Definition nm_data.h:250
static void nm_data_propertie_gpu_preinit(struct nm_data_properties_s *p_props)
pre-init GPU part of data properties
static nm_len_t nm_datav_size(struct nm_datav_s *p_datav)
get the size (number of bytes) of data contained in the datav
Definition nm_data.h:849
void nm_data_properties_gpu_fill(struct nm_data_properties_s *p_props, const void *p_ptr)
fill in the GPU part of data properties, following pointer 'p_ptr'
static void nm_datav_add_chunk(struct nm_datav_s *p_datav, const void *ptr, nm_len_t len)
add a chunk of contiguous data to a datav
Definition nm_data.h:842
enum nm_data_op_kind_e nm_data_op_t
operation to apply to data in traversal & slicer
void(* nm_data_apply_t)(void *ptr, nm_len_t len, void *_context)
function to apply to each data chunk upon traversal
Definition nm_data.h:89
void(* nm_data_properties_compute_t)(struct nm_data_s *p_data)
function to compute data properties
Definition nm_data.h:183
#define NM_DATA_TYPE(ENAME, CONTENT_TYPE, OPS)
macro to generate typed functions to init/access data fields.
Definition nm_data.h:222
static void nm_datav_init(struct nm_datav_s *p_datav)
initialize a datav
Definition nm_data.h:803
void nm_data_default_properties_compute(struct nm_data_s *p_data)
static void nm_data_propertie_gpu_postinit(const struct nm_data_properties_s *p_props)
post-init GPU part of data properties: check that p_properties_compute function actually filled the G...
static void nm_datav_destroy(struct nm_datav_s *p_datav)
destroys a datav
Definition nm_data.h:811
static void nm_datav_add_chunk_data(struct nm_datav_s *p_datav, const struct nm_data_s *p_data)
add a chunk of data to datav; given p_data content is copied.
Definition nm_data.h:821
@ NM_DATA_OP_CHUNK
apply op on chunk
Definition nm_data.h:114
@ NM_DATA_OP_NONE
no operation selected
Definition nm_data.h:108
@ NM_DATA_OP_COPY_TO
copy to iterator, from user buffer
Definition nm_data.h:111
@ NM_DATA_OP_COPY_FROM
copy from iterator to user buffer
Definition nm_data.h:110
@ NM_DATA_OP_SLICER_COROUTINE
coroutine-based slicer
Definition nm_data.h:115
@ NM_DATA_OP_DYNAMIC
dynamic function call
Definition nm_data.h:112
@ NM_DATA_OP_GETPROPS
compute data properties
Definition nm_data.h:113
@ NM_DATA_OP_NOP
perform no operation on data (fast forward)
Definition nm_data.h:109
void nm_data_chunk_extractor_op_traversal(const struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len, struct nm_data_op_s *p_op)
static nm_len_t nm_data_size(const struct nm_data_s *p_data)
returns the amount of data contained in the descriptor
Definition nm_data.h:665
void nm_data_copy(struct nm_data_s *p_dest, struct nm_data_s *p_from)
copy from nm_data to another nm_data
void nm_data_chunk_properties_compute(const struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len, struct nm_data_properties_s *p_props)
compute properties of the given chunk inside the data
static void nm_data_traversal_op_apply(const struct nm_data_s *p_data, struct nm_data_op_s *p_op)
Definition nm_data.h:635
static void nm_data_traversal_apply(const struct nm_data_s *p_data, nm_data_apply_t p_apply, void *_context)
helper function to apply iterator to data
Definition nm_data.h:643
void nm_data_copy_to(const struct nm_data_s *p_data, nm_len_t offset, nm_len_t len, const void *srcbuf)
copy chunk of data from contiguous buffer to user layout
void * nm_data_baseptr_get(const struct nm_data_s *p_data)
find base pointer for a data known to be contiguous
void nm_data_copy_from(const struct nm_data_s *p_data, nm_len_t offset, nm_len_t len, void *destbuf)
copy chunk of data from user layout to contiguous buffer
void nm_data_chunk_extractor_traversal(const struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len, nm_data_apply_t p_apply, void *p_apply_context)
nm_len_t nm_data_chunk_first_get(const struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len, int n)
get length of first n blocks in given chunk
static const struct nm_data_properties_s * nm_data_properties_get(const struct nm_data_s *p_data)
returns the properties block for the data
Definition nm_data.h:659
uint32_t nm_data_checksum(const struct nm_data_s *p_data)
checksum data
void * nm_data_chunk_baseptr_get(const struct nm_data_s *p_data, nm_len_t chunk_offset, nm_len_t chunk_len)
find base pointer for a data chunk known to be contiguous
static int nm_data_slicer_isnull(const nm_data_slicer_t *p_slicer)
tests whether a slicer is null
Definition nm_data.h:783
struct nm_data_slicer_s nm_data_slicer_t
internal state of a data slicer.
void nm_data_slicer_forward(nm_data_slicer_t *p_slicer, nm_len_t offset)
nm_data_slicer_kind_t
various kinds of slicer implementations
Definition nm_data.h:752
nm_data_coroutine_kind_e
Definition nm_data.h:726
void nm_data_coroutine_yield_to_data(struct nm_data_coroutine_s *p_coroutine)
void nm_data_slicer_copy_to(nm_data_slicer_t *p_slicer, const void *src_ptr, nm_len_t slice_len)
void(* nm_data_coroutine_worker_t)(struct nm_data_coroutine_s *p_coroutine, void *_user_data)
Definition nm_data.h:723
void nm_data_slicer_destroy(nm_data_slicer_t *p_slicer)
void nm_data_coroutine_yield_to_caller(struct nm_data_coroutine_s *p_coroutine)
void nm_data_slicer_copy_from(nm_data_slicer_t *p_slicer, void *dest_ptr, nm_len_t slice_len)
enum nm_data_coroutine_kind_e nm_data_coroutine_kind_t
Definition nm_data.h:731
void nm_data_slicer_init(nm_data_slicer_t *p_slicer, const struct nm_data_s *p_data)
@ NM_DATA_SLICER_NONE
Definition nm_data.h:753
@ NM_DATA_SLICER_COROUTINE
coroutine-based slicer, using generic data traversal
Definition nm_data.h:754
@ NM_DATA_SLICER_CONTIG
slicer specialized for contig data
Definition nm_data.h:755
@ NM_DATA_COROUTINE_LONGJMP
longjmp-based coroutine, when stack jumping is possible with longjmp
Definition nm_data.h:729
@ NM_DATA_COROUTINE_UCONTEXT
ucontext-based coroutines, when stack jumping is not available through longjmp
Definition nm_data.h:728
@ NM_DATA_COROUTINE_NONE
Definition nm_data.h:727
assert(p_data->ops.p_traversal !=NULL)
static void nm_data_op_apply_copy_from(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_from_s *__restrict__ p_copy_from)
Definition nm_data.h:971
const struct nm_data_ops_s nm_data_ops_excerpt
static void nm_data_contiguous_build(struct nm_data_s *p_data, void *ptr, nm_len_t len)
Definition nm_data.h:552
static void nm_data_iov_build(struct nm_data_s *p_data, const struct iovec *v, int n)
Definition nm_data.h:571
static void nm_data_op_apply_copy_to(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_copy_to_s *__restrict__ p_copy_to)
Definition nm_data.h:977
static void nm_data_datav_build(struct nm_data_s *p_datav_data, struct nm_datav_s *p_datav)
frontend to build a nm_data from a datav
Definition nm_data.h:590
static void nm_data_op_apply_dynamic(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_dynamic_s *__restrict__ p_dynamic)
Definition nm_data.h:983
const struct nm_data_ops_s nm_data_ops_null
static void nm_data_op_apply_nop(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_nop_s *__restrict__ p_nop)
Definition nm_data.h:966
static void nm_data_op_apply_slicer_coroutine(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_slicer_coroutine_s *__restrict__ p_slicer_coroutine)
Definition nm_data.h:1023
const struct nm_data_ops_s nm_data_ops_iov
static void nm_data_memcpy_to(void *p_dest, const void *p_src, nm_len_t len, const struct nm_data_properties_s *p_props)
copy chunks of data.
Definition nm_data.h:933
static void nm_data_excerpt_build(struct nm_data_s *p_data, struct nm_data_s *p_inner_data, nm_len_t chunk_offset, nm_len_t chunk_len)
build a data descriptor as an excerpt of another data.
Definition nm_data.h:612
static void nm_data_memcpy_from(void *p_dest, const void *p_src, nm_len_t len, const struct nm_data_properties_s *p_props)
copy chunks of data.
Definition nm_data.h:899
static void nm_data_op_apply(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_s *__restrict__ p_op)
apply op on block (p_ptr, len)
Definition nm_data.h:1051
static int nm_data_isnull(struct nm_data_s *p_data)
Definition nm_data.h:536
static void nm_data_op_apply_chunk(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_chunk_s *__restrict__ p_chunk)
Definition nm_data.h:1006
static void nm_data_op_apply_getprops(void *__restrict__ p_ptr, nm_len_t len, struct nm_data_op_context_getprops_s *__restrict__ p_getprops)
Definition nm_data.h:988
static void nm_data_null_build(struct nm_data_s *p_data)
Definition nm_data.h:531
const struct nm_data_ops_s nm_data_ops_contiguous
nm_data_propertie_gpu_preinit & p_data
Definition nm_data.h:530
const struct nm_data_ops_s nm_data_ops_datav
uint16_t len
chunk len
Definition nm_headers.h:0
nm_len_t chunk_len
length of this chunk
Definition nm_headers.h:4
nm_len_t chunk_offset
offset of the enclosed chunk
Definition nm_headers.h:4
#define NM_FATAL(format,...)
Definition nm_log.h:36
struct nm_mpi_operator_s * p_op
nm_len_t size
size of the onsided data (not incuding target-side completion)
uint64_t nm_len_t
data length used by nmad
Definition nm_types.h:68
data descriptor for contiguous data
Definition nm_data.h:545
nm_len_t len
data length
Definition nm_data.h:547
void * ptr
base pointer for block
Definition nm_data.h:546
struct nm_data_coroutine_ucontext_s * p_ucontext
Definition nm_data.h:738
nm_data_coroutine_worker_t p_worker
Definition nm_data.h:741
nm_data_coroutine_kind_t kind
Definition nm_data.h:735
struct nm_data_coroutine_longjmp_s * p_longjmp
Definition nm_data.h:739
union nm_data_coroutine_s::@21 impl
data descriptor for datav in a nm_data (embedd a vector of nm_data in nm_data)
Definition nm_data.h:583
struct nm_datav_s * p_datav
Definition nm_data.h:584
data as an excerpt of another data.
Definition nm_data.h:602
nm_len_t chunk_offset
Definition nm_data.h:603
nm_len_t chunk_len
Definition nm_data.h:604
struct nm_data_s * p_data
Definition nm_data.h:605
data descriptor for iov data (embedd iovec in nm_data)
Definition nm_data.h:564
const struct iovec * v
Definition nm_data.h:565
data descriptor for 'null' data
Definition nm_data.h:526
int dummy
unused, to avoid non-portable empty structure
Definition nm_data.h:527
nm_len_t chunk_offset
offset for begin of copy at destination
Definition nm_data.h:149
nm_len_t done
offset done so far at destination
Definition nm_data.h:151
struct nm_data_op_s * p_op
operation to apply on chunk
Definition nm_data.h:152
nm_len_t chunk_len
length to copy
Definition nm_data.h:150
const struct nm_data_s * p_data
Definition nm_data.h:126
void *__restrict__ p_dest_ptr
Definition nm_data.h:125
const void *__restrict__ p_src_ptr
Definition nm_data.h:131
const struct nm_data_s * p_data
Definition nm_data.h:132
nm_data_apply_t p_apply
Definition nm_data.h:137
struct nm_data_properties_s props
Definition nm_data.h:144
void * p_blockend
end of previous block
Definition nm_data.h:143
struct nm_data_slicer_s * p_slicer
Definition nm_data.h:157
union nm_data_op_s::@20 context
struct nm_data_op_context_nop_s nop
Definition nm_data.h:165
struct nm_data_op_context_copy_to_s copy_to
Definition nm_data.h:167
struct nm_data_op_context_getprops_s getprops
Definition nm_data.h:169
struct nm_data_op_context_copy_from_s copy_from
Definition nm_data.h:166
struct nm_data_op_context_chunk_s chunk
Definition nm_data.h:170
enum nm_data_op_kind_e kind
Definition nm_data.h:162
struct nm_data_op_context_slicer_coroutine_s slicer_coroutine
Definition nm_data.h:171
struct nm_data_op_context_dynamic_s dynamic
Definition nm_data.h:168
set of operations available on data type.
Definition nm_data.h:188
nm_data_properties_compute_t p_properties_compute
optimized function to compute data properties (optionnal)
Definition nm_data.h:190
nm_data_traversal_t p_traversal
operation to apply a given function to all chunks of data (required)
Definition nm_data.h:189
block of static properties for a given data descriptor
Definition nm_data.h:93
nm_len_t size
total size in bytes (accumulator)
Definition nm_data.h:95
int is_contig
data is contiguous; data may be contiguous even with blocks > 1, if blocks are next to each other
Definition nm_data.h:96
nm_len_t blocks
number of blocks; -1 if properties are not initialized
Definition nm_data.h:94
a data descriptor, used to pack/unpack data from app layout to/from contiguous buffers
Definition nm_data.h:199
struct nm_data_ops_s ops
collection of iterators
Definition nm_data.h:200
struct nm_data_properties_s props
cache for properties
Definition nm_data.h:201
char _content[64]
placeholder for type-dependant content
Definition nm_data.h:202
internal state of a data slicer.
Definition nm_data.h:760
struct nm_data_op_s op
op to apply on the current slice
Definition nm_data.h:768
struct nm_data_slicer_s::@22::@25 contig
slicer for contiguous data; no context switch
void * p_baseptr
base pointer of the data
Definition nm_data.h:774
volatile nm_len_t slice_len
length of the current slice
Definition nm_data.h:769
struct nm_data_coroutine_s coroutine
Definition nm_data.h:770
nm_len_t done
length of data processed so far
Definition nm_data.h:763
nm_data_slicer_kind_t kind
Definition nm_data.h:761
const struct nm_data_s * p_data
Definition nm_data.h:762
encapsulate a dynamic vector of nm_data
Definition nm_data.h:254
int commited
Definition nm_data.h:259
struct nm_data_s data[4]
vector of data
Definition nm_data.h:256
int n_data
number of entries actually used in the above array
Definition nm_data.h:257
struct nm_data_s * p_data
vector of nm_data; either dynamically allocated, or points to data[0]
Definition nm_data.h:255
int allocated
allocated number of entries in p_data
Definition nm_data.h:258