From 83a16997d9efd6375f230a90a8eda6e526a0876e Mon Sep 17 00:00:00 2001 From: JuliaRS Date: Tue, 13 Jan 2026 10:02:41 -0600 Subject: [PATCH 1/3] src_c/IMB_ones_accu.c: Fix RMA synchronization and buffer initialization - Added proper MPI_Win_fence calls to ensure all MPI_Accumulate operations occur within valid RMA epochs (per-iteration for NON-AGGREGATE mode, single epoch for AGGREGATE mode). - Initialized target buffer before the first RMA operation to prevent validation errors on sample #0 when CHECK is enabled. - Added MPI_Barrier for synchronization after initialization. - Updated comments to English for clarity. This resolves issues with "Got invalid buffer" during validation and "Wrong synchronization of RMA calls" runtime errors. --- src_c/IMB_ones_accu.c | 78 ++++++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 15 deletions(-) diff --git a/src_c/IMB_ones_accu.c b/src_c/IMB_ones_accu.c index a811ac8a..d5c3b981 100644 --- a/src_c/IMB_ones_accu.c +++ b/src_c/IMB_ones_accu.c @@ -137,24 +137,53 @@ Output variables: *time = MPI_Wtime(); + for (i = 0; i < ITERATIONS->n_sample; i++) { - MPI_ERRHAND(MPI_Accumulate((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, - s_num, c_info->red_data_type, - 0, i % ITERATIONS->r_cache_iter * r_off, - r_num, c_info->red_data_type, c_info->op_type, - c_info->WIN)); +#ifdef CHECK + /* Initialize the target buffer BEFORE the first RMA operation for this sample */ + { + const int root = (c_info->rank == 0); + if (root) { + char* tgt = (char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs; + IMB_ass_buf(tgt, 0, 0, (size > 0) ? size - 1 : 0, 0); + } + /* Synchronize initialization across all ranks before starting the epoch */ + MPI_Barrier(c_info->communicator); + } +#endif + /* Start RMA epoch */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); + + MPI_ERRHAND(MPI_Accumulate( + (char*)c_info->s_buffer + + (MPI_Aint)(i % ITERATIONS->s_cache_iter) * ITERATIONS->s_offs, + s_num, c_info->red_data_type, + /*target=*/0, + /*target_disp (in elements):*/ + (MPI_Aint)((i % ITERATIONS->r_cache_iter) * r_off), + r_num, c_info->red_data_type, c_info->op_type, + c_info->WIN)); + + /* End RMA epoch and ensure completion */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); - MPI_ERRHAND(MPI_Win_fence(0, c_info->WIN)); #ifdef CHECK - if (root) { - CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, - 0, size, size, asize, - put, 0, ITERATIONS->n_sample, i, - -1, &defect); - IMB_ass_buf((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, - (size > 0) ? size - 1 : 0, 0); + { + const int root = (c_info->rank == 0); + if (root) { + CHK_DIFF("Accumulate", c_info, + (char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs, + 0, size, size, asize, + put, 0, ITERATIONS->n_sample, i, + -1, &defect); + IMB_ass_buf((char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs, + 0, 0, (size > 0) ? size - 1 : 0, 0); + } + MPI_Barrier(c_info->communicator); } - MPI_Barrier(c_info->communicator); #endif } @@ -168,12 +197,30 @@ Output variables: *time = MPI_Wtime(); + /* Start one large RMA epoch for all Accumulate operations */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); + +#ifdef CHECK + /* Initialize ALL target slots before starting the epoch */ + { + const int root = (c_info->rank == 0); + if (root) { + for (int k = 0; k < ITERATIONS->r_cache_iter; k++) { + char* tgt = (char*)c_info->r_buffer + (MPI_Aint)k * ITERATIONS->r_offs; + IMB_ass_buf(tgt, 0, 0, (size > 0) ? size - 1 : 0, 0); + } + } + MPI_Barrier(c_info->communicator); + } +#endif + #ifdef CHECK for (i = 0; i < ITERATIONS->r_cache_iter; i++) #else for (i = 0; i < ITERATIONS->n_sample; i++) #endif { + MPI_ERRHAND(MPI_Accumulate((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num, c_info->red_data_type, 0, i%ITERATIONS->r_cache_iter*r_off, @@ -181,7 +228,8 @@ Output variables: c_info->WIN)); } - MPI_ERRHAND(MPI_Win_fence(0, c_info->WIN)); + /* End the epoch and ensure all updates are visible */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample; From d59ef1ad8ad04fd762680473fc9f2118a9b50567 Mon Sep 17 00:00:00 2001 From: JuliaRS Date: Sun, 18 Jan 2026 08:43:25 -0600 Subject: [PATCH 2/3] fix bug for DCHECK mode --- src_c/IMB_ones_accu.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src_c/IMB_ones_accu.c b/src_c/IMB_ones_accu.c index d5c3b981..f9510525 100644 --- a/src_c/IMB_ones_accu.c +++ b/src_c/IMB_ones_accu.c @@ -231,7 +231,11 @@ Output variables: /* End the epoch and ensure all updates are visible */ MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); +#ifdef CHECK + *time = (MPI_Wtime() - *time) / ITERATIONS->r_cache_iter; +#else *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample; +#endif #ifdef CHECK if (root) { From ddd6f5ae460ffd33fe47e3c4d7f226d5e99102f2 Mon Sep 17 00:00:00 2001 From: JuliaRS Date: Sun, 18 Jan 2026 09:21:56 -0600 Subject: [PATCH 3/3] fix imb-ext: correct timing and validation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Measure only fence → Accumulate → fence (exclude init/validation/barriers). - Move target initialization and CHK_DIFF outside the timed region. - Normalize AGGREGATE time by r_cache_iter when CHECK is enabled. Improves accuracy of AGGREGATE and NON-AGGREGATE results with -DCHECK. --- src_c/IMB_ones_accu.c | 31 ++++++++++++++++++++----------- 1 file changed, 20 insertions(+), 11 deletions(-) diff --git a/src_c/IMB_ones_accu.c b/src_c/IMB_ones_accu.c index f9510525..98c5c84b 100644 --- a/src_c/IMB_ones_accu.c +++ b/src_c/IMB_ones_accu.c @@ -115,8 +115,8 @@ Output variables: int i; #ifdef CHECK - int asize = (int) sizeof(assign_type); - int root = (c_info->rank == 0); + const int asize = (int) sizeof(assign_type); + const int root = (c_info->rank == 0); defect = 0; #endif @@ -134,6 +134,9 @@ Output variables: *time = 0.; else { if (!RUN_MODE->AGGREGATE) { + /* Measure only the RMA critical section: fence → Accumulate → fence. + * All target initialization and validation are performed outside timing. */ + double t_sum = 0.0; *time = MPI_Wtime(); @@ -142,7 +145,7 @@ Output variables: #ifdef CHECK /* Initialize the target buffer BEFORE the first RMA operation for this sample */ { - const int root = (c_info->rank == 0); + root = (c_info->rank == 0); if (root) { char* tgt = (char*)c_info->r_buffer + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs; @@ -152,6 +155,10 @@ Output variables: MPI_Barrier(c_info->communicator); } #endif + + /* Time only the RMA epoch and operation(s). */ + double t0 = MPI_Wtime(); + /* Start RMA epoch */ MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); @@ -168,9 +175,12 @@ Output variables: /* End RMA epoch and ensure completion */ MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); + double t1 = MPI_Wtime(); + t_sum += (t1 - t0); + #ifdef CHECK { - const int root = (c_info->rank == 0); + root = (c_info->rank == 0); if (root) { CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer @@ -187,7 +197,7 @@ Output variables: #endif } - *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample; + *time = t_sum / ITERATIONS->n_sample; } if (RUN_MODE->AGGREGATE) { @@ -195,15 +205,10 @@ Output variables: for (i = 0; i < N_BARR; i++) MPI_Barrier(c_info->communicator); - *time = MPI_Wtime(); - - /* Start one large RMA epoch for all Accumulate operations */ - MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); - #ifdef CHECK /* Initialize ALL target slots before starting the epoch */ { - const int root = (c_info->rank == 0); + root = (c_info->rank == 0); if (root) { for (int k = 0; k < ITERATIONS->r_cache_iter; k++) { char* tgt = (char*)c_info->r_buffer + (MPI_Aint)k * ITERATIONS->r_offs; @@ -213,6 +218,10 @@ Output variables: MPI_Barrier(c_info->communicator); } #endif + *time = MPI_Wtime(); + /* Start one large RMA epoch for all Accumulate operations */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); + #ifdef CHECK for (i = 0; i < ITERATIONS->r_cache_iter; i++)