diff --git a/src_c/IMB_ones_accu.c b/src_c/IMB_ones_accu.c index a811ac8..98c5c84 100644 --- a/src_c/IMB_ones_accu.c +++ b/src_c/IMB_ones_accu.c @@ -115,8 +115,8 @@ Output variables: int i; #ifdef CHECK - int asize = (int) sizeof(assign_type); - int root = (c_info->rank == 0); + const int asize = (int) sizeof(assign_type); + const int root = (c_info->rank == 0); defect = 0; #endif @@ -134,31 +134,70 @@ Output variables: *time = 0.; else { if (!RUN_MODE->AGGREGATE) { + /* Measure only the RMA critical section: fence → Accumulate → fence. + * All target initialization and validation are performed outside timing. */ + double t_sum = 0.0; *time = MPI_Wtime(); + for (i = 0; i < ITERATIONS->n_sample; i++) { - MPI_ERRHAND(MPI_Accumulate((char*)c_info->s_buffer + i % ITERATIONS->s_cache_iter * ITERATIONS->s_offs, - s_num, c_info->red_data_type, - 0, i % ITERATIONS->r_cache_iter * r_off, - r_num, c_info->red_data_type, c_info->op_type, - c_info->WIN)); +#ifdef CHECK + /* Initialize the target buffer BEFORE the first RMA operation for this sample */ + { + root = (c_info->rank == 0); + if (root) { + char* tgt = (char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs; + IMB_ass_buf(tgt, 0, 0, (size > 0) ? size - 1 : 0, 0); + } + /* Synchronize initialization across all ranks before starting the epoch */ + MPI_Barrier(c_info->communicator); + } +#endif + + /* Time only the RMA epoch and operation(s). */ + double t0 = MPI_Wtime(); + + /* Start RMA epoch */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); + + MPI_ERRHAND(MPI_Accumulate( + (char*)c_info->s_buffer + + (MPI_Aint)(i % ITERATIONS->s_cache_iter) * ITERATIONS->s_offs, + s_num, c_info->red_data_type, + /*target=*/0, + /*target_disp (in elements):*/ + (MPI_Aint)((i % ITERATIONS->r_cache_iter) * r_off), + r_num, c_info->red_data_type, c_info->op_type, + c_info->WIN)); + + /* End RMA epoch and ensure completion */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); + + double t1 = MPI_Wtime(); + t_sum += (t1 - t0); - MPI_ERRHAND(MPI_Win_fence(0, c_info->WIN)); #ifdef CHECK - if (root) { - CHK_DIFF("Accumulate", c_info, (char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, - 0, size, size, asize, - put, 0, ITERATIONS->n_sample, i, - -1, &defect); - IMB_ass_buf((char*)c_info->r_buffer + i%ITERATIONS->r_cache_iter*ITERATIONS->r_offs, 0, 0, - (size > 0) ? size - 1 : 0, 0); + { + root = (c_info->rank == 0); + if (root) { + CHK_DIFF("Accumulate", c_info, + (char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs, + 0, size, size, asize, + put, 0, ITERATIONS->n_sample, i, + -1, &defect); + IMB_ass_buf((char*)c_info->r_buffer + + (MPI_Aint)(i % ITERATIONS->r_cache_iter) * ITERATIONS->r_offs, + 0, 0, (size > 0) ? size - 1 : 0, 0); + } + MPI_Barrier(c_info->communicator); } - MPI_Barrier(c_info->communicator); #endif } - *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample; + *time = t_sum / ITERATIONS->n_sample; } if (RUN_MODE->AGGREGATE) { @@ -166,7 +205,23 @@ Output variables: for (i = 0; i < N_BARR; i++) MPI_Barrier(c_info->communicator); +#ifdef CHECK + /* Initialize ALL target slots before starting the epoch */ + { + root = (c_info->rank == 0); + if (root) { + for (int k = 0; k < ITERATIONS->r_cache_iter; k++) { + char* tgt = (char*)c_info->r_buffer + (MPI_Aint)k * ITERATIONS->r_offs; + IMB_ass_buf(tgt, 0, 0, (size > 0) ? size - 1 : 0, 0); + } + } + MPI_Barrier(c_info->communicator); + } +#endif *time = MPI_Wtime(); + /* Start one large RMA epoch for all Accumulate operations */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOPRECEDE, c_info->WIN)); + #ifdef CHECK for (i = 0; i < ITERATIONS->r_cache_iter; i++) @@ -174,6 +229,7 @@ Output variables: for (i = 0; i < ITERATIONS->n_sample; i++) #endif { + MPI_ERRHAND(MPI_Accumulate((char*)c_info->s_buffer + i%ITERATIONS->s_cache_iter*ITERATIONS->s_offs, s_num, c_info->red_data_type, 0, i%ITERATIONS->r_cache_iter*r_off, @@ -181,9 +237,14 @@ Output variables: c_info->WIN)); } - MPI_ERRHAND(MPI_Win_fence(0, c_info->WIN)); + /* End the epoch and ensure all updates are visible */ + MPI_ERRHAND(MPI_Win_fence(MPI_MODE_NOSUCCEED, c_info->WIN)); +#ifdef CHECK + *time = (MPI_Wtime() - *time) / ITERATIONS->r_cache_iter; +#else *time = (MPI_Wtime() - *time) / ITERATIONS->n_sample; +#endif #ifdef CHECK if (root) {