me_world has been set to the rank of the task in
the MPI communicator MPI_comm_world. This is done inside the
subroutine make_mpi_world().
We instrumented three regions inside the code. The first region, with
the label ``kern'', measures the performance of the entire
kernel of the code. The second region, labeled
``communication'', measures the performance of the routine
``Halo_swap''. Finally the third region, labeled
``working'', measures the performance of the routine
``Update_image()''. The second and third region are placed
inside an iteration loop. For each iteration, their counters will be
increased appropriately once they are restarted. Please note that the
regions ``communication'' and ``working'' are nested
inside the region ``kern''.
program image_main
use image_param
use image_mpi_comm
use image_work
use image_time
use time_tool
implicit none
#include "f_hpm.h"
real(kind=8) :: a_time, b_time, c_time
real(kind=8) :: s_time
integer iter
Call make_mpi_world()
Call make_cart_world()
Call edge_init('r')
! make sure we start together
Call sync_this(cart_comm)
! set times to 0
call zero_timers()
call f_hpminit( me_world, "case_image_normal_nohott" )
call f_hpmstart(1, "kern" )
s_time = now_time()
iterloop: do iter = 1, Nb_iter
call f_hpmstart( 2, "communication" )
a_time = now_time()
Call Halo_swap(image)
call f_hpmstop( 2 )
call f_hpmstart( 3, "working" )
b_time = now_time()
Call Update_image()
c_time = now_time()
call f_hpmstop( 3 )
halo_time = halo_time + b_time - a_time
update_time = update_time + c_time - b_time
enddo iterloop
call f_hpmstop(1)
elapsed_time = c_time - s_time
Call Print_times()
call f_hpmterminate( me_world )
Call shut_mpi_world()
end