Skip to content
代码片段 群组 项目
提交 b0a71744 编辑于 作者: Sebastian Ohlmann's avatar Sebastian Ohlmann
浏览文件

Fix kernel for periodic boundaries

The kernel used only one grid dimension to loop over the periodic
points. This, however, fails if more than about 65000 points are in
the periodic boundary. Rearrange the grid to split this number over the
second and third dimension to avoid this issue.
无相关合并请求
......@@ -26,7 +26,7 @@ __kernel void boundaries_periodic(const int nper,
__global double * __restrict ff,
const int ldff){
const int ist = get_global_id(0);
const int iper = get_global_id(1);
const int iper = get_global_id(1) + get_global_size(1)*get_global_id(2);
if(iper >= nper) return;
......@@ -45,7 +45,7 @@ __kernel void boundaries_periodic_corr(const int nper,
const int np,
const int offset){
const int ist = get_global_id(0);
const int iper = get_global_id(1);
const int iper = get_global_id(1) + get_global_size(1)*get_global_id(2);
if(iper >= nper) return;
......
......@@ -377,7 +377,7 @@ contains
integer, allocatable :: recv_disp(:), recv_count(:)
integer :: ipart, npart, maxsend, maxrecv, ldbuffer, ip2
type(accel_kernel_t), save :: kernel_send, kernel_recv, kernel_recv_corr, kernel, kernel_corr
integer(int64) :: wgsize
integer(int64) :: wgsize, dim2, dim3
type(accel_mem_t) :: buff_send
type(accel_mem_t) :: buff_recv
......@@ -709,9 +709,11 @@ contains
call accel_set_kernel_arg(kernel, 3, log2(int(ffb%pack_size_real(1), int32)))
wgsize = accel_kernel_workgroup_size(kernel)/ffb%pack_size_real(1)
dim3 = boundaries%nper/(accel%max_grid_dim(2)*wgsize) + 1
dim2 = min(accel%max_grid_dim(2)*wgsize, pad(boundaries%nper, wgsize))
call accel_kernel_run(kernel, (/ffb%pack_size_real(1), pad(boundaries%nper, wgsize)/), &
(/ffb%pack_size_real(1), wgsize/))
call accel_kernel_run(kernel, (/ffb%pack_size_real(1), dim2, dim3/), &
(/ffb%pack_size_real(1), wgsize, 1_int64/))
else
ASSERT(R_TYPE_VAL == TYPE_CMPLX)
......@@ -726,9 +728,11 @@ contains
call accel_set_kernel_arg(kernel_corr, 6, offset)
wgsize = accel_kernel_workgroup_size(kernel_corr)/ffb%pack_size(1)
dim3 = boundaries%nper/(accel%max_grid_dim(2)*wgsize) + 1
dim2 = min(accel%max_grid_dim(2)*wgsize, pad(boundaries%nper, wgsize))
call accel_kernel_run(kernel_corr, (/ffb%pack_size(1), pad(boundaries%nper, wgsize)/), &
(/ffb%pack_size(1), wgsize/))
call accel_kernel_run(kernel_corr, (/ffb%pack_size(1), dim2, dim3/), &
(/ffb%pack_size(1), wgsize, 1_int64/))
end if
end if
if(.not. optional_default(async, .false.)) call accel_finish()
......
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册