Newer
Older
!!
!! This program is free software; you can redistribute it and/or modify
!! it under the terms of the GNU General Public License as published by
!! the Free Software Foundation; either version 2, or (at your option)
!! any later version.
!!
!! This program is distributed in the hope that it will be useful,
!! but WITHOUT ANY WARRANTY; without even the implied warranty of
!! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
!! GNU General Public License for more details.
!!
!! You should have received a copy of the GNU General Public License
!! along with this program; if not, write to the Free Software
!! Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
!! 02111-1307, USA.
!!
!! $Id: opencl_f.F90 3587 2007-11-22 16:43:00Z xavier $
#include "global.h"
module opencl_m
use c_pointer_m
implicit none
private
opencl_init, &
opencl_end, &
opencl_mem_t, &
opencl_create_buffer, &
opencl_write_buffer, &
opencl_read_buffer, &
opencl_release_buffer, &
opencl_padded_size, &
opencl_finish, &
opencl_set_kernel_arg, &
opencl_max_workgroup_size, &
opencl_kernel_workgroup_size, &
opencl_kernel_run, &
opencl_build_program, &
opencl_release_program, &
opencl_release_kernel, &
private
type(c_ptr) :: mem
integer(SIZEOF_SIZE_T) :: size
integer :: type
type(opencl_t) :: opencl
! the kernels
type(c_ptr), public :: kernel_vpsi
type(c_ptr), public :: kernel_vpsi_spinors
type(c_ptr), public :: set_zero
type(c_ptr), public :: set_zero_part
type(c_ptr), public :: kernel_daxpy
type(c_ptr), public :: kernel_zaxpy
type(c_ptr), public :: kernel_copy
type(c_ptr), public :: dprojector_gather
type(c_ptr), public :: kernel_projector_bra
type(c_ptr), public :: kernel_projector_ket
type(c_ptr), public :: dpack
type(c_ptr), public :: zpack
type(c_ptr), public :: dunpack
type(c_ptr), public :: zunpack
#include "opencl_iface_inc.F90"
interface opencl_create_buffer
module procedure opencl_create_buffer_4, opencl_create_buffer_8
end interface
module procedure iopencl_write_buffer, dopencl_write_buffer, zopencl_write_buffer, &
iopencl_write_buffer_2, dopencl_write_buffer_2, zopencl_write_buffer_2
end interface
module procedure iopencl_read_buffer, dopencl_read_buffer, zopencl_read_buffer, &
iopencl_read_buffer_2, dopencl_read_buffer_2, zopencl_read_buffer_2
interface opencl_set_kernel_arg
module procedure &
opencl_set_kernel_arg_buffer, &
iopencl_set_kernel_arg_data, &
dopencl_set_kernel_arg_data, &
zopencl_set_kernel_arg_data, &
opencl_set_kernel_arg_local
type(profile_t), save :: prof_read, prof_write, prof_kernel_run
pure logical function opencl_is_enabled() result(enabled)
enabled = opencl%enabled
end function opencl_is_enabled
! ------------------------------------------
subroutine opencl_init()
type(c_ptr) :: prog
call push_sub('opencl.opencl_init')
!%Variable DisableOpenCL
!%Type logical
!%Default yes
!%Section Execution::OpenCL
!%Description
!% If Octopus was compiled with OpenCL support, it will try to
!% initialize and use an OpenCL device. By setting this variable
!% to <tt>yes</tt> you tell Octopus not to use OpenCL.
!%End
call parse_logical(datasets_check('DisableOpenCL'), .false., disable)
opencl%enabled = .not. disable
if(disable) then
call pop_sub('opencl.opencl_init')
return
end if
call f90_cl_env_init(opencl%env, trim(conf%share)//'/opencl/')
opencl%max_workgroup_size = f90_cl_max_workgroup_size(opencl%env)
call opencl_build_program(prog, "vpsi.cl")
call opencl_create_kernel(kernel_vpsi, prog, "vpsi")
call opencl_create_kernel(kernel_vpsi_spinors, prog, "vpsi_spinors")
call opencl_release_program(prog)
call opencl_build_program(prog, "set_zero.cl")
call opencl_create_kernel(set_zero, prog, "set_zero")
call opencl_create_kernel(set_zero_part, prog, "set_zero_part")
call opencl_release_program(prog)
call opencl_build_program(prog, "axpy.cl")
call opencl_create_kernel(kernel_daxpy, prog, "daxpy")
call opencl_create_kernel(kernel_zaxpy, prog, "zaxpy")
call opencl_release_program(prog)
call opencl_build_program(prog, "projector.cl")
call opencl_create_kernel(kernel_projector_ket, prog, "projector_ket")
call opencl_create_kernel(kernel_projector_bra, prog, "projector_bra")
call opencl_release_program(prog)
call opencl_build_program(prog, "pack.cl")
call opencl_create_kernel(dpack, prog, "dpack")
call opencl_create_kernel(zpack, prog, "zpack")
call opencl_create_kernel(dunpack, prog, "dunpack")
call opencl_create_kernel(zunpack, prog, "zunpack")
call opencl_release_program(prog)
call opencl_build_program(prog, "copy.cl")
call opencl_create_kernel(kernel_copy, prog, "copy")
call opencl_release_program(prog)
end subroutine opencl_init
! ------------------------------------------
subroutine opencl_end()
call push_sub('opencl.opencl_end')
if(opencl_is_enabled()) then
call opencl_release_kernel(kernel_vpsi)
call opencl_release_kernel(kernel_vpsi_spinors)
call opencl_release_kernel(kernel_daxpy)
call opencl_release_kernel(kernel_copy)
call opencl_release_kernel(kernel_zaxpy)
call opencl_release_kernel(kernel_projector_ket)
call opencl_release_kernel(kernel_projector_bra)
call f90_cl_env_end(opencl%env)
! ------------------------------------------
subroutine opencl_create_buffer_4(this, flags, type, size)
type(opencl_mem_t), intent(inout) :: this
integer, intent(in) :: flags
integer, intent(in) :: type
integer, intent(in) :: size
integer(SIZEOF_SIZE_T) :: fsize
integer :: ierr
call push_sub('opencl.opencl_create_buffer_4')
this%type = type
this%size = size
fsize = size*types_get_size(type)
call f90_cl_create_buffer(this%mem, opencl%env, flags, fsize, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "create_buffer")
call pop_sub('opencl.opencl_create_buffer_4')
end subroutine opencl_create_buffer_4
! ------------------------------------------
subroutine opencl_create_buffer_8(this, flags, type, size)
type(opencl_mem_t), intent(inout) :: this
integer, intent(in) :: flags
integer, intent(in) :: type
integer(SIZEOF_SIZE_T), intent(in) :: size
integer(SIZEOF_SIZE_T) :: fsize
integer :: ierr
call push_sub('opencl.opencl_create_buffer_8')
this%type = type
this%size = size
fsize = size*types_get_size(type)
call f90_cl_create_buffer(this%mem, opencl%env, flags, fsize, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "create_buffer")
call pop_sub('opencl.opencl_create_buffer_8')
end subroutine opencl_create_buffer_8
! ------------------------------------------
subroutine opencl_release_buffer(this)
type(opencl_mem_t), intent(inout) :: this
integer :: ierr
this%size = 0
call f90_cl_release_buffer(this%mem, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "release_buffer")
end subroutine opencl_release_buffer
! ------------------------------------------
integer(SIZEOF_SIZE_T) pure function opencl_get_buffer_size(this) result(size)
type(opencl_mem_t), intent(in) :: this
size = this%size
end function opencl_get_buffer_size
! -----------------------------------------
integer pure function opencl_get_buffer_type(this) result(type)
type(opencl_mem_t), intent(in) :: this
type = this%type
end function opencl_get_buffer_type
! -----------------------------------------
integer(SIZEOF_SIZE_T) function opencl_padded_size(nn) result(psize)
integer, intent(in) :: nn
integer :: modnn, bsize
bsize = opencl_max_workgroup_size()
modnn = mod(nn, bsize)
if(modnn /= 0) psize = psize + bsize - modnn
end function opencl_padded_size
! ------------------------------------------
subroutine opencl_finish()
integer :: ierr
call f90_cl_finish(opencl%env, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, 'cl_finish')
! ------------------------------------------
subroutine opencl_set_kernel_arg_buffer(kernel, narg, buffer)
type(c_ptr), intent(inout) :: kernel
integer, intent(in) :: narg
type(opencl_mem_t), intent(in) :: buffer
call push_sub('opencl.opencl_set_kernel_arg_buffer')
call f90_cl_set_kernel_arg_buf(kernel, narg, buffer%mem, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "set_kernel_arg_buf")
call pop_sub('opencl.opencl_set_kernel_arg_buffer')
end subroutine opencl_set_kernel_arg_buffer
! ------------------------------------------
subroutine opencl_set_kernel_arg_local(kernel, narg, type, size)
type(c_ptr), intent(inout) :: kernel
integer, intent(in) :: narg
integer, intent(in) :: type
integer, intent(in) :: size
call push_sub('opencl.opencl_set_kernel_arg_local')
call f90_cl_set_kernel_arg_local(kernel, narg, size*types_get_size(type), ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "set_kernel_arg_buf")
call pop_sub('opencl.opencl_set_kernel_arg_local')
end subroutine opencl_set_kernel_arg_local
! ------------------------------------------
subroutine opencl_kernel_run(kernel, globalsizes, localsizes)
type(c_ptr), intent(inout) :: kernel
integer, intent(in) :: globalsizes(:)
integer, intent(in) :: localsizes(:)
integer(SIZEOF_SIZE_T) :: gsizes(1:3)
integer(SIZEOF_SIZE_T) :: lsizes(1:3)
call profiling_in(prof_kernel_run, "CL_KERNEL_RUN")
dim = ubound(globalsizes, dim = 1)
ASSERT(dim == ubound(localsizes, dim = 1))
ASSERT(all(localsizes <= opencl_max_workgroup_size()))
ASSERT(all(mod(globalsizes, localsizes) == 0))
gsizes(1:dim) = int(globalsizes(1:dim), SIZEOF_SIZE_T)
lsizes(1:dim) = int(localsizes(1:dim), SIZEOF_SIZE_T)
call f90_cl_kernel_run(kernel, opencl%env, dim, gsizes(1), lsizes(1), ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "kernel_run")
call opencl_finish()
call profiling_out(prof_kernel_run)
end subroutine opencl_kernel_run
! -----------------------------------------------
integer pure function opencl_max_workgroup_size() result(max_workgroup_size)
max_workgroup_size = opencl%max_workgroup_size
end function opencl_max_workgroup_size
! -----------------------------------------------
integer function opencl_kernel_workgroup_size(kernel) result(workgroup_size)
type(c_ptr), intent(inout) :: kernel
workgroup_size = f90_cl_kernel_wgroup_size(kernel, opencl%env)
end function opencl_kernel_workgroup_size
! -----------------------------------------------
subroutine opencl_build_program(prog, filename)
type(c_ptr), intent(inout) :: prog
character(len=*), intent(in) :: filename
call f90_cl_build_program(prog, opencl%env, filename)
end subroutine opencl_build_program
! -----------------------------------------------
subroutine opencl_release_program(prog)
type(c_ptr), intent(inout) :: prog
integer :: ierr
call f90_cl_release_program(prog, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "release_program")
end subroutine opencl_release_program
! -----------------------------------------------
subroutine opencl_release_kernel(prog)
type(c_ptr), intent(inout) :: prog
integer :: ierr
call f90_cl_release_kernel(prog, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "release_kernel")
end subroutine opencl_release_kernel
! -----------------------------------------------
subroutine opencl_create_kernel(kernel, prog, name)
type(c_ptr), intent(inout) :: kernel
type(c_ptr), intent(inout) :: prog
character(len=*), intent(in) :: name
integer :: ierr
call f90_cl_create_kernel(kernel, prog, name, ierr)
if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "create_kernel")
end subroutine opencl_create_kernel
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
! ------------------------------------------------
subroutine opencl_print_error(ierr, name)
integer, intent(in) :: ierr
character(len=*), intent(in) :: name
character(len=40) :: errcode
select case(ierr)
case(CL_SUCCESS); errcode = 'CL_SUCCESS '
case(CL_DEVICE_NOT_FOUND); errcode = 'CL_DEVICE_NOT_FOUND '
case(CL_DEVICE_NOT_AVAILABLE); errcode = 'CL_DEVICE_NOT_AVAILABLE '
case(CL_COMPILER_NOT_AVAILABLE); errcode = 'CL_COMPILER_NOT_AVAILABLE '
case(CL_MEM_OBJECT_ALLOCATION_FAIL); errcode = 'CL_MEM_OBJECT_ALLOCATION_FAILURE '
case(CL_OUT_OF_RESOURCES); errcode = 'CL_OUT_OF_RESOURCES '
case(CL_OUT_OF_HOST_MEMORY); errcode = 'CL_OUT_OF_HOST_MEMORY '
case(CL_PROFILING_INFO_NOT_AVAILABLE); errcode = 'CL_PROFILING_INFO_NOT_AVAILABLE '
case(CL_MEM_COPY_OVERLAP); errcode = 'CL_MEM_COPY_OVERLAP '
case(CL_IMAGE_FORMAT_MISMATCH); errcode = 'CL_IMAGE_FORMAT_MISMATCH '
case(CL_IMAGE_FORMAT_NOT_SUPPORTED); errcode = 'CL_IMAGE_FORMAT_NOT_SUPPORTED '
case(CL_BUILD_PROGRAM_FAILURE); errcode = 'CL_BUILD_PROGRAM_FAILURE '
case(CL_MAP_FAILURE); errcode = 'CL_MAP_FAILURE '
case(CL_INVALID_VALUE); errcode = 'CL_INVALID_VALUE '
case(CL_INVALID_DEVICE_TYPE); errcode = 'CL_INVALID_DEVICE_TYPE '
case(CL_INVALID_PLATFORM); errcode = 'CL_INVALID_PLATFORM '
case(CL_INVALID_DEVICE); errcode = 'CL_INVALID_DEVICE '
case(CL_INVALID_CONTEXT); errcode = 'CL_INVALID_CONTEXT '
case(CL_INVALID_QUEUE_PROPERTIES); errcode = 'CL_INVALID_QUEUE_PROPERTIES '
case(CL_INVALID_COMMAND_QUEUE); errcode = 'CL_INVALID_COMMAND_QUEUE '
case(CL_INVALID_HOST_PTR); errcode = 'CL_INVALID_HOST_PTR '
case(CL_INVALID_MEM_OBJECT); errcode = 'CL_INVALID_MEM_OBJECT '
case(CL_INVALID_IMAGE_FORMAT_DESC); errcode = 'CL_INVALID_IMAGE_FORMAT_DESCRIPTOR '
case(CL_INVALID_IMAGE_SIZE); errcode = 'CL_INVALID_IMAGE_SIZE '
case(CL_INVALID_SAMPLER); errcode = 'CL_INVALID_SAMPLER '
case(CL_INVALID_BINARY); errcode = 'CL_INVALID_BINARY '
case(CL_INVALID_BUILD_OPTIONS); errcode = 'CL_INVALID_BUILD_OPTIONS '
case(CL_INVALID_PROGRAM); errcode = 'CL_INVALID_PROGRAM '
case(CL_INVALID_PROGRAM_EXECUTABLE); errcode = 'CL_INVALID_PROGRAM_EXECUTABLE '
case(CL_INVALID_KERNEL_NAME); errcode = 'CL_INVALID_KERNEL_NAME '
case(CL_INVALID_KERNEL_DEFINITION); errcode = 'CL_INVALID_KERNEL_DEFINITION '
case(CL_INVALID_KERNEL); errcode = 'CL_INVALID_KERNEL '
case(CL_INVALID_ARG_INDEX); errcode = 'CL_INVALID_ARG_INDEX '
case(CL_INVALID_ARG_VALUE); errcode = 'CL_INVALID_ARG_VALUE '
case(CL_INVALID_ARG_SIZE); errcode = 'CL_INVALID_ARG_SIZE '
case(CL_INVALID_KERNEL_ARGS); errcode = 'CL_INVALID_KERNEL_ARGS '
case(CL_INVALID_WORK_DIMENSION); errcode = 'CL_INVALID_WORK_DIMENSION '
case(CL_INVALID_WORK_GROUP_SIZE); errcode = 'CL_INVALID_WORK_GROUP_SIZE '
case(CL_INVALID_WORK_ITEM_SIZE); errcode = 'CL_INVALID_WORK_ITEM_SIZE '
case(CL_INVALID_GLOBAL_OFFSET); errcode = 'CL_INVALID_GLOBAL_OFFSET '
case(CL_INVALID_EVENT_WAIT_LIST); errcode = 'CL_INVALID_EVENT_WAIT_LIST '
case(CL_INVALID_EVENT); errcode = 'CL_INVALID_EVENT '
case(CL_INVALID_OPERATION); errcode = 'CL_INVALID_OPERATION '
case(CL_INVALID_GL_OBJECT); errcode = 'CL_INVALID_GL_OBJECT '
case(CL_INVALID_BUFFER_SIZE); errcode = 'CL_INVALID_BUFFER_SIZE '
case(CL_INVALID_MIP_LEVEL); errcode = 'CL_INVALID_MIP_LEVEL '
case(CL_INVALID_GLOBAL_WORK_SIZE); errcode = 'CL_INVALID_GLOBAL_WORK_SIZE '
end select
message(1) = 'Error: OpenCL '//trim(name)//' '//trim(errcode)
call write_fatal(1)
end subroutine opencl_print_error
#include "undef.F90"
#include "real.F90"
#include "opencl_inc.F90"
#include "undef.F90"
#include "complex.F90"
#include "opencl_inc.F90"
#include "undef.F90"
#include "integer.F90"
#include "opencl_inc.F90"
end module opencl_m
!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End: