Skip to content
代码片段 群组 项目
opencl.F90 49.9 KB
Newer Older

  subroutine opencl_release_program(prog)
    type(cl_program),    intent(inout) :: prog

    integer :: ierr

    PUSH_SUB(opencl_release_program)

    call clReleaseProgram(prog, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clReleaseProgram")

    POP_SUB(opencl_release_program)
  end subroutine opencl_release_program

  ! -----------------------------------------------

  subroutine opencl_release_kernel(prog)
    type(cl_kernel),      intent(inout) :: prog

    integer :: ierr

    PUSH_SUB(opencl_release_kernel)

    call clReleaseKernel(prog, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clReleaseKernel")

    POP_SUB(opencl_release_kernel)
  end subroutine opencl_release_kernel

  ! -----------------------------------------------
  subroutine opencl_create_kernel(kernel, prog, name)
    type(cl_kernel),  intent(inout) :: kernel
    type(cl_program), intent(inout) :: prog
    character(len=*), intent(in)    :: name

    integer :: ierr
    type(profile_t), save :: prof

    PUSH_SUB(opencl_create_kernel)
    call profiling_in(prof, "CL_BUILD_KERNEL", exclude = .true.)

    kernel = clCreateKernel(prog, name, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clCreateKernel")

    call profiling_out(prof)
    POP_SUB(opencl_create_kernel)
  end subroutine opencl_create_kernel

  ! ------------------------------------------------

  subroutine opencl_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name

    character(len=40) :: errcode

    PUSH_SUB(opencl_print_error)

    select case(ierr)
    case(CL_SUCCESS); errcode = 'CL_SUCCESS '
    case(CL_DEVICE_NOT_FOUND); errcode = 'CL_DEVICE_NOT_FOUND '
    case(CL_DEVICE_NOT_AVAILABLE); errcode = 'CL_DEVICE_NOT_AVAILABLE '
    case(CL_COMPILER_NOT_AVAILABLE); errcode = 'CL_COMPILER_NOT_AVAILABLE '
    case(CL_MEM_OBJECT_ALLOCATION_FAILURE); errcode = 'CL_MEM_OBJECT_ALLOCATION_FAILURE '
    case(CL_OUT_OF_RESOURCES); errcode = 'CL_OUT_OF_RESOURCES '
    case(CL_OUT_OF_HOST_MEMORY); errcode = 'CL_OUT_OF_HOST_MEMORY '
    case(CL_PROFILING_INFO_NOT_AVAILABLE); errcode = 'CL_PROFILING_INFO_NOT_AVAILABLE '
    case(CL_MEM_COPY_OVERLAP); errcode = 'CL_MEM_COPY_OVERLAP '
    case(CL_IMAGE_FORMAT_MISMATCH); errcode = 'CL_IMAGE_FORMAT_MISMATCH '
    case(CL_IMAGE_FORMAT_NOT_SUPPORTED); errcode = 'CL_IMAGE_FORMAT_NOT_SUPPORTED '
    case(CL_BUILD_PROGRAM_FAILURE); errcode = 'CL_BUILD_PROGRAM_FAILURE '
    case(CL_MAP_FAILURE); errcode = 'CL_MAP_FAILURE '
    case(CL_INVALID_VALUE); errcode = 'CL_INVALID_VALUE '
    case(CL_INVALID_DEVICE_TYPE); errcode = 'CL_INVALID_DEVICE_TYPE '
    case(CL_INVALID_PLATFORM); errcode = 'CL_INVALID_PLATFORM '
    case(CL_INVALID_DEVICE); errcode = 'CL_INVALID_DEVICE '
    case(CL_INVALID_CONTEXT); errcode = 'CL_INVALID_CONTEXT '
    case(CL_INVALID_QUEUE_PROPERTIES); errcode = 'CL_INVALID_QUEUE_PROPERTIES '
    case(CL_INVALID_COMMAND_QUEUE); errcode = 'CL_INVALID_COMMAND_QUEUE '
    case(CL_INVALID_HOST_PTR); errcode = 'CL_INVALID_HOST_PTR '
    case(CL_INVALID_MEM_OBJECT); errcode = 'CL_INVALID_MEM_OBJECT '
    case(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); errcode = 'CL_INVALID_IMAGE_FORMAT_DESCRIPTOR '
    case(CL_INVALID_IMAGE_SIZE); errcode = 'CL_INVALID_IMAGE_SIZE '
    case(CL_INVALID_SAMPLER); errcode = 'CL_INVALID_SAMPLER '
    case(CL_INVALID_BINARY); errcode = 'CL_INVALID_BINARY '
    case(CL_INVALID_BUILD_OPTIONS); errcode = 'CL_INVALID_BUILD_OPTIONS '
    case(CL_INVALID_PROGRAM); errcode = 'CL_INVALID_PROGRAM '
    case(CL_INVALID_PROGRAM_EXECUTABLE); errcode = 'CL_INVALID_PROGRAM_EXECUTABLE '
    case(CL_INVALID_KERNEL_NAME); errcode = 'CL_INVALID_KERNEL_NAME '
    case(CL_INVALID_KERNEL_DEFINITION); errcode = 'CL_INVALID_KERNEL_DEFINITION '
    case(CL_INVALID_KERNEL); errcode = 'CL_INVALID_KERNEL '
    case(CL_INVALID_ARG_INDEX); errcode = 'CL_INVALID_ARG_INDEX '
    case(CL_INVALID_ARG_VALUE); errcode = 'CL_INVALID_ARG_VALUE '
    case(CL_INVALID_ARG_SIZE); errcode = 'CL_INVALID_ARG_SIZE '
    case(CL_INVALID_KERNEL_ARGS); errcode = 'CL_INVALID_KERNEL_ARGS '
    case(CL_INVALID_WORK_DIMENSION); errcode = 'CL_INVALID_WORK_DIMENSION '
    case(CL_INVALID_WORK_GROUP_SIZE); errcode = 'CL_INVALID_WORK_GROUP_SIZE '
    case(CL_INVALID_WORK_ITEM_SIZE); errcode = 'CL_INVALID_WORK_ITEM_SIZE '
    case(CL_INVALID_GLOBAL_OFFSET); errcode = 'CL_INVALID_GLOBAL_OFFSET '
    case(CL_INVALID_EVENT_WAIT_LIST); errcode = 'CL_INVALID_EVENT_WAIT_LIST '
    case(CL_INVALID_EVENT); errcode = 'CL_INVALID_EVENT '
    case(CL_INVALID_OPERATION); errcode = 'CL_INVALID_OPERATION '
    case(CL_INVALID_GL_OBJECT); errcode = 'CL_INVALID_GL_OBJECT '
    case(CL_INVALID_BUFFER_SIZE); errcode = 'CL_INVALID_BUFFER_SIZE '
    case(CL_INVALID_MIP_LEVEL); errcode = 'CL_INVALID_MIP_LEVEL '
    case(CL_INVALID_GLOBAL_WORK_SIZE); errcode = 'CL_INVALID_GLOBAL_WORK_SIZE '
    case(CL_PLATFORM_NOT_FOUND_KHR); errcode = 'CL_PLATFORM_NOT_FOUND_KHR'
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select

    message(1) = 'OpenCL '//trim(name)//' '//trim(errcode)
    call messages_fatal(1)

    POP_SUB(opencl_print_error)
  end subroutine opencl_print_error

  ! ----------------------------------------------------

  subroutine clblas_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name

    character(len=40) :: errcode

    PUSH_SUB(clblas_print_error)
    case(clblasSuccess);                    errcode = 'clblasSuccess'
    case(clblasInvalidValue);               errcode = 'clblasInvalidValue'
    case(clblasInvalidCommandQueue);        errcode = 'clblasInvalidCommandQueue'
    case(clblasInvalidContext);             errcode = 'clblasInvalidContext'
    case(clblasInvalidMemObject);           errcode = 'clblasInvalidMemObject'
    case(clblasInvalidDevice);              errcode = 'clblasInvalidDevice'
    case(clblasInvalidEventWaitList);       errcode = 'clblasInvalidEventWaitList'
    case(clblasOutOfResources);             errcode = 'clblasOutOfResources'
    case(clblasOutOfHostMemory);            errcode = 'clblasOutOfHostMemory'
    case(clblasInvalidOperation);           errcode = 'clblasInvalidOperation'
    case(clblasCompilerNotAvailable);       errcode = 'clblasCompilerNotAvailable'
    case(clblasBuildProgramFailure );       errcode = 'clblasBuildProgramFailure'
    case(clblasNotImplemented);             errcode = 'clblasNotImplemented'
    case(clblasNotInitialized);             errcode = 'clblasNotInitialized'
    case(clblasInvalidMatA);                errcode = 'clblasInvalidMatA'
    case(clblasInvalidMatB);                errcode = 'clblasInvalidMatB'
    case(clblasInvalidMatC);                errcode = 'clblasInvalidMatC'
    case(clblasInvalidVecX);                errcode = 'clblasInvalidVecX'
    case(clblasInvalidVecY);                errcode = 'clblasInvalidVecY'
    case(clblasInvalidDim);                 errcode = 'clblasInvalidDim'
    case(clblasInvalidLeadDimA);            errcode = 'clblasInvalidLeadDimA'
    case(clblasInvalidLeadDimB);            errcode = 'clblasInvalidLeadDimB'
    case(clblasInvalidLeadDimC);            errcode = 'clblasInvalidLeadDimC'
    case(clblasInvalidIncX);                errcode = 'clblasInvalidIncX'
    case(clblasInvalidIncY);                errcode = 'clblasInvalidIncY'
    case(clblasInsufficientMemMatA);        errcode = 'clblasInsufficientMemMatA'
    case(clblasInsufficientMemMatB);        errcode = 'clblasInsufficientMemMatB'
    case(clblasInsufficientMemMatC);        errcode = 'clblasInsufficientMemMatC'
    case(clblasInsufficientMemVecX);        errcode = 'clblasInsufficientMemVecX'
    case(clblasInsufficientMemVecY);        errcode = 'clblasInsufficientMemVecY'
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select
    message(1) = 'clblas '//trim(name)//' '//trim(errcode)
    call messages_fatal(1)

    POP_SUB(clblas_print_error)
  end subroutine clblas_print_error
  ! ----------------------------------------------------
  subroutine clfft_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name
    select case(ierr)
    case(CLFFT_INVALID_GLOBAL_WORK_SIZE);          errcode = 'CLFFT_INVALID_GLOBAL_WORK_SIZE' 
    case(CLFFT_INVALID_MIP_LEVEL);                 errcode = 'CLFFT_INVALID_MIP_LEVEL' 
    case(CLFFT_INVALID_BUFFER_SIZE);               errcode = 'CLFFT_INVALID_BUFFER_SIZE' 
    case(CLFFT_INVALID_GL_OBJECT);                 errcode = 'CLFFT_INVALID_GL_OBJECT' 
    case(CLFFT_INVALID_OPERATION);                 errcode = 'CLFFT_INVALID_OPERATION' 
    case(CLFFT_INVALID_EVENT);                     errcode = 'CLFFT_INVALID_EVENT' 
    case(CLFFT_INVALID_EVENT_WAIT_LIST);           errcode = 'CLFFT_INVALID_EVENT_WAIT_LIST' 
    case(CLFFT_INVALID_GLOBAL_OFFSET);             errcode = 'CLFFT_INVALID_GLOBAL_OFFSET' 
    case(CLFFT_INVALID_WORK_ITEM_SIZE);            errcode = 'CLFFT_INVALID_WORK_ITEM_SIZE' 
    case(CLFFT_INVALID_WORK_GROUP_SIZE);           errcode = 'CLFFT_INVALID_WORK_GROUP_SIZE' 
    case(CLFFT_INVALID_WORK_DIMENSION);            errcode = 'CLFFT_INVALID_WORK_DIMENSION' 
    case(CLFFT_INVALID_KERNEL_ARGS);               errcode = 'CLFFT_INVALID_KERNEL_ARGS' 
    case(CLFFT_INVALID_ARG_SIZE);                  errcode = 'CLFFT_INVALID_ARG_SIZE' 
    case(CLFFT_INVALID_ARG_VALUE);                 errcode = 'CLFFT_INVALID_ARG_VALUE' 
    case(CLFFT_INVALID_ARG_INDEX);                 errcode = 'CLFFT_INVALID_ARG_INDEX' 
    case(CLFFT_INVALID_KERNEL);                    errcode = 'CLFFT_INVALID_KERNEL' 
    case(CLFFT_INVALID_KERNEL_DEFINITION);         errcode = 'CLFFT_INVALID_KERNEL_DEFINITION' 
    case(CLFFT_INVALID_KERNEL_NAME);               errcode = 'CLFFT_INVALID_KERNEL_NAME' 
    case(CLFFT_INVALID_PROGRAM_EXECUTABLE);        errcode = 'CLFFT_INVALID_PROGRAM_EXECUTABLE' 
    case(CLFFT_INVALID_PROGRAM);                   errcode = 'CLFFT_INVALID_PROGRAM' 
    case(CLFFT_INVALID_BUILD_OPTIONS);             errcode = 'CLFFT_INVALID_BUILD_OPTIONS' 
    case(CLFFT_INVALID_BINARY);                    errcode = 'CLFFT_INVALID_BINARY' 
    case(CLFFT_INVALID_SAMPLER);                   errcode = 'CLFFT_INVALID_SAMPLER' 
    case(CLFFT_INVALID_IMAGE_SIZE);                errcode = 'CLFFT_INVALID_IMAGE_SIZE' 
    case(CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR);   errcode = 'CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR' 
    case(CLFFT_INVALID_MEM_OBJECT);                errcode = 'CLFFT_INVALID_MEM_OBJECT' 
    case(CLFFT_INVALID_HOST_PTR);                  errcode = 'CLFFT_INVALID_HOST_PTR' 
    case(CLFFT_INVALID_COMMAND_QUEUE);             errcode = 'CLFFT_INVALID_COMMAND_QUEUE' 
    case(CLFFT_INVALID_QUEUE_PROPERTIES);          errcode = 'CLFFT_INVALID_QUEUE_PROPERTIES' 
    case(CLFFT_INVALID_CONTEXT);                   errcode = 'CLFFT_INVALID_CONTEXT' 
    case(CLFFT_INVALID_DEVICE);                    errcode = 'CLFFT_INVALID_DEVICE' 
    case(CLFFT_INVALID_PLATFORM);                  errcode = 'CLFFT_INVALID_PLATFORM' 
    case(CLFFT_INVALID_DEVICE_TYPE);               errcode = 'CLFFT_INVALID_DEVICE_TYPE' 
    case(CLFFT_INVALID_VALUE);                     errcode = 'CLFFT_INVALID_VALUE' 
    case(CLFFT_MAP_FAILURE);                       errcode = 'CLFFT_MAP_FAILURE' 
    case(CLFFT_BUILD_PROGRAM_FAILURE);             errcode = 'CLFFT_BUILD_PROGRAM_FAILURE' 
    case(CLFFT_IMAGE_FORMAT_NOT_SUPPORTED);        errcode = 'CLFFT_IMAGE_FORMAT_NOT_SUPPORTED' 
    case(CLFFT_IMAGE_FORMAT_MISMATCH);             errcode = 'CLFFT_IMAGE_FORMAT_MISMATCH' 
    case(CLFFT_MEM_COPY_OVERLAP);                  errcode = 'CLFFT_MEM_COPY_OVERLAP' 
    case(CLFFT_PROFILING_INFO_NOT_AVAILABLE);      errcode = 'CLFFT_PROFILING_INFO_NOT_AVAILABLE' 
    case(CLFFT_OUT_OF_HOST_MEMORY);                errcode = 'CLFFT_OUT_OF_HOST_MEMORY' 
    case(CLFFT_OUT_OF_RESOURCES);                  errcode = 'CLFFT_OUT_OF_RESOURCES' 
    case(CLFFT_MEM_OBJECT_ALLOCATION_FAILURE);     errcode = 'CLFFT_MEM_OBJECT_ALLOCATION_FAILURE' 
    case(CLFFT_COMPILER_NOT_AVAILABLE);            errcode = 'CLFFT_COMPILER_NOT_AVAILABLE' 
    case(CLFFT_DEVICE_NOT_AVAILABLE);              errcode = 'CLFFT_DEVICE_NOT_AVAILABLE' 
    case(CLFFT_DEVICE_NOT_FOUND);                  errcode = 'CLFFT_DEVICE_NOT_FOUND' 
    case(CLFFT_SUCCESS);                           errcode = 'CLFFT_SUCCESS' 
    case(CLFFT_BUGCHECK);                          errcode = 'CLFFT_BUGCHECK' 
    case(CLFFT_NOTIMPLEMENTED);                    errcode = 'CLFFT_NOTIMPLEMENTED' 
    case(CLFFT_FILE_NOT_FOUND);                    errcode = 'CLFFT_FILE_NOT_FOUND' 
    case(CLFFT_FILE_CREATE_FAILURE);               errcode = 'CLFFT_FILE_CREATE_FAILURE' 
    case(CLFFT_VERSION_MISMATCH);                  errcode = 'CLFFT_VERSION_MISMATCH' 
    case(CLFFT_INVALID_PLAN);                      errcode = 'CLFFT_INVALID_PLAN'
    case(CLFFT_DEVICE_NO_DOUBLE);                  errcode = 'CLFFT_DEVICE_NO_DOUBLE' 
    case(CLFFT_ENDSTATUS);                         errcode = 'CLFFT_ENDSTATUS' 
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select
    message(1) = 'clfft '//trim(name)//' '//trim(errcode)
    POP_SUB(clfft_print_error)
  end subroutine clfft_print_error
  ! ----------------------------------------------------
  logical function f90_cl_device_has_extension(device, extension) result(has)
    type(cl_device_id), intent(inout) :: device
    character(len=*),   intent(in)    :: extension
    integer :: cl_status
    character(len=2048) :: all_extensions
    call clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, all_extensions, cl_status)
    has = index(all_extensions, extension) /= 0
  end function f90_cl_device_has_extension
  ! ---------------------------------------------------------
  integer pure function opencl_pad(size, blk) result(pad)
    integer, intent(in) :: size
    integer, intent(in) :: blk
    mm = mod(size, blk)
    if(mm == 0) then
      pad = size
    else
      pad = size + blk - mm
    end if
  end function opencl_pad

  ! ----------------------------------------------------

  subroutine opencl_set_buffer_to_zero(buffer, type, nval, offset)
    type(accel_mem_t), intent(inout) :: buffer
    type(type_t),       intent(in)    :: type
    integer,            intent(in)    :: nval
    integer, optional,  intent(in)    :: offset

    integer :: nval_real, bsize

    PUSH_SUB(opencl_set_buffer_to_zero)

    ASSERT(type == TYPE_CMPLX .or. type == TYPE_FLOAT)

    nval_real = nval*types_get_size(type)/8

    call opencl_set_kernel_arg(set_zero, 0, nval_real)
    call opencl_set_kernel_arg(set_zero, 1, optional_default(offset, 0)*types_get_size(type)/8)
    call opencl_set_kernel_arg(set_zero, 2, buffer)
    bsize = opencl_kernel_workgroup_size(set_zero)
    call opencl_kernel_run(set_zero, (/ opencl_pad(nval_real, bsize) /), (/ bsize /))
    call opencl_finish()
    POP_SUB(opencl_set_buffer_to_zero)
  end subroutine opencl_set_buffer_to_zero
  ! ----------------------------------------------------
  subroutine opencl_check_bandwidth()
    integer :: itime
    integer, parameter :: times = 10
    integer :: size
    real(8) :: time, stime
    real(8) :: read_bw, write_bw
    type(accel_mem_t) :: buff
    FLOAT, allocatable :: data(:)

    call messages_new_line()
    call messages_write('Info: Benchmarking the bandwidth between main memory and device memory')
    call messages_new_line()
    call messages_info()

    call messages_write(' Buffer size   Read bw  Write bw')
    call messages_new_line()
    call messages_write('       [MiB]   [MiB/s]   [MiB/s]')
    call messages_info()

    size = 15000
    do 
      SAFE_ALLOCATE(data(1:size))
      call opencl_create_buffer(buff, CL_MEM_READ_WRITE, TYPE_FLOAT, size)

      stime = loct_clock()
      do itime = 1, times
        call opencl_write_buffer(buff, size, data)
        call opencl_finish()
      end do
      time = (loct_clock() - stime)/dble(times)

      write_bw = dble(size)*8.0_8/time

      stime = loct_clock()
      do itime = 1, times
        call opencl_read_buffer(buff, size, data)
      call opencl_finish()

      time = (loct_clock() - stime)/dble(times)
      read_bw = dble(size)*8.0_8/time

      call messages_write(size*8.0_8/1024.0**2)
      call messages_write(write_bw/1024.0**2, fmt = '(f10.1)')
      call messages_write(read_bw/1024.0**2, fmt = '(f10.1)')
      call messages_info()

      call opencl_release_buffer(buff)

      SAFE_DEALLOCATE_A(data)

      size = int(size*2.0)

      if(size > 50000000) exit
    end do
  end subroutine opencl_check_bandwidth
  ! ----------------------------------------------------
  logical pure function opencl_use_shared_mem() result(use_shared_mem)
    
    use_shared_mem = accel%shared_mem
#include "undef.F90"
#include "real.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "complex.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "real_single.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "complex_single.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "integer.F90"
#include "opencl_inc.F90"
end module opencl_oct_m

!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End: