Skip to content
代码片段 群组 项目
opencl.F90 50.7 KB
Newer Older
    call clBuildProgram(prog, trim(string), ierr)

    call clGetProgramBuildInfo(prog, opencl%device, CL_PROGRAM_BUILD_LOG, string, ierrlog)
    if(ierrlog /= CL_SUCCESS) call opencl_print_error(ierrlog, "clGetProgramBuildInfo")

    ! CL_PROGRAM_BUILD_LOG seems to have a useless '\n' in it
    newlen = scan(string, achar(010), back = .true.) - 1
    if(newlen >= 0) string = string(1:newlen)
    
    if(len(trim(string)) > 0) write(stderr, '(a)') trim(string)

    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clBuildProgram")

    call profiling_out(prof)
    POP_SUB(opencl_build_program)
  end subroutine opencl_build_program

  ! -----------------------------------------------

  subroutine opencl_release_program(prog)
    type(cl_program),    intent(inout) :: prog

    integer :: ierr

    PUSH_SUB(opencl_release_program)

    call clReleaseProgram(prog, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clReleaseProgram")

    POP_SUB(opencl_release_program)
  end subroutine opencl_release_program

  ! -----------------------------------------------

  subroutine opencl_release_kernel(prog)
    type(cl_kernel),      intent(inout) :: prog

    integer :: ierr

    PUSH_SUB(opencl_release_kernel)

    call clReleaseKernel(prog, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clReleaseKernel")

    POP_SUB(opencl_release_kernel)
  end subroutine opencl_release_kernel

  ! -----------------------------------------------
  subroutine opencl_create_kernel(kernel, prog, name)
    type(cl_kernel),  intent(inout) :: kernel
    type(cl_program), intent(inout) :: prog
    character(len=*), intent(in)    :: name

    integer :: ierr
    type(profile_t), save :: prof

    PUSH_SUB(opencl_create_kernel)
    call profiling_in(prof, "CL_BUILD_KERNEL", exclude = .true.)

    kernel = clCreateKernel(prog, name, ierr)
    if(ierr /= CL_SUCCESS) call opencl_print_error(ierr, "clCreateKernel")

    call profiling_out(prof)
    POP_SUB(opencl_create_kernel)
  end subroutine opencl_create_kernel

  ! ------------------------------------------------

  subroutine opencl_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name

    character(len=40) :: errcode

    PUSH_SUB(opencl_print_error)

    select case(ierr)
    case(CL_SUCCESS); errcode = 'CL_SUCCESS '
    case(CL_DEVICE_NOT_FOUND); errcode = 'CL_DEVICE_NOT_FOUND '
    case(CL_DEVICE_NOT_AVAILABLE); errcode = 'CL_DEVICE_NOT_AVAILABLE '
    case(CL_COMPILER_NOT_AVAILABLE); errcode = 'CL_COMPILER_NOT_AVAILABLE '
    case(CL_MEM_OBJECT_ALLOCATION_FAILURE); errcode = 'CL_MEM_OBJECT_ALLOCATION_FAILURE '
    case(CL_OUT_OF_RESOURCES); errcode = 'CL_OUT_OF_RESOURCES '
    case(CL_OUT_OF_HOST_MEMORY); errcode = 'CL_OUT_OF_HOST_MEMORY '
    case(CL_PROFILING_INFO_NOT_AVAILABLE); errcode = 'CL_PROFILING_INFO_NOT_AVAILABLE '
    case(CL_MEM_COPY_OVERLAP); errcode = 'CL_MEM_COPY_OVERLAP '
    case(CL_IMAGE_FORMAT_MISMATCH); errcode = 'CL_IMAGE_FORMAT_MISMATCH '
    case(CL_IMAGE_FORMAT_NOT_SUPPORTED); errcode = 'CL_IMAGE_FORMAT_NOT_SUPPORTED '
    case(CL_BUILD_PROGRAM_FAILURE); errcode = 'CL_BUILD_PROGRAM_FAILURE '
    case(CL_MAP_FAILURE); errcode = 'CL_MAP_FAILURE '
    case(CL_INVALID_VALUE); errcode = 'CL_INVALID_VALUE '
    case(CL_INVALID_DEVICE_TYPE); errcode = 'CL_INVALID_DEVICE_TYPE '
    case(CL_INVALID_PLATFORM); errcode = 'CL_INVALID_PLATFORM '
    case(CL_INVALID_DEVICE); errcode = 'CL_INVALID_DEVICE '
    case(CL_INVALID_CONTEXT); errcode = 'CL_INVALID_CONTEXT '
    case(CL_INVALID_QUEUE_PROPERTIES); errcode = 'CL_INVALID_QUEUE_PROPERTIES '
    case(CL_INVALID_COMMAND_QUEUE); errcode = 'CL_INVALID_COMMAND_QUEUE '
    case(CL_INVALID_HOST_PTR); errcode = 'CL_INVALID_HOST_PTR '
    case(CL_INVALID_MEM_OBJECT); errcode = 'CL_INVALID_MEM_OBJECT '
    case(CL_INVALID_IMAGE_FORMAT_DESCRIPTOR); errcode = 'CL_INVALID_IMAGE_FORMAT_DESCRIPTOR '
    case(CL_INVALID_IMAGE_SIZE); errcode = 'CL_INVALID_IMAGE_SIZE '
    case(CL_INVALID_SAMPLER); errcode = 'CL_INVALID_SAMPLER '
    case(CL_INVALID_BINARY); errcode = 'CL_INVALID_BINARY '
    case(CL_INVALID_BUILD_OPTIONS); errcode = 'CL_INVALID_BUILD_OPTIONS '
    case(CL_INVALID_PROGRAM); errcode = 'CL_INVALID_PROGRAM '
    case(CL_INVALID_PROGRAM_EXECUTABLE); errcode = 'CL_INVALID_PROGRAM_EXECUTABLE '
    case(CL_INVALID_KERNEL_NAME); errcode = 'CL_INVALID_KERNEL_NAME '
    case(CL_INVALID_KERNEL_DEFINITION); errcode = 'CL_INVALID_KERNEL_DEFINITION '
    case(CL_INVALID_KERNEL); errcode = 'CL_INVALID_KERNEL '
    case(CL_INVALID_ARG_INDEX); errcode = 'CL_INVALID_ARG_INDEX '
    case(CL_INVALID_ARG_VALUE); errcode = 'CL_INVALID_ARG_VALUE '
    case(CL_INVALID_ARG_SIZE); errcode = 'CL_INVALID_ARG_SIZE '
    case(CL_INVALID_KERNEL_ARGS); errcode = 'CL_INVALID_KERNEL_ARGS '
    case(CL_INVALID_WORK_DIMENSION); errcode = 'CL_INVALID_WORK_DIMENSION '
    case(CL_INVALID_WORK_GROUP_SIZE); errcode = 'CL_INVALID_WORK_GROUP_SIZE '
    case(CL_INVALID_WORK_ITEM_SIZE); errcode = 'CL_INVALID_WORK_ITEM_SIZE '
    case(CL_INVALID_GLOBAL_OFFSET); errcode = 'CL_INVALID_GLOBAL_OFFSET '
    case(CL_INVALID_EVENT_WAIT_LIST); errcode = 'CL_INVALID_EVENT_WAIT_LIST '
    case(CL_INVALID_EVENT); errcode = 'CL_INVALID_EVENT '
    case(CL_INVALID_OPERATION); errcode = 'CL_INVALID_OPERATION '
    case(CL_INVALID_GL_OBJECT); errcode = 'CL_INVALID_GL_OBJECT '
    case(CL_INVALID_BUFFER_SIZE); errcode = 'CL_INVALID_BUFFER_SIZE '
    case(CL_INVALID_MIP_LEVEL); errcode = 'CL_INVALID_MIP_LEVEL '
    case(CL_INVALID_GLOBAL_WORK_SIZE); errcode = 'CL_INVALID_GLOBAL_WORK_SIZE '
    case(CL_PLATFORM_NOT_FOUND_KHR); errcode = 'CL_PLATFORM_NOT_FOUND_KHR'
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select

    message(1) = 'OpenCL '//trim(name)//' '//trim(errcode)
    call messages_fatal(1)

    POP_SUB(opencl_print_error)
  end subroutine opencl_print_error

  ! ----------------------------------------------------

  subroutine clblas_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name

    character(len=40) :: errcode

    PUSH_SUB(clblas_print_error)
#ifdef HAVE_CLAMDBLAS
    select case(ierr)
    case(clAmdBlasSuccess);                    errcode = 'clAmdBlasSuccess'
    case(clAmdBlasInvalidValue);               errcode = 'clAmdBlasInvalidValue'
    case(clAmdBlasInvalidCommandQueue);        errcode = 'clAmdBlasInvalidCommandQueue'
    case(clAmdBlasInvalidContext);             errcode = 'clAmdBlasInvalidContext'
    case(clAmdBlasInvalidMemObject);           errcode = 'clAmdBlasInvalidMemObject'
    case(clAmdBlasInvalidDevice);              errcode = 'clAmdBlasInvalidDevice'
    case(clAmdBlasInvalidEventWaitList);       errcode = 'clAmdBlasInvalidEventWaitList'
    case(clAmdBlasOutOfResources);             errcode = 'clAmdBlasOutOfResources'
    case(clAmdBlasOutOfHostMemory);            errcode = 'clAmdBlasOutOfHostMemory'
    case(clAmdBlasInvalidOperation);           errcode = 'clAmdBlasInvalidOperation'
    case(clAmdBlasCompilerNotAvailable);       errcode = 'clAmdBlasCompilerNotAvailable'
    case(clAmdBlasBuildProgramFailure );       errcode = 'clAmdBlasBuildProgramFailure'
    case(clAmdBlasNotImplemented);             errcode = 'clAmdBlasNotImplemented'
    case(clAmdBlasNotInitialized);             errcode = 'clAmdBlasNotInitialized'
    case(clAmdBlasInvalidMatA);                errcode = 'clAmdBlasInvalidMatA'
    case(clAmdBlasInvalidMatB);                errcode = 'clAmdBlasInvalidMatB'
    case(clAmdBlasInvalidMatC);                errcode = 'clAmdBlasInvalidMatC'
    case(clAmdBlasInvalidVecX);                errcode = 'clAmdBlasInvalidVecX'
    case(clAmdBlasInvalidVecY);                errcode = 'clAmdBlasInvalidVecY'
    case(clAmdBlasInvalidDim);                 errcode = 'clAmdBlasInvalidDim'
    case(clAmdBlasInvalidLeadDimA);            errcode = 'clAmdBlasInvalidLeadDimA'
    case(clAmdBlasInvalidLeadDimB);            errcode = 'clAmdBlasInvalidLeadDimB'
    case(clAmdBlasInvalidLeadDimC);            errcode = 'clAmdBlasInvalidLeadDimC'
    case(clAmdBlasInvalidIncX);                errcode = 'clAmdBlasInvalidIncX'
    case(clAmdBlasInvalidIncY);                errcode = 'clAmdBlasInvalidIncY'
    case(clAmdBlasInsufficientMemMatA);        errcode = 'clAmdBlasInsufficientMemMatA'
    case(clAmdBlasInsufficientMemMatB);        errcode = 'clAmdBlasInsufficientMemMatB'
    case(clAmdBlasInsufficientMemMatC);        errcode = 'clAmdBlasInsufficientMemMatC'
    case(clAmdBlasInsufficientMemVecX);        errcode = 'clAmdBlasInsufficientMemVecX'
    case(clAmdBlasInsufficientMemVecY);        errcode = 'clAmdBlasInsufficientMemVecY'
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select
    message(1) = 'clAmdBlas '//trim(name)//' '//trim(errcode)
    call messages_fatal(1)

    POP_SUB(clblas_print_error)
  end subroutine clblas_print_error
  ! ----------------------------------------------------
  subroutine clfft_print_error(ierr, name)
    integer,          intent(in) :: ierr
    character(len=*), intent(in) :: name
#ifdef HAVE_CLAMDFFT
    select case(ierr)
    case(CLFFT_INVALID_GLOBAL_WORK_SIZE);          errcode = 'CLFFT_INVALID_GLOBAL_WORK_SIZE' 
    case(CLFFT_INVALID_MIP_LEVEL);                 errcode = 'CLFFT_INVALID_MIP_LEVEL' 
    case(CLFFT_INVALID_BUFFER_SIZE);               errcode = 'CLFFT_INVALID_BUFFER_SIZE' 
    case(CLFFT_INVALID_GL_OBJECT);                 errcode = 'CLFFT_INVALID_GL_OBJECT' 
    case(CLFFT_INVALID_OPERATION);                 errcode = 'CLFFT_INVALID_OPERATION' 
    case(CLFFT_INVALID_EVENT);                     errcode = 'CLFFT_INVALID_EVENT' 
    case(CLFFT_INVALID_EVENT_WAIT_LIST);           errcode = 'CLFFT_INVALID_EVENT_WAIT_LIST' 
    case(CLFFT_INVALID_GLOBAL_OFFSET);             errcode = 'CLFFT_INVALID_GLOBAL_OFFSET' 
    case(CLFFT_INVALID_WORK_ITEM_SIZE);            errcode = 'CLFFT_INVALID_WORK_ITEM_SIZE' 
    case(CLFFT_INVALID_WORK_GROUP_SIZE);           errcode = 'CLFFT_INVALID_WORK_GROUP_SIZE' 
    case(CLFFT_INVALID_WORK_DIMENSION);            errcode = 'CLFFT_INVALID_WORK_DIMENSION' 
    case(CLFFT_INVALID_KERNEL_ARGS);               errcode = 'CLFFT_INVALID_KERNEL_ARGS' 
    case(CLFFT_INVALID_ARG_SIZE);                  errcode = 'CLFFT_INVALID_ARG_SIZE' 
    case(CLFFT_INVALID_ARG_VALUE);                 errcode = 'CLFFT_INVALID_ARG_VALUE' 
    case(CLFFT_INVALID_ARG_INDEX);                 errcode = 'CLFFT_INVALID_ARG_INDEX' 
    case(CLFFT_INVALID_KERNEL);                    errcode = 'CLFFT_INVALID_KERNEL' 
    case(CLFFT_INVALID_KERNEL_DEFINITION);         errcode = 'CLFFT_INVALID_KERNEL_DEFINITION' 
    case(CLFFT_INVALID_KERNEL_NAME);               errcode = 'CLFFT_INVALID_KERNEL_NAME' 
    case(CLFFT_INVALID_PROGRAM_EXECUTABLE);        errcode = 'CLFFT_INVALID_PROGRAM_EXECUTABLE' 
    case(CLFFT_INVALID_PROGRAM);                   errcode = 'CLFFT_INVALID_PROGRAM' 
    case(CLFFT_INVALID_BUILD_OPTIONS);             errcode = 'CLFFT_INVALID_BUILD_OPTIONS' 
    case(CLFFT_INVALID_BINARY);                    errcode = 'CLFFT_INVALID_BINARY' 
    case(CLFFT_INVALID_SAMPLER);                   errcode = 'CLFFT_INVALID_SAMPLER' 
    case(CLFFT_INVALID_IMAGE_SIZE);                errcode = 'CLFFT_INVALID_IMAGE_SIZE' 
    case(CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR);   errcode = 'CLFFT_INVALID_IMAGE_FORMAT_DESCRIPTOR' 
    case(CLFFT_INVALID_MEM_OBJECT);                errcode = 'CLFFT_INVALID_MEM_OBJECT' 
    case(CLFFT_INVALID_HOST_PTR);                  errcode = 'CLFFT_INVALID_HOST_PTR' 
    case(CLFFT_INVALID_COMMAND_QUEUE);             errcode = 'CLFFT_INVALID_COMMAND_QUEUE' 
    case(CLFFT_INVALID_QUEUE_PROPERTIES);          errcode = 'CLFFT_INVALID_QUEUE_PROPERTIES' 
    case(CLFFT_INVALID_CONTEXT);                   errcode = 'CLFFT_INVALID_CONTEXT' 
    case(CLFFT_INVALID_DEVICE);                    errcode = 'CLFFT_INVALID_DEVICE' 
    case(CLFFT_INVALID_PLATFORM);                  errcode = 'CLFFT_INVALID_PLATFORM' 
    case(CLFFT_INVALID_DEVICE_TYPE);               errcode = 'CLFFT_INVALID_DEVICE_TYPE' 
    case(CLFFT_INVALID_VALUE);                     errcode = 'CLFFT_INVALID_VALUE' 
    case(CLFFT_MAP_FAILURE);                       errcode = 'CLFFT_MAP_FAILURE' 
    case(CLFFT_BUILD_PROGRAM_FAILURE);             errcode = 'CLFFT_BUILD_PROGRAM_FAILURE' 
    case(CLFFT_IMAGE_FORMAT_NOT_SUPPORTED);        errcode = 'CLFFT_IMAGE_FORMAT_NOT_SUPPORTED' 
    case(CLFFT_IMAGE_FORMAT_MISMATCH);             errcode = 'CLFFT_IMAGE_FORMAT_MISMATCH' 
    case(CLFFT_MEM_COPY_OVERLAP);                  errcode = 'CLFFT_MEM_COPY_OVERLAP' 
    case(CLFFT_PROFILING_INFO_NOT_AVAILABLE);      errcode = 'CLFFT_PROFILING_INFO_NOT_AVAILABLE' 
    case(CLFFT_OUT_OF_HOST_MEMORY);                errcode = 'CLFFT_OUT_OF_HOST_MEMORY' 
    case(CLFFT_OUT_OF_RESOURCES);                  errcode = 'CLFFT_OUT_OF_RESOURCES' 
    case(CLFFT_MEM_OBJECT_ALLOCATION_FAILURE);     errcode = 'CLFFT_MEM_OBJECT_ALLOCATION_FAILURE' 
    case(CLFFT_COMPILER_NOT_AVAILABLE);            errcode = 'CLFFT_COMPILER_NOT_AVAILABLE' 
    case(CLFFT_DEVICE_NOT_AVAILABLE);              errcode = 'CLFFT_DEVICE_NOT_AVAILABLE' 
    case(CLFFT_DEVICE_NOT_FOUND);                  errcode = 'CLFFT_DEVICE_NOT_FOUND' 
    case(CLFFT_SUCCESS);                           errcode = 'CLFFT_SUCCESS' 
    case(CLFFT_BUGCHECK);                          errcode = 'CLFFT_BUGCHECK' 
    case(CLFFT_NOTIMPLEMENTED);                    errcode = 'CLFFT_NOTIMPLEMENTED' 
    case(CLFFT_FILE_NOT_FOUND);                    errcode = 'CLFFT_FILE_NOT_FOUND' 
    case(CLFFT_FILE_CREATE_FAILURE);               errcode = 'CLFFT_FILE_CREATE_FAILURE' 
    case(CLFFT_VERSION_MISMATCH);                  errcode = 'CLFFT_VERSION_MISMATCH' 
    case(CLFFT_INVALID_PLAN);                      errcode = 'CLFFT_INVALID_PLAN'
    case(CLFFT_DEVICE_NO_DOUBLE);                  errcode = 'CLFFT_DEVICE_NO_DOUBLE' 
    case(CLFFT_ENDSTATUS);                         errcode = 'CLFFT_ENDSTATUS' 
    case default
      write(errcode, '(i10)') ierr
      errcode = 'UNKNOWN ERROR CODE ('//trim(adjustl(errcode))//')'
    end select
    message(1) = 'clAmdFft '//trim(name)//' '//trim(errcode)
    call messages_fatal(1)
    POP_SUB(clfft_print_error)
  end subroutine clfft_print_error
  ! ----------------------------------------------------
  logical function f90_cl_device_has_extension(device, extension) result(has)
    type(cl_device_id), intent(inout) :: device
    character(len=*),   intent(in)    :: extension
    integer :: cl_status
    character(len=2048) :: all_extensions
    call clGetDeviceInfo(device, CL_DEVICE_EXTENSIONS, all_extensions, cl_status)
    has = index(all_extensions, extension) /= 0
  end function f90_cl_device_has_extension
  ! ---------------------------------------------------------
  integer pure function opencl_pad(size, blk) result(pad)
    integer, intent(in) :: size
    integer, intent(in) :: blk
    mm = mod(size, blk)
    if(mm == 0) then
      pad = size
    else
      pad = size + blk - mm
    end if
  end function opencl_pad

  ! ----------------------------------------------------

  subroutine opencl_set_buffer_to_zero(buffer, type, nval, offset)
    type(opencl_mem_t), intent(inout) :: buffer
    type(type_t),       intent(in)    :: type
    integer,            intent(in)    :: nval
    integer, optional,  intent(in)    :: offset

    integer :: nval_real, bsize

    PUSH_SUB(opencl_set_buffer_to_zero)

    ASSERT(type == TYPE_CMPLX .or. type == TYPE_FLOAT)

    nval_real = nval*types_get_size(type)/8

    call opencl_set_kernel_arg(set_zero, 0, nval_real)
    call opencl_set_kernel_arg(set_zero, 1, optional_default(offset, 0)*types_get_size(type)/8)
    call opencl_set_kernel_arg(set_zero, 2, buffer)
    bsize = opencl_kernel_workgroup_size(set_zero)
    call opencl_kernel_run(set_zero, (/ opencl_pad(nval_real, bsize) /), (/ bsize /))
    call opencl_finish()
    POP_SUB(opencl_set_buffer_to_zero)
  end subroutine opencl_set_buffer_to_zero
  ! ----------------------------------------------------
  subroutine opencl_check_bandwidth()
    integer :: itime
    integer, parameter :: times = 10
    integer :: size
    real(8) :: time, stime
    real(8) :: read_bw, write_bw
    type(opencl_mem_t) :: buff
    FLOAT, allocatable :: data(:)

    call messages_new_line()
    call messages_write('Info: Benchmarking the bandwidth between main memory and device memory')
    call messages_new_line()
    call messages_info()

    call messages_write(' Buffer size   Read bw  Write bw')
    call messages_new_line()
    call messages_write('       [MiB]   [MiB/s]   [MiB/s]')
    call messages_info()

    size = 15000
    do 
      SAFE_ALLOCATE(data(1:size))
      call opencl_create_buffer(buff, CL_MEM_READ_WRITE, TYPE_FLOAT, size)

      stime = loct_clock()
      do itime = 1, times
        call opencl_write_buffer(buff, size, data)
        call opencl_finish()
      end do
      time = (loct_clock() - stime)/dble(times)

      write_bw = dble(size)*8.0_8/time

      stime = loct_clock()
      do itime = 1, times
        call opencl_read_buffer(buff, size, data)
      call opencl_finish()

      time = (loct_clock() - stime)/dble(times)
      read_bw = dble(size)*8.0_8/time

      call messages_write(size*8.0_8/1024.0**2)
      call messages_write(write_bw/1024.0**2, fmt = '(f10.1)')
      call messages_write(read_bw/1024.0**2, fmt = '(f10.1)')
      call messages_info()

      call opencl_release_buffer(buff)

      SAFE_DEALLOCATE_A(data)

      size = int(size*2.0)

      if(size > 50000000) exit
    end do
  end subroutine opencl_check_bandwidth
  ! ----------------------------------------------------
  logical pure function opencl_use_shared_mem() result(use_shared_mem)
    
    use_shared_mem = shared_mem
#include "undef.F90"
#include "real.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "complex.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "real_single.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "complex_single.F90"
#include "opencl_inc.F90"

#include "undef.F90"
#include "integer.F90"
#include "opencl_inc.F90"
end module opencl_m

!! Local Variables:
!! mode: f90
!! coding: utf-8
!! End: