diff --git a/CMakeLists.txt b/CMakeLists.txt index 66b3e74..a69c2f2 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -39,8 +39,8 @@ IF(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) ENDIF(NOT CMAKE_CONFIGURATION_TYPES AND NOT CMAKE_BUILD_TYPE) IF(NOT APPLE) - SET (CMAKE_CXX_FLAGS_RELEASE "-L${EXECUTABLE_OUTPUT_PATH}/opencl/lib") - SET (CMAKE_CXX_FLAGS_DEBUG "-L${EXECUTABLE_OUTPUT_PATH}/opencl/lib") + SET (CMAKE_CXX_FLAGS_RELEASE "-L${EXECUTABLE_OUTPUT_PATH}/opencl/lib -std=c++11") + SET (CMAKE_CXX_FLAGS_DEBUG "-L${EXECUTABLE_OUTPUT_PATH}/opencl/lib -std=c++11") endif() diff --git a/lib/mason/opencl/OclHost.cpp b/lib/mason/opencl/OclHost.cpp index cfb5fad..9814581 100644 --- a/lib/mason/opencl/OclHost.cpp +++ b/lib/mason/opencl/OclHost.cpp @@ -10,6 +10,7 @@ #include #include #include "IConfig.h" +#include pthread_mutex_t mutext_next_sub_block; @@ -108,11 +109,16 @@ OclHost::OclHost(int const device_type, int gpu_id, int const cpu_cores) : cl_device_partition_property props[3]; props[0] = CL_DEVICE_PARTITION_EQUALLY; // Equally - props[1] = 1; // 4 compute units per sub-device + props[1] = 1; // compute units per sub-device props[2] = 0; - devices = (cl_device_id *) malloc(256 * sizeof(cl_device_id)); - ciErrNum = clCreateSubDevices(device_id, props, 256, devices, + unsigned int threads = std::thread::hardware_concurrency(); + if ( threads == 0 ) { + threads = 2560; + Log.Message("Number of concurrent threads not well defined or not computable. Using default of %d.", threads); + } + devices = (cl_device_id *) malloc(threads * sizeof(cl_device_id)); + ciErrNum = clCreateSubDevices(device_id, props, threads, devices, &ciDeviceCount); if (ciErrNum == -18) { ciDeviceCount = 1;