diff --git a/include/acl_globals.h b/include/acl_globals.h index bcc7e8ba..74b6b2af 100644 --- a/include/acl_globals.h +++ b/include/acl_globals.h @@ -22,6 +22,8 @@ int acl_present_board_is_valid(void); // Can't use ACL after this. // Undoes acl_init(). void acl_reset(void); +// Version of reset used in unit test only +void acl_reset_join_thread(void); // Initializes the HAL and loads the builtin system definition. // diff --git a/include/acl_types.h b/include/acl_types.h index 28685ae2..b146a29a 100644 --- a/include/acl_types.h +++ b/include/acl_types.h @@ -1636,6 +1636,9 @@ typedef struct _cl_platform_id // The device operation queue. // These are the operations that can run immediately on the device. acl_device_op_queue_t device_op_queue; + // Thread used to update device_op_queue when kernel interrupt triggers + acl_thread_t device_op_queue_update_thread; + bool outstanding_interrupt; // Limits. See clGetDeviceInfo for semantics. unsigned int max_param_size; diff --git a/lib/acl_threadsupport/include/acl_threadsupport/acl_threadsupport.h b/lib/acl_threadsupport/include/acl_threadsupport/acl_threadsupport.h index 262f44f9..e7041370 100644 --- a/lib/acl_threadsupport/include/acl_threadsupport/acl_threadsupport.h +++ b/lib/acl_threadsupport/include/acl_threadsupport/acl_threadsupport.h @@ -189,7 +189,7 @@ int acl_sem_destroy(acl_sem_t *sem); // See this Microsoft Research paper on how to implement condition // variables with only semaphores // http://research.microsoft.com/pubs/64242/implementingcvs.pdf -// It's veyr instructive, but we can't use its implementation because: +// It's very instructive, but we can't use its implementation because: // - The signaler acquires a mutex // - It keeps an explicit linked list of waiters // diff --git a/src/acl_globals.cpp b/src/acl_globals.cpp index 64bbb1de..1941fa84 100644 --- a/src/acl_globals.cpp +++ b/src/acl_globals.cpp @@ -231,6 +231,35 @@ void acl_reset(void) { acl_platform.initialized = 0; } +// This function should only be used in the unit test +void acl_reset_join_thread(void) { + { + std::scoped_lock lock{acl_mutex_wrapper}; + + l_reset_present_board(); + + acl_platform.offline_device = ""; + acl_platform.num_devices = 0; + for (unsigned i = 0; i < ACL_MAX_DEVICE; ++i) { + acl_platform.device[i] = _cl_device_id(); + } + acl_platform.initialized = 0; + acl_signal_device_update(); + } + // Each unit test test groups are sequentially run and acl_init and acl_reset + // is called once at the start (setup) and end (teardown) of the test group. + // As acl_init wouldn't be called before acl_reset finished, it is okay to + // block here to wait for the device op queue update thread to finish here. + + // Note that the join has to be called without holding the acl global lock, if + // reset acquires lock and wait, the device op queue update thread will try to + // obtain the lock forever, resulting in deadlock in the unit test. + if (acl_platform.device_op_queue_update_thread != 0) { + acl_thread_join(&acl_platform.device_op_queue_update_thread); + acl_platform.device_op_queue_update_thread = 0; + } +} + //////////////////////////////////////////////////// // Static functions diff --git a/src/acl_kernel.cpp b/src/acl_kernel.cpp index 40d5206d..5ddf058c 100644 --- a/src/acl_kernel.cpp +++ b/src/acl_kernel.cpp @@ -3176,6 +3176,10 @@ void acl_receive_kernel_update(int activation_id, cl_int status) { std::unique_lock lock{acl_mutex_wrapper, std::defer_lock}; if (!acl_is_inside_sig()) { lock.lock(); + } else { + // Let the device op queue update thread know there is an interrupt from + // the kernel interrupt signal handler + acl_platform.outstanding_interrupt = 1; } if (activation_id >= 0 && activation_id < doq->max_ops) { diff --git a/src/acl_platform.cpp b/src/acl_platform.cpp index 3f57c173..e075934e 100644 --- a/src/acl_platform.cpp +++ b/src/acl_platform.cpp @@ -78,6 +78,7 @@ static void l_initialize_devices(const acl_system_def_t *present_board_def, int offline_mode, unsigned int num_devices, const cl_device_id *devices); static void l_add_device(int idx); +void *l_eagerly_update_device_op_queue(void *arg); ////////////////////////////// // OpenCL API @@ -412,6 +413,10 @@ void acl_init_platform(void) { // Device operation queue. acl_init_device_op_queue(&acl_platform.device_op_queue); + // Send off device_op_queue update thread + acl_platform.outstanding_interrupt = 0; + acl_thread_create(&acl_platform.device_op_queue_update_thread, 0, + l_eagerly_update_device_op_queue, NULL); // Initialize sampler allocator. for (int i = 0; i < ACL_MAX_SAMPLER; i++) { @@ -737,6 +742,25 @@ static void l_add_device(int idx) { device->address_bits = 64; // Yes, our devices are 64-bit. } +void *l_eagerly_update_device_op_queue(void *arg) { + while (true) { + std::scoped_lock lock{acl_mutex_wrapper}; + + // Sleep if no interrupt happening + acl_wait_for_device_update(NULL); + + if (!acl_platform.initialized) { + break; + } + if (acl_platform.outstanding_interrupt) { + acl_print_debug_msg("Serving outstanding kernel interrupt...\n"); + acl_update_device_op_queue(&(acl_platform.device_op_queue)); + acl_platform.outstanding_interrupt = 0; + } + } + return NULL; +} + // These functions check to see if a given object is known to the system. // acl_*_is_valid( * ); // This is simple because everything is statically allocated. diff --git a/src/acl_thread.cpp b/src/acl_thread.cpp index fc22ff23..bf0e3de2 100644 --- a/src/acl_thread.cpp +++ b/src/acl_thread.cpp @@ -9,6 +9,7 @@ #include #include #include +#include ACL_TLS int acl_global_lock_count = 0; ACL_TLS int acl_inside_sig_flag = 0; @@ -55,7 +56,7 @@ void acl_mutex_wrapper_t::resume_lock(int lock_count) { void acl_wait_for_device_update(cl_context context) { acl_assert_locked(); - if (acl_get_hal()->get_debug_verbosity && + if (acl_context_is_valid(context) && acl_get_hal()->get_debug_verbosity && acl_get_hal()->get_debug_verbosity() > 0) { unsigned timeout = 5; // Seconds // Keep waiting until signal is received @@ -102,6 +103,14 @@ __attribute__((constructor)) static void l_global_lock_init() { } __attribute__((destructor)) static void l_global_lock_uninit() { + if (acl_get_platform()->device_op_queue_update_thread) { + { + std::scoped_lock lock{acl_mutex_wrapper}; + acl_get_platform()->initialized = 0; + acl_signal_condvar(&l_acl_global_condvar); // wake up waiting thread + } + acl_thread_join(&acl_get_platform()->device_op_queue_update_thread); + } acl_reset_condvar(&l_acl_global_condvar); } diff --git a/test/acl_command_queue_test.cpp b/test/acl_command_queue_test.cpp index fddcf914..1ecaf609 100644 --- a/test/acl_command_queue_test.cpp +++ b/test/acl_command_queue_test.cpp @@ -40,7 +40,7 @@ MT_TEST_GROUP(acl_command_queue) { syncThreads(); if (threadNum() == 0) { - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } acl_test_run_standard_teardown_checks(); diff --git a/test/acl_context_test.cpp b/test/acl_context_test.cpp index 927f4596..d409fc0e 100644 --- a/test/acl_context_test.cpp +++ b/test/acl_context_test.cpp @@ -54,7 +54,7 @@ MT_TEST_GROUP(Context) { syncThreads(); if (threadNum() == 0) { - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_device_op_test.cpp b/test/acl_device_op_test.cpp index 2934e2cb..b148e189 100644 --- a/test/acl_device_op_test.cpp +++ b/test/acl_device_op_test.cpp @@ -151,8 +151,8 @@ TEST_GROUP(device_op) { virtual void teardown() { unload(); - acl_test_teardown_generic_system(); acl_mutex_wrapper.unlock(); + acl_test_teardown_generic_system(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_device_test.cpp b/test/acl_device_test.cpp index 90d7bb59..dfdbdbdd 100644 --- a/test/acl_device_test.cpp +++ b/test/acl_device_test.cpp @@ -29,7 +29,7 @@ syncThreads(); void teardown() { syncThreads(); if (threadNum() == 0) { - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_event_test.cpp b/test/acl_event_test.cpp index eec5d763..c0aff35c 100644 --- a/test/acl_event_test.cpp +++ b/test/acl_event_test.cpp @@ -44,7 +44,7 @@ MT_TEST_GROUP(acl_event) { syncThreads(); if (threadNum() == 0) { - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } acl_test_run_standard_teardown_checks(); @@ -129,7 +129,7 @@ MT_TEST_GROUP(acl_event_default_config) { syncThreads(); if (threadNum() == 0) { - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } acl_test_run_standard_teardown_checks(); diff --git a/test/acl_globals_test.cpp b/test/acl_globals_test.cpp index 5376d4e6..50fef513 100644 --- a/test/acl_globals_test.cpp +++ b/test/acl_globals_test.cpp @@ -716,7 +716,9 @@ TEST(acl_globals_undef, valid_init_simple) { CHECK(0 != acl_present_board_def()); CHECK(0 != acl_present_board_is_valid()); // Teardown - acl_reset(); + acl_mutex_wrapper.unlock(); + acl_reset_join_thread(); + acl_mutex_wrapper.lock(); CHECK(0 == acl_present_board_def()); CHECK(0 == acl_present_board_is_valid()); } @@ -726,7 +728,9 @@ TEST(acl_globals_undef, valid_init_empty) { CHECK(0 != acl_present_board_def()); CHECK(0 != acl_present_board_is_valid()); // Teardown - acl_reset(); + acl_mutex_wrapper.unlock(); + acl_reset_join_thread(); + acl_mutex_wrapper.lock(); CHECK(0 == acl_present_board_def()); CHECK(0 == acl_present_board_is_valid()); } @@ -735,6 +739,8 @@ TEST(acl_globals_undef, valid_init_complex) { CHECK_EQUAL(1, acl_init(&acltest_complex_system)); CHECK(0 != acl_present_board_def()); // Teardown - acl_reset(); + acl_mutex_wrapper.unlock(); + acl_reset_join_thread(); + acl_mutex_wrapper.lock(); CHECK_EQUAL(0, acl_present_board_def()); } diff --git a/test/acl_platform_test.cpp b/test/acl_platform_test.cpp index 3ecf230c..fe9dfc37 100644 --- a/test/acl_platform_test.cpp +++ b/test/acl_platform_test.cpp @@ -378,7 +378,7 @@ TEST(offline_device, offline_hal) { cl_bool result; acl_test_setenv(m_env, offline_device); - ACL_LOCKED(acl_reset()); + acl_reset_join_thread(); ACL_LOCKED(result = acl_init_from_hal_discovery()); CHECK_EQUAL(CL_TRUE, result); // Exercise the offline HAL: printing, and the timestamps. @@ -390,6 +390,11 @@ TEST(offline_device, offline_hal) { ACL_LOCKED(now = acl_get_hal()->get_timestamp()); ACL_LOCKED(acl_print_debug_msg("offline hal time is %08x%08x", (now >> 32), (now & 0xffffffff))); + + syncThreads(); + if (threadNum() == 0) { + acl_test_teardown_system(); + } } struct live_info_t { @@ -435,7 +440,7 @@ MT_TEST_GROUP(track_object) { syncThreads(); if (threadNum() == 0) { acl_test_unsetenv(m_offline_env); - ACL_LOCKED(acl_reset()); + acl_reset_join_thread(); } acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_support_test.cpp b/test/acl_support_test.cpp index 114c0c1c..ec827bdc 100644 --- a/test/acl_support_test.cpp +++ b/test/acl_support_test.cpp @@ -27,8 +27,8 @@ TEST_GROUP(support){void setup(){acl_mutex_wrapper.lock(); acl_test_setup_generic_system(); } void teardown() { - acl_test_teardown_generic_system(); acl_mutex_wrapper.unlock(); + acl_test_teardown_generic_system(); acl_test_run_standard_teardown_checks(); } diff --git a/test/acl_test.cpp b/test/acl_test.cpp index 890735fc..e32fa7a8 100644 --- a/test/acl_test.cpp +++ b/test/acl_test.cpp @@ -117,8 +117,8 @@ void acl_test_teardown_sample_default_board_system(void) { void acl_test_teardown_generic_system(void) { acl_test_teardown_system(); } void acl_test_teardown_system(void) { + acl_reset_join_thread(); acl_mutex_wrapper.lock(); - acl_reset(); acl_reset_hal(); acltest_hal_teardown(); acl_mutex_wrapper.unlock(); @@ -358,7 +358,7 @@ static void l_load_example_binary(void) { acl_test_setenv(envvar_program_lib, program_lib_old_value); } - ACL_LOCKED(acl_test_teardown_generic_system()); + acl_test_teardown_generic_system(); } // Return a context properties array that specifies preloaded binary only.