From 85bb5f62c07e1c6904f490f2586b96b629ad19b3 Mon Sep 17 00:00:00 2001 From: "Neil R. Spruit" Date: Thu, 5 Sep 2024 15:59:27 -0700 Subject: [PATCH] [L0] Refactor to remove default constructor inits - Remove all the default constructor inits to address error prone code changes and force setting of options and flags individually. Signed-off-by: Neil R. Spruit --- source/adapters/level_zero/command_buffer.cpp | 49 ++++++++++++------- source/adapters/level_zero/context.cpp | 2 +- source/adapters/level_zero/context.hpp | 4 +- source/adapters/level_zero/event.cpp | 40 ++++++++++----- source/adapters/level_zero/event.hpp | 9 ++-- source/adapters/level_zero/image.cpp | 2 +- source/adapters/level_zero/kernel.cpp | 10 ++-- source/adapters/level_zero/memory.cpp | 40 +++++++++------ source/adapters/level_zero/queue.cpp | 33 ++++++++----- source/adapters/level_zero/queue.hpp | 14 +++--- 10 files changed, 124 insertions(+), 79 deletions(-) diff --git a/source/adapters/level_zero/command_buffer.cpp b/source/adapters/level_zero/command_buffer.cpp index 431e544101..bc26dd6e31 100644 --- a/source/adapters/level_zero/command_buffer.cpp +++ b/source/adapters/level_zero/command_buffer.cpp @@ -147,8 +147,10 @@ ur_result_t createSyncPointAndGetZeEvents( UR_CALL(getEventsFromSyncPoints(CommandBuffer, NumSyncPointsInWaitList, SyncPointWaitList, ZeEventList)); ur_event_handle_t LaunchEvent; - UR_CALL(EventCreate(CommandBuffer->Context, nullptr, false, HostVisible, - &LaunchEvent, false, !CommandBuffer->IsProfilingEnabled)); + UR_CALL(EventCreate(CommandBuffer->Context, nullptr /*Queue*/, + false /*IsMultiDevice*/, HostVisible, &LaunchEvent, + false /*CounterBasedEventEnabled*/, + !CommandBuffer->IsProfilingEnabled)); LaunchEvent->CommandType = CommandType; ZeLaunchEvent = LaunchEvent->ZeEvent; @@ -326,22 +328,26 @@ void ur_exp_command_buffer_handle_t_::cleanupCommandBufferResources() { // Release additional signal and wait events used by command_buffer if (SignalEvent) { - CleanupCompletedEvent(SignalEvent, false); + CleanupCompletedEvent(SignalEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(SignalEvent); } if (WaitEvent) { - CleanupCompletedEvent(WaitEvent, false); + CleanupCompletedEvent(WaitEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(WaitEvent); } if (AllResetEvent) { - CleanupCompletedEvent(AllResetEvent, false); + CleanupCompletedEvent(AllResetEvent, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(AllResetEvent); } // Release events added to the command_buffer for (auto &Sync : SyncPoints) { auto &Event = Sync.second; - CleanupCompletedEvent(Event, false); + CleanupCompletedEvent(Event, false /*QueueLocked*/, + false /*SetEventCompleted*/); urEventReleaseInternal(Event); } @@ -514,12 +520,15 @@ urCommandBufferCreateExp(ur_context_handle_t Context, ur_device_handle_t Device, ur_event_handle_t WaitEvent; ur_event_handle_t AllResetEvent; - UR_CALL(EventCreate(Context, nullptr, false, false, &SignalEvent, false, - !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr, false, false, &WaitEvent, false, - !EnableProfiling)); - UR_CALL(EventCreate(Context, nullptr, false, false, &AllResetEvent, false, - !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &SignalEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &WaitEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + false /*HostVisible*/, &AllResetEvent, + false /*CounterBasedEventEnabled*/, !EnableProfiling)); std::vector PrecondEvents = {WaitEvent->ZeEvent, AllResetEvent->ZeEvent}; @@ -1197,14 +1206,15 @@ ur_result_t waitForDependencies(ur_exp_command_buffer_handle_t CommandBuffer, // when `EventWaitList` dependencies are complete. ur_command_list_ptr_t WaitCommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, WaitCommandList, false, NumEventsInWaitList, EventWaitList, - false)); + Queue, WaitCommandList, false /*UseCopyEngine*/, NumEventsInWaitList, + EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); ZE2UR_CALL(zeCommandListAppendBarrier, (WaitCommandList->first, CommandBuffer->WaitEvent->ZeEvent, CommandBuffer->WaitEvent->WaitList.Length, CommandBuffer->WaitEvent->WaitList.ZeEventList)); - Queue->executeCommandList(WaitCommandList, false, false); + Queue->executeCommandList(WaitCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); MustSignalWaitEvent = false; } } @@ -1316,9 +1326,9 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, // Create a command-list to signal the Event on completion ur_command_list_ptr_t SignalCommandList{}; - UR_CALL(Queue->Context->getAvailableCommandList(Queue, SignalCommandList, - false, NumEventsInWaitList, - EventWaitList, false)); + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, SignalCommandList, false /*UseCopyEngine*/, NumEventsInWaitList, + EventWaitList, false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // Reset the wait-event for the UR command-buffer that is signaled when its // submission dependencies have been satisfied. @@ -1333,7 +1343,8 @@ urCommandBufferEnqueueExp(ur_exp_command_buffer_handle_t CommandBuffer, // parameter with signal command-list completing. UR_CALL(createUserEvent(CommandBuffer, Queue, SignalCommandList, Event)); - UR_CALL(Queue->executeCommandList(SignalCommandList, false, false)); + UR_CALL(Queue->executeCommandList(SignalCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/context.cpp b/source/adapters/level_zero/context.cpp index 296e3e98d5..41c7593237 100644 --- a/source/adapters/level_zero/context.cpp +++ b/source/adapters/level_zero/context.cpp @@ -774,7 +774,7 @@ ur_result_t ur_context_handle_t_::getAvailableCommandList( .emplace(ZeCommandList, ur_command_list_info_t( ZeFence, true, false, ZeCommandQueue, ZeQueueDesc, - Queue->useCompletionBatching(), true, + Queue->useCompletionBatching(), true /*CanReuse */, ZeCommandListIt->second.InOrderList, ZeCommandListIt->second.IsImmediate)) .first; diff --git a/source/adapters/level_zero/context.hpp b/source/adapters/level_zero/context.hpp index e7c0d784a0..0d3b2846e2 100644 --- a/source/adapters/level_zero/context.hpp +++ b/source/adapters/level_zero/context.hpp @@ -302,8 +302,8 @@ struct ur_context_handle_t_ : _ur_object { ur_result_t getAvailableCommandList( ur_queue_handle_t Queue, ur_command_list_ptr_t &CommandList, bool UseCopyEngine, uint32_t NumEventsInWaitList, - const ur_event_handle_t *EventWaitList, bool AllowBatching = false, - ze_command_queue_handle_t *ForcedCmdQueue = nullptr); + const ur_event_handle_t *EventWaitList, bool AllowBatching, + ze_command_queue_handle_t *ForcedCmdQueue); // Checks if Device is covered by this context. // For that the Device or its root devices need to be in the context. diff --git a/source/adapters/level_zero/event.cpp b/source/adapters/level_zero/event.cpp index 2bd3011b4b..408580dd80 100644 --- a/source/adapters/level_zero/event.cpp +++ b/source/adapters/level_zero/event.cpp @@ -88,7 +88,8 @@ ur_result_t urEnqueueEventsWait( // Get a new command list to be used on this call ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForceCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -109,7 +110,8 @@ ur_result_t urEnqueueEventsWait( // Execute command list asynchronously as the event will be used // to track down its completion. - return Queue->executeCommandList(CommandList); + return Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); } { @@ -279,13 +281,14 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); // Insert the barrier into the command-list and execute. UR_CALL(insertBarrierIntoCmdList(CmdList, TmpWaitList, ResultEvent, IsInternal)); - UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); // Because of the dependency between commands in the in-order queue we don't // need to keep track of any active barriers if we have in-order queue. @@ -354,7 +357,7 @@ ur_result_t urEnqueueEventsWaitWithBarrier( ur_command_list_ptr_t CmdList; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CmdList, false /*UseCopyEngine=*/, NumEventsInWaitList, - EventWaitList, OkToBatch)); + EventWaitList, OkToBatch, nullptr /*ForcedCmdQueue*/)); CmdLists.push_back(CmdList); } @@ -404,7 +407,8 @@ ur_result_t urEnqueueEventsWaitWithBarrier( // Only batch if the matching CmdList is already open. OkToBatch = CommandBatch.OpenCommandList == CmdList; - UR_CALL(Queue->executeCommandList(CmdList, false, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CmdList, false /*IsBlocking*/, OkToBatch)); } UR_CALL(Queue->ActiveBarriers.clear()); @@ -716,7 +720,7 @@ ur_result_t urEnqueueTimestampRecordingExp( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - /* AllowBatching */ false)); + /* AllowBatching */ false, nullptr /*ForcedCmdQueue*/)); UR_CALL(createEventAndAssociateQueue( Queue, OutEvent, UR_COMMAND_TIMESTAMP_RECORDING_EXP, CommandList, @@ -740,7 +744,7 @@ ur_result_t urEnqueueTimestampRecordingExp( (*OutEvent)->WaitList.ZeEventList)); UR_CALL( - Queue->executeCommandList(CommandList, Blocking, /* OkToBatch */ false)); + Queue->executeCommandList(CommandList, Blocking, false /* OkToBatch */)); return UR_RESULT_SUCCESS; } @@ -816,7 +820,9 @@ urEventWait(uint32_t NumEvents, ///< [in] number of events in the event list else { // NOTE: we are cleaning up after the event here to free resources // sooner in case run-time is not calling urEventRelease soon enough. - CleanupCompletedEvent(reinterpret_cast(Event)); + CleanupCompletedEvent(reinterpret_cast(Event), + false /*QueueLocked*/, + false /*SetEventCompleted*/); // For the case when we have out-of-order queue or regular command // lists its more efficient to check fences so put the queue in the // set to cleanup later. @@ -884,7 +890,10 @@ ur_result_t urExtEventCreate( ur_event_handle_t *Event ///< [out] pointer to the handle of the event object created. ) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + true /*HostVisible*/, Event, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -903,7 +912,10 @@ ur_result_t urEventCreateWithNativeHandle( // we dont have urEventCreate, so use this check for now to know that // the call comes from urEventCreate() if (reinterpret_cast(NativeEvent) == nullptr) { - UR_CALL(EventCreate(Context, nullptr, false, true, Event)); + UR_CALL(EventCreate(Context, nullptr /*Queue*/, false /*IsMultiDevice*/, + true /*HostVisible*/, Event, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); (*Event)->RefCountExternal++; if (!(*Event)->CounterBasedEventsEnabled) @@ -983,7 +995,8 @@ ur_result_t ur_event_handle_t_::getOrCreateHostVisibleEvent( ur_command_list_ptr_t CommandList{}; UR_CALL(UrQueue->Context->getAvailableCommandList( - UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch)) + UrQueue, CommandList, false /* UseCopyEngine */, 0, nullptr, OkToBatch, + nullptr /*ForcedCmdQueue*/)) // Create a "proxy" host-visible event. UR_CALL(createEventAndAssociateQueue( @@ -1529,7 +1542,8 @@ ur_result_t _ur_ze_event_list_t::createAndRetainUrZeEventList( // This prevents a potential deadlock with recursive // event locks. UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, false, 0, nullptr, true)); + Queue, CommandList, false /*UseCopyEngine*/, 0, nullptr, + true /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); } std::shared_lock Lock(EventList[I]->Mutex); diff --git a/source/adapters/level_zero/event.hpp b/source/adapters/level_zero/event.hpp index 7dd64acdaa..2c9e698e3c 100644 --- a/source/adapters/level_zero/event.hpp +++ b/source/adapters/level_zero/event.hpp @@ -32,8 +32,8 @@ ur_result_t urEventReleaseInternal(ur_event_handle_t Event); ur_result_t EventCreate(ur_context_handle_t Context, ur_queue_handle_t Queue, bool IsMultiDevice, bool HostVisible, ur_event_handle_t *RetEvent, - bool CounterBasedEventEnabled = false, - bool ForceDisableProfiling = false); + bool CounterBasedEventEnabled, + bool ForceDisableProfiling); } // extern "C" // This is an experimental option that allows to disable caching of events in @@ -273,9 +273,8 @@ template <> ze_result_t zeHostSynchronize(ze_command_queue_handle_t Handle); // the event, updates the last command event in the queue and cleans up all dep // events of the event. // If the caller locks queue mutex then it must pass 'true' to QueueLocked. -ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, - bool QueueLocked = false, - bool SetEventCompleted = false); +ur_result_t CleanupCompletedEvent(ur_event_handle_t Event, bool QueueLocked, + bool SetEventCompleted); // Get value of device scope events env var setting or default setting static const EventsScope DeviceEventsSetting = [] { diff --git a/source/adapters/level_zero/image.cpp b/source/adapters/level_zero/image.cpp index 4810b3d88e..6433b1f325 100644 --- a/source/adapters/level_zero/image.cpp +++ b/source/adapters/level_zero/image.cpp @@ -856,7 +856,7 @@ ur_result_t urBindlessImagesImageCopyExp( ur_command_list_ptr_t CommandList{}; UR_CALL(hQueue->Context->getAvailableCommandList( hQueue, CommandList, UseCopyEngine, numEventsInWaitList, phEventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; diff --git a/source/adapters/level_zero/kernel.cpp b/source/adapters/level_zero/kernel.cpp index 9c201eda88..5782d043c0 100644 --- a/source/adapters/level_zero/kernel.cpp +++ b/source/adapters/level_zero/kernel.cpp @@ -133,7 +133,7 @@ ur_result_t urEnqueueKernelLaunch( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - true /* AllowBatching */)); + true /* AllowBatching */, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent{}; @@ -196,7 +196,8 @@ ur_result_t urEnqueueKernelLaunch( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, true)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + true /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -392,7 +393,7 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - true /* AllowBatching */)); + true /* AllowBatching */, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent{}; @@ -455,7 +456,8 @@ ur_result_t urEnqueueCooperativeKernelLaunchExp( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, true)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + true /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/memory.cpp b/source/adapters/level_zero/memory.cpp index e7ff6dfea1..62b1de8591 100644 --- a/source/adapters/level_zero/memory.cpp +++ b/source/adapters/level_zero/memory.cpp @@ -80,7 +80,7 @@ ur_result_t enqueueMemCopyHelper(ur_command_t CommandType, ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -133,7 +133,7 @@ ur_result_t enqueueMemCopyRectHelper( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -215,7 +215,7 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, bool OkToBatch = true; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -245,7 +245,8 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList, false, OkToBatch)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + OkToBatch)); } else { // Copy pattern into every entry in memory array pointed by Ptr. uint32_t NumOfCopySteps = Size / PatternSize; @@ -265,7 +266,8 @@ static ur_result_t enqueueMemFillHelper(ur_command_t CommandType, printZeEventList(WaitList); // Execute command list synchronously. - UR_CALL(Queue->executeCommandList(CommandList, true, OkToBatch)); + UR_CALL( + Queue->executeCommandList(CommandList, true /*IsBlocking*/, OkToBatch)); } return UR_RESULT_SUCCESS; @@ -332,7 +334,7 @@ static ur_result_t enqueueMemImageCommandHelper( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, - OkToBatch)); + OkToBatch, nullptr /*ForcedCmdQueue*/)); ze_event_handle_t ZeEvent = nullptr; ur_event_handle_t InternalEvent; @@ -1006,7 +1008,8 @@ ur_result_t urEnqueueMemBufferMap( // For discrete devices we need a command list ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // Add the event to the command list. CommandList->second.append(reinterpret_cast(*Event)); @@ -1027,7 +1030,8 @@ ur_result_t urEnqueueMemBufferMap( (ZeCommandList, *RetMap, ZeHandleSrc + Offset, Size, ZeEvent, WaitList.Length, WaitList.ZeEventList)); - UR_CALL(Queue->executeCommandList(CommandList, BlockingMap)); + UR_CALL(Queue->executeCommandList(CommandList, BlockingMap, + false /*OKToBatchCommand*/)); } auto Res = Buffer->Mappings.insert({*RetMap, {Offset, Size}}); @@ -1135,7 +1139,8 @@ ur_result_t urEnqueueMemUnmap( ur_command_list_ptr_t CommandList{}; UR_CALL(Queue->Context->getAvailableCommandList( reinterpret_cast(Queue), CommandList, UseCopyEngine, - NumEventsInWaitList, EventWaitList)); + NumEventsInWaitList, EventWaitList, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); CommandList->second.append(reinterpret_cast(*Event)); (*Event)->RefCount.increment(); @@ -1164,7 +1169,8 @@ ur_result_t urEnqueueMemUnmap( // Execute command list asynchronously, as the event will be used // to track down its completion. - UR_CALL(Queue->executeCommandList(CommandList)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -1246,7 +1252,8 @@ ur_result_t urEnqueueUSMPrefetch( // TODO: Change UseCopyEngine argument to 'true' once L0 backend // support is added UR_CALL(Queue->Context->getAvailableCommandList( - Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList)); + Queue, CommandList, UseCopyEngine, NumEventsInWaitList, EventWaitList, + false /*AllowBatching*/, nullptr /*ForcedCmdQueue*/)); // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; @@ -1271,7 +1278,8 @@ ur_result_t urEnqueueUSMPrefetch( // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); - UR_CALL(Queue->executeCommandList(CommandList, false)); + UR_CALL(Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/)); return UR_RESULT_SUCCESS; } @@ -1301,8 +1309,9 @@ ur_result_t urEnqueueUSMAdvise( // UseCopyEngine is set to 'false' here. // TODO: Additional analysis is required to check if this operation will // run faster on copy engines. - UR_CALL(Queue->Context->getAvailableCommandList(Queue, CommandList, - UseCopyEngine, 0, nullptr)); + UR_CALL(Queue->Context->getAvailableCommandList( + Queue, CommandList, UseCopyEngine, 0, nullptr, false /*AllowBatching*/, + nullptr /*ForcedCmdQueue*/)); // TODO: do we need to create a unique command type for this? ze_event_handle_t ZeEvent = nullptr; @@ -1329,7 +1338,8 @@ ur_result_t urEnqueueUSMAdvise( // so manually add command to signal our event. ZE2UR_CALL(zeCommandListAppendSignalEvent, (ZeCommandList, ZeEvent)); - Queue->executeCommandList(CommandList, false); + Queue->executeCommandList(CommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); return UR_RESULT_SUCCESS; } diff --git a/source/adapters/level_zero/queue.cpp b/source/adapters/level_zero/queue.cpp index 978547df10..c4598f3472 100644 --- a/source/adapters/level_zero/queue.cpp +++ b/source/adapters/level_zero/queue.cpp @@ -104,7 +104,10 @@ ur_result_t ur_completion_batch::seal(ur_queue_handle_t queue, assert(st == ACCUMULATING); if (!barrierEvent) { - UR_CALL(EventCreate(queue->Context, queue, false, true, &barrierEvent)); + UR_CALL(EventCreate(queue->Context, queue, false /*IsMultiDevice*/, + true /*HostVisible*/, &barrierEvent, + false /*CounterBasedEventEnabled*/, + false /*ForceDisableProfiling*/)); } // Instead of collecting all the batched events, we simply issue a global @@ -307,7 +310,9 @@ ur_result_t resetCommandLists(ur_queue_handle_t Queue) { // Handle immediate command lists here, they don't need to be reset and we // only need to cleanup events. if (Queue->UsingImmCmdLists) { - UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*locked*/)); + UR_CALL(CleanupEventsInImmCmdLists(Queue, true /*QueueLocked*/, + false /*QueueSynced*/, + nullptr /*CompletedEvent*/)); return UR_RESULT_SUCCESS; } @@ -682,7 +687,8 @@ ur_result_t urQueueRelease( std::scoped_lock EventLock(Event->Mutex); Event->Completed = true; } - UR_CALL(CleanupCompletedEvent(Event)); + UR_CALL(CleanupCompletedEvent(Event, false /*QueueLocked*/, + false /*SetEventCompleted*/)); // This event was removed from the command list, so decrement ref count // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(reinterpret_cast(Event))); @@ -1655,7 +1661,8 @@ ur_result_t CleanupEventListFromResetCmdList( for (auto &Event : EventListToCleanup) { // We don't need to synchronize the events since the fence associated with // the command list was synchronized. - UR_CALL(CleanupCompletedEvent(Event, QueueLocked, true)); + UR_CALL( + CleanupCompletedEvent(Event, QueueLocked, true /*SetEventCompleted*/)); // This event was removed from the command list, so decrement ref count // (it was incremented when they were added to the command list). UR_CALL(urEventReleaseInternal(Event)); @@ -1879,9 +1886,9 @@ ur_result_t createEventAndAssociateQueue(ur_queue_handle_t Queue, : nullptr; if (*Event == nullptr) - UR_CALL(EventCreate(Queue->Context, Queue, IsMultiDevice, - HostVisible.value(), Event, - Queue->CounterBasedEventsEnabled)); + UR_CALL(EventCreate( + Queue->Context, Queue, IsMultiDevice, HostVisible.value(), Event, + Queue->CounterBasedEventsEnabled, false /*ForceDisableProfiling*/)); (*Event)->UrQueue = Queue; (*Event)->CommandType = CommandType; @@ -1978,7 +1985,9 @@ ur_result_t ur_queue_handle_t_::executeOpenCommandList(bool IsCopy) { // queue, then close and execute that command list now. if (hasOpenCommandList(IsCopy)) { adjustBatchSizeForPartialBatch(IsCopy); - auto Res = executeCommandList(CommandBatch.OpenCommandList, false, false); + auto Res = + executeCommandList(CommandBatch.OpenCommandList, false /*IsBlocking*/, + false /*OKToBatchCommand*/); CommandBatch.OpenCommandList = CommandListMap.end(); return Res; } @@ -2288,9 +2297,11 @@ ur_result_t ur_queue_handle_t_::createCommandList( std::tie(CommandList, std::ignore) = CommandListMap.insert( std::pair( - ZeCommandList, ur_command_list_info_t( - ZeFence, false, false, ZeCommandQueue, ZeQueueDesc, - useCompletionBatching(), true, IsInOrderList))); + ZeCommandList, + ur_command_list_info_t( + ZeFence, false /*ZeFenceInUse*/, false /*IsClosed*/, + ZeCommandQueue, ZeQueueDesc, useCompletionBatching(), + true /*CanReuse*/, IsInOrderList, false /*IsImmediate*/))); UR_CALL(insertStartBarrierIfDiscardEventsMode(CommandList)); UR_CALL(insertActiveBarriers(CommandList, UseCopyEngine)); diff --git a/source/adapters/level_zero/queue.hpp b/source/adapters/level_zero/queue.hpp index 699d7ec960..1108e4c268 100644 --- a/source/adapters/level_zero/queue.hpp +++ b/source/adapters/level_zero/queue.hpp @@ -150,10 +150,9 @@ struct ur_completion_batches { }; ur_result_t resetCommandLists(ur_queue_handle_t Queue); -ur_result_t -CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, bool QueueLocked = false, - bool QueueSynced = false, - ur_event_handle_t CompletedEvent = nullptr); +ur_result_t CleanupEventsInImmCmdLists(ur_queue_handle_t UrQueue, + bool QueueLocked, bool QueueSynced, + ur_event_handle_t CompletedEvent); // Structure describing the specific use of a command-list in a queue. // This is because command-lists are re-used across multiple queues @@ -162,8 +161,8 @@ struct ur_command_list_info_t { ur_command_list_info_t(ze_fence_handle_t ZeFence, bool ZeFenceInUse, bool IsClosed, ze_command_queue_handle_t ZeQueue, ZeStruct ZeQueueDesc, - bool UseCompletionBatching, bool CanReuse = true, - bool IsInOrderList = false, bool IsImmediate = false) + bool UseCompletionBatching, bool CanReuse, + bool IsInOrderList, bool IsImmediate) : ZeFence(ZeFence), ZeFenceInUse(ZeFenceInUse), IsClosed(IsClosed), ZeQueue(ZeQueue), ZeQueueDesc(ZeQueueDesc), IsInOrderList(IsInOrderList), CanReuse(CanReuse), @@ -528,8 +527,7 @@ struct ur_queue_handle_t_ : _ur_object { // // For immediate commandlists, no close and execute is necessary. ur_result_t executeCommandList(ur_command_list_ptr_t CommandList, - bool IsBlocking = false, - bool OKToBatchCommand = false); + bool IsBlocking, bool OKToBatchCommand); // Helper method telling whether we need to reuse discarded event in this // queue.