-
Notifications
You must be signed in to change notification settings - Fork 818
[SYCL] Remove redundant creation of discarded events #21571
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: sycl
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -562,8 +562,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
| /// dependencies. | ||
| /// \param CallerNeedsEvent specifies if the caller expects a usable event. | ||
| /// \return an event representing fill operation. | ||
| event memset(void *Ptr, int Value, size_t Count, | ||
| const std::vector<event> &DepEvents, bool CallerNeedsEvent); | ||
| EventImplPtr memset(void *Ptr, int Value, size_t Count, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
|
Comment on lines
563
to
+567
|
||
| /// Copies data from one memory region to another, both pointed by | ||
| /// USM pointers. | ||
| /// | ||
|
|
@@ -574,9 +575,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
| /// dependencies. | ||
| /// \param CallerNeedsEvent specifies if the caller expects a usable event. | ||
| /// \return an event representing copy operation. | ||
| event memcpy(void *Dest, const void *Src, size_t Count, | ||
| const std::vector<event> &DepEvents, bool CallerNeedsEvent, | ||
| const code_location &CodeLoc); | ||
| EventImplPtr memcpy(void *Dest, const void *Src, size_t Count, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent, const code_location &CodeLoc); | ||
| /// Provides additional information to the underlying runtime about how | ||
| /// different allocations are used. | ||
| /// | ||
|
|
@@ -587,8 +588,10 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
| /// dependencies. | ||
| /// \param CallerNeedsEvent specifies if the caller expects a usable event. | ||
| /// \return an event representing advise operation. | ||
| event mem_advise(const void *Ptr, size_t Length, ur_usm_advice_flags_t Advice, | ||
| const std::vector<event> &DepEvents, bool CallerNeedsEvent); | ||
| EventImplPtr mem_advise(const void *Ptr, size_t Length, | ||
| ur_usm_advice_flags_t Advice, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
|
|
||
| static ThreadPool &getThreadPool() { | ||
| return GlobalHandler::instance().getHostTaskThreadPool(); | ||
|
|
@@ -606,15 +609,16 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
|
|
||
| bool queue_empty() const; | ||
|
|
||
| event memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, | ||
| bool IsDeviceImageScope, size_t NumBytes, | ||
| size_t Offset, const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
| event memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, | ||
| bool IsDeviceImageScope, size_t NumBytes, | ||
| size_t Offset, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
| EventImplPtr memcpyToDeviceGlobal(void *DeviceGlobalPtr, const void *Src, | ||
| bool IsDeviceImageScope, size_t NumBytes, | ||
| size_t Offset, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
| EventImplPtr memcpyFromDeviceGlobal(void *Dest, const void *DeviceGlobalPtr, | ||
| bool IsDeviceImageScope, size_t NumBytes, | ||
| size_t Offset, | ||
| const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent); | ||
|
|
||
| void setCommandGraphUnlocked( | ||
| const std::shared_ptr<ext::oneapi::experimental::detail::graph_impl> | ||
|
|
@@ -960,8 +964,9 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
| /// \param HandlerFunc is a function that submits the operation with a | ||
| /// handler. | ||
| template <typename HandlerFuncT> | ||
| event submitWithHandler(const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent, HandlerFuncT HandlerFunc); | ||
| EventImplPtr submitWithHandler(const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent, | ||
| HandlerFuncT HandlerFunc); | ||
|
|
||
| /// Performs submission of a memory operation directly if scheduler can be | ||
| /// bypassed, or with a handler otherwise. | ||
|
|
@@ -979,10 +984,10 @@ class queue_impl : public std::enable_shared_from_this<queue_impl> { | |
| /// \return an event representing the submitted operation. | ||
| template <typename HandlerFuncT, typename MemMngrFuncT, | ||
| typename... MemMngrArgTs> | ||
| event submitMemOpHelper(const std::vector<event> &DepEvents, | ||
| bool CallerNeedsEvent, HandlerFuncT HandlerFunc, | ||
| MemMngrFuncT MemMngrFunc, | ||
| MemMngrArgTs &&...MemOpArgs); | ||
| EventImplPtr | ||
| submitMemOpHelper(const std::vector<event> &DepEvents, bool CallerNeedsEvent, | ||
| HandlerFuncT HandlerFunc, MemMngrFuncT MemMngrFunc, | ||
| MemMngrArgTs &&...MemOpArgs); | ||
|
|
||
| #ifdef XPTI_ENABLE_INSTRUMENTATION | ||
| // When instrumentation is enabled emits trace event for wait begin and | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
In queue_impl::submit_impl(), the new "return CallerNeedsEvent ? EventImpl : nullptr" only applies to the main path; the earlier noLastEventPath branch returns finalizeHandlerInOrderNoEventsUnlocked(Handler) directly, which can still yield a non-null EventImplPtr even when CallerNeedsEvent=false. This can violate the CallerNeedsEvent contract and can trip the assert in submitWithHandler() (ReturnEvent==false but submit_impl returns a non-null pointer). Consider routing the noLastEventPath through the same tail return logic (or at least applying the same CallerNeedsEvent filtering there).