@@ -44,10 +44,47 @@ def device_dt(self):
4444 return self ._device_dt
4545
4646
47+ class BaseDeviceTimer :
48+ __slots__ = ["queue" ]
49+
50+ def __init__ (self , sycl_queue ):
51+ if not isinstance (sycl_queue , SyclQueue ):
52+ raise TypeError (f"Expected type SyclQueue, got { type (sycl_queue )} " )
53+ self .queue = sycl_queue
54+
55+
56+ class QueueBarrierDeviceTimer (BaseDeviceTimer ):
57+ __slots__ = []
58+
59+ def __init__ (self , sycl_queue ):
60+ super (QueueBarrierDeviceTimer , self ).__init__ (sycl_queue )
61+
62+ def get_event (self ):
63+ return self .queue .submit_barrier ()
64+
65+
66+ class OrderManagerDeviceTimer (BaseDeviceTimer ):
67+ __slots__ = ["_order_manager" , "_submit_empty_task_fn" ]
68+
69+ def __init__ (self , sycl_queue ):
70+ import dpctl .utils ._seq_order_keeper as s_ok
71+ from dpctl .utils import SequentialOrderManager as seq_om
72+
73+ super (OrderManagerDeviceTimer , self ).__init__ (sycl_queue )
74+ self ._order_manager = seq_om [self .queue ]
75+ self ._submit_empty_task_fn = s_ok ._submit_empty_task
76+
77+ def get_event (self ):
78+ ev = self ._submit_empty_task_fn (
79+ sycl_queue = self .queue , depends = self ._order_manager .submitted_events
80+ )
81+ self ._order_manager .add_event_pair (ev , ev )
82+ return ev
83+
84+
4785class SyclTimer :
4886 """
49- Context to measure device time and host wall-time of execution
50- of commands submitted to :class:`dpctl.SyclQueue`.
87+ Context to time execution of tasks submitted to :class:`dpctl.SyclQueue`.
5188
5289 :Example:
5390 .. code-block:: python
@@ -58,40 +95,81 @@ class SyclTimer:
5895 q = dpctl.SyclQueue(property="enable_profiling")
5996
6097 # create the timer
61- milliseconds_sc = 1e-3
98+ milliseconds_sc = 1e3
6299 timer = dpctl.SyclTimer(time_scale = milliseconds_sc)
63100
101+ untimed_code_block_1
64102 # use the timer
65103 with timer(queue=q):
66- code_block1
104+ timed_code_block1
105+
106+ untimed_code_block_2
67107
68108 # use the timer
69109 with timer(queue=q):
70- code_block2
110+ timed_code_block2
111+
112+ untimed_code_block_3
71113
72114 # retrieve elapsed times in milliseconds
73115 wall_dt, device_dt = timer.dt
74116
75117 .. note::
76- The timer submits barriers to the queue at the entrance and the
118+ The timer submits tasks to the queue at the entrance and the
77119 exit of the context and uses profiling information from events
78120 associated with these submissions to perform the timing. Thus
79121 :class:`dpctl.SyclTimer` requires the queue with ``"enable_profiling"``
80122 property. In order to be able to collect the profiling information,
81- the ``dt`` property ensures that both submitted barriers complete their
82- execution and thus effectively synchronizes the queue.
123+ the ``dt`` property ensures that both tasks submitted by the timer
124+ complete their execution and thus effectively synchronizes the queue.
125+
126+ Execution of the above example results in the following task graph,
127+ where each group of tasks is ordered after the one preceding it,
128+ ``[tasks_of_untimed_block1]``, ``[timer_fence_start_task]``,
129+ ``[tasks_of_timed_block1]``, ``[timer_fence_finish_task]``,
130+ ``[tasks_of_untimed_block2]``, ``[timer_fence_start_task]``,
131+ ``[tasks_of_timed_block2]``, ``[timer_fence_finish_task]``,
132+ ``[tasks_of_untimed_block3]``.
133+
134+ ``device_timer`` keyword argument controls the type of tasks submitted.
135+ With ``"queue_barrier"`` value, queue barrier tasks are used. With
136+ ``"order_manager"`` value, a single empty body task is inserted
137+ and order manager (used by all `dpctl.tensor` operations) is used to
138+ order these tasks so that they fence operations performed within
139+ timer's context.
140+
141+ Timing offloading operations that do not use the order manager with
142+ the timer that uses ``"order_manager"`` as ``device_timer`` value
143+ will be misleading becaused the tasks submitted by the timer will not
144+ be ordered with respect to tasks we intend to time.
145+
146+ Note, that host timer effectively measures the time of task
147+ submissions. To measure host timer wall-time that includes execution
148+ of submitted tasks, make sure to include synchronization point in
149+ the timed block.
150+
151+ :Example:
152+ .. code-block:: python
153+
154+ with timer(q):
155+ timed_block
156+ q.wait()
83157
84158 Args:
85159 host_timer (callable, optional):
86160 A callable such that host_timer() returns current
87161 host time in seconds.
88162 Default: :py:func:`timeit.default_timer`.
163+ device_timer (Literal["queue_barrier", "order_manager"], optional):
164+ Device timing method. Default: "queue_barrier".
89165 time_scale (Union[int, float], optional):
90- Ratio of the unit of time of interest and one second .
166+ Ratio of one second and the unit of time-scale of interest.
91167 Default: ``1``.
92168 """
93169
94- def __init__ (self , host_timer = timeit .default_timer , time_scale = 1 ):
170+ def __init__ (
171+ self , host_timer = timeit .default_timer , device_timer = None , time_scale = 1
172+ ):
95173 """
96174 Create new instance of :class:`.SyclTimer`.
97175
@@ -100,6 +178,8 @@ def __init__(self, host_timer=timeit.default_timer, time_scale=1):
100178 A function that takes no arguments and returns a value
101179 measuring time.
102180 Default: :meth:`timeit.default_timer`.
181+ device_timer (Literal["queue_barrier", "order_manager"], optional):
182+ Device timing method. Default: "queue_barrier"
103183 time_scale (Union[int, float], optional):
104184 Scaling factor applied to durations measured by
105185 the host_timer. Default: ``1``.
@@ -109,11 +189,26 @@ def __init__(self, host_timer=timeit.default_timer, time_scale=1):
109189 self .queue = None
110190 self .host_times = []
111191 self .bracketing_events = []
192+ self ._context_data = list ()
193+ if device_timer is None :
194+ device_timer = "queue_barrier"
195+ if device_timer == "queue_barrier" :
196+ self ._device_timer_class = QueueBarrierDeviceTimer
197+ elif device_timer == "order_manager" :
198+ self ._device_timer_class = OrderManagerDeviceTimer
199+ else :
200+ raise ValueError (
201+ "Supported values for device_timer keyword are "
202+ "'queue_barrier', 'order_manager', got "
203+ f"'{ device_timer } '"
204+ )
205+ self ._device_timer = None
112206
113207 def __call__ (self , queue = None ):
114208 if isinstance (queue , SyclQueue ):
115209 if queue .has_enable_profiling :
116210 self .queue = queue
211+ self ._device_timer = self ._device_timer_class (queue )
117212 else :
118213 raise ValueError (
119214 "The given queue was not created with the "
@@ -127,17 +222,17 @@ def __call__(self, queue=None):
127222 return self
128223
129224 def __enter__ (self ):
130- self ._event_start = self .queue .submit_barrier ()
131- self ._host_start = self .timer ()
225+ _event_start = self ._device_timer .get_event ()
226+ _host_start = self .timer ()
227+ self ._context_data .append ((_event_start , _host_start ))
132228 return self
133229
134230 def __exit__ (self , * args ):
135- self .host_times .append ((self ._host_start , self .timer ()))
136- self .bracketing_events .append (
137- (self ._event_start , self .queue .submit_barrier ())
138- )
139- del self ._event_start
140- del self ._host_start
231+ _event_end = self ._device_timer .get_event ()
232+ _host_end = self .timer ()
233+ _event_start , _host_start = self ._context_data .pop ()
234+ self .host_times .append ((_host_start , _host_end ))
235+ self .bracketing_events .append ((_event_start , _event_end ))
141236
142237 @property
143238 def dt (self ):
0 commit comments