gem5 v24.0.0.0
Loading...
Searching...
No Matches
hw_scheduler.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "base/compiler.hh"
35#include "base/trace.hh"
36#include "debug/HSAPacketProcessor.hh"
37#include "sim/cur_tick.hh"
38
39#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
40 const char* \
41 HWScheduler::XEVENT::description() const \
42 { \
43 return #XEVENT; \
44 }
45
46namespace gem5
47{
48
49HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
50
51void
56
57void
59{
60 // The scheduler unmaps an idle queue from the
61 // registered qList and maps a new queue
62 // to the registered list from the active list.
63 // For this implementation, an idle queue means
64 // a queue that does not have any outstanding dispatch
65 // at the time of this scheduler's wakeup
66
69}
70
71void
73{
74 // If atleast there is one queue that is not registered
75 // then wakeup again
77 regdListMap.size() < activeList.size()) {
80 "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
81 }
82}
83
84void
85HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
86 uint64_t basePointer,
87 uint64_t queue_id,
88 uint32_t size, int doorbellSize,
89 GfxVersion gfxVersion,
90 Addr offset, uint64_t rd_idx)
91{
92 assert(queue_id < MAX_ACTIVE_QUEUES);
93 // Map queue ID to doorbell.
94 // We are only using offset to pio base address as doorbell
95 // We use the same mapping function used by hsa runtime to do this mapping
96 if (!offset) {
97 offset = queue_id * doorbellSize;
98 }
99 if (dbMap.find(offset) != dbMap.end()) {
100 panic("Creating an already existing queue (queueID %d)", queue_id);
101 }
102
103 // Populate doorbell map
104 dbMap[offset] = queue_id;
105 qidMap[queue_id] = offset;
106
107 if (queue_id >= MAX_ACTIVE_QUEUES) {
108 panic("Attempting to create a queue (queueID %d)" \
109 " beyond PIO range", queue_id);
110 }
111
112 HSAQueueDescriptor* q_desc =
113 new HSAQueueDescriptor(basePointer, offset,
114 hostReadIndexPointer, size, gfxVersion);
115 AQLRingBuffer* aql_buf =
117 if (rd_idx > 0) {
118 aql_buf->setRdIdx(rd_idx);
119 aql_buf->setWrIdx(rd_idx);
120 aql_buf->setDispIdx(rd_idx);
121 }
122 DPRINTF(HSAPacketProcessor, "Setting read index for %#lx to %ld\n",
123 offset, rd_idx);
124
125 QCntxt q_cntxt(q_desc, aql_buf);
126 activeList[dbMap[offset]] = q_cntxt;
127
128 // Check if this newly created queue can be directly mapped
129 // to registered queue list
130 [[maybe_unused]] bool register_q =
131 mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
132 schedWakeup();
134 "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
135 __FUNCTION__, offset, queue_id,
136 (register_q) ? "true" : "false", dbMap.size());
137}
138
139bool
141{
143 "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
144 if (regdListMap.size() < hsaPP->numHWQueues) {
145 for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
146 HSAQueueDescriptor* qDesc =
148 // If qDesc is empty, we find an empty HW queue
149 if (qDesc == NULL) {
150 return true;
151 }
153 }
154 // We should be able to find an empty slot in registered list
155 // So, we should not reach here
156 panic("Cannot find empty queue\n");
157 }
158 return false;
159}
160
161bool
163 HSAQueueDescriptor* q_desc)
164{
166 "Trying to map new queue, @ %s\n", __FUNCTION__);
167 if (!findEmptyHWQ()) {
168 return false;
169 }
170 addQCntxt(q_id, aql_buf, q_desc);
172 updateRRVars(q_id, nextRLId);
173 return true;
174}
175
176void
178{
179 // There maybe AQL packets in the mapped queue waiting
180 // to be fetched. Invoke the logic to fetch AQL packets
182 // Schedule the newly mapped queue
183 if (hsaPP->regdQList[nextRLId]->dispPending())
185}
186
187void
188HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
189 HSAQueueDescriptor* q_desc)
190{
191 assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
192 assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
193 // Move the context
196 // Add the mapping to registered list map
197 regdListMap[al_idx] = nextRLId;
198 DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
199 al_idx, nextRLId, __FUNCTION__);
200}
201
202bool
204{
206 "Trying to map next queue, @ %s\n", __FUNCTION__);
207 // Identify the next queue, if there is nothing to
208 // map, return false
209 if (!findNextActiveALQ()) {
210 return false;
211 }
212 HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
213 AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
214 // If there is empty slot available, use that slot
215 if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
216 return true;
217 }
218 // There is no empty slot to map this queue. So, we need to
219 // unmap a queue from registered list and find a slot.
220 // If nothing can be unmapped now, return false
221 if (!unmapQFromRQ()) {
222 return false;
223 }
224 // One queue is unmapped from registered list and that queueID
225 // is stored in nextRLId. We will map this queue to that unmapped slot
226 addQCntxt(nextALId, aql_buf, q_desc);
229 return true;
230}
231
232void
233HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
234{
235 nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
236 nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
237}
238
239bool
241{
242 // Identify the next idle queue, if there is no
243 // idle queue, we cannot unmap
244 if (!findNextIdleRLQ()) {
245 return false;
246 }
247 removeQCntxt();
248 return true;
249}
250
251void
253{
254 // The nextRLId gives the registered queue that is to be unmapped.
255 // We can find the corresponding queue_id from the doorbellPointer
256 Addr db_offset =
260 // Here, we are unmappping a queue wihtout waiting for the outstanding
261 // dependency signal reads to complete. We will discard any outstanding
262 // reads and will reset the signal values here.
265 uint32_t al_idx = dbMap[db_offset];
266 assert(regdListMap[al_idx] == nextRLId);
267 // Unmap from regdListMap.
268 regdListMap.erase(al_idx);
269}
270
271bool
273{
274 for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
275 uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
276 auto aqlmap_iter = activeList.find(al_id);
277 if (aqlmap_iter != activeList.end()) {
278 // If this queue is already mapped
279 if (regdListMap.find(al_id) != regdListMap.end()) {
280 continue;
281 } else {
283 "Next Active ALQ %d (current %d), max ALQ %d\n",
285 nextALId = al_id;
286 return true;
287 }
288 }
289 }
290 return false;
291}
292
293bool
295{
296 for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
297 uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
298 if (isRLQIdle(rl_idx)) {
299 nextRLId = rl_idx;
300 return true;
301 }
302 }
303 return false;
304}
305
306// This function could be moved to packet processor
307bool
309{
311 "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
313
314 // If there a pending DMA to this registered queue
315 // then the queue is not idle
316 if (qDesc->dmaInProgress) {
317 return false;
318 }
319
320 // Since packet completion stage happens only after kernel completion
321 // we need to keep the queue mapped till all the outstanding kernels
322 // from that queue are finished
323 if (hsaPP->inFlightPkts(rl_idx)) {
324 return false;
325 }
326
327 return true;
328}
329
330void
331HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
332{
333 auto dbmap_iter = dbMap.find(db_addr);
334 if (dbmap_iter == dbMap.end()) {
335 panic("Writing to a non-existing queue (db_offset %x)", db_addr);
336 }
337 uint32_t al_idx = dbMap[db_addr];
338 // Modify the write pointer
339 activeList[al_idx].qDesc->writeIndex = doorbell_reg;
340 // If a queue is unmapped and remapped (common in full system) the qDesc
341 // gets reused. Keep the readIndex up to date so that when the HSA packet
342 // processor gets commands from host, the correct entry is read after
343 // remapping.
344 activeList[al_idx].qDesc->readIndex = doorbell_reg - 1;
345 DPRINTF(HSAPacketProcessor, "q %d readIndex %d writeIndex %d\n",
346 al_idx, activeList[al_idx].qDesc->readIndex,
347 activeList[al_idx].qDesc->writeIndex);
348 // If this queue is mapped, then start DMA to fetch the
349 // AQL packet
350 if (regdListMap.find(al_idx) != regdListMap.end()) {
352 }
353}
354
355void
356HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
357{
358 assert(qidMap.count(queue_id));
359 Addr db_offset = qidMap[queue_id];
360 qidMap.erase(queue_id);
361 auto dbmap_iter = dbMap.find(db_offset);
362 if (dbmap_iter == dbMap.end()) {
363 panic("Destroying a non-existing queue (db_offset %x)",
364 db_offset);
365 }
366 uint32_t al_idx = dbMap[db_offset];
367 assert(dbMap[db_offset] == dbmap_iter->second);
368 if (!activeList[al_idx].qDesc->isEmpty()) {
369 // According to HSA runtime specification says, deleting
370 // a queue before it is fully processed can lead to undefined
371 // behavior and it is the application's responsibility to
372 // avoid this situation.
373 // Even completion signal is not a sufficient indication for a
374 // fully processed queue; for example completion signal may be
375 // asserted when a read pointer update is in progress
376 warn("Destroying a non-empty queue");
377 }
378 delete activeList[al_idx].qDesc;
379 delete activeList[al_idx].aqlBuf;
380 activeList.erase(al_idx);
381 // Unmap doorbell from doorbell map
382 dbMap.erase(db_offset);
383 if (regdListMap.find(al_idx) != regdListMap.end()) {
384 uint32_t rl_idx = regdListMap[al_idx];
385 hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
386 hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
389 assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
390 regdListMap.erase(al_idx);
391 // A registered queue is released, let us try to map
392 // a queue to that slot
394 }
395 schedWakeup();
396}
397
398} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void setRdIdx(uint64_t value)
void setDispIdx(uint64_t value)
void setWrIdx(uint64_t value)
std::vector< class RQLEntry * > regdQList
uint64_t inFlightPkts(uint32_t queId)
void getCommandsFromHost(int pid, uint32_t rl_idx)
class RQLEntry * getRegdListEntry(uint32_t queId)
void schedAQLProcessing(uint32_t rl_idx)
void unregisterQueue(uint64_t queue_id, int doorbellSize)
SchedulerWakeupEvent schedWakeupEvent
void addQCntxt(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
void updateRRVars(uint32_t al_idx, uint32_t rl_idx)
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
std::map< uint32_t, uint32_t > regdListMap
void scheduleAndWakeupMappedQ()
std::map< Addr, uint32_t > dbMap
std::map< uint32_t, QCntxt > activeList
bool isRLQIdle(uint32_t rl_idx)
const Tick wakeupDelay
void write(Addr db_addr, uint64_t doorbell_reg)
HSAPacketProcessor * hsaPP
std::map< uint64_t, Addr > qidMap
bool mapQIfSlotAvlbl(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
virtual std::string name() const
Definition named.hh:47
bool scheduled() const
Determine if the current event is scheduled.
Definition eventq.hh:458
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define NUM_DMA_BUFS
#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT)
#define MAX_ACTIVE_QUEUES
#define warn(...)
Definition logging.hh:256
Bitfield< 23, 0 > offset
Definition types.hh:144
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
AQLRingBuffer * aqlBuf
HSAQueueDescriptor * qDesc

Generated on Tue Jun 18 2024 16:24:03 for gem5 by doxygen 1.11.0