gem5  v22.1.0.0
hw_scheduler.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "dev/hsa/hw_scheduler.hh"
33 
34 #include "base/compiler.hh"
35 #include "base/trace.hh"
36 #include "debug/HSAPacketProcessor.hh"
37 #include "sim/cur_tick.hh"
38 
39 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
40  const char* \
41  HWScheduler::XEVENT::description() const \
42  { \
43  return #XEVENT; \
44  }
45 
46 namespace gem5
47 {
48 
49 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
50 
51 void
53 {
54  hwSchdlr->wakeup();
55 }
56 
57 void
59 {
60  // The scheduler unmaps an idle queue from the
61  // registered qList and maps a new queue
62  // to the registered list from the active list.
63  // For this implementation, an idle queue means
64  // a queue that does not have any outstanding dispatch
65  // at the time of this scheduler's wakeup
66 
68  schedWakeup();
69 }
70 
71 void
73 {
74  // If atleast there is one queue that is not registered
75  // then wakeup again
76  if (!schedWakeupEvent.scheduled() &&
77  regdListMap.size() < activeList.size()) {
80  "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
81  }
82 }
83 
84 void
85 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
86  uint64_t basePointer,
87  uint64_t queue_id,
88  uint32_t size, int doorbellSize,
89  GfxVersion gfxVersion,
90  Addr offset, uint64_t rd_idx)
91 {
92  assert(queue_id < MAX_ACTIVE_QUEUES);
93  // Map queue ID to doorbell.
94  // We are only using offset to pio base address as doorbell
95  // We use the same mapping function used by hsa runtime to do this mapping
96  if (!offset) {
97  offset = queue_id * doorbellSize;
98  }
99  if (dbMap.find(offset) != dbMap.end()) {
100  panic("Creating an already existing queue (queueID %d)", queue_id);
101  }
102 
103  // Populate doorbell map
104  dbMap[offset] = queue_id;
105  qidMap[queue_id] = offset;
106 
107  if (queue_id >= MAX_ACTIVE_QUEUES) {
108  panic("Attempting to create a queue (queueID %d)" \
109  " beyond PIO range", queue_id);
110  }
111 
112  HSAQueueDescriptor* q_desc =
113  new HSAQueueDescriptor(basePointer, offset,
114  hostReadIndexPointer, size, gfxVersion);
115  AQLRingBuffer* aql_buf =
117  if (rd_idx > 0) {
118  aql_buf->setRdIdx(rd_idx);
119  aql_buf->setWrIdx(rd_idx);
120  aql_buf->setDispIdx(rd_idx);
121  }
122  DPRINTF(HSAPacketProcessor, "Setting read index for %#lx to %ld\n",
123  offset, rd_idx);
124 
125  QCntxt q_cntxt(q_desc, aql_buf);
126  activeList[dbMap[offset]] = q_cntxt;
127 
128  // Check if this newly created queue can be directly mapped
129  // to registered queue list
130  [[maybe_unused]] bool register_q =
131  mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
132  schedWakeup();
134  "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
135  __FUNCTION__, offset, queue_id,
136  (register_q) ? "true" : "false", dbMap.size());
137 }
138 
139 bool
141 {
143  "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
144  if (regdListMap.size() < hsaPP->numHWQueues) {
145  for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
146  HSAQueueDescriptor* qDesc =
148  // If qDesc is empty, we find an empty HW queue
149  if (qDesc == NULL) {
150  return true;
151  }
152  nextRLId = (nextRLId + 1) % hsaPP->numHWQueues;
153  }
154  // We should be able to find an empty slot in registered list
155  // So, we should not reach here
156  panic("Cannot find empty queue\n");
157  }
158  return false;
159 }
160 
161 bool
163  HSAQueueDescriptor* q_desc)
164 {
166  "Trying to map new queue, @ %s\n", __FUNCTION__);
167  if (!findEmptyHWQ()) {
168  return false;
169  }
170  addQCntxt(q_id, aql_buf, q_desc);
172  updateRRVars(q_id, nextRLId);
173  return true;
174 }
175 
176 void
178 {
179  // There maybe AQL packets in the mapped queue waiting
180  // to be fetched. Invoke the logic to fetch AQL packets
182  // Schedule the newly mapped queue
183  if (hsaPP->regdQList[nextRLId]->dispPending())
185 }
186 
187 void
188 HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
189  HSAQueueDescriptor* q_desc)
190 {
191  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
192  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
193  // Move the context
196  // Add the mapping to registered list map
197  regdListMap[al_idx] = nextRLId;
198  DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
199  al_idx, nextRLId, __FUNCTION__);
200 }
201 
202 bool
204 {
206  "Trying to map next queue, @ %s\n", __FUNCTION__);
207  // Identify the next queue, if there is nothing to
208  // map, return false
209  if (!findNextActiveALQ()) {
210  return false;
211  }
212  HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
213  AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
214  // If there is empty slot available, use that slot
215  if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
216  return true;
217  }
218  // There is no empty slot to map this queue. So, we need to
219  // unmap a queue from registered list and find a slot.
220  // If nothing can be unmapped now, return false
221  if (!unmapQFromRQ()) {
222  return false;
223  }
224  // One queue is unmapped from registered list and that queueID
225  // is stored in nextRLId. We will map this queue to that unmapped slot
226  addQCntxt(nextALId, aql_buf, q_desc);
229  return true;
230 }
231 
232 void
233 HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
234 {
235  nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
236  nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
237 }
238 
239 bool
241 {
242  // Identify the next idle queue, if there is no
243  // idle queue, we cannot unmap
244  if (!findNextIdleRLQ()) {
245  return false;
246  }
247  removeQCntxt();
248  return true;
249 }
250 
251 void
253 {
254  // The nextRLId gives the registered queue that is to be unmapped.
255  // We can find the corresponding queue_id from the doorbellPointer
256  Addr db_offset =
260  // Here, we are unmappping a queue wihtout waiting for the outstanding
261  // dependency signal reads to complete. We will discard any outstanding
262  // reads and will reset the signal values here.
265  uint32_t al_idx = dbMap[db_offset];
266  assert(regdListMap[al_idx] == nextRLId);
267  // Unmap from regdListMap.
268  regdListMap.erase(al_idx);
269 }
270 
271 bool
273 {
274  for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
275  uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
276  auto aqlmap_iter = activeList.find(al_id);
277  if (aqlmap_iter != activeList.end()) {
278  // If this queue is already mapped
279  if (regdListMap.find(al_id) != regdListMap.end()) {
280  continue;
281  } else {
283  "Next Active ALQ %d (current %d), max ALQ %d\n",
284  al_id, nextALId, MAX_ACTIVE_QUEUES);
285  nextALId = al_id;
286  return true;
287  }
288  }
289  }
290  return false;
291 }
292 
293 bool
295 {
296  for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
297  uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
298  if (isRLQIdle(rl_idx)) {
299  nextRLId = rl_idx;
300  return true;
301  }
302  }
303  return false;
304 }
305 
306 // This function could be moved to packet processor
307 bool
308 HWScheduler::isRLQIdle(uint32_t rl_idx)
309 {
311  "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
313 
314  // If there a pending DMA to this registered queue
315  // then the queue is not idle
316  if (qDesc->dmaInProgress) {
317  return false;
318  }
319 
320  // Since packet completion stage happens only after kernel completion
321  // we need to keep the queue mapped till all the outstanding kernels
322  // from that queue are finished
323  if (hsaPP->inFlightPkts(rl_idx)) {
324  return false;
325  }
326 
327  return true;
328 }
329 
330 void
331 HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
332 {
333  auto dbmap_iter = dbMap.find(db_addr);
334  if (dbmap_iter == dbMap.end()) {
335  panic("Writing to a non-existing queue (db_offset %x)", db_addr);
336  }
337  uint32_t al_idx = dbMap[db_addr];
338  // Modify the write pointer
339  activeList[al_idx].qDesc->writeIndex = doorbell_reg;
340  // If a queue is unmapped and remapped (common in full system) the qDesc
341  // gets reused. Keep the readIndex up to date so that when the HSA packet
342  // processor gets commands from host, the correct entry is read after
343  // remapping.
344  activeList[al_idx].qDesc->readIndex = doorbell_reg - 1;
345  DPRINTF(HSAPacketProcessor, "q %d readIndex %d writeIndex %d\n",
346  al_idx, activeList[al_idx].qDesc->readIndex,
347  activeList[al_idx].qDesc->writeIndex);
348  // If this queue is mapped, then start DMA to fetch the
349  // AQL packet
350  if (regdListMap.find(al_idx) != regdListMap.end()) {
352  }
353 }
354 
355 void
356 HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
357 {
358  assert(qidMap.count(queue_id));
359  Addr db_offset = qidMap[queue_id];
360  qidMap.erase(queue_id);
361  auto dbmap_iter = dbMap.find(db_offset);
362  if (dbmap_iter == dbMap.end()) {
363  panic("Destroying a non-existing queue (db_offset %x)",
364  db_offset);
365  }
366  uint32_t al_idx = dbMap[db_offset];
367  assert(dbMap[db_offset] == dbmap_iter->second);
368  if (!activeList[al_idx].qDesc->isEmpty()) {
369  // According to HSA runtime specification says, deleting
370  // a queue before it is fully processed can lead to undefined
371  // behavior and it is the application's responsibility to
372  // avoid this situation.
373  // Even completion signal is not a sufficient indication for a
374  // fully processed queue; for example completion signal may be
375  // asserted when a read pointer update is in progress
376  warn("Destroying a non-empty queue");
377  }
378  delete activeList[al_idx].qDesc;
379  delete activeList[al_idx].aqlBuf;
380  activeList.erase(al_idx);
381  // Unmap doorbell from doorbell map
382  dbMap.erase(db_offset);
383  if (regdListMap.find(al_idx) != regdListMap.end()) {
384  uint32_t rl_idx = regdListMap[al_idx];
385  hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
386  hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
389  assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
390  regdListMap.erase(al_idx);
391  // A registered queue is released, let us try to map
392  // a queue to that slot
393  contextSwitchQ();
394  }
395  schedWakeup();
396 }
397 
398 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void setRdIdx(uint64_t value)
void setDispIdx(uint64_t value)
void setWrIdx(uint64_t value)
std::vector< class RQLEntry * > regdQList
uint64_t inFlightPkts(uint32_t queId)
void getCommandsFromHost(int pid, uint32_t rl_idx)
class RQLEntry * getRegdListEntry(uint32_t queId)
void schedAQLProcessing(uint32_t rl_idx)
void unregisterQueue(uint64_t queue_id, int doorbellSize)
SchedulerWakeupEvent schedWakeupEvent
void addQCntxt(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
void updateRRVars(uint32_t al_idx, uint32_t rl_idx)
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition: hw_scheduler.cc:85
std::map< uint32_t, uint32_t > regdListMap
void scheduleAndWakeupMappedQ()
std::map< Addr, uint32_t > dbMap
Definition: hw_scheduler.hh:95
std::map< uint32_t, QCntxt > activeList
Definition: hw_scheduler.hh:92
bool isRLQIdle(uint32_t rl_idx)
const Tick wakeupDelay
void write(Addr db_addr, uint64_t doorbell_reg)
HSAPacketProcessor * hsaPP
std::map< uint64_t, Addr > qidMap
Definition: hw_scheduler.hh:97
bool mapQIfSlotAvlbl(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
virtual std::string name() const
Definition: named.hh:47
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define NUM_DMA_BUFS
#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT)
Definition: hw_scheduler.cc:39
#define MAX_ACTIVE_QUEUES
Definition: hw_scheduler.hh:45
#define warn(...)
Definition: logging.hh:246
Bitfield< 23, 0 > offset
Definition: types.hh:144
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
AQLRingBuffer * aqlBuf
HSAQueueDescriptor * qDesc

Generated on Wed Dec 21 2022 10:22:33 for gem5 by doxygen 1.9.1