gem5  v21.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hw_scheduler.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #include "dev/hsa/hw_scheduler.hh"
35 
36 #include "debug/HSAPacketProcessor.hh"
37 #include "mem/packet_access.hh"
38 
39 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
40  const char* \
41  HWScheduler::XEVENT::description() const \
42  { \
43  return #XEVENT; \
44  }
45 
46 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
47 
48 void
50 {
51  hwSchdlr->wakeup();
52 }
53 
54 void
56 {
57  // The scheduler unmaps an idle queue from the
58  // registered qList and maps a new queue
59  // to the registered list from the active list.
60  // For this implementation, an idle queue means
61  // a queue that does not have any outstanding dispatch
62  // at the time of this scheduler's wakeup
63 
65  schedWakeup();
66 }
67 
68 void
70 {
71  // If atleast there is one queue that is not registered
72  // then wakeup again
73  if (!schedWakeupEvent.scheduled() &&
74  regdListMap.size() < activeList.size()) {
77  "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
78  }
79 }
80 
81 void
82 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
83  uint64_t basePointer,
84  uint64_t queue_id,
85  uint32_t size)
86 {
87  assert(queue_id < MAX_ACTIVE_QUEUES);
88  // Map queue ID to doorbell.
89  // We are only using offset to pio base address as doorbell
90  // We use the same mapping function used by hsa runtime to do this mapping
91  //
92  // Originally
93  // #define VOID_PTR_ADD32(ptr,n)
94  // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
95  // (Addr)VOID_PTR_ADD32(0, queue_id)
96  Addr db_offset = sizeof(uint32_t)*queue_id;
97  if (dbMap.find(db_offset) != dbMap.end()) {
98  panic("Creating an already existing queue (queueID %d)", queue_id);
99  }
100 
101  // Populate doorbell map
102  dbMap[db_offset] = queue_id;
103 
104  if (queue_id >= MAX_ACTIVE_QUEUES) {
105  panic("Attempting to create a queue (queueID %d)" \
106  " beyond PIO range", queue_id);
107  }
108 
109  HSAQueueDescriptor* q_desc =
110  new HSAQueueDescriptor(basePointer, db_offset,
111  hostReadIndexPointer, size);
112  AQLRingBuffer* aql_buf =
114  QCntxt q_cntxt(q_desc, aql_buf);
115  activeList[dbMap[db_offset]] = q_cntxt;
116 
117  // Check if this newly created queue can be directly mapped
118  // to registered queue list
119  M5_VAR_USED bool register_q = mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
120  schedWakeup();
122  "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
123  __FUNCTION__, db_offset, queue_id,
124  (register_q) ? "true" : "false", dbMap.size());
125 }
126 
127 bool
129 {
131  "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
132  if (regdListMap.size() < hsaPP->numHWQueues) {
133  for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
134  HSAQueueDescriptor* qDesc =
136  // If qDesc is empty, we find an empty HW queue
137  if (qDesc == NULL) {
138  return true;
139  }
140  nextRLId = (nextRLId + 1) % hsaPP->numHWQueues;
141  }
142  // We should be able to find an empty slot in registered list
143  // So, we should not reach here
144  panic("Cannot find empty queue\n");
145  }
146  return false;
147 }
148 
149 bool
151  HSAQueueDescriptor* q_desc)
152 {
154  "Trying to map new queue, @ %s\n", __FUNCTION__);
155  if (!findEmptyHWQ()) {
156  return false;
157  }
158  addQCntxt(q_id, aql_buf, q_desc);
160  updateRRVars(q_id, nextRLId);
161  return true;
162 }
163 
164 void
166 {
167  // There maybe AQL packets in the mapped queue waiting
168  // to be fetched. Invoke the logic to fetch AQL packets
170  // Schedule the newly mapped queue
171  if (hsaPP->regdQList[nextRLId]->dispPending())
173 }
174 
175 void
176 HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
177  HSAQueueDescriptor* q_desc)
178 {
179  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
180  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
181  // Move the context
184  // Add the mapping to registered list map
185  regdListMap[al_idx] = nextRLId;
186  DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
187  al_idx, nextRLId, __FUNCTION__);
188 }
189 
190 bool
192 {
194  "Trying to map next queue, @ %s", __FUNCTION__);
195  // Identify the next queue, if there is nothing to
196  // map, return false
197  if (!findNextActiveALQ()) {
198  return false;
199  }
200  HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
201  AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
202  // If there is empty slot available, use that slot
203  if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
204  return true;
205  }
206  // There is no empty slot to map this queue. So, we need to
207  // unmap a queue from registered list and find a slot.
208  // If nothing can be unmapped now, return false
209  if (!unmapQFromRQ()) {
210  return false;
211  }
212  // One queue is unmapped from registered list and that queueID
213  // is stored in nextRLId. We will map this queue to that unmapped slot
214  addQCntxt(nextALId, aql_buf, q_desc);
217  return true;
218 }
219 
220 void
221 HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
222 {
223  nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
224  nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
225 }
226 
227 bool
229 {
230  // Identify the next idle queue, if there is no
231  // idle queue, we cannot unmap
232  if (!findNextIdleRLQ()) {
233  return false;
234  }
235  removeQCntxt();
236  return true;
237 }
238 
239 void
241 {
242  // The nextRLId gives the registered queue that is to be unmapped.
243  // We can find the corresponding queue_id from the doorbellPointer
244  Addr db_offset =
248  // Here, we are unmappping a queue wihtout waiting for the outstanding
249  // dependency signal reads to complete. We will discard any outstanding
250  // reads and will reset the signal values here.
253  uint32_t al_idx = dbMap[db_offset];
254  assert(regdListMap[al_idx] == nextRLId);
255  // Unmap from regdListMap.
256  regdListMap.erase(al_idx);
257 }
258 
259 bool
261 {
262  for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
263  uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
264  auto aqlmap_iter = activeList.find(al_id);
265  if (aqlmap_iter != activeList.end()) {
266  // If this queue is already mapped
267  if (regdListMap.find(al_id) != regdListMap.end()) {
268  continue;
269  } else {
271  "Next Active ALQ %d (current %d), max ALQ %d\n",
272  al_id, nextALId, MAX_ACTIVE_QUEUES);
273  nextALId = al_id;
274  return true;
275  }
276  }
277  }
278  return false;
279 }
280 
281 bool
283 {
284  for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
285  uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
286  if (isRLQIdle(rl_idx)) {
287  nextRLId = rl_idx;
288  return true;
289  }
290  }
291  return false;
292 }
293 
294 // This function could be moved to packet processor
295 bool
296 HWScheduler::isRLQIdle(uint32_t rl_idx)
297 {
299  "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
301 
302  // If there a pending DMA to this registered queue
303  // then the queue is not idle
304  if (qDesc->dmaInProgress) {
305  return false;
306  }
307 
308  // Since packet completion stage happens only after kernel completion
309  // we need to keep the queue mapped till all the outstanding kernels
310  // from that queue are finished
311  if (hsaPP->inFlightPkts(rl_idx)) {
312  return false;
313  }
314 
315  return true;
316 }
317 
318 void
319 HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
320 {
321  auto dbmap_iter = dbMap.find(db_addr);
322  if (dbmap_iter == dbMap.end()) {
323  panic("Writing to a non-existing queue (db_offset %x)", db_addr);
324  }
325  uint32_t al_idx = dbMap[db_addr];
326  // Modify the write pointer
327  activeList[al_idx].qDesc->writeIndex = doorbell_reg;
328  // If this queue is mapped, then start DMA to fetch the
329  // AQL packet
330  if (regdListMap.find(al_idx) != regdListMap.end()) {
332  }
333 }
334 
335 void
337 {
338  // Pointer arithmetic on a null pointer is undefined behavior. Clang
339  // compilers therefore complain if the following reads:
340  // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
341  //
342  // Originally
343  // #define VOID_PTR_ADD32(ptr,n)
344  // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
345  // (Addr)VOID_PTR_ADD32(0, queue_id)
346  Addr db_offset = sizeof(uint32_t)*queue_id;
347  auto dbmap_iter = dbMap.find(db_offset);
348  if (dbmap_iter == dbMap.end()) {
349  panic("Destroying a non-existing queue (db_offset %x)",
350  db_offset);
351  }
352  uint32_t al_idx = dbMap[db_offset];
353  assert(dbMap[db_offset] == dbmap_iter->second);
354  if (!activeList[al_idx].qDesc->isEmpty()) {
355  // According to HSA runtime specification says, deleting
356  // a queue before it is fully processed can lead to undefined
357  // behavior and it is the application's responsibility to
358  // avoid this situation.
359  // Even completion signal is not a sufficient indication for a
360  // fully processed queue; for example completion signal may be
361  // asserted when a read pointer update is in progress
362  warn("Destroying a non-empty queue");
363  }
364  delete activeList[al_idx].qDesc;
365  delete activeList[al_idx].aqlBuf;
366  activeList.erase(al_idx);
367  // Unmap doorbell from doorbell map
368  dbMap.erase(db_offset);
369  if (regdListMap.find(al_idx) != regdListMap.end()) {
370  uint32_t rl_idx = regdListMap[al_idx];
371  hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
372  hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
375  assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
376  regdListMap.erase(al_idx);
377  // A registered queue is released, let us try to map
378  // a queue to that slot
379  contextSwitchQ();
380  }
381  schedWakeup();
382 }
HWScheduler::registerNewQueue
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
Definition: hw_scheduler.cc:82
HWScheduler::mapQIfSlotAvlbl
bool mapQIfSlotAvlbl(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:150
HSAPacketProcessor::numHWQueues
int numHWQueues
Definition: hsa_packet_processor.hh:312
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:462
warn
#define warn(...)
Definition: logging.hh:239
HWScheduler::dbMap
std::map< Addr, uint32_t > dbMap
Definition: hw_scheduler.hh:86
HWScheduler::unregisterQueue
void unregisterQueue(uint64_t queue_id)
Definition: hw_scheduler.cc:336
HWScheduler::scheduleAndWakeupMappedQ
void scheduleAndWakeupMappedQ()
Definition: hw_scheduler.cc:165
HWScheduler::unmapQFromRQ
bool unmapQFromRQ()
Definition: hw_scheduler.cc:228
HWScheduler::schedWakeup
void schedWakeup()
Definition: hw_scheduler.cc:69
HSAQueueDescriptor::dmaInProgress
bool dmaInProgress
Definition: hsa_packet_processor.hh:81
HWScheduler::findNextActiveALQ
bool findNextActiveALQ()
Definition: hw_scheduler.cc:260
hw_scheduler.hh
HSAQueueDescriptor
Definition: hsa_packet_processor.hh:72
HWScheduler::findEmptyHWQ
bool findEmptyHWQ()
Definition: hw_scheduler.cc:128
AQLRingBuffer
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
Definition: hsa_packet_processor.hh:117
HWSCHDLR_EVENT_DESCRIPTION_GENERATOR
#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT)
Definition: hw_scheduler.cc:39
HWScheduler::activeList
std::map< uint32_t, QCntxt > activeList
Definition: hw_scheduler.hh:83
HSAPacketProcessor::getCommandsFromHost
void getCommandsFromHost(int pid, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:510
HWScheduler::updateRRVars
void updateRRVars(uint32_t al_idx, uint32_t rl_idx)
Definition: hw_scheduler.cc:221
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1016
QueueContext::aqlBuf
AQLRingBuffer * aqlBuf
Definition: hsa_packet_processor.hh:202
HSAPacketProcessor
Definition: hsa_packet_processor.hh:212
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:237
HWScheduler::isRLQIdle
bool isRLQIdle(uint32_t rl_idx)
Definition: hw_scheduler.cc:296
HSAPacketProcessor::SignalState::resetSigVals
void resetSigVals()
Definition: hsa_packet_processor.hh:240
HSAPacketProcessor::regdQList
std::vector< class RQLEntry * > regdQList
Definition: hsa_packet_processor.hh:279
HSAPacketProcessor::SignalState::discardRead
bool discardRead
Definition: hsa_packet_processor.hh:236
HWScheduler::wakeupDelay
const Tick wakeupDelay
Definition: hw_scheduler.hh:100
HWScheduler::nextRLId
uint32_t nextRLId
Definition: hw_scheduler.hh:99
NUM_DMA_BUFS
#define NUM_DMA_BUFS
Definition: hsa_packet_processor.hh:50
HWScheduler::SchedulerWakeupEvent::process
virtual void process()
Definition: hw_scheduler.cc:49
HSAQueueDescriptor::doorbellPointer
uint64_t doorbellPointer
Definition: hsa_packet_processor.hh:75
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:148
QueueContext::qDesc
HSAQueueDescriptor * qDesc
Definition: hsa_packet_processor.hh:201
packet_access.hh
QueueContext
Definition: hsa_packet_processor.hh:200
HSAPacketProcessor::RQLEntry::aqlProcessEvent
QueueProcessEvent aqlProcessEvent
Definition: hsa_packet_processor.hh:270
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:182
HSAPacketProcessor::getRegdListEntry
class RQLEntry * getRegdListEntry(uint32_t queId)
Definition: hsa_packet_processor.hh:300
HWScheduler::wakeup
void wakeup()
Definition: hw_scheduler.cc:55
HSAPacketProcessor::schedAQLProcessing
void schedAQLProcessing(uint32_t rl_idx)
Definition: hsa_packet_processor.cc:300
HWScheduler::regdListMap
std::map< uint32_t, uint32_t > regdListMap
Definition: hw_scheduler.hh:90
HWScheduler::hsaPP
HSAPacketProcessor * hsaPP
Definition: hw_scheduler.hh:91
HWScheduler::write
void write(Addr db_addr, uint32_t doorbell_reg)
Definition: hw_scheduler.cc:319
HWScheduler::addQCntxt
void addQCntxt(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:176
HWScheduler::SchedulerWakeupEvent::hwSchdlr
HWScheduler * hwSchdlr
Definition: hw_scheduler.hh:61
curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:43
HWScheduler::contextSwitchQ
bool contextSwitchQ()
Definition: hw_scheduler.cc:191
HWScheduler::removeQCntxt
void removeQCntxt()
Definition: hw_scheduler.cc:240
HWScheduler::findNextIdleRLQ
bool findNextIdleRLQ()
Definition: hw_scheduler.cc:282
HWScheduler::schedWakeupEvent
SchedulerWakeupEvent schedWakeupEvent
Definition: hw_scheduler.hh:101
HSAPacketProcessor::RQLEntry::qCntxt
QCntxt qCntxt
Definition: hsa_packet_processor.hh:266
HWScheduler::nextALId
uint32_t nextALId
Definition: hw_scheduler.hh:98
HSAPacketProcessor::inFlightPkts
uint64_t inFlightPkts(uint32_t queId)
Definition: hsa_packet_processor.hh:306
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
HSAPacketProcessor::RQLEntry::depSignalRdState
SignalState depSignalRdState
Definition: hsa_packet_processor.hh:269
MAX_ACTIVE_QUEUES
#define MAX_ACTIVE_QUEUES
Definition: hw_scheduler.hh:41

Generated on Tue Mar 23 2021 19:41:26 for gem5 by doxygen 1.8.17