gem5  v20.1.0.0
hw_scheduler.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Sooraj Puthoor
34  */
35 
36 #include "dev/hsa/hw_scheduler.hh"
37 
38 #include "debug/HSAPacketProcessor.hh"
39 #include "mem/packet_access.hh"
40 
41 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
42  const char* \
43  HWScheduler::XEVENT::description() const \
44  { \
45  return #XEVENT; \
46  }
47 
48 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
49 
50 void
52 {
53  hwSchdlr->wakeup();
54 }
55 
56 void
58 {
59  // The scheduler unmaps an idle queue from the
60  // registered qList and maps a new queue
61  // to the registered list from the active list.
62  // For this implementation, an idle queue means
63  // a queue that does not have any outstanding dispatch
64  // at the time of this scheduler's wakeup
65 
67  schedWakeup();
68 }
69 
70 void
72 {
73  // If atleast there is one queue that is not registered
74  // then wakeup again
75  if (!schedWakeupEvent.scheduled() &&
76  regdListMap.size() < activeList.size()) {
79  "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
80  }
81 }
82 
83 void
84 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
85  uint64_t basePointer,
86  uint64_t queue_id,
87  uint32_t size)
88 {
89  assert(queue_id < MAX_ACTIVE_QUEUES);
90  // Map queue ID to doorbell.
91  // We are only using offset to pio base address as doorbell
92  // We use the same mapping function used by hsa runtime to do this mapping
93  //
94  // Originally
95  // #define VOID_PTR_ADD32(ptr,n)
96  // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
97  // (Addr)VOID_PTR_ADD32(0, queue_id)
98  Addr db_offset = sizeof(uint32_t)*queue_id;
99  if (dbMap.find(db_offset) != dbMap.end()) {
100  panic("Creating an already existing queue (queueID %d)", queue_id);
101  }
102 
103  // Populate doorbell map
104  dbMap[db_offset] = queue_id;
105 
106  if (queue_id >= MAX_ACTIVE_QUEUES) {
107  panic("Attempting to create a queue (queueID %d)" \
108  " beyond PIO range", queue_id);
109  }
110 
111  HSAQueueDescriptor* q_desc =
112  new HSAQueueDescriptor(basePointer, db_offset,
113  hostReadIndexPointer, size);
114  AQLRingBuffer* aql_buf =
116  QCntxt q_cntxt(q_desc, aql_buf);
117  activeList[dbMap[db_offset]] = q_cntxt;
118 
119  // Check if this newly created queue can be directly mapped
120  // to registered queue list
121  bool M5_VAR_USED register_q = mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
122  schedWakeup();
124  "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
125  __FUNCTION__, db_offset, queue_id,
126  (register_q) ? "true" : "false", dbMap.size());
127 }
128 
129 bool
131 {
133  "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
134  if (regdListMap.size() < hsaPP->numHWQueues) {
135  for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
136  HSAQueueDescriptor* qDesc =
138  // If qDesc is empty, we find an empty HW queue
139  if (qDesc == NULL) {
140  return true;
141  }
142  nextRLId = (nextRLId + 1) % hsaPP->numHWQueues;
143  }
144  // We should be able to find an empty slot in registered list
145  // So, we should not reach here
146  panic("Cannot find empty queue\n");
147  }
148  return false;
149 }
150 
151 bool
153  HSAQueueDescriptor* q_desc)
154 {
156  "Trying to map new queue, @ %s\n", __FUNCTION__);
157  if (!findEmptyHWQ()) {
158  return false;
159  }
160  addQCntxt(q_id, aql_buf, q_desc);
162  updateRRVars(q_id, nextRLId);
163  return true;
164 }
165 
166 void
168 {
169  // There maybe AQL packets in the mapped queue waiting
170  // to be fetched. Invoke the logic to fetch AQL packets
172  // Schedule the newly mapped queue
173  if (hsaPP->regdQList[nextRLId]->dispPending())
175 }
176 
177 void
178 HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
179  HSAQueueDescriptor* q_desc)
180 {
181  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
182  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
183  // Move the context
186  // Add the mapping to registered list map
187  regdListMap[al_idx] = nextRLId;
188  DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
189  al_idx, nextRLId, __FUNCTION__);
190 }
191 
192 bool
194 {
196  "Trying to map next queue, @ %s", __FUNCTION__);
197  // Identify the next queue, if there is nothing to
198  // map, return false
199  if (!findNextActiveALQ()) {
200  return false;
201  }
202  HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
203  AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
204  // If there is empty slot available, use that slot
205  if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
206  return true;
207  }
208  // There is no empty slot to map this queue. So, we need to
209  // unmap a queue from registered list and find a slot.
210  // If nothing can be unmapped now, return false
211  if (!unmapQFromRQ()) {
212  return false;
213  }
214  // One queue is unmapped from registered list and that queueID
215  // is stored in nextRLId. We will map this queue to that unmapped slot
216  addQCntxt(nextALId, aql_buf, q_desc);
219  return true;
220 }
221 
222 void
223 HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
224 {
225  nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
226  nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
227 }
228 
229 bool
231 {
232  // Identify the next idle queue, if there is no
233  // idle queue, we cannot unmap
234  if (!findNextIdleRLQ()) {
235  return false;
236  }
237  removeQCntxt();
238  return true;
239 }
240 
241 void
243 {
244  // The nextRLId gives the registered queue that is to be unmapped.
245  // We can find the corresponding queue_id from the doorbellPointer
246  Addr db_offset =
250  // Here, we are unmappping a queue wihtout waiting for the outstanding
251  // dependency signal reads to complete. We will discard any outstanding
252  // reads and will reset the signal values here.
255  uint32_t al_idx = dbMap[db_offset];
256  assert(regdListMap[al_idx] == nextRLId);
257  // Unmap from regdListMap.
258  regdListMap.erase(al_idx);
259 }
260 
261 bool
263 {
264  for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
265  uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
266  auto aqlmap_iter = activeList.find(al_id);
267  if (aqlmap_iter != activeList.end()) {
268  // If this queue is already mapped
269  if (regdListMap.find(al_id) != regdListMap.end()) {
270  continue;
271  } else {
273  "Next Active ALQ %d (current %d), max ALQ %d\n",
274  al_id, nextALId, MAX_ACTIVE_QUEUES);
275  nextALId = al_id;
276  return true;
277  }
278  }
279  }
280  return false;
281 }
282 
283 bool
285 {
286  for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
287  uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
288  if (isRLQIdle(rl_idx)) {
289  nextRLId = rl_idx;
290  return true;
291  }
292  }
293  return false;
294 }
295 
296 // This function could be moved to packet processor
297 bool
298 HWScheduler::isRLQIdle(uint32_t rl_idx)
299 {
301  "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
303  AQLRingBuffer* aql_buf = hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf;
304 
305  // If there a pending DMA to this registered queue
306  // then the queue is not idle
307  if (qDesc->dmaInProgress) {
308  return false;
309  }
310 
311  // Since packet completion stage happens only after kernel completion
312  // we need to keep the queue mapped till all the outstanding kernels
313  // from that queue are finished
314  if (aql_buf->rdIdx() != aql_buf->dispIdx()) {
315  return false;
316  }
317 
318  return true;
319 }
320 
321 void
322 HWScheduler::write(Addr db_addr, uint32_t doorbell_reg)
323 {
324  auto dbmap_iter = dbMap.find(db_addr);
325  if (dbmap_iter == dbMap.end()) {
326  panic("Writing to a non-existing queue (db_offset %x)", db_addr);
327  }
328  uint32_t al_idx = dbMap[db_addr];
329  // Modify the write pointer
330  activeList[al_idx].qDesc->writeIndex = doorbell_reg;
331  // If this queue is mapped, then start DMA to fetch the
332  // AQL packet
333  if (regdListMap.find(al_idx) != regdListMap.end()) {
335  }
336 }
337 
338 void
340 {
341  // Pointer arithmetic on a null pointer is undefined behavior. Clang
342  // compilers therefore complain if the following reads:
343  // `(Addr)(VOID_PRT_ADD32(0, queue_id))`
344  //
345  // Originally
346  // #define VOID_PTR_ADD32(ptr,n)
347  // (void*)((uint32_t*)(ptr) + n)/*ptr + offset*/
348  // (Addr)VOID_PTR_ADD32(0, queue_id)
349  Addr db_offset = sizeof(uint32_t)*queue_id;
350  auto dbmap_iter = dbMap.find(db_offset);
351  if (dbmap_iter == dbMap.end()) {
352  panic("Destroying a non-existing queue (db_offset %x)",
353  db_offset);
354  }
355  uint32_t al_idx = dbMap[db_offset];
356  assert(dbMap[db_offset] == dbmap_iter->second);
357  if (!activeList[al_idx].qDesc->isEmpty()) {
358  // According to HSA runtime specification says, deleting
359  // a queue before it is fully processed can lead to undefined
360  // behavior and it is the application's responsibility to
361  // avoid this situation.
362  // Even completion signal is not a sufficient indication for a
363  // fully processed queue; for example completion signal may be
364  // asserted when a read pointer update is in progress
365  warn("Destroying a non-empty queue");
366  }
367  delete activeList[al_idx].qDesc;
368  delete activeList[al_idx].aqlBuf;
369  activeList.erase(al_idx);
370  // Unmap doorbell from doorbell map
371  dbMap.erase(db_offset);
372  if (regdListMap.find(al_idx) != regdListMap.end()) {
373  uint32_t rl_idx = regdListMap[al_idx];
374  hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
375  hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
378  assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
379  regdListMap.erase(al_idx);
380  // A registered queue is released, let us try to map
381  // a queue to that slot
382  contextSwitchQ();
383  }
384  schedWakeup();
385 }
HWScheduler::registerNewQueue
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
Definition: hw_scheduler.cc:84
HWScheduler::mapQIfSlotAvlbl
bool mapQIfSlotAvlbl(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:152
HSAPacketProcessor::numHWQueues
int numHWQueues
Definition: hsa_packet_processor.hh:305
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
warn
#define warn(...)
Definition: logging.hh:239
HWScheduler::dbMap
std::map< Addr, uint32_t > dbMap
Definition: hw_scheduler.hh:88
HWScheduler::unregisterQueue
void unregisterQueue(uint64_t queue_id)
Definition: hw_scheduler.cc:339
HWScheduler::scheduleAndWakeupMappedQ
void scheduleAndWakeupMappedQ()
Definition: hw_scheduler.cc:167
HWScheduler::unmapQFromRQ
bool unmapQFromRQ()
Definition: hw_scheduler.cc:230
HWScheduler::schedWakeup
void schedWakeup()
Definition: hw_scheduler.cc:71
HSAQueueDescriptor::dmaInProgress
bool dmaInProgress
Definition: hsa_packet_processor.hh:81
HWScheduler::findNextActiveALQ
bool findNextActiveALQ()
Definition: hw_scheduler.cc:262
hw_scheduler.hh
HSAQueueDescriptor
Definition: hsa_packet_processor.hh:72
AQLRingBuffer::dispIdx
uint64_t dispIdx() const
Definition: hsa_packet_processor.hh:190
HWScheduler::findEmptyHWQ
bool findEmptyHWQ()
Definition: hw_scheduler.cc:130
AQLRingBuffer
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
Definition: hsa_packet_processor.hh:117
HWSCHDLR_EVENT_DESCRIPTION_GENERATOR
#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT)
Definition: hw_scheduler.cc:41
HWScheduler::activeList
std::map< uint32_t, QCntxt > activeList
Definition: hw_scheduler.hh:85
HSAPacketProcessor::getCommandsFromHost
void getCommandsFromHost(int pid, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:510
HWScheduler::updateRRVars
void updateRRVars(uint32_t al_idx, uint32_t rl_idx)
Definition: hw_scheduler.cc:223
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
QueueContext::aqlBuf
AQLRingBuffer * aqlBuf
Definition: hsa_packet_processor.hh:202
HSAPacketProcessor
Definition: hsa_packet_processor.hh:212
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
HWScheduler::isRLQIdle
bool isRLQIdle(uint32_t rl_idx)
Definition: hw_scheduler.cc:298
HSAPacketProcessor::SignalState::resetSigVals
void resetSigVals()
Definition: hsa_packet_processor.hh:240
AQLRingBuffer::rdIdx
uint64_t rdIdx() const
Definition: hsa_packet_processor.hh:192
HSAPacketProcessor::regdQList
std::vector< class RQLEntry * > regdQList
Definition: hsa_packet_processor.hh:279
HSAPacketProcessor::SignalState::discardRead
bool discardRead
Definition: hsa_packet_processor.hh:236
HWScheduler::wakeupDelay
const Tick wakeupDelay
Definition: hw_scheduler.hh:102
HWScheduler::nextRLId
uint32_t nextRLId
Definition: hw_scheduler.hh:101
NUM_DMA_BUFS
#define NUM_DMA_BUFS
Definition: hsa_packet_processor.hh:50
HWScheduler::SchedulerWakeupEvent::process
virtual void process()
Definition: hw_scheduler.cc:51
HSAQueueDescriptor::doorbellPointer
uint64_t doorbellPointer
Definition: hsa_packet_processor.hh:75
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
QueueContext::qDesc
HSAQueueDescriptor * qDesc
Definition: hsa_packet_processor.hh:201
packet_access.hh
QueueContext
Definition: hsa_packet_processor.hh:200
HSAPacketProcessor::RQLEntry::aqlProcessEvent
QueueProcessEvent aqlProcessEvent
Definition: hsa_packet_processor.hh:270
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:133
HSAPacketProcessor::getRegdListEntry
class RQLEntry * getRegdListEntry(uint32_t queId)
Definition: hsa_packet_processor.hh:300
HWScheduler::wakeup
void wakeup()
Definition: hw_scheduler.cc:57
HSAPacketProcessor::schedAQLProcessing
void schedAQLProcessing(uint32_t rl_idx)
Definition: hsa_packet_processor.cc:299
HWScheduler::regdListMap
std::map< uint32_t, uint32_t > regdListMap
Definition: hw_scheduler.hh:92
HWScheduler::hsaPP
HSAPacketProcessor * hsaPP
Definition: hw_scheduler.hh:93
HWScheduler::write
void write(Addr db_addr, uint32_t doorbell_reg)
Definition: hw_scheduler.cc:322
HWScheduler::addQCntxt
void addQCntxt(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:178
HWScheduler::SchedulerWakeupEvent::hwSchdlr
HWScheduler * hwSchdlr
Definition: hw_scheduler.hh:63
HWScheduler::contextSwitchQ
bool contextSwitchQ()
Definition: hw_scheduler.cc:193
HWScheduler::removeQCntxt
void removeQCntxt()
Definition: hw_scheduler.cc:242
HWScheduler::findNextIdleRLQ
bool findNextIdleRLQ()
Definition: hw_scheduler.cc:284
HWScheduler::schedWakeupEvent
SchedulerWakeupEvent schedWakeupEvent
Definition: hw_scheduler.hh:103
HSAPacketProcessor::RQLEntry::qCntxt
QCntxt qCntxt
Definition: hsa_packet_processor.hh:266
HWScheduler::nextALId
uint32_t nextALId
Definition: hw_scheduler.hh:100
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
HSAPacketProcessor::RQLEntry::depSignalRdState
SignalState depSignalRdState
Definition: hsa_packet_processor.hh:269
MAX_ACTIVE_QUEUES
#define MAX_ACTIVE_QUEUES
Definition: hw_scheduler.hh:43

Generated on Wed Sep 30 2020 14:02:11 for gem5 by doxygen 1.8.17