gem5  v22.0.0.1
hw_scheduler.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "dev/hsa/hw_scheduler.hh"
33 
34 #include "base/compiler.hh"
35 #include "base/trace.hh"
36 #include "debug/HSAPacketProcessor.hh"
37 #include "sim/cur_tick.hh"
38 
39 #define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
40  const char* \
41  HWScheduler::XEVENT::description() const \
42  { \
43  return #XEVENT; \
44  }
45 
46 namespace gem5
47 {
48 
49 HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(SchedulerWakeupEvent)
50 
51 void
53 {
54  hwSchdlr->wakeup();
55 }
56 
57 void
59 {
60  // The scheduler unmaps an idle queue from the
61  // registered qList and maps a new queue
62  // to the registered list from the active list.
63  // For this implementation, an idle queue means
64  // a queue that does not have any outstanding dispatch
65  // at the time of this scheduler's wakeup
66 
68  schedWakeup();
69 }
70 
71 void
73 {
74  // If atleast there is one queue that is not registered
75  // then wakeup again
76  if (!schedWakeupEvent.scheduled() &&
77  regdListMap.size() < activeList.size()) {
80  "Scheduling wakeup at %lu\n", (curTick() + wakeupDelay));
81  }
82 }
83 
84 void
85 HWScheduler::registerNewQueue(uint64_t hostReadIndexPointer,
86  uint64_t basePointer,
87  uint64_t queue_id,
88  uint32_t size, int doorbellSize,
89  GfxVersion gfxVersion,
90  Addr offset, uint64_t rd_idx)
91 {
92  assert(queue_id < MAX_ACTIVE_QUEUES);
93  // Map queue ID to doorbell.
94  // We are only using offset to pio base address as doorbell
95  // We use the same mapping function used by hsa runtime to do this mapping
96  if (!offset) {
97  offset = queue_id * doorbellSize;
98  }
99  if (dbMap.find(offset) != dbMap.end()) {
100  panic("Creating an already existing queue (queueID %d)", queue_id);
101  }
102 
103  // Populate doorbell map
104  dbMap[offset] = queue_id;
105  qidMap[queue_id] = offset;
106 
107  if (queue_id >= MAX_ACTIVE_QUEUES) {
108  panic("Attempting to create a queue (queueID %d)" \
109  " beyond PIO range", queue_id);
110  }
111 
112  HSAQueueDescriptor* q_desc =
113  new HSAQueueDescriptor(basePointer, offset,
114  hostReadIndexPointer, size, gfxVersion);
115  AQLRingBuffer* aql_buf =
117  if (rd_idx > 0) {
118  aql_buf->setRdIdx(rd_idx);
119  }
120  DPRINTF(HSAPacketProcessor, "Setting read index for %#lx to %ld\n",
121  offset, rd_idx);
122 
123  QCntxt q_cntxt(q_desc, aql_buf);
124  activeList[dbMap[offset]] = q_cntxt;
125 
126  // Check if this newly created queue can be directly mapped
127  // to registered queue list
128  [[maybe_unused]] bool register_q =
129  mapQIfSlotAvlbl(queue_id, aql_buf, q_desc);
130  schedWakeup();
132  "%s: offset = %p, qID = %d, is_regd = %s, AL size %d\n",
133  __FUNCTION__, offset, queue_id,
134  (register_q) ? "true" : "false", dbMap.size());
135 }
136 
137 bool
139 {
141  "Trying to find empty HW queue, @ %s\n", __FUNCTION__);
142  if (regdListMap.size() < hsaPP->numHWQueues) {
143  for (int emptyQId = 0; emptyQId < hsaPP->numHWQueues; emptyQId++) {
144  HSAQueueDescriptor* qDesc =
146  // If qDesc is empty, we find an empty HW queue
147  if (qDesc == NULL) {
148  return true;
149  }
150  nextRLId = (nextRLId + 1) % hsaPP->numHWQueues;
151  }
152  // We should be able to find an empty slot in registered list
153  // So, we should not reach here
154  panic("Cannot find empty queue\n");
155  }
156  return false;
157 }
158 
159 bool
161  HSAQueueDescriptor* q_desc)
162 {
164  "Trying to map new queue, @ %s\n", __FUNCTION__);
165  if (!findEmptyHWQ()) {
166  return false;
167  }
168  addQCntxt(q_id, aql_buf, q_desc);
170  updateRRVars(q_id, nextRLId);
171  return true;
172 }
173 
174 void
176 {
177  // There maybe AQL packets in the mapped queue waiting
178  // to be fetched. Invoke the logic to fetch AQL packets
180  // Schedule the newly mapped queue
181  if (hsaPP->regdQList[nextRLId]->dispPending())
183 }
184 
185 void
186 HWScheduler::addQCntxt(uint32_t al_idx, AQLRingBuffer* aql_buf,
187  HSAQueueDescriptor* q_desc)
188 {
189  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.qDesc == NULL);
190  assert(hsaPP->getRegdListEntry(nextRLId)->qCntxt.aqlBuf == NULL);
191  // Move the context
194  // Add the mapping to registered list map
195  regdListMap[al_idx] = nextRLId;
196  DPRINTF(HSAPacketProcessor, "Mapped HSA queue %d to hw queue %d: @ %s\n",
197  al_idx, nextRLId, __FUNCTION__);
198 }
199 
200 bool
202 {
204  "Trying to map next queue, @ %s\n", __FUNCTION__);
205  // Identify the next queue, if there is nothing to
206  // map, return false
207  if (!findNextActiveALQ()) {
208  return false;
209  }
210  HSAQueueDescriptor* q_desc = activeList[nextALId].qDesc;
211  AQLRingBuffer* aql_buf = activeList[nextALId].aqlBuf;
212  // If there is empty slot available, use that slot
213  if(mapQIfSlotAvlbl(nextALId, aql_buf, q_desc)) {
214  return true;
215  }
216  // There is no empty slot to map this queue. So, we need to
217  // unmap a queue from registered list and find a slot.
218  // If nothing can be unmapped now, return false
219  if (!unmapQFromRQ()) {
220  return false;
221  }
222  // One queue is unmapped from registered list and that queueID
223  // is stored in nextRLId. We will map this queue to that unmapped slot
224  addQCntxt(nextALId, aql_buf, q_desc);
227  return true;
228 }
229 
230 void
231 HWScheduler::updateRRVars(uint32_t al_idx, uint32_t rl_idx)
232 {
233  nextALId = (al_idx + 1) % MAX_ACTIVE_QUEUES;
234  nextRLId = (rl_idx + 1) % hsaPP->numHWQueues;
235 }
236 
237 bool
239 {
240  // Identify the next idle queue, if there is no
241  // idle queue, we cannot unmap
242  if (!findNextIdleRLQ()) {
243  return false;
244  }
245  removeQCntxt();
246  return true;
247 }
248 
249 void
251 {
252  // The nextRLId gives the registered queue that is to be unmapped.
253  // We can find the corresponding queue_id from the doorbellPointer
254  Addr db_offset =
258  // Here, we are unmappping a queue wihtout waiting for the outstanding
259  // dependency signal reads to complete. We will discard any outstanding
260  // reads and will reset the signal values here.
263  uint32_t al_idx = dbMap[db_offset];
264  assert(regdListMap[al_idx] == nextRLId);
265  // Unmap from regdListMap.
266  regdListMap.erase(al_idx);
267 }
268 
269 bool
271 {
272  for (int activeQId = 0; activeQId < MAX_ACTIVE_QUEUES; activeQId++) {
273  uint32_t al_id = (nextALId + activeQId) % MAX_ACTIVE_QUEUES;
274  auto aqlmap_iter = activeList.find(al_id);
275  if (aqlmap_iter != activeList.end()) {
276  // If this queue is already mapped
277  if (regdListMap.find(al_id) != regdListMap.end()) {
278  continue;
279  } else {
281  "Next Active ALQ %d (current %d), max ALQ %d\n",
282  al_id, nextALId, MAX_ACTIVE_QUEUES);
283  nextALId = al_id;
284  return true;
285  }
286  }
287  }
288  return false;
289 }
290 
291 bool
293 {
294  for (int regdQId = 0; regdQId < hsaPP->numHWQueues; regdQId++) {
295  uint32_t rl_idx = (nextRLId + regdQId) % hsaPP->numHWQueues;
296  if (isRLQIdle(rl_idx)) {
297  nextRLId = rl_idx;
298  return true;
299  }
300  }
301  return false;
302 }
303 
304 // This function could be moved to packet processor
305 bool
306 HWScheduler::isRLQIdle(uint32_t rl_idx)
307 {
309  "@ %s, analyzing hw queue %d\n", __FUNCTION__, rl_idx);
311 
312  // If there a pending DMA to this registered queue
313  // then the queue is not idle
314  if (qDesc->dmaInProgress) {
315  return false;
316  }
317 
318  // Since packet completion stage happens only after kernel completion
319  // we need to keep the queue mapped till all the outstanding kernels
320  // from that queue are finished
321  if (hsaPP->inFlightPkts(rl_idx)) {
322  return false;
323  }
324 
325  return true;
326 }
327 
328 void
329 HWScheduler::write(Addr db_addr, uint64_t doorbell_reg)
330 {
331  auto dbmap_iter = dbMap.find(db_addr);
332  if (dbmap_iter == dbMap.end()) {
333  panic("Writing to a non-existing queue (db_offset %x)", db_addr);
334  }
335  uint32_t al_idx = dbMap[db_addr];
336  // Modify the write pointer
337  activeList[al_idx].qDesc->writeIndex = doorbell_reg;
338  // If a queue is unmapped and remapped (common in full system) the qDesc
339  // gets reused. Keep the readIndex up to date so that when the HSA packet
340  // processor gets commands from host, the correct entry is read after
341  // remapping.
342  activeList[al_idx].qDesc->readIndex = doorbell_reg - 1;
343  DPRINTF(HSAPacketProcessor, "queue %d qDesc->writeIndex %d\n",
344  al_idx, activeList[al_idx].qDesc->writeIndex);
345  // If this queue is mapped, then start DMA to fetch the
346  // AQL packet
347  if (regdListMap.find(al_idx) != regdListMap.end()) {
349  }
350 }
351 
352 void
353 HWScheduler::unregisterQueue(uint64_t queue_id, int doorbellSize)
354 {
355  assert(qidMap.count(queue_id));
356  Addr db_offset = qidMap[queue_id];
357  auto dbmap_iter = dbMap.find(db_offset);
358  if (dbmap_iter == dbMap.end()) {
359  panic("Destroying a non-existing queue (db_offset %x)",
360  db_offset);
361  }
362  uint32_t al_idx = dbMap[db_offset];
363  assert(dbMap[db_offset] == dbmap_iter->second);
364  if (!activeList[al_idx].qDesc->isEmpty()) {
365  // According to HSA runtime specification says, deleting
366  // a queue before it is fully processed can lead to undefined
367  // behavior and it is the application's responsibility to
368  // avoid this situation.
369  // Even completion signal is not a sufficient indication for a
370  // fully processed queue; for example completion signal may be
371  // asserted when a read pointer update is in progress
372  warn("Destroying a non-empty queue");
373  }
374  delete activeList[al_idx].qDesc;
375  delete activeList[al_idx].aqlBuf;
376  activeList.erase(al_idx);
377  // Unmap doorbell from doorbell map
378  dbMap.erase(db_offset);
379  if (regdListMap.find(al_idx) != regdListMap.end()) {
380  uint32_t rl_idx = regdListMap[al_idx];
381  hsaPP->getRegdListEntry(rl_idx)->qCntxt.aqlBuf = NULL;
382  hsaPP->getRegdListEntry(rl_idx)->qCntxt.qDesc = NULL;
385  assert(!hsaPP->getRegdListEntry(rl_idx)->aqlProcessEvent.scheduled());
386  regdListMap.erase(al_idx);
387  // A registered queue is released, let us try to map
388  // a queue to that slot
389  contextSwitchQ();
390  }
391  schedWakeup();
392 }
393 
394 } // namespace gem5
gem5::HWScheduler::SchedulerWakeupEvent::process
virtual void process()
Definition: hw_scheduler.cc:52
gem5::HWScheduler::nextRLId
uint32_t nextRLId
Definition: hw_scheduler.hh:110
gem5::curTick
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
gem5::HWScheduler::removeQCntxt
void removeQCntxt()
Definition: hw_scheduler.cc:250
gem5::HSAPacketProcessor::schedAQLProcessing
void schedAQLProcessing(uint32_t rl_idx)
Definition: hsa_packet_processor.cc:277
warn
#define warn(...)
Definition: logging.hh:246
gem5::HSAPacketProcessor::RQLEntry::qCntxt
QCntxt qCntxt
Definition: hsa_packet_processor.hh:308
gem5::HSAPacketProcessor::SignalState::resetSigVals
void resetSigVals()
Definition: hsa_packet_processor.hh:282
gem5::HWScheduler::dbMap
std::map< Addr, uint32_t > dbMap
Definition: hw_scheduler.hh:95
gem5::HWScheduler::findEmptyHWQ
bool findEmptyHWQ()
Definition: hw_scheduler.cc:138
gem5::HWScheduler::wakeup
void wakeup()
Definition: hw_scheduler.cc:58
gem5::HWScheduler::unmapQFromRQ
bool unmapQFromRQ()
Definition: hw_scheduler.cc:238
gem5::HWScheduler::findNextActiveALQ
bool findNextActiveALQ()
Definition: hw_scheduler.cc:270
cur_tick.hh
gem5::HSAPacketProcessor::inFlightPkts
uint64_t inFlightPkts(uint32_t queId)
Definition: hsa_packet_processor.hh:339
gem5::EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1019
gem5::HSAPacketProcessor::SignalState::discardRead
bool discardRead
Definition: hsa_packet_processor.hh:278
hw_scheduler.hh
gem5::HSAPacketProcessor::getCommandsFromHost
void getCommandsFromHost(int pid, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:500
gem5::HWScheduler::isRLQIdle
bool isRLQIdle(uint32_t rl_idx)
Definition: hw_scheduler.cc:306
gem5::HWScheduler::contextSwitchQ
bool contextSwitchQ()
Definition: hw_scheduler.cc:201
HWSCHDLR_EVENT_DESCRIPTION_GENERATOR
#define HWSCHDLR_EVENT_DESCRIPTION_GENERATOR(XEVENT)
Definition: hw_scheduler.cc:39
gem5::HSAQueueDescriptor::dmaInProgress
bool dmaInProgress
Definition: hsa_packet_processor.hh:88
gem5::HWScheduler::schedWakeup
void schedWakeup()
Definition: hw_scheduler.cc:72
gem5::Named::name
virtual std::string name() const
Definition: named.hh:47
gem5::HSAPacketProcessor
Definition: hsa_packet_processor.hh:252
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::HWScheduler::registerNewQueue
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition: hw_scheduler.cc:85
gem5::HSAPacketProcessor::getRegdListEntry
class RQLEntry * getRegdListEntry(uint32_t queId)
Definition: hsa_packet_processor.hh:333
gem5::QCntxt
Definition: hsa_packet_processor.hh:240
gem5::HSAPacketProcessor::RQLEntry::depSignalRdState
SignalState depSignalRdState
Definition: hsa_packet_processor.hh:311
NUM_DMA_BUFS
#define NUM_DMA_BUFS
Definition: hsa_packet_processor.hh:50
gem5::HSAQueueDescriptor
Definition: hsa_packet_processor.hh:78
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
compiler.hh
gem5::HSAPacketProcessor::numHWQueues
int numHWQueues
Definition: hsa_packet_processor.hh:345
gem5::HWScheduler::unregisterQueue
void unregisterQueue(uint64_t queue_id, int doorbellSize)
Definition: hw_scheduler.cc:353
gem5::HWScheduler::mapQIfSlotAvlbl
bool mapQIfSlotAvlbl(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:160
gem5::HSAQueueDescriptor::doorbellPointer
uint64_t doorbellPointer
Definition: hsa_packet_processor.hh:82
gem5::HWScheduler::qidMap
std::map< uint64_t, Addr > qidMap
Definition: hw_scheduler.hh:97
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::HWScheduler::findNextIdleRLQ
bool findNextIdleRLQ()
Definition: hw_scheduler.cc:292
gem5::HWScheduler::activeList
std::map< uint32_t, QCntxt > activeList
Definition: hw_scheduler.hh:92
gem5::HWScheduler::addQCntxt
void addQCntxt(uint32_t al_idx, AQLRingBuffer *aql_buf, HSAQueueDescriptor *q_desc)
Definition: hw_scheduler.cc:186
gem5::QCntxt::aqlBuf
AQLRingBuffer * aqlBuf
Definition: hsa_packet_processor.hh:243
gem5::HSAPacketProcessor::regdQList
std::vector< class RQLEntry * > regdQList
Definition: hsa_packet_processor.hh:321
gem5::HWScheduler::nextALId
uint32_t nextALId
Definition: hw_scheduler.hh:109
gem5::HWScheduler::hsaPP
HSAPacketProcessor * hsaPP
Definition: hw_scheduler.hh:102
gem5::HWScheduler::scheduleAndWakeupMappedQ
void scheduleAndWakeupMappedQ()
Definition: hw_scheduler.cc:175
gem5::AQLRingBuffer
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
Definition: hsa_packet_processor.hh:156
gem5::HWScheduler::wakeupDelay
const Tick wakeupDelay
Definition: hw_scheduler.hh:111
gem5::HWScheduler::updateRRVars
void updateRRVars(uint32_t al_idx, uint32_t rl_idx)
Definition: hw_scheduler.cc:231
gem5::QCntxt::qDesc
HSAQueueDescriptor * qDesc
Definition: hsa_packet_processor.hh:242
gem5::HWScheduler::SchedulerWakeupEvent::hwSchdlr
HWScheduler * hwSchdlr
Definition: hw_scheduler.hh:70
gem5::HWScheduler::schedWakeupEvent
SchedulerWakeupEvent schedWakeupEvent
Definition: hw_scheduler.hh:112
trace.hh
gem5::HWScheduler::write
void write(Addr db_addr, uint64_t doorbell_reg)
Definition: hw_scheduler.cc:329
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::HWScheduler::regdListMap
std::map< uint32_t, uint32_t > regdListMap
Definition: hw_scheduler.hh:101
gem5::AQLRingBuffer::setRdIdx
void setRdIdx(uint64_t value)
Definition: hsa_packet_processor.cc:608
gem5::Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:465
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
MAX_ACTIVE_QUEUES
#define MAX_ACTIVE_QUEUES
Definition: hw_scheduler.hh:45
gem5::HSAPacketProcessor::RQLEntry::aqlProcessEvent
QueueProcessEvent aqlProcessEvent
Definition: hsa_packet_processor.hh:312

Generated on Sat Jun 18 2022 08:12:24 for gem5 by doxygen 1.8.17