gem5  v20.1.0.0
hsa_packet_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Eric van Tassell
34  */
35 
37 
38 #include <cstring>
39 
40 #include "base/chunk_generator.hh"
41 #include "base/compiler.hh"
42 #include "debug/HSAPacketProcessor.hh"
43 #include "dev/dma_device.hh"
44 #include "dev/hsa/hsa_device.hh"
45 #include "dev/hsa/hsa_packet.hh"
46 #include "dev/hsa/hw_scheduler.hh"
47 #include "mem/packet_access.hh"
48 #include "mem/page_table.hh"
49 #include "sim/process.hh"
50 #include "sim/proxy_ptr.hh"
51 #include "sim/system.hh"
52 
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
54  const char* \
55  HSAPacketProcessor::XEVENT::description() const \
56  { \
57  return #XEVENT; \
58  }
59 
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61  HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
62 
63 // checks if the barrier bit is set in the header -- shift the barrier bit
64 // to LSB, then bitwise "and" to mask off all other bits
65 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
66  HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
67 
68 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
69 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
70 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
71 HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent)
72 
74  : DmaDevice(p), numHWQueues(p->numHWQueues), pioAddr(p->pioAddr),
75  pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p->pktProcessDelay)
76 {
77  DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
78  hwSchdlr = new HWScheduler(this, p->wakeupDelay);
79  regdQList.resize(numHWQueues);
80  for (int i = 0; i < numHWQueues; i++) {
81  regdQList[i] = new RQLEntry(this, i);
82  }
83 }
84 
86 {
87  for (auto &queue : regdQList) {
88  delete queue;
89  }
90 }
91 
92 void
94 {
95  hwSchdlr->unregisterQueue(queue_id);
96 }
97 
98 void
99 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
100  uint64_t basePointer,
101  uint64_t queue_id,
102  uint32_t size)
103 {
105  "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
106  (void *)basePointer, queue_id, size);
107  hwSchdlr->registerNewQueue(hostReadIndexPointer,
108  basePointer, queue_id, size);
109 }
110 
113 {
114  assert(pioSize != 0);
115 
116  AddrRangeList ranges;
117  ranges.push_back(RangeSize(pioAddr, pioSize));
118 
119  return ranges;
120 }
121 
122 // Basically only processes writes to the queue doorbell register.
123 Tick
125 {
126  assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
127 
128  // TODO: How to get pid??
129  Addr M5_VAR_USED daddr = pkt->getAddr() - pioAddr;
130 
132  "%s: write of size %d to reg-offset %d (0x%x)\n",
133  __FUNCTION__, pkt->getSize(), daddr, daddr);
134 
135  uint32_t doorbell_reg = pkt->getLE<uint32_t>();
136 
138  "%s: write data 0x%x to offset %d (0x%x)\n",
139  __FUNCTION__, doorbell_reg, daddr, daddr);
140  hwSchdlr->write(daddr, doorbell_reg);
141  pkt->makeAtomicResponse();
142  return pioDelay;
143 }
144 
145 Tick
147 {
148  pkt->makeAtomicResponse();
149  pkt->setBadAddress();
150  return pioDelay;
151 }
152 
153 void
155 {
156  // Grab the process and try to translate the virtual address with it; with
157  // new extensions, it will likely be wrong to just arbitrarily grab context
158  // zero.
159  auto process = sys->threads[0]->getProcessPtr();
160 
161  if (!process->pTable->translate(vaddr, paddr))
162  fatal("failed translation: vaddr 0x%x\n", vaddr);
163 }
164 
165 void
166 HSAPacketProcessor::dmaVirt(DmaFnPtr dmaFn, Addr addr, unsigned size,
167  Event *event, void *data, Tick delay)
168 {
169  if (size == 0) {
170  schedule(event, curTick() + delay);
171  return;
172  }
173 
174  // move the buffer data pointer with the chunks
175  uint8_t *loc_data = (uint8_t*)data;
176 
177  for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
178  Addr phys;
179 
180  // translate pages into their corresponding frames
181  translateOrDie(gen.addr(), phys);
182 
183  // only send event on last transfer; transfers complete in-order
184  Event *ev = gen.last() ? event : NULL;
185 
186  (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
187 
188  loc_data += gen.size();
189  }
190 }
191 
192 void
193 HSAPacketProcessor::dmaReadVirt(Addr host_addr, unsigned size,
194  Event *event, void *data, Tick delay)
195 {
197  "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
198  dmaVirt(&DmaDevice::dmaRead, host_addr, size, event, data, delay);
199 }
200 
201 void
202 HSAPacketProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
203  Event *event, void *data, Tick delay)
204 {
205  dmaVirt(&DmaDevice::dmaWrite, host_addr, size, event, data, delay);
206 }
207 
210  : Event(Default_Pri, AutoDelete)
211 {
212  DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
214 }
215 
216 void
217 HSAPacketProcessor::updateReadIndex(int pid, uint32_t rl_idx)
218 {
219  AQLRingBuffer* aqlbuf = regdQList[rl_idx]->qCntxt.aqlBuf;
220  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
221  auto *dmaEvent = new UpdateReadDispIdDmaEvent();
222 
224  "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->rdIdx());
225 
227  sizeof(aqlbuf->rdIdx()),
228  dmaEvent, aqlbuf->rdIdxPtr());
229 
231  "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
232  " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
233  qDesc->readIndex, qDesc->writeIndex, qDesc->spaceUsed(),
234  qDesc->numElts, qDesc->isEmpty()? "true" : "false", rl_idx);
235  if (qDesc->writeIndex != aqlbuf->wrIdx()) {
236  getCommandsFromHost(pid, rl_idx);
237  }
238 }
239 
241 CmdQueueCmdDmaEvent(HSAPacketProcessor *_hsaPP, int _pid, bool _isRead,
242  uint32_t _ix_start, unsigned _num_pkts,
243  dma_series_ctx *_series_ctx, void *_dest_4debug)
244  : Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
245  ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
246  dest_4debug(_dest_4debug)
247 {
249 
250  DPRINTF(HSAPacketProcessor, "%s, ix = %d, npkts = %d," \
251  "active list ID = %d\n", __FUNCTION__,
252  _ix_start, num_pkts, series_ctx->rl_idx);
253 }
254 
255 void
257 {
258  uint32_t rl_idx = series_ctx->rl_idx;
259  AQLRingBuffer *aqlRingBuffer M5_VAR_USED =
260  hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
261  HSAQueueDescriptor* qDesc =
262  hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
263  DPRINTF(HSAPacketProcessor, ">%s, ix = %d, npkts = %d," \
264  " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
265  ix_start, num_pkts, series_ctx->pkts_2_go,
266  rl_idx);
267  if (isRead) {
268  series_ctx->pkts_2_go -= num_pkts;
269  if (series_ctx->pkts_2_go == 0) {
270  // Mark DMA as completed
271  qDesc->dmaInProgress = false;
273  "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
274  " dispIdx %d, active list ID = %d\n",
275  __FUNCTION__, aqlRingBuffer->rdIdx(),
276  aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
277  // schedule queue wakeup
278  hsaPP->schedAQLProcessing(rl_idx);
279  delete series_ctx;
280  }
281  }
282 }
283 
284 void
286 {
287  RQLEntry *queue = regdQList[rl_idx];
288  if (!queue->aqlProcessEvent.scheduled()) {
289  Tick processingTick = curTick() + delay;
290  schedule(queue->aqlProcessEvent, processingTick);
291  DPRINTF(HSAPacketProcessor, "AQL processing scheduled at tick: %d\n",
292  processingTick);
293  } else {
294  DPRINTF(HSAPacketProcessor, "AQL processing already scheduled\n");
295  }
296 }
297 
298 void
300 {
302 }
303 
304 Q_STATE
305 HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
306 {
307  Q_STATE is_submitted = BLOCKED_BPKT;
308  SignalState *dep_sgnl_rd_st = &(regdQList[rl_idx]->depSignalRdState);
309  // Dependency signals are not read yet. And this can only be a retry.
310  // The retry logic will schedule the packet processor wakeup
311  if (dep_sgnl_rd_st->pendingReads != 0) {
312  return BLOCKED_BPKT;
313  }
314  // `pkt` can be typecasted to any type of AQL packet since they all
315  // have header information at offset zero
316  auto disp_pkt = (_hsa_dispatch_packet_t *)pkt;
317  hsa_packet_type_t pkt_type = PKT_TYPE(disp_pkt);
318  if (IS_BARRIER(disp_pkt) &&
319  regdQList[rl_idx]->compltnPending() > 0) {
320  // If this packet is using the "barrier bit" to enforce ordering with
321  // previous packets, and if there are outstanding packets, set the
322  // barrier bit for this queue and block the queue.
323  DPRINTF(HSAPacketProcessor, "%s: setting barrier bit for active" \
324  " list ID = %d\n", __FUNCTION__, rl_idx);
325  regdQList[rl_idx]->setBarrierBit(true);
326  return BLOCKED_BBIT;
327  }
328  if (pkt_type == HSA_PACKET_TYPE_VENDOR_SPECIFIC) {
329  DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
330  " active list ID = %d\n", __FUNCTION__, rl_idx);
331  // Submit packet to HSA device (dispatcher)
332  hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
333  is_submitted = UNBLOCKED;
334  } else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
335  DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
336  " active list ID = %d\n", __FUNCTION__, rl_idx);
337  // Submit packet to HSA device (dispatcher)
338  hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
339  is_submitted = UNBLOCKED;
340  } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
341  DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
342  " active list ID = %d\n", __FUNCTION__, rl_idx);
343  auto bar_and_pkt = (_hsa_barrier_and_packet_t *)pkt;
344  bool isReady = true;
345  // Loop thorugh all the completion signals to see if this barrier
346  // packet is ready.
347  for (int i = 0; i < NumSignalsPerBarrier; i++) {
348  // dep_signal = zero imply no signal connected
349  if (bar_and_pkt->dep_signal[i]) {
350  // The signal value is aligned 8 bytes from
351  // the actual handle in the runtime
352  uint64_t signal_addr =
353  (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[i]) + 1);
354  hsa_signal_value_t *signal_val =
355  &(dep_sgnl_rd_st->values[i]);
356  DPRINTF(HSAPacketProcessor, "%s: Barrier pkt dep sgnl[%d]" \
357  " , sig addr %x, value %d active list ID = %d\n",
358  __FUNCTION__, i, signal_addr,
359  *signal_val, rl_idx);
360  // The if condition will be executed everytime except the
361  // very first time this barrier packet is encounteresd.
362  if (dep_sgnl_rd_st->allRead) {
363  if (*signal_val != 0) {
364  // This signal is not yet ready, read it again
365  isReady = false;
366  DepSignalsReadDmaEvent *sgnl_rd_evnt =
367  new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
368  dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
369  sgnl_rd_evnt, signal_val);
370  dep_sgnl_rd_st->pendingReads++;
371  DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
372  " active list %d\n", __FUNCTION__,
373  dep_sgnl_rd_st->pendingReads, rl_idx);
374  }
375  } else {
376  // This signal is not yet ready, read it again
377  isReady = false;
378  DepSignalsReadDmaEvent *sgnl_rd_evnt =
379  new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
380  dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
381  sgnl_rd_evnt, signal_val);
382  dep_sgnl_rd_st->pendingReads++;
383  DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
384  " active list %d\n", __FUNCTION__,
385  dep_sgnl_rd_st->pendingReads, rl_idx);
386  }
387  }
388  }
389  if (isReady) {
390  assert(dep_sgnl_rd_st->pendingReads == 0);
391  DPRINTF(HSAPacketProcessor, "%s: Barrier packet completed" \
392  " active list ID = %d\n", __FUNCTION__, rl_idx);
393  // TODO: Completion signal of barrier packet to be
394  // atomically decremented here
395  finishPkt((void*)bar_and_pkt, rl_idx);
396  is_submitted = UNBLOCKED;
397  // Reset signal values
398  dep_sgnl_rd_st->resetSigVals();
399  // The completion signal is connected
400  if (bar_and_pkt->completion_signal != 0) {
401  // The signal value is aligned 8 bytes
402  // from the actual handle in the runtime
403  uint64_t signal_addr =
404  (uint64_t) (((uint64_t *)
405  bar_and_pkt->completion_signal) + 1);
406  DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
407  " completion signal: %x!\n", signal_addr);
417  VPtr<uint64_t> prev_signal(signal_addr, sys->threads[0]);
418 
419  hsa_signal_value_t *new_signal = new hsa_signal_value_t;
420  *new_signal = (hsa_signal_value_t)*prev_signal - 1;
421 
422  dmaWriteVirt(signal_addr,
423  sizeof(hsa_signal_value_t), NULL, new_signal, 0);
424  }
425  }
426  if (dep_sgnl_rd_st->pendingReads > 0) {
427  // Atleast one DepSignalsReadDmaEvent is scheduled this cycle
428  dep_sgnl_rd_st->allRead = false;
429  dep_sgnl_rd_st->discardRead = false;
430  }
431  } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_OR) {
432  fatal("Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
433  } else if (pkt_type == HSA_PACKET_TYPE_INVALID) {
434  fatal("Unsupported packet type HSA_PACKET_TYPE_INVALID");
435  } else {
436  fatal("Unsupported packet type %d\n", pkt_type);
437  }
438  return is_submitted;
439 }
440 
441 // Wakes up every fixed time interval (pktProcessDelay) and processes a single
442 // packet from the queue that scheduled this wakeup. If there are more
443 // packets in that queue, the next wakeup is scheduled.
444 void
446 {
447  AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
449  "%s: Qwakeup , rdIdx %d, wrIdx %d," \
450  " dispIdx %d, active list ID = %d\n",
451  __FUNCTION__, aqlRingBuffer->rdIdx(),
452  aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rqIdx);
453  // If barrier bit is set, then this wakeup is a dummy wakeup
454  // just to model the processing time. Do nothing.
455  if (hsaPP->regdQList[rqIdx]->getBarrierBit()) {
457  "Dummy wakeup with barrier bit for rdIdx %d\n", rqIdx);
458  return;
459  }
460  // In the future, we may support batch processing of packets.
461  // Then, we can just remove the break statements and the code
462  // will support batch processing. That is why we are using a
463  // "while loop" here instead on an "if" condition.
464  while (hsaPP->regdQList[rqIdx]->dispPending()) {
465  void *pkt = aqlRingBuffer->ptr(aqlRingBuffer->dispIdx());
466  DPRINTF(HSAPacketProcessor, "%s: Attempting dispatch @ dispIdx[%d]\n",
467  __FUNCTION__, aqlRingBuffer->dispIdx());
468  Addr host_addr = aqlRingBuffer->hostDispAddr();
469  Q_STATE q_state = hsaPP->processPkt(pkt, rqIdx, host_addr);
470  if (q_state == UNBLOCKED) {
471  aqlRingBuffer->incDispIdx(1);
472  DPRINTF(HSAPacketProcessor, "%s: Increment dispIdx[%d]\n",
473  __FUNCTION__, aqlRingBuffer->dispIdx());
474  if (hsaPP->regdQList[rqIdx]->dispPending()) {
475  hsaPP->schedAQLProcessing(rqIdx);
476  }
477  break;
478  } else if (q_state == BLOCKED_BPKT) {
479  // This queue is blocked by barrier packet,
480  // schedule a processing event
481  hsaPP->schedAQLProcessing(rqIdx);
482  break;
483  } else if (q_state == BLOCKED_BBIT) {
484  // This queue is blocked by barrier bit, and processing event
485  // should be scheduled from finishPkt(). However, to elapse
486  // "pktProcessDelay" processing time, let us schedule a dummy
487  // wakeup once which will just wakeup and will do nothing.
488  hsaPP->schedAQLProcessing(rqIdx);
489  break;
490  } else {
491  panic("Unknown queue state\n");
492  }
493  }
494 }
495 
496 void
498 {
499  assert(pendingReads > 0);
500  pendingReads--;
501  if (pendingReads == 0) {
502  allRead = true;
503  if (discardRead) {
504  resetSigVals();
505  }
506  }
507 }
508 
509 void
511 {
512  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
513  AQLRingBuffer *aqlRingBuffer = regdQList[rl_idx]->qCntxt.aqlBuf;
514 
516  "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
517  " doorbell(%d)[0x%x] \n",
518  __FUNCTION__, qDesc->readIndex,
519  qDesc->writeIndex, pid, qDesc->doorbellPointer);
520 
521  if (qDesc->dmaInProgress) {
522  // we'll try again when this dma transfer completes in updateReadIndex
523  return;
524  }
525  uint32_t num_umq = qDesc->spaceUsed();
526  if (num_umq == 0)
527  return; // nothing to be gotten
528  uint32_t umq_nxt = qDesc->readIndex;
529  // Total AQL buffer size
530  uint32_t ttl_aql_buf = aqlRingBuffer->numObjs();
531  // Available AQL buffer size. If the available buffer is less than
532  // demanded, number of available buffer is returned
533  uint32_t got_aql_buf = aqlRingBuffer->allocEntry(num_umq);
534  qDesc->readIndex += got_aql_buf;
535  uint32_t dma_start_ix = (aqlRingBuffer->wrIdx() - got_aql_buf) %
536  ttl_aql_buf;
537  dma_series_ctx *series_ctx = NULL;
538 
539  DPRINTF(HSAPacketProcessor, "%s: umq_nxt = %d, ttl_aql_buf = %d, "
540  "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
541  ttl_aql_buf, dma_start_ix, num_umq);
542 
543  if (got_aql_buf == 0) {
544  // we'll try again when some dma bufs are freed in freeEntry
545  qDesc->stalledOnDmaBufAvailability = true;
546  return;
547  } else {
548  qDesc->stalledOnDmaBufAvailability = false;
549  }
550 
551  uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
552  while (got_aql_buf != 0 && num_umq != 0) {
553  uint32_t umq_b4_wrap = qDesc->numObjs() -
554  (umq_nxt % qDesc->objSize());
555  uint32_t num_2_xfer
556  = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
557  if (!series_ctx) {
558  qDesc->dmaInProgress = true;
559  series_ctx = new dma_series_ctx(got_aql_buf, got_aql_buf,
560  dma_start_ix, rl_idx);
561  }
562 
563  void *aql_buf = aqlRingBuffer->ptr(dma_start_ix);
564  CmdQueueCmdDmaEvent *dmaEvent
565  = new CmdQueueCmdDmaEvent(this, pid, true, dma_start_ix,
566  num_2_xfer, series_ctx, aql_buf);
568  "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
569  __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
570 
571  dmaReadVirt(qDesc->ptr(umq_nxt), num_2_xfer * qDesc->objSize(),
572  dmaEvent, aql_buf);
573 
574  aqlRingBuffer->saveHostDispAddr(qDesc->ptr(umq_nxt), num_2_xfer,
575  dma_start_ix);
576 
577  num_umq -= num_2_xfer;
578  got_aql_buf -= num_2_xfer;
579  dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
580  umq_nxt = (umq_nxt + num_2_xfer) % qDesc->numObjs();
581  if (got_aql_buf == 0 && num_umq != 0) {
582  // There are more packets in the queue but
583  // not enough DMA buffers. Set the stalledOnDmaBufAvailability,
584  // we will try again in freeEntry
585  qDesc->stalledOnDmaBufAvailability = true;
586  }
587  }
588 }
589 
590 void
592 {
593  HSAQueueDescriptor* M5_VAR_USED qDesc = regdQList[rl_idx]->qCntxt.qDesc;
595  "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
596  "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
597  __FUNCTION__, pid, qDesc->basePointer,
598  qDesc->doorbellPointer, qDesc->writeIndex,
599  qDesc->readIndex, qDesc->numElts);
600 }
601 
603  const std::string name)
604  : _name(name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
605 {
606  _aqlBuf.resize(size);
607  _aqlComplete.resize(size);
608  _hostDispAddresses.resize(size);
609  // Mark all packets as invalid and incomplete
610  for (auto& it : _aqlBuf)
611  it.header = HSA_PACKET_TYPE_INVALID;
612  std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
613 }
614 
615 bool
617 {
618  _aqlComplete[(hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data()] = true;
619  DPRINTF(HSAPacketProcessor, "%s: pkt_ix = %d; "\
620  " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
621  (hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data(),
622  nFree(), wrIdx(), rdIdx());
623  // Packets can complete out-of-order. This code "retires" packets in-order
624  // by updating the read pointer in the MQD when a contiguous chunk of
625  // packets have finished.
626  uint32_t old_rdIdx = rdIdx();
627  while (_aqlComplete[rdIdx() % numObjs()]) {
628  _aqlComplete[rdIdx() % numObjs()] = false;
630  incRdIdx(1);
631  }
632  return (old_rdIdx != rdIdx());
633 }
634 
635 void
637 {
638  this->hsa_device = dev;
639 }
640 
641 int
642 AQLRingBuffer::allocEntry(uint32_t nBufReq)
643 {
644  DPRINTF(HSAPacketProcessor, "%s: nReq = %d\n", __FUNCTION__, nBufReq);
645  if (nFree() == 0) {
646  DPRINTF(HSAPacketProcessor, "%s: return = %d\n", __FUNCTION__, 0);
647  return 0;
648  }
649 
650  if (nBufReq > nFree())
651  nBufReq = nFree();
652 
653  DPRINTF(HSAPacketProcessor, "%s: ix1stFree = %d\n", __FUNCTION__, wrIdx());
654  incWrIdx(nBufReq);
655  DPRINTF(HSAPacketProcessor, "%s: return = %d, wrIdx = %d\n",
656  __FUNCTION__, nBufReq, wrIdx());
657  return nBufReq;
658 }
659 
661 HSAPacketProcessorParams::create()
662 {
663  return new HSAPacketProcessor(this);
664 }
665 
666 void
667 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
668 {
669  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
670 
671  // if barrier bit was set and this is the last
672  // outstanding packet from that queue,
673  // unset it here
674  if (regdQList[rl_idx]->getBarrierBit() &&
675  regdQList[rl_idx]->isLastOutstandingPkt()) {
677  "Unset barrier bit for active list ID %d\n", rl_idx);
678  regdQList[rl_idx]->setBarrierBit(false);
679  panic_if(!regdQList[rl_idx]->dispPending(),
680  "There should be pending kernels in this queue\n");
682  "Rescheduling active list ID %d after unsetting barrier "
683  "bit\n", rl_idx);
684  // Try to schedule wakeup in the next cycle. There is a minimum
685  // pktProcessDelay for queue wake up. If that processing delay is
686  // elapsed, schedAQLProcessing will wakeup next tick.
687  schedAQLProcessing(rl_idx, 1);
688  }
689 
690  // If set, then blocked schedule, so need to reschedule
691  if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
692  updateReadIndex(0, rl_idx);
694  "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
695  " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
696  __FUNCTION__, qDesc->readIndex, qDesc->writeIndex,
697  qDesc->spaceUsed(), qDesc->numElts,
698  qDesc->stalledOnDmaBufAvailability? "true" : "false",
699  qDesc->isEmpty()? "true" : "false", rl_idx);
700  // DMA buffer is freed, check the queue to see if there are DMA
701  // accesses blocked becasue of non-availability of DMA buffer
702  if (qDesc->stalledOnDmaBufAvailability) {
703  assert(!qDesc->isEmpty());
704  getCommandsFromHost(0, rl_idx); // TODO:assign correct pid
705  // when implementing
706  // multi-process support
707  }
708 }
HSAPacketProcessor::DepSignalsReadDmaEvent
Definition: hsa_packet_processor.hh:332
HWScheduler::registerNewQueue
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
Definition: hw_scheduler.cc:84
fatal
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:183
Event::scheduled
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
EventBase::AutoDelete
static const FlagsType AutoDelete
Definition: eventq.hh:102
Packet::makeAtomicResponse
void makeAtomicResponse()
Definition: packet.hh:1016
HSAPacketProcessor::UpdateReadDispIdDmaEvent::UpdateReadDispIdDmaEvent
UpdateReadDispIdDmaEvent()
Definition: hsa_packet_processor.cc:209
HSAPacketProcessor::~HSAPacketProcessor
~HSAPacketProcessor()
Definition: hsa_packet_processor.cc:85
system.hh
HSAPP_EVENT_DESCRIPTION_GENERATOR
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
Definition: hsa_packet_processor.cc:53
data
const char data[]
Definition: circlebuf.test.cc:42
hsa_kernel_dispatch_packet_s
AQL kernel dispatch packet.
Definition: hsa.h:2867
AQLRingBuffer::freeEntry
bool freeEntry(void *pkt)
Definition: hsa_packet_processor.cc:616
Packet::getAddr
Addr getAddr() const
Definition: packet.hh:754
HWScheduler::unregisterQueue
void unregisterQueue(uint64_t queue_id)
Definition: hw_scheduler.cc:339
ArmISA::i
Bitfield< 7 > i
Definition: miscregs_types.hh:63
BLOCKED_BBIT
@ BLOCKED_BBIT
Definition: hsa_packet_processor.hh:60
hsa_packet_processor.hh
hsa_signal_value_t
int32_t hsa_signal_value_t
Signal value.
Definition: hsa.h:1302
AQLRingBuffer::saveHostDispAddr
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
Definition: hsa_packet_processor.hh:144
ProxyPtr
Definition: proxy_ptr.hh:236
HSAPacketProcessor::pktProcessDelay
const Tick pktProcessDelay
Definition: hsa_packet_processor.hh:309
Tick
uint64_t Tick
Tick count type.
Definition: types.hh:63
AQLRingBuffer::_aqlBuf
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
Definition: hsa_packet_processor.hh:120
HSAQueueDescriptor::isEmpty
bool isEmpty()
Definition: hsa_packet_processor.hh:96
HSAPacketProcessor::pioDelay
Tick pioDelay
Definition: hsa_packet_processor.hh:308
NumSignalsPerBarrier
#define NumSignalsPerBarrier
Definition: hsa_packet_processor.hh:53
HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:667
HSAPacketProcessor::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
Definition: hsa_packet_processor.cc:202
proxy_ptr.hh
HSAQueueDescriptor::dmaInProgress
bool dmaInProgress
Definition: hsa_packet_processor.hh:81
AQLRingBuffer::incWrIdx
void incWrIdx(uint64_t value)
Definition: hsa_packet_processor.hh:195
Packet::getSize
unsigned getSize() const
Definition: packet.hh:764
Q_STATE
Q_STATE
Definition: hsa_packet_processor.hh:58
hw_scheduler.hh
BLOCKED_BPKT
@ BLOCKED_BPKT
Definition: hsa_packet_processor.hh:63
HSAQueueDescriptor
Definition: hsa_packet_processor.hh:72
HSAPacketProcessor::pioSize
Addr pioSize
Definition: hsa_packet_processor.hh:307
AQLRingBuffer::dispIdx
uint64_t dispIdx() const
Definition: hsa_packet_processor.hh:190
HSAPacketProcessor::pioAddr
Addr pioAddr
Definition: hsa_packet_processor.hh:306
HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
Definition: hsa_packet_processor.cc:99
AQLRingBuffer::nFree
uint32_t nFree() const
Definition: hsa_packet_processor.hh:186
AQLRingBuffer
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
Definition: hsa_packet_processor.hh:117
HSAPacketProcessor::setDevice
void setDevice(HSADevice *dev)
Definition: hsa_packet_processor.cc:636
PioDevice::sys
System * sys
Definition: io_device.hh:102
HWScheduler
Definition: hw_scheduler.hh:45
HSAPacketProcessor::getCommandsFromHost
void getCommandsFromHost(int pid, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:510
HSAPacketProcessor::getAddrRanges
virtual AddrRangeList getAddrRanges() const
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: hsa_packet_processor.cc:112
AQLRingBuffer::hostDispAddr
Addr hostDispAddr() const
Definition: hsa_packet_processor.hh:153
HSADevice::submitVendorPkt
virtual void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() accepts vendor specific packets from the HSA packet processor.
Definition: hsa_device.hh:79
DmaDevice::dmaRead
void dmaRead(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
Definition: dma_device.hh:188
HSAQueueDescriptor::hostReadIndexPtr
uint64_t hostReadIndexPtr
Definition: hsa_packet_processor.hh:79
HSA_PACKET_TYPE_KERNEL_DISPATCH
@ HSA_PACKET_TYPE_KERNEL_DISPATCH
Packet used by agents for dispatching jobs to kernel agents.
Definition: hsa.h:2733
UNBLOCKED
@ UNBLOCKED
Definition: hsa_packet_processor.hh:59
dma_device.hh
HSADevice::submitDispatchPkt
virtual void submitDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
submitDispatchPkt() accepts AQL dispatch packets from the HSA packet processor.
Definition: hsa_device.hh:65
EventManager::schedule
void schedule(Event &event, Tick when)
Definition: eventq.hh:1005
DmaDevice::dmaWrite
void dmaWrite(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
Definition: dma_device.hh:175
HSAQueueDescriptor::ptr
uint64_t ptr(uint64_t ix)
Definition: hsa_packet_processor.hh:98
AQLRingBuffer::wrIdx
uint64_t wrIdx() const
Definition: hsa_packet_processor.hh:191
HSAPacketProcessor
Definition: hsa_packet_processor.hh:212
_hsa_barrier_and_packet_s
Definition: hsa_packet.hh:81
AQLRingBuffer::incRdIdx
void incRdIdx(uint64_t value)
Definition: hsa_packet_processor.hh:194
Event
Definition: eventq.hh:246
HSAPacketProcessor::CmdQueueCmdDmaEvent::CmdQueueCmdDmaEvent
CmdQueueCmdDmaEvent(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t dma_buf_ix, uint num_bufs, dma_series_ctx *series_ctx, void *dest_4debug)
Definition: hsa_packet_processor.cc:241
HSAPacketProcessor::processPkt
Q_STATE processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
Definition: hsa_packet_processor.cc:305
AQLRingBuffer::AQLRingBuffer
AQLRingBuffer(uint32_t size, const std::string name)
Definition: hsa_packet_processor.cc:602
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:234
PAGE_SIZE
#define PAGE_SIZE
Definition: base.cc:60
MipsISA::vaddr
vaddr
Definition: pra_constants.hh:275
HSAPacketProcessor::SignalState::resetSigVals
void resetSigVals()
Definition: hsa_packet_processor.hh:240
HSAPacketProcessor::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
Definition: hsa_packet_processor.cc:193
MipsISA::event
Bitfield< 10, 5 > event
Definition: pra_constants.hh:297
AQLRingBuffer::rdIdx
uint64_t rdIdx() const
Definition: hsa_packet_processor.hh:192
HSAPacketProcessor::regdQList
std::vector< class RQLEntry * > regdQList
Definition: hsa_packet_processor.hh:279
HSAPacketProcessor::unsetDeviceQueueDesc
void unsetDeviceQueueDesc(uint64_t queue_id)
Definition: hsa_packet_processor.cc:93
HSAPacketProcessor::RQLEntry
Definition: hsa_packet_processor.hh:261
Event::setFlags
void setFlags(Flags _flags)
Definition: eventq.hh:323
process.hh
RangeSize
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:638
HSAQueueDescriptor::objSize
uint32_t objSize()
Definition: hsa_packet_processor.hh:93
HSAPacketProcessor::SignalState::discardRead
bool discardRead
Definition: hsa_packet_processor.hh:236
hsa_packet.hh
HSAPacketProcessor::dmaVirt
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
Definition: hsa_packet_processor.cc:166
HSAPacketProcessor::SignalState::handleReadDMA
void handleReadDMA()
Definition: hsa_packet_processor.cc:497
AQLRingBuffer::incDispIdx
void incDispIdx(uint64_t value)
Definition: hsa_packet_processor.hh:196
PKT_TYPE
#define PKT_TYPE(PKT)
Definition: hsa_packet_processor.cc:60
HSAQueueDescriptor::numObjs
uint32_t numObjs()
Definition: hsa_packet_processor.hh:94
compiler.hh
HSA_PACKET_TYPE_BARRIER_OR
@ HSA_PACKET_TYPE_BARRIER_OR
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
Definition: hsa.h:2750
HSAQueueDescriptor::doorbellPointer
uint64_t doorbellPointer
Definition: hsa_packet_processor.hh:75
HSAPacketProcessor::updateReadIndex
void updateReadIndex(int, uint32_t)
Definition: hsa_packet_processor.cc:217
AQLRingBuffer::allocEntry
int allocEntry(uint32_t nBufReq)
Definition: hsa_packet_processor.cc:642
HSA_PACKET_TYPE_VENDOR_SPECIFIC
@ HSA_PACKET_TYPE_VENDOR_SPECIFIC
Vendor-specific packet.
Definition: hsa.h:2722
HSAPacketProcessor::SignalState::allRead
bool allRead
Definition: hsa_packet_processor.hh:233
HSAPacketProcessor::SignalState
Definition: hsa_packet_processor.hh:223
Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:142
HSAPacketProcessor::translateOrDie
void translateOrDie(Addr vaddr, Addr &paddr)
Definition: hsa_packet_processor.cc:154
HSAPacketProcessor::hwSchdlr
HWScheduler * hwSchdlr
Definition: hsa_packet_processor.hh:218
ChunkGenerator::done
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
Definition: chunk_generator.hh:137
AQLRingBuffer::rdIdxPtr
uint64_t * rdIdxPtr()
Definition: hsa_packet_processor.hh:193
packet_access.hh
HSAPacketProcessor::write
virtual Tick write(Packet *)
Definition: hsa_packet_processor.cc:124
HSAPacketProcessor::dma_series_ctx::rl_idx
uint32_t rl_idx
Definition: hsa_packet_processor.hh:367
HSA_PACKET_TYPE_INVALID
@ HSA_PACKET_TYPE_INVALID
The packet has been processed in the past, but has not been reassigned to the packet processor.
Definition: hsa.h:2728
hsa_packet_type_t
hsa_packet_type_t
Packet type.
Definition: hsa.h:2718
AQLRingBuffer::_aqlComplete
std::vector< bool > _aqlComplete
Definition: hsa_packet_processor.hh:123
HSAQueueDescriptor::stalledOnDmaBufAvailability
bool stalledOnDmaBufAvailability
Definition: hsa_packet_processor.hh:80
HSAPacketProcessor::RQLEntry::aqlProcessEvent
QueueProcessEvent aqlProcessEvent
Definition: hsa_packet_processor.hh:270
MipsISA::fill
fill
Definition: pra_constants.hh:54
HSAPacketProcessor::read
virtual Tick read(Packet *)
Definition: hsa_packet_processor.cc:146
SimObject::name
virtual const std::string name() const
Definition: sim_object.hh:133
HSAPacketProcessor::CmdQueueCmdDmaEvent::process
virtual void process()
Definition: hsa_packet_processor.cc:256
AQLRingBuffer::numObjs
uint32_t numObjs() const
Definition: hsa_packet_processor.hh:188
HSAPacketProcessor::schedAQLProcessing
void schedAQLProcessing(uint32_t rl_idx)
Definition: hsa_packet_processor.cc:299
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:197
System::threads
Threads threads
Definition: system.hh:309
_hsa_dispatch_packet_s
Definition: hsa_packet.hh:53
DmaDevice
Definition: dma_device.hh:165
Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition: packet_access.hh:75
IS_BARRIER
#define IS_BARRIER(PKT)
Definition: hsa_packet_processor.cc:65
Packet::setBadAddress
void setBadAddress()
Definition: packet.hh:746
HWScheduler::write
void write(Addr db_addr, uint32_t doorbell_reg)
Definition: hw_scheduler.cc:322
HSAPacketProcessor::hsa_device
HSADevice * hsa_device
Definition: hsa_packet_processor.hh:217
HSAPacketProcessor::CmdQueueCmdDmaEvent::num_pkts
uint num_pkts
Definition: hsa_packet_processor.hh:386
Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:257
addr
ip6_addr_t addr
Definition: inet.hh:423
chunk_generator.hh
HSAPacketProcessor::displayQueueDescriptor
void displayQueueDescriptor(int pid, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:591
HSA_PACKET_TYPE_BARRIER_AND
@ HSA_PACKET_TYPE_BARRIER_AND
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
Definition: hsa.h:2739
AQLRingBuffer::ptr
void * ptr(uint32_t ix)
Definition: hsa_packet_processor.hh:187
ChunkGenerator
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
Definition: chunk_generator.hh:55
HSAQueueDescriptor::writeIndex
uint64_t writeIndex
Definition: hsa_packet_processor.hh:76
AQLRingBuffer::_hostDispAddresses
std::vector< Addr > _hostDispAddresses
Definition: hsa_packet_processor.hh:122
MipsISA::p
Bitfield< 0 > p
Definition: pra_constants.hh:323
std::list< AddrRange >
HSAPacketProcessor::SignalState::values
std::vector< hsa_signal_value_t > values
Definition: hsa_packet_processor.hh:238
HSAPacketProcessor::SignalState::pendingReads
int pendingReads
Definition: hsa_packet_processor.hh:232
HSAPacketProcessor::CmdQueueCmdDmaEvent
Definition: hsa_packet_processor.hh:379
HSAPacketProcessor::QueueProcessEvent::process
virtual void process()
Definition: hsa_packet_processor.cc:445
hsa_device.hh
page_table.hh
HSAQueueDescriptor::spaceUsed
uint64_t spaceUsed()
Definition: hsa_packet_processor.hh:92
HSAQueueDescriptor::numElts
uint32_t numElts
Definition: hsa_packet_processor.hh:78
HSADevice
Definition: hsa_device.hh:46
HSAPacketProcessor::CmdQueueCmdDmaEvent::series_ctx
dma_series_ctx * series_ctx
Definition: hsa_packet_processor.hh:387
HSAQueueDescriptor::readIndex
uint64_t readIndex
Definition: hsa_packet_processor.hh:77
HSAPacketProcessor::UpdateReadDispIdDmaEvent
this event is used to update the read_disp_id field (the read pointer) of the MQD,...
Definition: hsa_packet_processor.hh:349
DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:171
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:171
curTick
Tick curTick()
The current simulated tick.
Definition: core.hh:45
HSAPacketProcessor::dma_series_ctx
Calls getCurrentEntry once the queueEntry has been dmaRead.
Definition: hsa_packet_processor.hh:362

Generated on Wed Sep 30 2020 14:02:11 for gem5 by doxygen 1.8.17