gem5  v20.0.0.2
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
hsa_packet_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2015-2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: Eric van Tassell
34  */
35 
37 
38 #include <cstring>
39 
40 #include "base/chunk_generator.hh"
41 #include "base/compiler.hh"
42 #include "debug/HSAPacketProcessor.hh"
43 #include "dev/dma_device.hh"
44 #include "dev/hsa/hsa_device.hh"
45 #include "dev/hsa/hsa_packet.hh"
46 #include "dev/hsa/hw_scheduler.hh"
47 #include "mem/packet_access.hh"
48 #include "mem/page_table.hh"
49 #include "sim/process.hh"
50 #include "sim/syscall_emul_buf.hh"
51 #include "sim/system.hh"
52 
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
54  const char* \
55  HSAPacketProcessor::XEVENT::description() const \
56  { \
57  return #XEVENT; \
58  }
59 
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61  HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
62 
63 HSAPP_EVENT_DESCRIPTION_GENERATOR(UpdateReadDispIdDmaEvent)
64 HSAPP_EVENT_DESCRIPTION_GENERATOR(CmdQueueCmdDmaEvent)
65 HSAPP_EVENT_DESCRIPTION_GENERATOR(QueueProcessEvent)
66 HSAPP_EVENT_DESCRIPTION_GENERATOR(DepSignalsReadDmaEvent)
67 
69  : DmaDevice(p), numHWQueues(p->numHWQueues), pioAddr(p->pioAddr),
70  pioSize(PAGE_SIZE), pioDelay(10), pktProcessDelay(p->pktProcessDelay)
71 {
72  DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
73  hwSchdlr = new HWScheduler(this, p->wakeupDelay);
74  regdQList.resize(numHWQueues);
75  for (int i = 0; i < numHWQueues; i++) {
76  regdQList[i] = new RQLEntry(this, i);
77  }
78 }
79 
81 {
82  for (auto &queue : regdQList) {
83  delete queue;
84  }
85 }
86 
87 void
89 {
90  hwSchdlr->unregisterQueue(queue_id);
91 }
92 
93 void
94 HSAPacketProcessor::setDeviceQueueDesc(uint64_t hostReadIndexPointer,
95  uint64_t basePointer,
96  uint64_t queue_id,
97  uint32_t size)
98 {
100  "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
101  (void *)basePointer, queue_id, size);
102  hwSchdlr->registerNewQueue(hostReadIndexPointer,
103  basePointer, queue_id, size);
104 }
105 
108 {
109  assert(pioSize != 0);
110 
111  AddrRangeList ranges;
112  ranges.push_back(RangeSize(pioAddr, pioSize));
113 
114  return ranges;
115 }
116 
117 // Basically only processes writes to the queue doorbell register.
118 Tick
120 {
121  assert(pkt->getAddr() >= pioAddr && pkt->getAddr() < pioAddr + pioSize);
122 
123  // TODO: How to get pid??
124  Addr M5_VAR_USED daddr = pkt->getAddr() - pioAddr;
125 
127  "%s: write of size %d to reg-offset %d (0x%x)\n",
128  __FUNCTION__, pkt->getSize(), daddr, daddr);
129 
130  uint32_t doorbell_reg = pkt->getLE<uint32_t>();
131 
133  "%s: write data 0x%x to offset %d (0x%x)\n",
134  __FUNCTION__, doorbell_reg, daddr, daddr);
135  hwSchdlr->write(daddr, doorbell_reg);
136  pkt->makeAtomicResponse();
137  return pioDelay;
138 }
139 
140 Tick
142 {
143  pkt->makeAtomicResponse();
144  pkt->setBadAddress();
145  return pioDelay;
146 }
147 
148 void
150 {
151  // Grab the process and try to translate the virtual address with it; with
152  // new extensions, it will likely be wrong to just arbitrarily grab context
153  // zero.
154  auto process = sys->getThreadContext(0)->getProcessPtr();
155 
156  if (!process->pTable->translate(vaddr, paddr))
157  fatal("failed translation: vaddr 0x%x\n", vaddr);
158 }
159 
160 void
162  Event *event, void *data, Tick delay)
163 {
164  if (size == 0) {
165  schedule(event, curTick() + delay);
166  return;
167  }
168 
169  // move the buffer data pointer with the chunks
170  uint8_t *loc_data = (uint8_t*)data;
171 
172  for (ChunkGenerator gen(addr, size, PAGE_SIZE); !gen.done(); gen.next()) {
173  Addr phys;
174 
175  // translate pages into their corresponding frames
176  translateOrDie(gen.addr(), phys);
177 
178  // only send event on last transfer; transfers complete in-order
179  Event *ev = gen.last() ? event : NULL;
180 
181  (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
182 
183  loc_data += gen.size();
184  }
185 }
186 
187 void
188 HSAPacketProcessor::dmaReadVirt(Addr host_addr, unsigned size,
189  Event *event, void *data, Tick delay)
190 {
192  "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
193  dmaVirt(&DmaDevice::dmaRead, host_addr, size, event, data, delay);
194 }
195 
196 void
197 HSAPacketProcessor::dmaWriteVirt(Addr host_addr, unsigned size,
198  Event *event, void *data, Tick delay)
199 {
200  dmaVirt(&DmaDevice::dmaWrite, host_addr, size, event, data, delay);
201 }
202 
205  : Event(Default_Pri, AutoDelete)
206 {
207  DPRINTF(HSAPacketProcessor, "%s:\n", __FUNCTION__);
209 }
210 
211 void
212 HSAPacketProcessor::updateReadIndex(int pid, uint32_t rl_idx)
213 {
214  AQLRingBuffer* aqlbuf = regdQList[rl_idx]->qCntxt.aqlBuf;
215  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
216  auto *dmaEvent = new UpdateReadDispIdDmaEvent();
217 
219  "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->rdIdx());
220 
222  sizeof(aqlbuf->rdIdx()),
223  dmaEvent, aqlbuf->rdIdxPtr());
224 
226  "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
227  " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
228  qDesc->readIndex, qDesc->writeIndex, qDesc->spaceUsed(),
229  qDesc->numElts, qDesc->isEmpty()? "true" : "false", rl_idx);
230  if (qDesc->writeIndex != aqlbuf->wrIdx()) {
231  getCommandsFromHost(pid, rl_idx);
232  }
233 }
234 
236 CmdQueueCmdDmaEvent(HSAPacketProcessor *_hsaPP, int _pid, bool _isRead,
237  uint32_t _ix_start, unsigned _num_pkts,
238  dma_series_ctx *_series_ctx, void *_dest_4debug)
239  : Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
240  ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
241  dest_4debug(_dest_4debug)
242 {
244 
245  DPRINTF(HSAPacketProcessor, "%s, ix = %d, npkts = %d," \
246  "active list ID = %d\n", __FUNCTION__,
247  _ix_start, num_pkts, series_ctx->rl_idx);
248 }
249 
250 void
252 {
253  uint32_t rl_idx = series_ctx->rl_idx;
254  AQLRingBuffer *aqlRingBuffer M5_VAR_USED =
255  hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
256  HSAQueueDescriptor* qDesc =
257  hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
258  DPRINTF(HSAPacketProcessor, ">%s, ix = %d, npkts = %d," \
259  " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
261  rl_idx);
262  if (isRead) {
264  if (series_ctx->pkts_2_go == 0) {
265  // Mark DMA as completed
266  qDesc->dmaInProgress = false;
268  "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
269  " dispIdx %d, active list ID = %d\n",
270  __FUNCTION__, aqlRingBuffer->rdIdx(),
271  aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
272  // schedule queue wakeup
273  hsaPP->schedAQLProcessing(rl_idx);
274  delete series_ctx;
275  }
276  }
277 }
278 
279 void
281 {
282  RQLEntry *queue = regdQList[rl_idx];
283  if (!queue->aqlProcessEvent.scheduled()) {
284  Tick processingTick = curTick() + pktProcessDelay;
285  schedule(queue->aqlProcessEvent, processingTick);
286  DPRINTF(HSAPacketProcessor, "AQL processing scheduled at tick: %d\n",
287  processingTick);
288  } else {
289  DPRINTF(HSAPacketProcessor, "AQL processing already scheduled\n");
290  }
291 }
292 
293 bool
294 HSAPacketProcessor::processPkt(void* pkt, uint32_t rl_idx, Addr host_pkt_addr)
295 {
296  bool is_submitted = false;
297  SignalState *dep_sgnl_rd_st = &(regdQList[rl_idx]->depSignalRdState);
298  // Dependency signals are not read yet. And this can only be a retry.
299  // The retry logic will schedule the packet processor wakeup
300  if (dep_sgnl_rd_st->pendingReads != 0) {
301  return false;
302  }
303  // `pkt` can be typecasted to any type of AQL packet since they all
304  // have header information at offset zero
305  auto disp_pkt = (_hsa_dispatch_packet_t *)pkt;
306  hsa_packet_type_t pkt_type = PKT_TYPE(disp_pkt);
307  if (pkt_type == HSA_PACKET_TYPE_VENDOR_SPECIFIC) {
308  DPRINTF(HSAPacketProcessor, "%s: submitting vendor specific pkt" \
309  " active list ID = %d\n", __FUNCTION__, rl_idx);
310  // Submit packet to HSA device (dispatcher)
311  hsa_device->submitVendorPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
312  is_submitted = true;
313  } else if (pkt_type == HSA_PACKET_TYPE_KERNEL_DISPATCH) {
314  DPRINTF(HSAPacketProcessor, "%s: submitting kernel dispatch pkt" \
315  " active list ID = %d\n", __FUNCTION__, rl_idx);
316  // Submit packet to HSA device (dispatcher)
317  hsa_device->submitDispatchPkt((void *)disp_pkt, rl_idx, host_pkt_addr);
318  is_submitted = true;
319  } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_AND) {
320  DPRINTF(HSAPacketProcessor, "%s: Processing barrier packet" \
321  " active list ID = %d\n", __FUNCTION__, rl_idx);
322  auto bar_and_pkt = (_hsa_barrier_and_packet_t *)pkt;
323  bool isReady = true;
324  // Loop thorugh all the completion signals to see if this barrier
325  // packet is ready.
326  for (int i = 0; i < NumSignalsPerBarrier; i++) {
327  // dep_signal = zero imply no signal connected
328  if (bar_and_pkt->dep_signal[i]) {
329  // The signal value is aligned 8 bytes from
330  // the actual handle in the runtime
331  uint64_t signal_addr =
332  (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[i]) + 1);
333  hsa_signal_value_t *signal_val =
334  &(dep_sgnl_rd_st->values[i]);
335  DPRINTF(HSAPacketProcessor, "%s: Barrier pkt dep sgnl[%d]" \
336  " , sig addr %x, value %d active list ID = %d\n",
337  __FUNCTION__, i, signal_addr,
338  *signal_val, rl_idx);
339  // The if condition will be executed everytime except the
340  // very first time this barrier packet is encounteresd.
341  if (dep_sgnl_rd_st->allRead) {
342  if (*signal_val != 0) {
343  // This signal is not yet ready, read it again
344  isReady = false;
345  DepSignalsReadDmaEvent *sgnl_rd_evnt =
346  new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
347  dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
348  sgnl_rd_evnt, signal_val);
349  dep_sgnl_rd_st->pendingReads++;
350  DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
351  " active list %d\n", __FUNCTION__,
352  dep_sgnl_rd_st->pendingReads, rl_idx);
353  }
354  } else {
355  // This signal is not yet ready, read it again
356  isReady = false;
357  DepSignalsReadDmaEvent *sgnl_rd_evnt =
358  new DepSignalsReadDmaEvent(dep_sgnl_rd_st);
359  dmaReadVirt(signal_addr, sizeof(hsa_signal_value_t),
360  sgnl_rd_evnt, signal_val);
361  dep_sgnl_rd_st->pendingReads++;
362  DPRINTF(HSAPacketProcessor, "%s: Pending reads %d," \
363  " active list %d\n", __FUNCTION__,
364  dep_sgnl_rd_st->pendingReads, rl_idx);
365  }
366  }
367  }
368  if (isReady) {
369  assert(dep_sgnl_rd_st->pendingReads == 0);
370  DPRINTF(HSAPacketProcessor, "%s: Barrier packet completed" \
371  " active list ID = %d\n", __FUNCTION__, rl_idx);
372  // TODO: Completion signal of barrier packet to be
373  // atomically decremented here
374  finishPkt((void*)bar_and_pkt, rl_idx);
375  is_submitted = true;
376  // Reset signal values
377  dep_sgnl_rd_st->resetSigVals();
378  // The completion signal is connected
379  if (bar_and_pkt->completion_signal != 0) {
380  // The signal value is aligned 8 bytes
381  // from the actual handle in the runtime
382  uint64_t signal_addr =
383  (uint64_t) (((uint64_t *)
384  bar_and_pkt->completion_signal) + 1);
385  DPRINTF(HSAPacketProcessor, "Triggering barrier packet" \
386  " completion signal: %x!\n", signal_addr);
396  auto tc = sys->getThreadContext(0);
397  auto &virt_proxy = tc->getVirtProxy();
398  TypedBufferArg<uint64_t> prev_signal(signal_addr);
399  prev_signal.copyIn(virt_proxy);
400 
401  hsa_signal_value_t *new_signal = new hsa_signal_value_t;
402  *new_signal = (hsa_signal_value_t) *prev_signal - 1;
403 
404  dmaWriteVirt(signal_addr,
405  sizeof(hsa_signal_value_t), NULL, new_signal, 0);
406  }
407  }
408  if (dep_sgnl_rd_st->pendingReads > 0) {
409  // Atleast one DepSignalsReadDmaEvent is scheduled this cycle
410  dep_sgnl_rd_st->allRead = false;
411  dep_sgnl_rd_st->discardRead = false;
412  }
413  } else if (pkt_type == HSA_PACKET_TYPE_BARRIER_OR) {
414  fatal("Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
415  } else if (pkt_type == HSA_PACKET_TYPE_INVALID) {
416  fatal("Unsupported packet type HSA_PACKET_TYPE_INVALID");
417  } else {
418  fatal("Unsupported packet type %d\n", pkt_type);
419  }
420  return is_submitted;
421 }
422 
423 // Wakes up every fixed time interval (pktProcessDelay) and processes a single
424 // packet from the queue that scheduled this wakeup. If there are more
425 // packets in that queue, the next wakeup is scheduled.
426 void
428 {
429  AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
431  "%s: Qwakeup , rdIdx %d, wrIdx %d," \
432  " dispIdx %d, active list ID = %d\n",
433  __FUNCTION__, aqlRingBuffer->rdIdx(),
434  aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rqIdx);
435  // In the future, we may support batch processing of packets.
436  // Then, we can just remove the break statements and the code
437  // will support batch processing. That is why we are using a
438  // "while loop" here instead on an "if" condition.
439  while (hsaPP->regdQList[rqIdx]->dispPending()) {
440  void *pkt = aqlRingBuffer->ptr(aqlRingBuffer->dispIdx());
441  DPRINTF(HSAPacketProcessor, "%s: Attempting dispatch @ dispIdx[%d]\n",
442  __FUNCTION__, aqlRingBuffer->dispIdx());
443  Addr host_addr = aqlRingBuffer->hostDispAddr();
444  if (hsaPP->processPkt(pkt, rqIdx, host_addr)) {
445  aqlRingBuffer->incDispIdx(1);
446  DPRINTF(HSAPacketProcessor, "%s: Increment dispIdx[%d]\n",
447  __FUNCTION__, aqlRingBuffer->dispIdx());
448  if (hsaPP->regdQList[rqIdx]->dispPending()) {
449  hsaPP->schedAQLProcessing(rqIdx);
450  }
451  break;
452  } else {
453  // This queue is blocked, scheduled a processing event
454  hsaPP->schedAQLProcessing(rqIdx);
455  break;
456  }
457  }
458 }
459 
460 void
462 {
463  assert(pendingReads > 0);
464  pendingReads--;
465  if (pendingReads == 0) {
466  allRead = true;
467  if (discardRead) {
468  resetSigVals();
469  }
470  }
471 }
472 
473 void
475 {
476  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
477  AQLRingBuffer *aqlRingBuffer = regdQList[rl_idx]->qCntxt.aqlBuf;
478 
480  "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
481  " doorbell(%d)[0x%x] \n",
482  __FUNCTION__, qDesc->readIndex,
483  qDesc->writeIndex, pid, qDesc->doorbellPointer);
484 
485  if (qDesc->dmaInProgress) {
486  // we'll try again when this dma transfer completes in updateReadIndex
487  return;
488  }
489  uint32_t num_umq = qDesc->spaceUsed();
490  if (num_umq == 0)
491  return; // nothing to be gotten
492  uint32_t umq_nxt = qDesc->readIndex;
493  // Total AQL buffer size
494  uint32_t ttl_aql_buf = aqlRingBuffer->numObjs();
495  // Available AQL buffer size. If the available buffer is less than
496  // demanded, number of available buffer is returned
497  uint32_t got_aql_buf = aqlRingBuffer->allocEntry(num_umq);
498  qDesc->readIndex += got_aql_buf;
499  uint32_t dma_start_ix = (aqlRingBuffer->wrIdx() - got_aql_buf) %
500  ttl_aql_buf;
501  dma_series_ctx *series_ctx = NULL;
502 
503  DPRINTF(HSAPacketProcessor, "%s: umq_nxt = %d, ttl_aql_buf = %d, "
504  "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
505  ttl_aql_buf, dma_start_ix, num_umq);
506 
507  if (got_aql_buf == 0) {
508  // we'll try again when some dma bufs are freed in freeEntry
509  qDesc->stalledOnDmaBufAvailability = true;
510  return;
511  } else {
512  qDesc->stalledOnDmaBufAvailability = false;
513  }
514 
515  uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
516  while (got_aql_buf != 0 && num_umq != 0) {
517  uint32_t umq_b4_wrap = qDesc->numObjs() -
518  (umq_nxt % qDesc->objSize());
519  uint32_t num_2_xfer
520  = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
521  if (!series_ctx) {
522  qDesc->dmaInProgress = true;
523  series_ctx = new dma_series_ctx(got_aql_buf, got_aql_buf,
524  dma_start_ix, rl_idx);
525  }
526 
527  void *aql_buf = aqlRingBuffer->ptr(dma_start_ix);
528  CmdQueueCmdDmaEvent *dmaEvent
529  = new CmdQueueCmdDmaEvent(this, pid, true, dma_start_ix,
530  num_2_xfer, series_ctx, aql_buf);
532  "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
533  __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
534 
535  dmaReadVirt(qDesc->ptr(umq_nxt), num_2_xfer * qDesc->objSize(),
536  dmaEvent, aql_buf);
537 
538  aqlRingBuffer->saveHostDispAddr(qDesc->ptr(umq_nxt), num_2_xfer,
539  dma_start_ix);
540 
541  num_umq -= num_2_xfer;
542  got_aql_buf -= num_2_xfer;
543  dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
544  umq_nxt = (umq_nxt + num_2_xfer) % qDesc->numObjs();
545  if (got_aql_buf == 0 && num_umq != 0) {
546  // There are more packets in the queue but
547  // not enough DMA buffers. Set the stalledOnDmaBufAvailability,
548  // we will try again in freeEntry
549  qDesc->stalledOnDmaBufAvailability = true;
550  }
551  }
552 }
553 
554 void
556 {
557  HSAQueueDescriptor* M5_VAR_USED qDesc = regdQList[rl_idx]->qCntxt.qDesc;
559  "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
560  "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
561  __FUNCTION__, pid, qDesc->basePointer,
562  qDesc->doorbellPointer, qDesc->writeIndex,
563  qDesc->readIndex, qDesc->numElts);
564 }
565 
567  const std::string name)
568  : _name(name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
569 {
570  _aqlBuf.resize(size);
571  _aqlComplete.resize(size);
572  _hostDispAddresses.resize(size);
573  // Mark all packets as invalid and incomplete
574  for (auto& it : _aqlBuf)
575  it.header = HSA_PACKET_TYPE_INVALID;
576  std::fill(_aqlComplete.begin(), _aqlComplete.end(), false);
577 }
578 
579 bool
581 {
582  _aqlComplete[(hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data()] = true;
583  DPRINTF(HSAPacketProcessor, "%s: pkt_ix = %d; "\
584  " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
585  (hsa_kernel_dispatch_packet_t *) pkt - _aqlBuf.data(),
586  nFree(), wrIdx(), rdIdx());
587  // Packets can complete out-of-order. This code "retires" packets in-order
588  // by updating the read pointer in the MQD when a contiguous chunk of
589  // packets have finished.
590  uint32_t old_rdIdx = rdIdx();
591  while (_aqlComplete[rdIdx() % numObjs()]) {
592  _aqlComplete[rdIdx() % numObjs()] = false;
594  incRdIdx(1);
595  }
596  return (old_rdIdx != rdIdx());
597 }
598 
599 void
601 {
602  this->hsa_device = dev;
603 }
604 
605 int
606 AQLRingBuffer::allocEntry(uint32_t nBufReq)
607 {
608  DPRINTF(HSAPacketProcessor, "%s: nReq = %d\n", __FUNCTION__, nBufReq);
609  if (nFree() == 0) {
610  DPRINTF(HSAPacketProcessor, "%s: return = %d\n", __FUNCTION__, 0);
611  return 0;
612  }
613 
614  if (nBufReq > nFree())
615  nBufReq = nFree();
616 
617  DPRINTF(HSAPacketProcessor, "%s: ix1stFree = %d\n", __FUNCTION__, wrIdx());
618  incWrIdx(nBufReq);
619  DPRINTF(HSAPacketProcessor, "%s: return = %d, wrIdx = %d\n",
620  __FUNCTION__, nBufReq, wrIdx());
621  return nBufReq;
622 }
623 
625 HSAPacketProcessorParams::create()
626 {
627  return new HSAPacketProcessor(this);
628 }
629 
630 void
631 HSAPacketProcessor::finishPkt(void *pvPkt, uint32_t rl_idx)
632 {
633  HSAQueueDescriptor* qDesc = regdQList[rl_idx]->qCntxt.qDesc;
634  if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
635  updateReadIndex(0, rl_idx);
637  "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
638  " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
639  __FUNCTION__, qDesc->readIndex, qDesc->writeIndex,
640  qDesc->spaceUsed(), qDesc->numElts,
641  qDesc->stalledOnDmaBufAvailability? "true" : "false",
642  qDesc->isEmpty()? "true" : "false", rl_idx);
643  // DMA buffer is freed, check the queue to see if there are DMA
644  // accesses blocked becasue of non-availability of DMA buffer
645  if (qDesc->stalledOnDmaBufAvailability) {
646  assert(!qDesc->isEmpty());
647  getCommandsFromHost(0, rl_idx); // TODO:assign correct pid
648  // when implementing
649  // multi-process support
650  }
651 }
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
Definition: hsa.h:2739
#define DPRINTF(x,...)
Definition: trace.hh:222
AddrRange RangeSize(Addr start, Addr size)
Definition: addr_range.hh:580
This file defines buffer classes used to handle pointer arguments in emulated syscalls.
void displayQueueDescriptor(int pid, uint32_t rl_idx)
void incDispIdx(uint64_t value)
#define fatal(...)
This implements a cprintf based fatal() function.
Definition: logging.hh:171
Bitfield< 7 > i
void dmaWrite(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
Definition: dma_device.hh:175
uint64_t wrIdx() const
static const Priority Default_Pri
Default is zero for historical reasons.
Definition: eventq.hh:174
Vendor-specific packet.
Definition: hsa.h:2722
Packet used by agents for dispatching jobs to kernel agents.
Definition: hsa.h:2733
void * ptr(uint32_t ix)
bool copyIn(PortProxy &memproxy)
copy data into simulator space (read from target memory)
void unregisterQueue(uint64_t queue_id)
std::vector< bool > _aqlComplete
ip6_addr_t addr
Definition: inet.hh:330
bool processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
uint32_t numObjs() const
#define NumSignalsPerBarrier
virtual PortProxy & getVirtProxy()=0
virtual Process * getProcessPtr()=0
virtual AddrRangeList getAddrRanges() const
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
int allocEntry(uint32_t nBufReq)
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
void dmaReadVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
static const FlagsType AutoDelete
Definition: eventq.hh:102
ThreadContext * getThreadContext(ContextID tid) const
Definition: system.hh:186
int32_t hsa_signal_value_t
Signal value.
Definition: hsa.h:1302
std::vector< hsa_signal_value_t > values
unsigned getSize() const
Definition: packet.hh:730
void dmaWriteVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
Tick curTick()
The current simulated tick.
Definition: core.hh:44
virtual const std::string name() const
Definition: eventq.cc:83
void finishPkt(void *pkt, uint32_t rl_idx)
virtual Tick write(Packet *)
uint32_t nFree() const
void makeAtomicResponse()
Definition: packet.hh:943
DmaDeviceParams Params
Definition: dma_device.hh:171
uint64_t Tick
Tick count type.
Definition: types.hh:61
void dmaRead(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
Definition: dma_device.hh:188
void translateOrDie(Addr vaddr, Addr &paddr)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void setDevice(HSADevice *dev)
CmdQueueCmdDmaEvent(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t dma_buf_ix, uint num_bufs, dma_series_ctx *series_ctx, void *dest_4debug)
virtual void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() accepts vendor specific packets from the HSA packet processor.
Definition: hsa_device.hh:79
Addr hostDispAddr() const
The packet has been processed in the past, but has not been reassigned to the packet processor...
Definition: hsa.h:2728
void getCommandsFromHost(int pid, uint32_t rl_idx)
Addr getAddr() const
Definition: packet.hh:720
void schedule(Event &event, Tick when)
Definition: eventq.hh:998
void setFlags(Flags _flags)
Definition: eventq.hh:323
hsa_packet_type_t
Packet type.
Definition: hsa.h:2718
bool freeEntry(void *pkt)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
System * sys
Definition: io_device.hh:102
#define PAGE_SIZE
Definition: base.cc:60
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
Definition: packet.hh:249
Bitfield< 10, 5 > event
void setBadAddress()
Definition: packet.hh:712
void unsetDeviceQueueDesc(uint64_t queue_id)
bool scheduled() const
Determine if the current event is scheduled.
Definition: eventq.hh:460
std::vector< class RQLEntry * > regdQList
Declarations of a non-full system Page Table.
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
void incRdIdx(uint64_t value)
void incWrIdx(uint64_t value)
Definition: eventq.hh:246
uint64_t rdIdx() const
void updateReadIndex(int, uint32_t)
uint64_t dispIdx() const
virtual Tick read(Packet *)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer...
void schedAQLProcessing(uint32_t rl_idx)
Declaration and inline definition of ChunkGenerator object.
void write(Addr db_addr, uint32_t doorbell_reg)
Bitfield< 0 > p
virtual void submitDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
submitDispatchPkt() accepts AQL dispatch packets from the HSA packet processor.
Definition: hsa_device.hh:65
const char data[]
Calls getCurrentEntry once the queueEntry has been dmaRead.
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
Definition: hsa.h:2750
EventQueue * queue
queue to which this event belongs (though it may or may not be scheduled on this queue yet) ...
Definition: eventq.hh:282
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
uint64_t ptr(uint64_t ix)
AQLRingBuffer(uint32_t size, const std::string name)
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
Definition: hw_scheduler.cc:84
#define PKT_TYPE(PKT)
std::vector< Addr > _hostDispAddresses
AQL kernel dispatch packet.
Definition: hsa.h:2867

Generated on Mon Jun 8 2020 15:45:10 for gem5 by doxygen 1.8.13