gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
pm4_packet_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
34 
35 #include "debug/PM4PacketProcessor.hh"
39 #include "dev/amdgpu/pm4_mmio.hh"
41 #include "dev/hsa/hw_scheduler.hh"
42 #include "enums/GfxVersion.hh"
44 #include "gpu-compute/shader.hh"
45 #include "mem/packet.hh"
46 #include "mem/packet_access.hh"
47 
48 namespace gem5
49 {
50 
51 PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
52  : DmaVirtDevice(p)
53 {
54  memset(&kiq, 0, sizeof(QueueDesc));
55  memset(&pq, 0, sizeof(QueueDesc));
56 }
57 
65 {
66  if (gpuDevice->getVM().inAGP(vaddr)) {
67  // Use AGP translation gen
68  return TranslationGenPtr(
70  }
71 
72  // Assume GART otherwise as this is the only other translation aperture
73  // available to the PM4 packet processor.
74  return TranslationGenPtr(
76 }
77 
80 {
81  AddrRangeList ranges;
82  return ranges;
83 }
84 
85 void
87 {
88  gpuDevice = gpu_device;
89 }
90 
91 Addr
93 {
94  if (!gpuDevice->getVM().inAGP(addr)) {
95  Addr low_bits = bits(addr, 11, 0);
96  addr = (((addr >> 12) << 3) << 12) | low_bits;
97  }
98  return addr;
99 }
100 
101 PM4Queue *
103 {
104  auto result = queuesMap.find(offset);
105  if (result == queuesMap.end()) {
106  if (gfx)
107  mapPq(offset);
108  else
109  mapKiq(offset);
110  return queuesMap[offset];
111  }
112  return result->second;
113 }
114 
115 void
117 {
118  DPRINTF(PM4PacketProcessor, "Mapping KIQ\n");
120 }
121 
122 void
124 {
125  DPRINTF(PM4PacketProcessor, "Mapping PQ\n");
127 }
128 
129 void
131  PM4MapQueues *pkt, int id)
132 {
133  if (id == -1)
134  id = queues.size();
135 
136  /* 256 bytes aligned address */
137  mqd->base <<= 8;
138  PM4Queue *q = new PM4Queue(id, mqd, offset, pkt);
139 
140  queuesMap[offset] = q;
141  queues[id] = q;
142 
143  /* we are assumming only compute queues can be map from MQDs */
144  QueueType qt;
145  qt = mqd->aql ? QueueType::ComputeAQL
148 
149  DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
150  "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
151  q->me(), q->pipe(), q->queue(), q->size());
152 }
153 
154 void
156 {
157  q->wptr(wptrOffset * sizeof(uint32_t));
158 
159  if (!q->processing()) {
160  q->processing(true);
161  decodeNext(q);
162  }
163 }
164 
165 void
167 {
168  DPRINTF(PM4PacketProcessor, "PM4 decode queue %d rptr %p, wptr %p\n",
169  q->id(), q->rptr(), q->wptr());
170 
171  if (q->rptr() < q->wptr()) {
172  /* Additional braces here are needed due to a clang compilation bug
173  falsely throwing a "suggest braces around initialization of
174  subject" error. More info on this bug is available here:
175  https://stackoverflow.com/questions/31555584
176  */
177  PM4Header h{{{0, 0, 0, 0, 0, 0}}};
178  auto cb = new DmaVirtCallback<PM4Header>(
179  [ = ] (PM4Header header)
180  { decodeHeader(q, header); }, h);
181  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(uint32_t), cb,
182  &cb->dmaBuffer);
183  } else {
184  q->processing(false);
185  if (q->ib()) {
186  q->ib(false);
187  decodeNext(q);
188  }
189  }
190 }
191 
192 void
194 {
195  DPRINTF(PM4PacketProcessor, "PM4 packet %p\n", header.opcode);
196 
197  q->incRptr(sizeof(PM4Header));
198 
199  DmaVirtCallback<uint64_t> *cb = nullptr;
200  void *dmaBuffer = nullptr;
201 
202  switch(header.opcode) {
203  case IT_NOP: {
204  DPRINTF(PM4PacketProcessor, "PM4 nop, count %p\n", header.count);
205  DPRINTF(PM4PacketProcessor, "rptr %p wptr %p\n", q->rptr(), q->wptr());
206  if (header.count != 0x3fff) {
207  q->incRptr((header.count + 1) * sizeof(uint32_t));
208  }
209  decodeNext(q);
210  } break;
211  case IT_WRITE_DATA: {
212  dmaBuffer = new PM4WriteData();
213  cb = new DmaVirtCallback<uint64_t>(
214  [ = ] (const uint64_t &)
215  { writeData(q, (PM4WriteData *)dmaBuffer); });
216  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WriteData), cb,
217  dmaBuffer);
218  } break;
219 
220  case IT_MAP_QUEUES: {
221  dmaBuffer = new PM4MapQueues();
222  cb = new DmaVirtCallback<uint64_t>(
223  [ = ] (const uint64_t &)
224  { mapQueues(q, (PM4MapQueues *)dmaBuffer); });
225  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapQueues), cb,
226  dmaBuffer);
227  } break;
228 
229  case IT_RELEASE_MEM: {
230  dmaBuffer = new PM4ReleaseMem();
231  cb = new DmaVirtCallback<uint64_t>(
232  [ = ] (const uint64_t &)
233  { releaseMem(q, (PM4ReleaseMem *)dmaBuffer); });
234  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4ReleaseMem), cb,
235  dmaBuffer);
236  } break;
237 
238  case IT_INDIRECT_BUFFER: {
239  dmaBuffer = new PM4IndirectBuf();
240  cb = new DmaVirtCallback<uint64_t>(
241  [ = ] (const uint64_t &)
242  { indirectBuffer(q, (PM4IndirectBuf *)dmaBuffer); });
243  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4IndirectBuf), cb,
244  dmaBuffer);
245  } break;
246 
247  case IT_SWITCH_BUFFER: {
248  dmaBuffer = new PM4SwitchBuf();
249  cb = new DmaVirtCallback<uint64_t>(
250  [ = ] (const uint64_t &)
251  { switchBuffer(q, (PM4SwitchBuf *)dmaBuffer); });
252  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SwitchBuf), cb,
253  dmaBuffer);
254  } break;
255 
256  case IT_SET_UCONFIG_REG: {
257  dmaBuffer = new PM4SetUconfigReg();
258  cb = new DmaVirtCallback<uint64_t>(
259  [ = ] (const uint64_t &)
260  { setUconfigReg(q, (PM4SetUconfigReg *)dmaBuffer); });
261  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SetUconfigReg), cb,
262  dmaBuffer);
263  } break;
264 
265  case IT_WAIT_REG_MEM: {
266  dmaBuffer = new PM4WaitRegMem();
267  cb = new DmaVirtCallback<uint64_t>(
268  [ = ] (const uint64_t &)
269  { waitRegMem(q, (PM4WaitRegMem *)dmaBuffer); });
270  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WaitRegMem), cb,
271  dmaBuffer);
272  } break;
273  case IT_MAP_PROCESS: {
274  if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
275  dmaBuffer = new PM4MapProcessMI200();
276  cb = new DmaVirtCallback<uint64_t>(
277  [ = ] (const uint64_t &)
278  { mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
279  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessMI200),
280  cb, dmaBuffer);
281  } else {
282  dmaBuffer = new PM4MapProcess();
283  cb = new DmaVirtCallback<uint64_t>(
284  [ = ] (const uint64_t &)
285  { mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
286  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
287  dmaBuffer);
288  }
289  } break;
290 
291  case IT_UNMAP_QUEUES: {
292  dmaBuffer = new PM4UnmapQueues();
293  cb = new DmaVirtCallback<uint64_t>(
294  [ = ] (const uint64_t &)
295  { unmapQueues(q, (PM4UnmapQueues *)dmaBuffer); });
296  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4UnmapQueues), cb,
297  dmaBuffer);
298  } break;
299 
300  case IT_RUN_LIST: {
301  dmaBuffer = new PM4RunList();
302  cb = new DmaVirtCallback<uint64_t>(
303  [ = ] (const uint64_t &)
304  { runList(q, (PM4RunList *)dmaBuffer); });
305  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4RunList), cb,
306  dmaBuffer);
307  } break;
308 
309  case IT_QUERY_STATUS: {
310  dmaBuffer = new PM4QueryStatus();
311  cb = new DmaVirtCallback<uint64_t>(
312  [ = ] (const uint64_t &)
313  { queryStatus(q, (PM4QueryStatus *)dmaBuffer); });
314  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4QueryStatus), cb,
315  dmaBuffer);
316  } break;
317 
318  case IT_INVALIDATE_TLBS: {
319  DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
321  q->incRptr((header.count + 1) * sizeof(uint32_t));
322  decodeNext(q);
323  } break;
324 
325  default: {
326  warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
327  DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",
328  header.opcode);
329  q->incRptr((header.count + 1) * sizeof(uint32_t));
330  decodeNext(q);
331  } break;
332  }
333 }
334 
335 void
337 {
338  q->incRptr(sizeof(PM4WriteData));
339 
340  Addr addr = getGARTAddr(pkt->destAddr);
341  DPRINTF(PM4PacketProcessor, "PM4 write addr: %p data: %p.\n", addr,
342  pkt->data);
343  auto cb = new DmaVirtCallback<uint32_t>(
344  [ = ](const uint32_t &) { writeDataDone(q, pkt, addr); });
345  //TODO: the specs indicate that pkt->data holds the number of dword that
346  //need to be written.
347  dmaWriteVirt(addr, sizeof(uint32_t), cb, &pkt->data);
348 
349  if (!pkt->writeConfirm)
350  decodeNext(q);
351 }
352 
353 void
355 {
356  DPRINTF(PM4PacketProcessor, "PM4 write completed to %p, %p.\n", addr,
357  pkt->data);
358 
359  if (pkt->writeConfirm)
360  decodeNext(q);
361 
362  delete pkt;
363 }
364 
365 void
367 {
368  q->incRptr(sizeof(PM4MapQueues));
369 
370  DPRINTF(PM4PacketProcessor, "MAPQueues queueSel: %d, vmid: %d, me: %d, "
371  "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
372  "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
373  " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->queueSel, pkt->vmid,
374  pkt->me, pkt->pipe, pkt->queueSlot, pkt->queueType,
375  pkt->allocFormat, pkt->engineSel, pkt->numQueues,
376  pkt->checkDisable, pkt->doorbellOffset, pkt->mqdAddr,
377  pkt->wptrAddr);
378 
379  // Partially reading the mqd with an offset of 96 dwords
380  if (pkt->engineSel == 0 || pkt->engineSel == 1 || pkt->engineSel == 4) {
381  Addr addr = getGARTAddr(pkt->mqdAddr + 96 * sizeof(uint32_t));
382 
384  "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
385  addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
386 
388  gpuDevice->lastVMID());
389 
390  QueueDesc *mqd = new QueueDesc();
391  memset(mqd, 0, sizeof(QueueDesc));
392  auto cb = new DmaVirtCallback<uint32_t>(
393  [ = ] (const uint32_t &) {
394  processMQD(pkt, q, addr, mqd, gpuDevice->lastVMID()); });
395  dmaReadVirt(addr, sizeof(QueueDesc), cb, mqd);
396  } else if (pkt->engineSel == 2 || pkt->engineSel == 3) {
397  SDMAQueueDesc *sdmaMQD = new SDMAQueueDesc();
398  memset(sdmaMQD, 0, sizeof(SDMAQueueDesc));
399 
400  // For SDMA we read the full MQD, so there is no offset calculation.
401  Addr addr = getGARTAddr(pkt->mqdAddr);
402 
403  auto cb = new DmaVirtCallback<uint32_t>(
404  [ = ] (const uint32_t &) {
405  processSDMAMQD(pkt, q, addr, sdmaMQD,
406  gpuDevice->lastVMID()); });
407  dmaReadVirt(addr, sizeof(SDMAQueueDesc), cb, sdmaMQD);
408  } else {
409  panic("Unknown engine for MQD: %d\n", pkt->engineSel);
410  }
411 
412  decodeNext(q);
413 }
414 
415 void
417  QueueDesc *mqd, uint16_t vmid)
418 {
419  DPRINTF(PM4PacketProcessor, "MQDbase: %lx, active: %d, vmid: %d, base: "
420  "%lx, rptr: %x aqlPtr: %lx\n", mqd->mqdBase, mqd->hqd_active,
421  mqd->hqd_vmid, mqd->base, mqd->rptr, mqd->aqlRptr);
422 
423  Addr offset = mqd->doorbell & 0x1ffffffc;
424  newQueue(mqd, offset, pkt);
425  PM4Queue *new_q = queuesMap[offset];
426  gpuDevice->insertQId(vmid, new_q->id());
427 
428  if (mqd->aql) {
429  // The queue size is encoded in the cp_hqd_pq_control field in the
430  // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1
431  // number of dwords.
432  //
433  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
434  // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561
435  //
436  // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply
437  // by 4 to get the number of bytes as HSAPP expects.
438  int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4;
439  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
440  hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(),
441  mqd_size, 8, GfxVersion::gfx900, offset,
442  mqd->mqdReadIndex);
443  }
444 
445  DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
446  "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
447 }
448 
449 void
451  SDMAQueueDesc *mqd, uint16_t vmid)
452 {
453  uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
454  Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
455  rptr_wb_addr <<= 32;
456  rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
457 
458  DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
459  "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
463  rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr);
464 
465  // Engine 2 points to SDMA0 while engine 3 points to SDMA1
466  assert(pkt->engineSel == 2 || pkt->engineSel == 3);
467  SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
468 
469  // Register RLC queue with SDMA
470  sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
471 
472  // Register doorbell with GPU device
473  gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
475 }
476 
477 void
479 {
480  q->incRptr(sizeof(PM4ReleaseMem));
481 
482  Addr addr = getGARTAddr(pkt->addr);
483  DPRINTF(PM4PacketProcessor, "PM4 release_mem event %d eventIdx %d intSel "
484  "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
485  pkt->event, pkt->eventIdx, pkt->intSelect, pkt->destSelect,
486  pkt->dataSelect, addr, pkt->dataLo, pkt->intCtxId);
487 
489  "PM4 release_mem destSel 0 bypasses caches to MC.\n");
490 
491  if (pkt->dataSelect == 1) {
492  auto cb = new DmaVirtCallback<uint32_t>(
493  [ = ](const uint32_t &) { releaseMemDone(q, pkt, addr); },
494  pkt->dataLo);
495  dmaWriteVirt(addr, sizeof(uint32_t), cb, &cb->dmaBuffer);
496  } else {
497  panic("Unimplemented PM4ReleaseMem.dataSelect");
498  }
499 }
500 
501 void
503 {
504  DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n",
505  pkt->dataLo, addr);
506  if (pkt->intSelect == 2) {
507  DPRINTF(PM4PacketProcessor, "PM4 interrupt, id: %d ctx: %d, me: %d, "
508  "pipe: %d, queueSlot:%d\n", q->id(), pkt->intCtxId, q->me(),
509  q->pipe(), q->queue());
510 
511  uint8_t ringId = 0;
512  if (q->id() != 0) {
513  ringId = (q->queue() << 4) | (q->me() << 2) | q->pipe();
514  }
518  }
519 
520  delete pkt;
521  decodeNext(q);
522 }
523 
524 void
526 {
527  assert(queuesMap.count(offset));
528  queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
529 }
530 
531 void
533 {
534  q->incRptr(sizeof(PM4UnmapQueues));
535 
536  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues queueSel: %d numQueues: %d "
537  "pasid: %p doorbellOffset0 %p \n",
538  pkt->queueSel, pkt->numQueues, pkt->pasid, pkt->doorbellOffset0);
539 
540  switch (pkt->queueSel) {
541  case 0:
542  switch (pkt->numQueues) {
543  case 1:
552  break;
553  case 2:
560  break;
561  case 3:
566  break;
567  case 4:
570  break;
571  default:
572  panic("Unrecognized number of queues %d\n", pkt->numQueues);
573  }
574  break;
575  case 1:
577  break;
578  case 2:
579  break;
580  case 3: {
581  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
582  for (auto iter : gpuDevice->getUsedVMIDs()) {
583  for (auto id : iter.second) {
584  assert(queues.count(id));
585 
586  // Do not unmap KMD queues
587  if (queues[id]->privileged()) {
588  continue;
589  }
590  QueueDesc *mqd = queues[id]->getMQD();
591  DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
592  "index %ld\n", id, mqd->mqdReadIndex);
593  // Partially writing the mqd with an offset of 96 dwords
594  Addr addr = getGARTAddr(queues[id]->mqdBase() +
595  96 * sizeof(uint32_t));
596  Addr mqd_base = queues[id]->mqdBase();
597  auto cb = new DmaVirtCallback<uint32_t>(
598  [ = ] (const uint32_t &) {
599  doneMQDWrite(mqd_base, addr);
600  });
601  mqd->base >>= 8;
602  dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
603  queues.erase(id);
604  hsa_pp.unsetDeviceQueueDesc(id, 8);
605  }
606  }
608  } break;
609  default:
610  panic("Unrecognized options\n");
611  break;
612  }
613 
614  delete pkt;
615  decodeNext(q);
616 }
617 
618 void
620  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues MQD %p wrote to addr %p\n",
621  mqdAddr, addr);
622 }
623 
624 void
625 PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
626  uint32_t shMemBases)
627 {
628  uint16_t vmid = gpuDevice->allocateVMID(pasid);
629 
630  gpuDevice->getVM().setPageTableBase(vmid, ptBase);
631  gpuDevice->CP()->shader()->setHwReg(HW_REG_SH_MEM_BASES, shMemBases);
632 
633  // Setup the apertures that gem5 uses. These values are bits [63:48].
634  Addr lds_base = (Addr)bits(shMemBases, 31, 16) << 48;
635  Addr scratch_base = (Addr)bits(shMemBases, 15, 0) << 48;
636 
637  // There does not seem to be any register for the limit, but the driver
638  // assumes scratch and LDS have a 4GB aperture, so use that.
639  gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
640  gpuDevice->CP()->shader()->setScratchApe(scratch_base,
641  scratch_base + 0xFFFFFFFF);
642 }
643 
644 void
646 {
647  q->incRptr(sizeof(PM4MapProcess));
648 
649  DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
650  "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
651  pkt->ptBase, pkt->completionSignal);
652 
653  mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
654 
655  delete pkt;
656  decodeNext(q);
657 }
658 
659 void
661 {
662  q->incRptr(sizeof(PM4MapProcessMI200));
663 
664  DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
665  "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
666  pkt->ptBase, pkt->completionSignal);
667 
668  mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
669 
670  delete pkt;
671  decodeNext(q);
672 }
673 
674 void
676 {
677  DPRINTF(PM4PacketProcessor, "PM4 run_list base: %p size: %d\n",
678  pkt->ibBase, pkt->ibSize);
679 
680  q->incRptr(sizeof(PM4RunList));
681 
682  q->ib(true);
683  q->ibBase(pkt->ibBase);
684  q->rptr(0);
685  q->wptr(pkt->ibSize * sizeof(uint32_t));
686 
687  delete pkt;
688  decodeNext(q);
689 }
690 
691 void
693 {
694  DPRINTF(PM4PacketProcessor, "PM4 indirect buffer, base: %p.\n",
695  pkt->ibBase);
696 
697  q->incRptr(sizeof(PM4IndirectBuf));
698 
699  q->ib(true);
700  q->ibBase(pkt->ibBase);
701  q->wptr(pkt->ibSize * sizeof(uint32_t));
702 
703  decodeNext(q);
704 }
705 
706 void
708 {
709  q->incRptr(sizeof(PM4SwitchBuf));
710 
711  q->ib(true);
712  DPRINTF(PM4PacketProcessor, "PM4 switching buffer, rptr: %p.\n",
713  q->wptr());
714 
715  decodeNext(q);
716 }
717 
718 void
720 {
721  q->incRptr(sizeof(PM4SetUconfigReg));
722 
723  // SET_UCONFIG_REG_START and pkt->offset are dword addresses
724  uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
725 
726  gpuDevice->setRegVal(reg_addr, pkt->data);
727 
728  decodeNext(q);
729 }
730 
731 void
733 {
734  q->incRptr(sizeof(PM4WaitRegMem));
735 
736  DPRINTF(PM4PacketProcessor, "PM4 WAIT_REG_MEM\nfunc: %d memSpace: %d op: "
737  "%d\n", pkt->function, pkt->memSpace, pkt->operation);
738  DPRINTF(PM4PacketProcessor, " AddrLo/Reg1: %lx\n", pkt->memAddrLo);
739  DPRINTF(PM4PacketProcessor, " AddrHi/Reg2: %lx\n", pkt->memAddrHi);
740  DPRINTF(PM4PacketProcessor, " Reference: %lx\n", pkt->reference);
741  DPRINTF(PM4PacketProcessor, " Mask: %lx\n", pkt->mask);
742  DPRINTF(PM4PacketProcessor, " Poll Interval: %lx\n", pkt->pollInterval);
743 
744  decodeNext(q);
745 }
746 
747 void
749 {
750  q->incRptr(sizeof(PM4QueryStatus));
751 
752  DPRINTF(PM4PacketProcessor, "PM4 query status contextId: %d, interruptSel:"
753  " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
754  "addr: %lx, data: %lx\n", pkt->contextId, pkt->interruptSel,
755  pkt->command, pkt->pasid, pkt->doorbellOffset, pkt->engineSel,
756  pkt->addr, pkt->data);
757 
758  if (pkt->interruptSel == 0 && pkt->command == 2) {
759  // Write data value to fence address
760  Addr addr = getGARTAddr(pkt->addr);
761  DPRINTF(PM4PacketProcessor, "Using GART addr %lx\n", addr);
762  auto cb = new DmaVirtCallback<uint64_t>(
763  [ = ] (const uint64_t &) { queryStatusDone(q, pkt); }, pkt->data);
764  dmaWriteVirt(addr, sizeof(uint64_t), cb, &cb->dmaBuffer);
765  } else {
766  // No other combinations used in amdkfd v9
767  panic("query_status with interruptSel %d command %d not supported",
768  pkt->interruptSel, pkt->command);
769  }
770 }
771 
772 void
774 {
775  DPRINTF(PM4PacketProcessor, "PM4 query status complete\n");
776 
777  delete pkt;
778  decodeNext(q);
779 }
780 
781 void
783 {
784  switch (mmio_offset) {
785  /* Hardware queue descriptor (HQD) registers */
786  case mmCP_HQD_VMID:
787  setHqdVmid(pkt->getLE<uint32_t>());
788  break;
789  case mmCP_HQD_ACTIVE:
790  setHqdActive(pkt->getLE<uint32_t>());
791  break;
792  case mmCP_HQD_PQ_BASE:
793  setHqdPqBase(pkt->getLE<uint32_t>());
794  break;
795  case mmCP_HQD_PQ_BASE_HI:
796  setHqdPqBaseHi(pkt->getLE<uint32_t>());
797  break;
799  setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
801  break;
802  case mmCP_HQD_PQ_RPTR:
803  setHqdPqPtr(pkt->getLE<uint32_t>());
804  break;
805  case mmCP_HQD_PQ_WPTR_LO:
806  setHqdPqWptrLo(pkt->getLE<uint32_t>());
807  break;
808  case mmCP_HQD_PQ_WPTR_HI:
809  setHqdPqWptrHi(pkt->getLE<uint32_t>());
810  break;
812  setHqdPqRptrReportAddr(pkt->getLE<uint32_t>());
813  break;
815  setHqdPqRptrReportAddrHi(pkt->getLE<uint32_t>());
816  break;
818  setHqdPqWptrPollAddr(pkt->getLE<uint32_t>());
819  break;
821  setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
822  break;
823  case mmCP_HQD_PQ_CONTROL:
824  setHqdPqControl(pkt->getLE<uint32_t>());
825  break;
826  case mmCP_HQD_IB_CONTROL:
827  setHqdIbCtrl(pkt->getLE<uint32_t>());
828  break;
829  /* Ring buffer registers */
830  case mmCP_RB_VMID:
831  setRbVmid(pkt->getLE<uint32_t>());
832  break;
833  case mmCP_RB0_CNTL:
834  setRbCntl(pkt->getLE<uint32_t>());
835  break;
836  case mmCP_RB0_WPTR:
837  setRbWptrLo(pkt->getLE<uint32_t>());
838  break;
839  case mmCP_RB0_WPTR_HI:
840  setRbWptrHi(pkt->getLE<uint32_t>());
841  break;
842  case mmCP_RB0_RPTR_ADDR:
843  setRbRptrAddrLo(pkt->getLE<uint32_t>());
844  break;
846  setRbRptrAddrHi(pkt->getLE<uint32_t>());
847  break;
849  setRbWptrPollAddrLo(pkt->getLE<uint32_t>());
850  break;
852  setRbWptrPollAddrHi(pkt->getLE<uint32_t>());
853  break;
854  case mmCP_RB0_BASE:
855  setRbBaseLo(pkt->getLE<uint32_t>());
856  break;
857  case mmCP_RB0_BASE_HI:
858  setRbBaseHi(pkt->getLE<uint32_t>());
859  break;
861  setRbDoorbellCntrl(pkt->getLE<uint32_t>());
863  break;
865  setRbDoorbellRangeLo(pkt->getLE<uint32_t>());
866  break;
868  setRbDoorbellRangeHi(pkt->getLE<uint32_t>());
869  break;
870  default:
871  break;
872  }
873 }
874 
875 void
877 {
878  kiq.hqd_vmid = data;
879 }
880 
881 void
883 {
884  kiq.hqd_active = data;
885 }
886 
887 void
889 {
891 }
892 
893 void
895 {
897 }
898 
899 void
901 {
903 }
904 
905 void
907 {
908  kiq.rptr = data;
909 }
910 
911 void
913 {
914  /* Write pointer communicated through doorbell value. */
915 }
916 
917 void
919 {
920  /* Write pointer communicated through doorbell value. */
921 }
922 
923 void
925 {
927 }
928 
929 void
931 {
933 }
934 
935 void
937 {
939 }
940 
941 void
943 {
945 }
946 
947 void
949 {
951 }
952 
953 void
955 {
957 }
958 
959 void
961 {
962  pq.hqd_vmid = data;
963 }
964 
965 void
967 {
969 }
970 
971 void
973 {
974  pq.queueWptrLo = data;
975 }
976 
977 void
979 {
980  pq.queueWptrHi = data;
981 }
982 
983 void
985 {
987 }
988 
989 void
991 {
993 }
994 
995 void
997 {
999 }
1000 
1001 void
1003 {
1005 }
1006 
1007 void
1009 {
1011 }
1012 
1013 void
1015 {
1017 }
1018 
1019 void
1021 {
1023  pq.doorbellOffset = data & 0x1ffffffc;
1024 }
1025 
1026 void
1028 {
1030 }
1031 
1032 void
1034 {
1036 }
1037 
1038 void
1040 {
1041  // Serialize the DmaVirtDevice base class
1043 
1044  int num_queues = queues.size();
1045  Addr id[num_queues];
1046  Addr mqd_base[num_queues];
1047  Addr base[num_queues];
1048  Addr rptr[num_queues];
1049  Addr wptr[num_queues];
1050  Addr ib_base[num_queues];
1051  Addr ib_rptr[num_queues];
1052  Addr ib_wptr[num_queues];
1053  Addr offset[num_queues];
1054  bool processing[num_queues];
1055  bool ib[num_queues];
1056  uint32_t me[num_queues];
1057  uint32_t pipe[num_queues];
1058  uint32_t queue[num_queues];
1059  bool privileged[num_queues];
1060  uint32_t hqd_active[num_queues];
1061  uint32_t hqd_vmid[num_queues];
1062  Addr aql_rptr[num_queues];
1063  uint32_t doorbell[num_queues];
1064  uint32_t hqd_pq_control[num_queues];
1065 
1066  int i = 0;
1067  for (auto iter : queues) {
1068  PM4Queue *q = iter.second;
1069  id[i] = q->id();
1070  mqd_base[i] = q->mqdBase();
1071  bool cur_state = q->ib();
1072  q->ib(false);
1073  base[i] = q->base() >> 8;
1074  rptr[i] = q->getRptr();
1075  wptr[i] = q->getWptr();
1076  q->ib(true);
1077  ib_base[i] = q->ibBase();
1078  ib_rptr[i] = q->getRptr();
1079  ib_wptr[i] = q->getWptr();
1080  q->ib(cur_state);
1081  offset[i] = q->offset();
1082  processing[i] = q->processing();
1083  ib[i] = q->ib();
1084  me[i] = q->me();
1085  pipe[i] = q->pipe();
1086  queue[i] = q->queue();
1087  privileged[i] = q->privileged();
1088  hqd_active[i] = q->getMQD()->hqd_active;
1089  hqd_vmid[i] = q->getMQD()->hqd_vmid;
1090  aql_rptr[i] = q->getMQD()->aqlRptr;
1091  doorbell[i] = q->getMQD()->doorbell;
1092  hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
1093  i++;
1094  }
1095 
1096  SERIALIZE_SCALAR(num_queues);
1097  SERIALIZE_ARRAY(id, num_queues);
1098  SERIALIZE_ARRAY(mqd_base, num_queues);
1099  SERIALIZE_ARRAY(base, num_queues);
1100  SERIALIZE_ARRAY(rptr, num_queues);
1101  SERIALIZE_ARRAY(wptr, num_queues);
1102  SERIALIZE_ARRAY(ib_base, num_queues);
1103  SERIALIZE_ARRAY(ib_rptr, num_queues);
1104  SERIALIZE_ARRAY(ib_wptr, num_queues);
1105  SERIALIZE_ARRAY(offset, num_queues);
1106  SERIALIZE_ARRAY(processing, num_queues);
1107  SERIALIZE_ARRAY(ib, num_queues);
1108  SERIALIZE_ARRAY(me, num_queues);
1109  SERIALIZE_ARRAY(pipe, num_queues);
1110  SERIALIZE_ARRAY(queue, num_queues);
1111  SERIALIZE_ARRAY(privileged, num_queues);
1112  SERIALIZE_ARRAY(hqd_active, num_queues);
1113  SERIALIZE_ARRAY(hqd_vmid, num_queues);
1114  SERIALIZE_ARRAY(aql_rptr, num_queues);
1115  SERIALIZE_ARRAY(doorbell, num_queues);
1116  SERIALIZE_ARRAY(hqd_pq_control, num_queues);
1117 }
1118 
1119 void
1121 {
1122  // Serialize the DmaVirtDevice base class
1124 
1125  int num_queues = 0;
1126  UNSERIALIZE_SCALAR(num_queues);
1127 
1128  Addr id[num_queues];
1129  Addr mqd_base[num_queues];
1130  Addr base[num_queues];
1131  Addr rptr[num_queues];
1132  Addr wptr[num_queues];
1133  Addr ib_base[num_queues];
1134  Addr ib_rptr[num_queues];
1135  Addr ib_wptr[num_queues];
1136  Addr offset[num_queues];
1137  bool processing[num_queues];
1138  bool ib[num_queues];
1139  uint32_t me[num_queues];
1140  uint32_t pipe[num_queues];
1141  uint32_t queue[num_queues];
1142  bool privileged[num_queues];
1143  uint32_t hqd_active[num_queues];
1144  uint32_t hqd_vmid[num_queues];
1145  Addr aql_rptr[num_queues];
1146  uint32_t doorbell[num_queues];
1147  uint32_t hqd_pq_control[num_queues];
1148 
1149  UNSERIALIZE_ARRAY(id, num_queues);
1150  UNSERIALIZE_ARRAY(mqd_base, num_queues);
1151  UNSERIALIZE_ARRAY(base, num_queues);
1152  UNSERIALIZE_ARRAY(rptr, num_queues);
1153  UNSERIALIZE_ARRAY(wptr, num_queues);
1154  UNSERIALIZE_ARRAY(ib_base, num_queues);
1155  UNSERIALIZE_ARRAY(ib_rptr, num_queues);
1156  UNSERIALIZE_ARRAY(ib_wptr, num_queues);
1157  UNSERIALIZE_ARRAY(offset, num_queues);
1158  UNSERIALIZE_ARRAY(processing, num_queues);
1159  UNSERIALIZE_ARRAY(ib, num_queues);
1160  UNSERIALIZE_ARRAY(me, num_queues);
1161  UNSERIALIZE_ARRAY(pipe, num_queues);
1162  UNSERIALIZE_ARRAY(queue, num_queues);
1163  UNSERIALIZE_ARRAY(privileged, num_queues);
1164  UNSERIALIZE_ARRAY(hqd_active, num_queues);
1165  UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
1166  UNSERIALIZE_ARRAY(aql_rptr, num_queues);
1167  UNSERIALIZE_ARRAY(doorbell, num_queues);
1168  UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);
1169 
1170  for (int i = 0; i < num_queues; i++) {
1171  QueueDesc *mqd = new QueueDesc();
1172  memset(mqd, 0, sizeof(QueueDesc));
1173 
1174  mqd->mqdBase = mqd_base[i] >> 8;
1175  mqd->base = base[i];
1176  mqd->rptr = rptr[i];
1177  mqd->ibBase = ib_base[i];
1178  mqd->ibRptr = ib_rptr[i];
1179 
1180  PM4MapQueues* pkt = new PM4MapQueues;
1181  memset(pkt, 0, sizeof(PM4MapQueues));
1182  newQueue(mqd, offset[i], pkt, id[i]);
1183 
1184  queues[id[i]]->ib(false);
1185  queues[id[i]]->wptr(wptr[i]);
1186  queues[id[i]]->ib(true);
1187  queues[id[i]]->wptr(ib_wptr[i]);
1188  queues[id[i]]->offset(offset[i]);
1189  queues[id[i]]->processing(processing[i]);
1190  queues[id[i]]->ib(ib[i]);
1191  queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
1192  queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
1193  queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
1194  queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
1195  queues[id[i]]->getMQD()->doorbell = doorbell[i];
1196  queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
1197 
1198  DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
1199  queues[id[i]]->id(), queues[id[i]]->rptr(),
1200  queues[id[i]]->wptr());
1201  }
1202 }
1203 
1204 } // namespace gem5
gem5::GEM5_PACKED::ptBase
uint64_t ptBase
Definition: pm4_defines.hh:240
gem5::PM4RunList
struct gem5::GEM5_PACKED PM4RunList
gem5::PM4PacketProcessor::mapPq
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
Definition: pm4_packet_processor.cc:123
gem5::PM4PacketProcessor::setRbRptrAddrLo
void setRbRptrAddrLo(uint32_t data)
Definition: pm4_packet_processor.cc:984
gem5::PM4QueryStatus
struct gem5::GEM5_PACKED PM4QueryStatus
gem5::PM4WriteData
struct gem5::GEM5_PACKED PM4WriteData
gem5::PM4PacketProcessor::decodeNext
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
Definition: pm4_packet_processor.cc:166
mmCP_RB0_BASE
#define mmCP_RB0_BASE
Definition: pm4_mmio.hh:39
gem5::GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:460
gem5::PM4PacketProcessor::mapProcess
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
Definition: pm4_packet_processor.cc:625
gem5::IT_RUN_LIST
@ IT_RUN_LIST
Definition: pm4_defines.hh:66
warn
#define warn(...)
Definition: logging.hh:256
gem5::Shader::setLdsApe
void setLdsApe(Addr base, Addr limit)
Definition: shader.hh:140
gem5::GEM5_PACKED
PM4 packets.
Definition: pm4_defines.hh:77
gem5::PM4PacketProcessor::queuesMap
std::unordered_map< uint32_t, PM4Queue * > queuesMap
Definition: pm4_packet_processor.hh:65
gem5::IT_WAIT_REG_MEM
@ IT_WAIT_REG_MEM
Definition: pm4_defines.hh:56
gem5::GEM5_PACKED::doorbellOffset2
uint32_t doorbellOffset2
Definition: pm4_defines.hh:186
gem5::GEM5_PACKED::mqdAddr
uint64_t mqdAddr
Definition: pm4_defines.hh:146
gem5::GEM5_PACKED::rptr
uint32_t rptr
Definition: pm4_queues.hh:114
data
const char data[]
Definition: circlebuf.test.cc:48
shader.hh
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::PM4PacketProcessor::setRbWptrLo
void setRbWptrLo(uint32_t data)
Definition: pm4_packet_processor.cc:972
gem5::IT_INVALIDATE_TLBS
@ IT_INVALIDATE_TLBS
Definition: pm4_defines.hh:61
gem5::PM4PacketProcessor::queryStatusDone
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
Definition: pm4_packet_processor.cc:773
gem5::GEM5_PACKED::operation
uint32_t operation
Definition: pm4_defines.hh:338
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::IT_INDIRECT_BUFFER
@ IT_INDIRECT_BUFFER
Definition: pm4_defines.hh:57
gem5::AMDGPUDevice::setDoorbellType
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
Definition: amdgpu_device.cc:555
gem5::GEM5_PACKED::doorbellOffset1
uint32_t doorbellOffset1
Definition: pm4_defines.hh:183
gem5::GEM5_PACKED::hqd_pq_doorbell_control
uint32_t hqd_pq_doorbell_control
Definition: pm4_queues.hh:129
gem5::GEM5_PACKED::checkDisable
uint32_t checkDisable
Definition: pm4_defines.hh:136
gem5::PM4UnmapQueues
struct gem5::GEM5_PACKED PM4UnmapQueues
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
gem5::QueueDesc
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
gem5::GEM5_PACKED::intSelect
uint32_t intSelect
Definition: pm4_defines.hh:462
gem5::AMDGPUDevice::getSDMAById
SDMAEngine * getSDMAById(int id)
Definition: amdgpu_device.cc:568
gem5::Gfx
@ Gfx
Definition: amdgpu_defines.hh:44
gem5::PM4PacketProcessor::writeMMIO
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Definition: pm4_packet_processor.cc:782
gem5::AMDGPUDevice::deallocateAllQueues
void deallocateAllQueues()
Definition: amdgpu_device.cc:747
gem5::GEM5_PACKED::reference
uint32_t reference
Definition: pm4_defines.hh:358
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::GEM5_PACKED::sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_cntl
Definition: pm4_queues.hh:196
gem5::AMDGPUDevice::lastVMID
uint16_t lastVMID()
Definition: amdgpu_device.hh:204
gem5::GEM5_PACKED::destAddr
uint64_t destAddr
Definition: pm4_defines.hh:114
gem5::PM4PacketProcessor::setRbCntl
void setRbCntl(uint32_t data)
Definition: pm4_packet_processor.cc:966
gem5::IT_QUERY_STATUS
@ IT_QUERY_STATUS
Definition: pm4_defines.hh:65
sdma_engine.hh
gem5::RLC
@ RLC
Definition: amdgpu_defines.hh:49
gem5::PM4Queue
Class defining a PM4 queue.
Definition: pm4_queues.hh:377
mmCP_HQD_ACTIVE
#define mmCP_HQD_ACTIVE
Definition: pm4_mmio.hh:53
gem5::PrimaryQueue::queueWptrLo
uint32_t queueWptrLo
Definition: pm4_queues.hh:364
gem5::GEM5_PACKED::hqd_vmid
uint32_t hqd_vmid
Definition: pm4_queues.hh:97
gem5::PM4SetUconfigReg
struct gem5::GEM5_PACKED PM4SetUconfigReg
header
output header
Definition: nop.cc:36
gem5::PM4PacketProcessor::setRbDoorbellCntrl
void setRbDoorbellCntrl(uint32_t data)
Definition: pm4_packet_processor.cc:1020
gem5::IT_WRITE_DATA
@ IT_WRITE_DATA
Definition: pm4_defines.hh:55
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:180
mmCP_HQD_IB_CONTROL
#define mmCP_HQD_IB_CONTROL
Definition: pm4_mmio.hh:64
gem5::AMDGPUInterruptHandler::submitInterruptCookie
void submitInterruptCookie()
Definition: interrupt_handler.cc:146
gem5::GEM5_PACKED::queueSel
uint32_t queueSel
Definition: pm4_defines.hh:123
mmCP_RB_VMID
#define mmCP_RB_VMID
Definition: pm4_mmio.hh:43
mmCP_HQD_PQ_WPTR_LO
#define mmCP_HQD_PQ_WPTR_LO
Definition: pm4_mmio.hh:65
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
gem5::GEM5_PACKED::hqd_pq_wptr_poll_addr_hi
uint32_t hqd_pq_wptr_poll_addr_hi
Definition: pm4_queues.hh:126
pm4_mmio.hh
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
gem5::PM4PacketProcessor::setRbDoorbellRangeHi
void setRbDoorbellRangeHi(uint32_t data)
Definition: pm4_packet_processor.cc:1033
gem5::PM4PacketProcessor::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: pm4_packet_processor.cc:1039
gem5::PM4PacketProcessor::setHqdPqRptrReportAddr
void setHqdPqRptrReportAddr(uint32_t data)
Definition: pm4_packet_processor.cc:924
gem5::PM4PacketProcessor::process
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
Definition: pm4_packet_processor.cc:155
interrupt_handler.hh
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
hw_scheduler.hh
gem5::AMDGPUVM::inAGP
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:188
mmCP_RB0_BASE_HI
#define mmCP_RB0_BASE_HI
Definition: pm4_mmio.hh:51
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr_addr_lo
uint32_t sdmax_rlcx_rb_rptr_addr_lo
Definition: pm4_queues.hh:226
gem5::PM4PacketProcessor::setHqdPqControl
void setHqdPqControl(uint32_t data)
Definition: pm4_packet_processor.cc:948
gem5::ClockedObject::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: clocked_object.cc:64
gem5::AMDGPUVM::setPageTableBase
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
Definition: amdgpu_vm.hh:280
gem5::PM4PacketProcessor::getKiqDoorbellOffset
uint32_t getKiqDoorbellOffset()
Definition: pm4_packet_processor.hh:85
gem5::PM4PacketProcessor::newQueue
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
Definition: pm4_packet_processor.cc:130
gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition: hsa_packet_processor.cc:112
gem5::AMDGPUDevice::mapDoorbellToVMID
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
Definition: amdgpu_device.cc:758
gem5::PM4PacketProcessor::setUconfigReg
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
Definition: pm4_packet_processor.cc:719
gem5::IT_UNMAP_QUEUES
@ IT_UNMAP_QUEUES
Definition: pm4_defines.hh:64
gem5::GEM5_PACKED::hqd_pq_base_lo
uint32_t hqd_pq_base_lo
Definition: pm4_queues.hh:106
gem5::PM4PacketProcessor::releaseMemDone
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
Definition: pm4_packet_processor.cc:502
gem5::PM4PacketProcessor::setRbBaseLo
void setRbBaseLo(uint32_t data)
Definition: pm4_packet_processor.cc:1008
gem5::PM4MapQueues
struct gem5::GEM5_PACKED PM4MapQueues
gem5::GEM5_PACKED::data
uint32_t data
Definition: pm4_defines.hh:116
gem5::GEM5_PACKED::shMemBases
uint32_t shMemBases
Definition: pm4_defines.hh:242
gem5::PowerISA::me
Bitfield< 12 > me
Definition: misc.hh:118
packet.hh
gem5::PrimaryQueue::queueRptrAddrHi
uint32_t queueRptrAddrHi
Definition: pm4_queues.hh:356
gem5::AMDGPUVM::invalidateTLBs
void invalidateTLBs()
Definition: amdgpu_vm.cc:174
mmCP_HQD_PQ_DOORBELL_CONTROL
#define mmCP_HQD_PQ_DOORBELL_CONTROL
Definition: pm4_mmio.hh:57
gem5::PM4PacketProcessor::switchBuffer
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
Definition: pm4_packet_processor.cc:707
gem5::PM4PacketProcessor::releaseMem
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
Definition: pm4_packet_processor.cc:478
gem5::PM4PacketProcessor::mapKiq
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Definition: pm4_packet_processor.cc:116
gem5::GEM5_PACKED::wptrAddr
uint64_t wptrAddr
Definition: pm4_defines.hh:155
gem5::GEM5_PACKED::writeConfirm
uint32_t writeConfirm
Definition: pm4_defines.hh:103
gem5::GEM5_PACKED::hqd_pq_rptr_report_addr_lo
uint32_t hqd_pq_rptr_report_addr_lo
Definition: pm4_queues.hh:120
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:62
gem5::GEM5_PACKED::doorbellOffset3
uint32_t doorbellOffset3
Definition: pm4_defines.hh:189
gem5::PM4PacketProcessor::mapProcessGfx90a
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
Definition: pm4_packet_processor.cc:660
gem5::GEM5_PACKED::dataLo
uint32_t dataLo
Definition: pm4_defines.hh:485
mmCP_RB_WPTR_POLL_ADDR_HI
#define mmCP_RB_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:42
gem5::IT_SWITCH_BUFFER
@ IT_SWITCH_BUFFER
Definition: pm4_defines.hh:60
gem5::GEM5_PACKED::hqd_pq_rptr_report_addr_hi
uint32_t hqd_pq_rptr_report_addr_hi
Definition: pm4_queues.hh:121
mmCP_HQD_PQ_CONTROL
#define mmCP_HQD_PQ_CONTROL
Definition: pm4_mmio.hh:63
gem5::PM4MapProcess
struct gem5::GEM5_PACKED PM4MapProcess
gem5::AMDGPUDevice::getIH
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
Definition: amdgpu_device.hh:177
gem5::IT_RELEASE_MEM
@ IT_RELEASE_MEM
Definition: pm4_defines.hh:58
gem5::GEM5_PACKED::processQuantum
uint32_t processQuantum
Definition: pm4_defines.hh:232
gem5::PM4SwitchBuf
struct gem5::GEM5_PACKED PM4SwitchBuf
gem5::GEM5_PACKED::mqdReadIndex
uint64_t mqdReadIndex
Definition: pm4_queues.hh:55
gem5::PM4PacketProcessor::setHqdVmid
void setHqdVmid(uint32_t data)
Definition: pm4_packet_processor.cc:876
mmCP_RB0_RPTR_ADDR
#define mmCP_RB0_RPTR_ADDR
Definition: pm4_mmio.hh:44
gem5::GEM5_PACKED::queueType
uint32_t queueType
Definition: pm4_defines.hh:131
gem5::PM4PacketProcessor::setHqdPqRptrReportAddrHi
void setHqdPqRptrReportAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:930
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr_hi
uint32_t sdmax_rlcx_rb_rptr_hi
Definition: pm4_queues.hh:211
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
mmCP_HQD_PQ_RPTR
#define mmCP_HQD_PQ_RPTR
Definition: pm4_mmio.hh:58
gem5::GEM5_PACKED::command
uint32_t command
Definition: pm4_defines.hh:435
gem5::PM4PacketProcessor::setRbWptrPollAddrHi
void setRbWptrPollAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:1002
gem5::PM4PacketProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: pm4_packet_processor.cc:86
gem5::GEM5_PACKED::memAddrHi
uint32_t memAddrHi
Definition: pm4_defines.hh:356
mmCP_HQD_PQ_BASE
#define mmCP_HQD_PQ_BASE
Definition: pm4_mmio.hh:55
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:210
PACKET3_SET_UCONFIG_REG_START
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
Definition: pm4_defines.hh:72
amdgpu_device.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
mmCP_HQD_PQ_WPTR_HI
#define mmCP_HQD_PQ_WPTR_HI
Definition: pm4_mmio.hh:66
mmCP_RB0_WPTR
#define mmCP_RB0_WPTR
Definition: pm4_mmio.hh:46
mmCP_HQD_VMID
#define mmCP_HQD_VMID
Definition: pm4_mmio.hh:54
gem5::GEM5_PACKED::dataSelect
uint32_t dataSelect
Definition: pm4_defines.hh:464
gem5::AMDGPUDevice::setRegVal
void setRegVal(uint32_t addr, uint32_t value)
Definition: amdgpu_device.cc:547
gem5::PM4PacketProcessor
Definition: pm4_packet_processor.hh:52
gem5::GEM5_PACKED::aql
uint32_t aql
Definition: pm4_queues.hh:183
gem5::AMDGPUDevice::allocateVMID
uint16_t allocateVMID(uint16_t pasid)
Definition: amdgpu_device.cc:714
mmCP_RB_DOORBELL_CONTROL
#define mmCP_RB_DOORBELL_CONTROL
Definition: pm4_mmio.hh:48
gem5::PM4PacketProcessor::setHqdPqPtr
void setHqdPqPtr(uint32_t data)
Definition: pm4_packet_processor.cc:906
gem5::PM4PacketProcessor::writeDataDone
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
Definition: pm4_packet_processor.cc:354
gem5::PM4PacketProcessor::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
Definition: pm4_packet_processor.cc:64
gem5::PM4PacketProcessor::setHqdPqWptrPollAddrHi
void setHqdPqWptrPollAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:942
gem5::GEM5_PACKED::pipe
uint32_t pipe
Definition: pm4_defines.hh:128
gem5::GEM5_PACKED::doorbellOffset
uint32_t doorbellOffset
Definition: pm4_defines.hh:137
gem5::PM4PacketProcessor::pq
PrimaryQueue pq
Definition: pm4_packet_processor.hh:56
pm4_packet_processor.hh
gem5::GEM5_PACKED::hqd_pq_base_hi
uint32_t hqd_pq_base_hi
Definition: pm4_queues.hh:107
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::PM4PacketProcessor::doneMQDWrite
void doneMQDWrite(Addr mqdAddr, Addr addr)
Definition: pm4_packet_processor.cc:619
gem5::PM4PacketProcessor::processSDMAMQD
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
Definition: pm4_packet_processor.cc:450
gem5::GEM5_PACKED::rb_base
uint64_t rb_base
Definition: pm4_queues.hh:204
gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition: amdgpu_device.hh:62
gem5::SDMAEngine::registerRLCQueue
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
Definition: sdma_engine.cc:181
gem5::ClockedObject::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: clocked_object.cc:59
mmCP_HQD_PQ_BASE_HI
#define mmCP_HQD_PQ_BASE_HI
Definition: pm4_mmio.hh:56
gem5::GEM5_PACKED::doorbellOffset0
uint32_t doorbellOffset0
Definition: pm4_defines.hh:178
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::GEM5_PACKED::me
uint32_t me
Definition: pm4_defines.hh:129
gem5::GEM5_PACKED::event
uint32_t event
Definition: pm4_defines.hh:441
gpu_command_processor.hh
gem5::GEM5_PACKED::sdmax_rlcx_ib_base_hi
uint32_t sdmax_rlcx_ib_base_hi
Definition: pm4_queues.hh:231
SERIALIZE_ARRAY
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
gem5::PM4PacketProcessor::setHqdActive
void setHqdActive(uint32_t data)
Definition: pm4_packet_processor.cc:882
gem5::PM4PacketProcessor::setRbWptrHi
void setRbWptrHi(uint32_t data)
Definition: pm4_packet_processor.cc:978
mmCP_RB0_RPTR_ADDR_HI
#define mmCP_RB0_RPTR_ADDR_HI
Definition: pm4_mmio.hh:45
mmCP_RB_DOORBELL_RANGE_LOWER
#define mmCP_RB_DOORBELL_RANGE_LOWER
Definition: pm4_mmio.hh:49
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::GEM5_PACKED::ibRptr
uint32_t ibRptr
Definition: pm4_queues.hh:146
gem5::SDMAEngine
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
gem5::GEM5_PACKED::contextId
uint32_t contextId
Definition: pm4_defines.hh:526
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::PrimaryQueue::queueRptrAddrLo
uint32_t queueRptrAddrLo
Definition: pm4_queues.hh:355
mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
Definition: pm4_mmio.hh:60
gem5::Shader::setScratchApe
void setScratchApe(Addr base, Addr limit)
Definition: shader.hh:153
gem5::PM4PacketProcessor::queryStatus
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
Definition: pm4_packet_processor.cc:748
gem5::GEM5_PACKED::function
uint32_t function
Definition: pm4_defines.hh:336
gem5::GEM5_PACKED::memAddrLo
uint32_t memAddrLo
Definition: pm4_defines.hh:347
gem5::PrimaryQueue::doorbellOffset
uint32_t doorbellOffset
Definition: pm4_queues.hh:369
gem5::PM4ReleaseMem
struct gem5::GEM5_PACKED PM4ReleaseMem
gem5::PM4PacketProcessor::runList
void runList(PM4Queue *q, PM4RunList *pkt)
Definition: pm4_packet_processor.cc:675
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::IT_MAP_QUEUES
@ IT_MAP_QUEUES
Definition: pm4_defines.hh:63
gem5::Compute
@ Compute
Definition: amdgpu_defines.hh:43
gem5::GEM5_PACKED::queueSlot
uint32_t queueSlot
Definition: pm4_defines.hh:127
mmCP_RB0_WPTR_HI
#define mmCP_RB0_WPTR_HI
Definition: pm4_mmio.hh:47
packet_access.hh
gem5::PM4PacketProcessor::setHqdPqWptrLo
void setHqdPqWptrLo(uint32_t data)
Definition: pm4_packet_processor.cc:912
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr
uint32_t sdmax_rlcx_rb_rptr
Definition: pm4_queues.hh:210
gem5::AMDGPUDevice::getVMID
uint16_t getVMID(Addr doorbell)
Definition: amdgpu_device.hh:210
gem5::PM4PacketProcessor::setHqdPqBase
void setHqdPqBase(uint32_t data)
Definition: pm4_packet_processor.cc:888
gem5::AMDGPUVM::AGPTranslationGen
Translation range generators.
Definition: amdgpu_vm.hh:316
gem5::GEM5_PACKED::doorbell
uint32_t doorbell
Definition: pm4_queues.hh:130
gem5::AMDGPUDevice::getUsedVMIDs
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
Definition: amdgpu_device.cc:764
gem5::PM4IndirectBuf
struct gem5::GEM5_PACKED PM4IndirectBuf
gem5::PM4PacketProcessor::decodeHeader
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
Definition: pm4_packet_processor.cc:193
gem5::GEM5_PACKED::hqd_ib_control
uint32_t hqd_ib_control
Definition: pm4_queues.hh:148
gem5::PM4PacketProcessor::processMQD
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
Definition: pm4_packet_processor.cc:416
gem5::GEM5_PACKED::ibBase
uint64_t ibBase
Definition: pm4_defines.hh:381
gem5::PM4PacketProcessor::setHqdPqDoorbellCtrl
void setHqdPqDoorbellCtrl(uint32_t data)
Definition: pm4_packet_processor.cc:900
gem5::CP_EOP
@ CP_EOP
Definition: interrupt_handler.hh:72
gem5::PM4PacketProcessor::indirectBuffer
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
Definition: pm4_packet_processor.cc:692
gem5::AMDGPUVM::GARTTranslationGen
Definition: amdgpu_vm.hh:329
gem5::GEM5_PACKED::mqdBase
uint64_t mqdBase
Definition: pm4_queues.hh:94
gem5::GEM5_PACKED::pollInterval
uint32_t pollInterval
Definition: pm4_defines.hh:360
gem5::ArmISA::q
Bitfield< 27 > q
Definition: misc_types.hh:55
gem5::PM4PacketProcessor::PM4PacketProcessor
PM4PacketProcessor(const PM4PacketProcessorParams &p)
Definition: pm4_packet_processor.cc:51
gem5::PM4PacketProcessor::setRbWptrPollAddrLo
void setRbWptrPollAddrLo(uint32_t data)
Definition: pm4_packet_processor.cc:996
gem5::PM4PacketProcessor::setHqdPqBaseHi
void setHqdPqBaseHi(uint32_t data)
Definition: pm4_packet_processor.cc:894
gem5::PM4PacketProcessor::setHqdPqWptrHi
void setHqdPqWptrHi(uint32_t data)
Definition: pm4_packet_processor.cc:918
gem5::GEM5_PACKED::hqd_active
uint32_t hqd_active
Definition: pm4_queues.hh:96
UNSERIALIZE_ARRAY
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
gem5::IT_MAP_PROCESS
@ IT_MAP_PROCESS
Definition: pm4_defines.hh:62
gem5::PM4MapProcessMI200
struct gem5::GEM5_PACKED PM4MapProcessMI200
gem5::GEM5_PACKED::engineSel
uint32_t engineSel
Definition: pm4_defines.hh:133
gem5::GEM5_PACKED::offset
uint32_t offset
Definition: pm4_defines.hh:497
gem5::PM4PacketProcessor::getQueue
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
Definition: pm4_packet_processor.cc:102
gem5::PM4PacketProcessor::queues
std::unordered_map< uint16_t, PM4Queue * > queues
Definition: pm4_packet_processor.hh:63
mmCP_RB0_CNTL
#define mmCP_RB0_CNTL
Definition: pm4_mmio.hh:40
mmCP_HQD_PQ_WPTR_POLL_ADDR
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
Definition: pm4_mmio.hh:61
gem5::GEM5_PACKED::aqlRptr
uint64_t aqlRptr
Definition: pm4_queues.hh:123
gem5::GEM5_PACKED::addr
uint64_t addr
Definition: pm4_defines.hh:472
gem5::PrimaryQueue::queueWptrHi
uint32_t queueWptrHi
Definition: pm4_queues.hh:365
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_rptr_addr_hi
Definition: pm4_queues.hh:225
gem5::GEM5_PACKED::pasid
uint32_t pasid
Definition: pm4_defines.hh:172
gem5::PM4PacketProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: pm4_packet_processor.cc:79
hwreg_defines.hh
gem5::Shader::setHwReg
void setHwReg(int regIdx, uint32_t val)
Definition: shader.hh:116
gem5::GEM5_PACKED::hqd_pq_wptr_poll_addr_lo
uint32_t hqd_pq_wptr_poll_addr_lo
Definition: pm4_queues.hh:125
gem5::PM4PacketProcessor::mapProcessGfx9
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
Definition: pm4_packet_processor.cc:645
gem5::PM4PacketProcessor::updateReadIndex
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
Definition: pm4_packet_processor.cc:525
gem5::AMDGPUDevice::insertQId
void insertQId(uint16_t vmid, int id)
Definition: amdgpu_device.cc:770
gem5::Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition: packet_access.hh:78
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:305
gem5::GEM5_PACKED::sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_wptr
Definition: pm4_queues.hh:219
gem5::AMDGPUDevice::deallocateVmid
void deallocateVmid(uint16_t vmid)
Definition: amdgpu_device.cc:729
gem5::GEM5_PACKED::vmid
uint32_t vmid
Definition: pm4_defines.hh:125
mmCP_RB_DOORBELL_RANGE_UPPER
#define mmCP_RB_DOORBELL_RANGE_UPPER
Definition: pm4_mmio.hh:50
gem5::AMDGPUDevice::deallocatePasid
void deallocatePasid(uint16_t pasid)
Definition: amdgpu_device.cc:735
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
gem5::GEM5_PACKED::mask
uint32_t mask
Definition: pm4_defines.hh:359
mmCP_HQD_PQ_RPTR_REPORT_ADDR
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
Definition: pm4_mmio.hh:59
gem5::IT_NOP
@ IT_NOP
Definition: pm4_defines.hh:54
gem5::GEM5_PACKED::base
uint64_t base
Definition: pm4_queues.hh:109
gem5::PM4PacketProcessor::kiq_pkt
PM4MapQueues kiq_pkt
Definition: pm4_packet_processor.hh:60
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gem5::PM4PacketProcessor::writeData
void writeData(PM4Queue *q, PM4WriteData *pkt)
Definition: pm4_packet_processor.cc:336
gem5::PrimaryQueue::doorbellRangeHi
uint32_t doorbellRangeHi
Definition: pm4_queues.hh:371
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::AMDGPUDevice::CP
GPUCommandProcessor * CP()
Definition: amdgpu_device.hh:182
gem5::PM4Queue::id
int id()
Definition: pm4_queues.hh:406
gem5::GEM5_PACKED::memSpace
uint32_t memSpace
Definition: pm4_defines.hh:337
gem5::GEM5_PACKED::ibSize
uint32_t ibSize
Definition: pm4_defines.hh:383
gem5::GEM5_PACKED::allocFormat
uint32_t allocFormat
Definition: pm4_defines.hh:132
gem5::PM4PacketProcessor::pq_pkt
PM4MapQueues pq_pkt
Definition: pm4_packet_processor.hh:57
std::list< AddrRange >
gem5::HW_REG_SH_MEM_BASES
@ HW_REG_SH_MEM_BASES
Definition: hwreg_defines.hh:58
gem5::GEM5_PACKED::interruptSel
uint32_t interruptSel
Definition: pm4_defines.hh:527
gem5::GEM5_PACKED::sdmax_rlcx_rb_wptr_hi
uint32_t sdmax_rlcx_rb_wptr_hi
Definition: pm4_queues.hh:220
gem5::PM4PacketProcessor::mapQueues
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
Definition: pm4_packet_processor.cc:366
gem5::GEM5_PACKED::numQueues
uint32_t numQueues
Definition: pm4_defines.hh:134
gem5::PM4PacketProcessor::setRbVmid
void setRbVmid(uint32_t data)
Definition: pm4_packet_processor.cc:960
gem5::PM4PacketProcessor::waitRegMem
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
Definition: pm4_packet_processor.cc:732
gem5::PM4PacketProcessor::getPqDoorbellOffset
uint32_t getPqDoorbellOffset()
Definition: pm4_packet_processor.hh:86
gem5::GEM5_PACKED::sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_ib_base_lo
Definition: pm4_queues.hh:230
gem5::PM4PacketProcessor::setRbRptrAddrHi
void setRbRptrAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:990
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::QueueType
QueueType
Definition: amdgpu_defines.hh:41
gem5::PrimaryQueue::doorbellRangeLo
uint32_t doorbellRangeLo
Definition: pm4_queues.hh:370
gem5::GEM5_PACKED::eventIdx
uint32_t eventIdx
Definition: pm4_defines.hh:443
gem5::AMDGPUDevice::setSDMAEngine
void setSDMAEngine(Addr offset, SDMAEngine *eng)
Definition: amdgpu_device.cc:562
gem5::PM4PacketProcessor::setHqdPqWptrPollAddr
void setHqdPqWptrPollAddr(uint32_t data)
Definition: pm4_packet_processor.cc:936
gem5::IT_SET_UCONFIG_REG
@ IT_SET_UCONFIG_REG
Definition: pm4_defines.hh:59
gem5::AMDGPUDevice::getGfxVersion
GfxVersion getGfxVersion() const
Definition: amdgpu_device.hh:215
gem5::PM4PacketProcessor::getGARTAddr
Addr getGARTAddr(Addr addr) const
Definition: pm4_packet_processor.cc:92
gem5::PM4PacketProcessor::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: pm4_packet_processor.cc:1120
gem5::AMDGPUInterruptHandler::prepareInterruptCookie
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
Definition: interrupt_handler.cc:75
gem5::GEM5_PACKED::completionSignal
uint64_t completionSignal
Definition: pm4_defines.hh:271
gem5::PM4WaitRegMem
struct gem5::GEM5_PACKED PM4WaitRegMem
gem5::PM4PacketProcessor::setRbBaseHi
void setRbBaseHi(uint32_t data)
Definition: pm4_packet_processor.cc:1014
gem5::GEM5_PACKED::destSelect
uint32_t destSelect
Definition: pm4_defines.hh:460
gem5::PM4PacketProcessor::unmapQueues
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
Definition: pm4_packet_processor.cc:532
gem5::PM4PacketProcessor::kiq
QueueDesc kiq
Definition: pm4_packet_processor.hh:59
gem5::PM4PacketProcessor::gpuDevice
AMDGPUDevice * gpuDevice
Definition: pm4_packet_processor.hh:54
gem5::SDMAQueueDesc
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
gem5::SOC15_IH_CLIENTID_GRBM_CP
@ SOC15_IH_CLIENTID_GRBM_CP
Definition: interrupt_handler.hh:67
mmCP_RB_WPTR_POLL_ADDR_LO
#define mmCP_RB_WPTR_POLL_ADDR_LO
Definition: pm4_mmio.hh:41
gem5::PM4PacketProcessor::setRbDoorbellRangeLo
void setRbDoorbellRangeLo(uint32_t data)
Definition: pm4_packet_processor.cc:1027
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:188
gem5::GEM5_PACKED::hqd_pq_control
uint32_t hqd_pq_control
Definition: pm4_queues.hh:133
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::GEM5_PACKED::intCtxId
uint32_t intCtxId
Definition: pm4_defines.hh:491
gem5::ComputeAQL
@ ComputeAQL
Definition: amdgpu_defines.hh:47
gem5::PM4PacketProcessor::setHqdIbCtrl
void setHqdIbCtrl(uint32_t data)
Definition: pm4_packet_processor.cc:954

Generated on Sun Jul 30 2023 01:56:54 for gem5 by doxygen 1.8.17