gem5  v22.1.0.0
pm4_packet_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
34 
35 #include "debug/PM4PacketProcessor.hh"
39 #include "dev/amdgpu/pm4_mmio.hh"
41 #include "dev/hsa/hw_scheduler.hh"
42 #include "enums/GfxVersion.hh"
44 #include "gpu-compute/shader.hh"
45 #include "mem/packet.hh"
46 #include "mem/packet_access.hh"
47 
48 namespace gem5
49 {
50 
51 PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
52  : DmaVirtDevice(p)
53 {
54  memset(&kiq, 0, sizeof(QueueDesc));
55  memset(&pq, 0, sizeof(QueueDesc));
56 }
57 
65 {
66  if (gpuDevice->getVM().inAGP(vaddr)) {
67  // Use AGP translation gen
68  return TranslationGenPtr(
70  }
71 
72  // Assume GART otherwise as this is the only other translation aperture
73  // available to the PM4 packet processor.
74  return TranslationGenPtr(
76 }
77 
80 {
81  AddrRangeList ranges;
82  return ranges;
83 }
84 
85 void
87 {
88  gpuDevice = gpu_device;
89 }
90 
91 Addr
93 {
94  if (!gpuDevice->getVM().inAGP(addr)) {
95  Addr low_bits = bits(addr, 11, 0);
96  addr = (((addr >> 12) << 3) << 12) | low_bits;
97  }
98  return addr;
99 }
100 
101 PM4Queue *
103 {
104  auto result = queuesMap.find(offset);
105  if (result == queuesMap.end()) {
106  if (gfx)
107  mapPq(offset);
108  else
109  mapKiq(offset);
110  return queuesMap[offset];
111  }
112  return result->second;
113 }
114 
115 void
117 {
118  DPRINTF(PM4PacketProcessor, "Mapping KIQ\n");
120 }
121 
122 void
124 {
125  DPRINTF(PM4PacketProcessor, "Mapping PQ\n");
127 }
128 
129 void
131  PM4MapQueues *pkt, int id)
132 {
133  if (id == -1)
134  id = queues.size();
135 
136  /* 256 bytes aligned address */
137  mqd->base <<= 8;
138  PM4Queue *q = new PM4Queue(id, mqd, offset, pkt);
139 
140  queuesMap[offset] = q;
141  queues[id] = q;
142 
143  /* we are assumming only compute queues can be map from MQDs */
144  QueueType qt;
145  qt = mqd->aql ? QueueType::ComputeAQL
148 
149  DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
150  "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
151  q->me(), q->pipe(), q->queue(), q->size());
152 }
153 
154 void
156 {
157  q->wptr(wptrOffset * sizeof(uint32_t));
158 
159  if (!q->processing()) {
160  q->processing(true);
161  decodeNext(q);
162  }
163 }
164 
165 void
167 {
168  DPRINTF(PM4PacketProcessor, "PM4 decode queue %d rptr %p, wptr %p\n",
169  q->id(), q->rptr(), q->wptr());
170 
171  if (q->rptr() < q->wptr()) {
172  /* Additional braces here are needed due to a clang compilation bug
173  falsely throwing a "suggest braces around initialization of
174  subject" error. More info on this bug is available here:
175  https://stackoverflow.com/questions/31555584
176  */
177  PM4Header h{{{0, 0, 0, 0, 0, 0}}};
178  auto cb = new DmaVirtCallback<PM4Header>(
179  [ = ] (PM4Header header)
180  { decodeHeader(q, header); }, h);
181  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(uint32_t), cb,
182  &cb->dmaBuffer);
183  } else {
184  q->processing(false);
185  if (q->ib()) {
186  q->ib(false);
187  decodeNext(q);
188  }
189  }
190 }
191 
192 void
194 {
195  DPRINTF(PM4PacketProcessor, "PM4 packet %p\n", header.opcode);
196 
197  q->incRptr(sizeof(PM4Header));
198 
199  DmaVirtCallback<uint64_t> *cb = nullptr;
200  void *dmaBuffer = nullptr;
201 
202  switch(header.opcode) {
203  case IT_NOP: {
204  DPRINTF(PM4PacketProcessor, "PM4 nop, count %p\n", header.count);
205  DPRINTF(PM4PacketProcessor, "rptr %p wptr %p\n", q->rptr(), q->wptr());
206  if (header.count != 0x3fff) {
207  q->incRptr((header.count + 1) * sizeof(uint32_t));
208  }
209  decodeNext(q);
210  } break;
211  case IT_WRITE_DATA: {
212  dmaBuffer = new PM4WriteData();
213  cb = new DmaVirtCallback<uint64_t>(
214  [ = ] (const uint64_t &)
215  { writeData(q, (PM4WriteData *)dmaBuffer); });
216  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WriteData), cb,
217  dmaBuffer);
218  } break;
219 
220  case IT_MAP_QUEUES: {
221  dmaBuffer = new PM4MapQueues();
222  cb = new DmaVirtCallback<uint64_t>(
223  [ = ] (const uint64_t &)
224  { mapQueues(q, (PM4MapQueues *)dmaBuffer); });
225  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapQueues), cb,
226  dmaBuffer);
227  } break;
228 
229  case IT_RELEASE_MEM: {
230  dmaBuffer = new PM4ReleaseMem();
231  cb = new DmaVirtCallback<uint64_t>(
232  [ = ] (const uint64_t &)
233  { releaseMem(q, (PM4ReleaseMem *)dmaBuffer); });
234  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4ReleaseMem), cb,
235  dmaBuffer);
236  } break;
237 
238  case IT_INDIRECT_BUFFER: {
239  dmaBuffer = new PM4IndirectBuf();
240  cb = new DmaVirtCallback<uint64_t>(
241  [ = ] (const uint64_t &)
242  { indirectBuffer(q, (PM4IndirectBuf *)dmaBuffer); });
243  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4IndirectBuf), cb,
244  dmaBuffer);
245  } break;
246 
247  case IT_SWITCH_BUFFER: {
248  dmaBuffer = new PM4SwitchBuf();
249  cb = new DmaVirtCallback<uint64_t>(
250  [ = ] (const uint64_t &)
251  { switchBuffer(q, (PM4SwitchBuf *)dmaBuffer); });
252  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SwitchBuf), cb,
253  dmaBuffer);
254  } break;
255 
256  case IT_SET_UCONFIG_REG: {
257  dmaBuffer = new PM4SetUconfigReg();
258  cb = new DmaVirtCallback<uint64_t>(
259  [ = ] (const uint64_t &)
260  { setUconfigReg(q, (PM4SetUconfigReg *)dmaBuffer); });
261  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SetUconfigReg), cb,
262  dmaBuffer);
263  } break;
264 
265  case IT_WAIT_REG_MEM: {
266  dmaBuffer = new PM4WaitRegMem();
267  cb = new DmaVirtCallback<uint64_t>(
268  [ = ] (const uint64_t &)
269  { waitRegMem(q, (PM4WaitRegMem *)dmaBuffer); });
270  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WaitRegMem), cb,
271  dmaBuffer);
272  } break;
273  case IT_MAP_PROCESS: {
274  dmaBuffer = new PM4MapProcess();
275  cb = new DmaVirtCallback<uint64_t>(
276  [ = ] (const uint64_t &)
277  { mapProcess(q, (PM4MapProcess *)dmaBuffer); });
278  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
279  dmaBuffer);
280  } break;
281 
282  case IT_UNMAP_QUEUES: {
283  dmaBuffer = new PM4UnmapQueues();
284  cb = new DmaVirtCallback<uint64_t>(
285  [ = ] (const uint64_t &)
286  { unmapQueues(q, (PM4UnmapQueues *)dmaBuffer); });
287  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4UnmapQueues), cb,
288  dmaBuffer);
289  } break;
290 
291  case IT_RUN_LIST: {
292  dmaBuffer = new PM4RunList();
293  cb = new DmaVirtCallback<uint64_t>(
294  [ = ] (const uint64_t &)
295  { runList(q, (PM4RunList *)dmaBuffer); });
296  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4RunList), cb,
297  dmaBuffer);
298  } break;
299 
300  case IT_QUERY_STATUS: {
301  dmaBuffer = new PM4QueryStatus();
302  cb = new DmaVirtCallback<uint64_t>(
303  [ = ] (const uint64_t &)
304  { queryStatus(q, (PM4QueryStatus *)dmaBuffer); });
305  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4QueryStatus), cb,
306  dmaBuffer);
307  } break;
308 
309  case IT_INVALIDATE_TLBS: {
310  DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
312  q->incRptr((header.count + 1) * sizeof(uint32_t));
313  decodeNext(q);
314  } break;
315 
316  default: {
317  warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
318  DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",
319  header.opcode);
320  q->incRptr((header.count + 1) * sizeof(uint32_t));
321  decodeNext(q);
322  } break;
323  }
324 }
325 
326 void
328 {
329  q->incRptr(sizeof(PM4WriteData));
330 
331  Addr addr = getGARTAddr(pkt->destAddr);
332  DPRINTF(PM4PacketProcessor, "PM4 write addr: %p data: %p.\n", addr,
333  pkt->data);
334  auto cb = new DmaVirtCallback<uint32_t>(
335  [ = ](const uint32_t &) { writeDataDone(q, pkt, addr); });
336  //TODO: the specs indicate that pkt->data holds the number of dword that
337  //need to be written.
338  dmaWriteVirt(addr, sizeof(uint32_t), cb, &pkt->data);
339 
340  if (!pkt->writeConfirm)
341  decodeNext(q);
342 }
343 
344 void
346 {
347  DPRINTF(PM4PacketProcessor, "PM4 write completed to %p, %p.\n", addr,
348  pkt->data);
349 
350  if (pkt->writeConfirm)
351  decodeNext(q);
352 
353  delete pkt;
354 }
355 
356 void
358 {
359  q->incRptr(sizeof(PM4MapQueues));
360 
361  DPRINTF(PM4PacketProcessor, "MAPQueues queueSel: %d, vmid: %d, me: %d, "
362  "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
363  "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
364  " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->queueSel, pkt->vmid,
365  pkt->me, pkt->pipe, pkt->queueSlot, pkt->queueType,
366  pkt->allocFormat, pkt->engineSel, pkt->numQueues,
367  pkt->checkDisable, pkt->doorbellOffset, pkt->mqdAddr,
368  pkt->wptrAddr);
369 
370  // Partially reading the mqd with an offset of 96 dwords
371  if (pkt->engineSel == 0 || pkt->engineSel == 1 || pkt->engineSel == 4) {
372  Addr addr = getGARTAddr(pkt->mqdAddr + 96 * sizeof(uint32_t));
373 
375  "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
376  addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
377 
379  gpuDevice->lastVMID());
380 
381  QueueDesc *mqd = new QueueDesc();
382  memset(mqd, 0, sizeof(QueueDesc));
383  auto cb = new DmaVirtCallback<uint32_t>(
384  [ = ] (const uint32_t &) {
385  processMQD(pkt, q, addr, mqd, gpuDevice->lastVMID()); });
386  dmaReadVirt(addr, sizeof(QueueDesc), cb, mqd);
387  } else if (pkt->engineSel == 2 || pkt->engineSel == 3) {
388  SDMAQueueDesc *sdmaMQD = new SDMAQueueDesc();
389  memset(sdmaMQD, 0, sizeof(SDMAQueueDesc));
390 
391  // For SDMA we read the full MQD, so there is no offset calculation.
392  Addr addr = getGARTAddr(pkt->mqdAddr);
393 
394  auto cb = new DmaVirtCallback<uint32_t>(
395  [ = ] (const uint32_t &) {
396  processSDMAMQD(pkt, q, addr, sdmaMQD,
397  gpuDevice->lastVMID()); });
398  dmaReadVirt(addr, sizeof(SDMAQueueDesc), cb, sdmaMQD);
399  } else {
400  panic("Unknown engine for MQD: %d\n", pkt->engineSel);
401  }
402 
403  decodeNext(q);
404 }
405 
406 void
408  QueueDesc *mqd, uint16_t vmid)
409 {
410  DPRINTF(PM4PacketProcessor, "MQDbase: %lx, active: %d, vmid: %d, base: "
411  "%lx, rptr: %x aqlPtr: %lx\n", mqd->mqdBase, mqd->hqd_active,
412  mqd->hqd_vmid, mqd->base, mqd->rptr, mqd->aqlRptr);
413 
414  Addr offset = mqd->doorbell & 0x1ffffffc;
415  newQueue(mqd, offset, pkt);
416  PM4Queue *new_q = queuesMap[offset];
417  gpuDevice->insertQId(vmid, new_q->id());
418 
419  if (mqd->aql) {
420  // The queue size is encoded in the cp_hqd_pq_control field in the
421  // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1
422  // number of dwords.
423  //
424  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
425  // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561
426  //
427  // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply
428  // by 4 to get the number of bytes as HSAPP expects.
429  int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4;
430  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
431  hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(),
432  mqd_size, 8, GfxVersion::gfx900, offset,
433  mqd->mqdReadIndex);
434  }
435 
436  DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
437  "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
438 }
439 
440 void
442  SDMAQueueDesc *mqd, uint16_t vmid)
443 {
444  uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
445  Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
446  rptr_wb_addr <<= 32;
447  rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
448 
449  DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
450  "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
454  rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr);
455 
456  // Engine 2 points to SDMA0 while engine 3 points to SDMA1
457  assert(pkt->engineSel == 2 || pkt->engineSel == 3);
458  SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
459 
460  // Register RLC queue with SDMA
461  sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
462  mqd->rb_base << 8, rlc_size,
463  rptr_wb_addr);
464 
465  // Register doorbell with GPU device
466  gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
468 }
469 
470 void
472 {
473  q->incRptr(sizeof(PM4ReleaseMem));
474 
475  Addr addr = getGARTAddr(pkt->addr);
476  DPRINTF(PM4PacketProcessor, "PM4 release_mem event %d eventIdx %d intSel "
477  "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
478  pkt->event, pkt->eventIdx, pkt->intSelect, pkt->destSelect,
479  pkt->dataSelect, addr, pkt->dataLo, pkt->intCtxId);
480 
482  "PM4 release_mem destSel 0 bypasses caches to MC.\n");
483 
484  if (pkt->dataSelect == 1) {
485  auto cb = new DmaVirtCallback<uint32_t>(
486  [ = ](const uint32_t &) { releaseMemDone(q, pkt, addr); },
487  pkt->dataLo);
488  dmaWriteVirt(addr, sizeof(uint32_t), cb, &cb->dmaBuffer);
489  } else {
490  panic("Unimplemented PM4ReleaseMem.dataSelect");
491  }
492 }
493 
494 void
496 {
497  DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n",
498  pkt->dataLo, addr);
499  if (pkt->intSelect == 2) {
500  DPRINTF(PM4PacketProcessor, "PM4 interrupt, id: %d ctx: %d, me: %d, "
501  "pipe: %d, queueSlot:%d\n", q->id(), pkt->intCtxId, q->me(),
502  q->pipe(), q->queue());
503 
504  uint8_t ringId = 0;
505  if (q->id() != 0) {
506  ringId = (q->queue() << 4) | (q->me() << 2) | q->pipe();
507  }
511  }
512 
513  delete pkt;
514  decodeNext(q);
515 }
516 
517 void
519 {
520  assert(queuesMap.count(offset));
521  queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
522 }
523 
524 void
526 {
527  q->incRptr(sizeof(PM4UnmapQueues));
528 
529  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues queueSel: %d numQueues: %d "
530  "pasid: %p doorbellOffset0 %p \n",
531  pkt->queueSel, pkt->numQueues, pkt->pasid, pkt->doorbellOffset0);
532 
533  switch (pkt->queueSel) {
534  case 0:
535  switch (pkt->numQueues) {
536  case 1:
545  break;
546  case 2:
553  break;
554  case 3:
559  break;
560  case 4:
563  break;
564  default:
565  panic("Unrecognized number of queues %d\n", pkt->numQueues);
566  }
567  break;
568  case 1:
570  break;
571  case 2:
572  break;
573  case 3: {
574  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
575  for (auto iter : gpuDevice->getUsedVMIDs()) {
576  for (auto id : iter.second) {
577  assert(queues.count(id));
578 
579  // Do not unmap KMD queues
580  if (queues[id]->privileged()) {
581  continue;
582  }
583  QueueDesc *mqd = queues[id]->getMQD();
584  DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
585  "index %ld\n", id, mqd->mqdReadIndex);
586  // Partially writing the mqd with an offset of 96 dwords
587  Addr addr = getGARTAddr(queues[id]->mqdBase() +
588  96 * sizeof(uint32_t));
589  Addr mqd_base = queues[id]->mqdBase();
590  auto cb = new DmaVirtCallback<uint32_t>(
591  [ = ] (const uint32_t &) {
592  doneMQDWrite(mqd_base, addr);
593  });
594  mqd->base >>= 8;
595  dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
596  queues.erase(id);
597  hsa_pp.unsetDeviceQueueDesc(id, 8);
598  }
599  }
601  } break;
602  default:
603  panic("Unrecognized options\n");
604  break;
605  }
606 
607  delete pkt;
608  decodeNext(q);
609 }
610 
611 void
613  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues MQD %p wrote to addr %p\n",
614  mqdAddr, addr);
615 }
616 
617 void
619 {
620  q->incRptr(sizeof(PM4MapProcess));
621  uint16_t vmid = gpuDevice->allocateVMID(pkt->pasid);
622 
623  DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p vmid: %d quantum: "
624  "%d pt: %p signal: %p\n", pkt->pasid, vmid, pkt->processQuantum,
625  pkt->ptBase, pkt->completionSignal);
626 
627  gpuDevice->getVM().setPageTableBase(vmid, pkt->ptBase);
629 
630  // Setup the apertures that gem5 uses. These values are bits [63:48].
631  Addr lds_base = (Addr)bits(pkt->shMemBases, 31, 16) << 48;
632  Addr scratch_base = (Addr)bits(pkt->shMemBases, 15, 0) << 48;
633 
634  // There does not seem to be any register for the limit, but the driver
635  // assumes scratch and LDS have a 4GB aperture, so use that.
636  gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
637  gpuDevice->CP()->shader()->setScratchApe(scratch_base,
638  scratch_base + 0xFFFFFFFF);
639 
640  delete pkt;
641  decodeNext(q);
642 }
643 
644 void
646 {
647  DPRINTF(PM4PacketProcessor, "PM4 run_list base: %p size: %d\n",
648  pkt->ibBase, pkt->ibSize);
649 
650  q->incRptr(sizeof(PM4RunList));
651 
652  q->ib(true);
653  q->ibBase(pkt->ibBase);
654  q->rptr(0);
655  q->wptr(pkt->ibSize * sizeof(uint32_t));
656 
657  delete pkt;
658  decodeNext(q);
659 }
660 
661 void
663 {
664  DPRINTF(PM4PacketProcessor, "PM4 indirect buffer, base: %p.\n",
665  pkt->ibBase);
666 
667  q->incRptr(sizeof(PM4IndirectBuf));
668 
669  q->ib(true);
670  q->ibBase(pkt->ibBase);
671  q->wptr(pkt->ibSize * sizeof(uint32_t));
672 
673  decodeNext(q);
674 }
675 
676 void
678 {
679  q->incRptr(sizeof(PM4SwitchBuf));
680 
681  q->ib(true);
682  DPRINTF(PM4PacketProcessor, "PM4 switching buffer, rptr: %p.\n",
683  q->wptr());
684 
685  decodeNext(q);
686 }
687 
688 void
690 {
691  q->incRptr(sizeof(PM4SetUconfigReg));
692 
693  // SET_UCONFIG_REG_START and pkt->offset are dword addresses
694  uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
695 
696  gpuDevice->setRegVal(reg_addr, pkt->data);
697 
698  decodeNext(q);
699 }
700 
701 void
703 {
704  q->incRptr(sizeof(PM4WaitRegMem));
705 
706  DPRINTF(PM4PacketProcessor, "PM4 WAIT_REG_MEM\nfunc: %d memSpace: %d op: "
707  "%d\n", pkt->function, pkt->memSpace, pkt->operation);
708  DPRINTF(PM4PacketProcessor, " AddrLo/Reg1: %lx\n", pkt->memAddrLo);
709  DPRINTF(PM4PacketProcessor, " AddrHi/Reg2: %lx\n", pkt->memAddrHi);
710  DPRINTF(PM4PacketProcessor, " Reference: %lx\n", pkt->reference);
711  DPRINTF(PM4PacketProcessor, " Mask: %lx\n", pkt->mask);
712  DPRINTF(PM4PacketProcessor, " Poll Interval: %lx\n", pkt->pollInterval);
713 
714  decodeNext(q);
715 }
716 
717 void
719 {
720  q->incRptr(sizeof(PM4QueryStatus));
721 
722  DPRINTF(PM4PacketProcessor, "PM4 query status contextId: %d, interruptSel:"
723  " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
724  "addr: %lx, data: %lx\n", pkt->contextId, pkt->interruptSel,
725  pkt->command, pkt->pasid, pkt->doorbellOffset, pkt->engineSel,
726  pkt->addr, pkt->data);
727 
728  if (pkt->interruptSel == 0 && pkt->command == 2) {
729  // Write data value to fence address
730  Addr addr = getGARTAddr(pkt->addr);
731  DPRINTF(PM4PacketProcessor, "Using GART addr %lx\n", addr);
732  auto cb = new DmaVirtCallback<uint64_t>(
733  [ = ] (const uint64_t &) { queryStatusDone(q, pkt); }, pkt->data);
734  dmaWriteVirt(addr, sizeof(uint64_t), cb, &cb->dmaBuffer);
735  } else {
736  // No other combinations used in amdkfd v9
737  panic("query_status with interruptSel %d command %d not supported",
738  pkt->interruptSel, pkt->command);
739  }
740 }
741 
742 void
744 {
745  DPRINTF(PM4PacketProcessor, "PM4 query status complete\n");
746 
747  delete pkt;
748  decodeNext(q);
749 }
750 
751 void
753 {
754  switch (mmio_offset) {
755  /* Hardware queue descriptor (HQD) registers */
756  case mmCP_HQD_VMID:
757  setHqdVmid(pkt->getLE<uint32_t>());
758  break;
759  case mmCP_HQD_ACTIVE:
760  setHqdActive(pkt->getLE<uint32_t>());
761  break;
762  case mmCP_HQD_PQ_BASE:
763  setHqdPqBase(pkt->getLE<uint32_t>());
764  break;
765  case mmCP_HQD_PQ_BASE_HI:
766  setHqdPqBaseHi(pkt->getLE<uint32_t>());
767  break;
769  setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
771  break;
772  case mmCP_HQD_PQ_RPTR:
773  setHqdPqPtr(pkt->getLE<uint32_t>());
774  break;
775  case mmCP_HQD_PQ_WPTR_LO:
776  setHqdPqWptrLo(pkt->getLE<uint32_t>());
777  break;
778  case mmCP_HQD_PQ_WPTR_HI:
779  setHqdPqWptrHi(pkt->getLE<uint32_t>());
780  break;
782  setHqdPqRptrReportAddr(pkt->getLE<uint32_t>());
783  break;
785  setHqdPqRptrReportAddrHi(pkt->getLE<uint32_t>());
786  break;
788  setHqdPqWptrPollAddr(pkt->getLE<uint32_t>());
789  break;
791  setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
792  break;
793  case mmCP_HQD_PQ_CONTROL:
794  setHqdPqControl(pkt->getLE<uint32_t>());
795  break;
796  case mmCP_HQD_IB_CONTROL:
797  setHqdIbCtrl(pkt->getLE<uint32_t>());
798  break;
799  /* Ring buffer registers */
800  case mmCP_RB_VMID:
801  setRbVmid(pkt->getLE<uint32_t>());
802  break;
803  case mmCP_RB0_CNTL:
804  setRbCntl(pkt->getLE<uint32_t>());
805  break;
806  case mmCP_RB0_WPTR:
807  setRbWptrLo(pkt->getLE<uint32_t>());
808  break;
809  case mmCP_RB0_WPTR_HI:
810  setRbWptrHi(pkt->getLE<uint32_t>());
811  break;
812  case mmCP_RB0_RPTR_ADDR:
813  setRbRptrAddrLo(pkt->getLE<uint32_t>());
814  break;
816  setRbRptrAddrHi(pkt->getLE<uint32_t>());
817  break;
819  setRbWptrPollAddrLo(pkt->getLE<uint32_t>());
820  break;
822  setRbWptrPollAddrHi(pkt->getLE<uint32_t>());
823  break;
824  case mmCP_RB0_BASE:
825  setRbBaseLo(pkt->getLE<uint32_t>());
826  break;
827  case mmCP_RB0_BASE_HI:
828  setRbBaseHi(pkt->getLE<uint32_t>());
829  break;
831  setRbDoorbellCntrl(pkt->getLE<uint32_t>());
833  break;
835  setRbDoorbellRangeLo(pkt->getLE<uint32_t>());
836  break;
838  setRbDoorbellRangeHi(pkt->getLE<uint32_t>());
839  break;
840  default:
841  break;
842  }
843 }
844 
845 void
847 {
848  kiq.hqd_vmid = data;
849 }
850 
851 void
853 {
854  kiq.hqd_active = data;
855 }
856 
857 void
859 {
861 }
862 
863 void
865 {
867 }
868 
869 void
871 {
873 }
874 
875 void
877 {
878  kiq.rptr = data;
879 }
880 
881 void
883 {
884  /* Write pointer communicated through doorbell value. */
885 }
886 
887 void
889 {
890  /* Write pointer communicated through doorbell value. */
891 }
892 
893 void
895 {
897 }
898 
899 void
901 {
903 }
904 
905 void
907 {
909 }
910 
911 void
913 {
915 }
916 
917 void
919 {
921 }
922 
923 void
925 {
927 }
928 
929 void
931 {
932  pq.hqd_vmid = data;
933 }
934 
935 void
937 {
939 }
940 
941 void
943 {
944  pq.queueWptrLo = data;
945 }
946 
947 void
949 {
950  pq.queueWptrHi = data;
951 }
952 
953 void
955 {
957 }
958 
959 void
961 {
963 }
964 
965 void
967 {
969 }
970 
971 void
973 {
975 }
976 
977 void
979 {
981 }
982 
983 void
985 {
987 }
988 
989 void
991 {
993  pq.doorbellOffset = data & 0x1ffffffc;
994 }
995 
996 void
998 {
1000 }
1001 
1002 void
1004 {
1006 }
1007 
1008 void
1010 {
1011  // Serialize the DmaVirtDevice base class
1013 
1014  int num_queues = queues.size();
1015  Addr id[num_queues];
1016  Addr mqd_base[num_queues];
1017  Addr base[num_queues];
1018  Addr rptr[num_queues];
1019  Addr wptr[num_queues];
1020  Addr ib_base[num_queues];
1021  Addr ib_rptr[num_queues];
1022  Addr ib_wptr[num_queues];
1023  Addr offset[num_queues];
1024  bool processing[num_queues];
1025  bool ib[num_queues];
1026 
1027  int i = 0;
1028  for (auto iter : queues) {
1029  PM4Queue *q = iter.second;
1030  id[i] = q->id();
1031  mqd_base[i] = q->mqdBase();
1032  bool cur_state = q->ib();
1033  q->ib(false);
1034  base[i] = q->base() >> 8;
1035  rptr[i] = q->getRptr();
1036  wptr[i] = q->getWptr();
1037  q->ib(true);
1038  ib_base[i] = q->ibBase();
1039  ib_rptr[i] = q->getRptr();
1040  ib_wptr[i] = q->getWptr();
1041  q->ib(cur_state);
1042  offset[i] = q->offset();
1043  processing[i] = q->processing();
1044  ib[i] = q->ib();
1045  i++;
1046  }
1047 
1048  SERIALIZE_SCALAR(num_queues);
1049  SERIALIZE_ARRAY(id, num_queues);
1050  SERIALIZE_ARRAY(mqd_base, num_queues);
1051  SERIALIZE_ARRAY(base, num_queues);
1052  SERIALIZE_ARRAY(rptr, num_queues);
1053  SERIALIZE_ARRAY(wptr, num_queues);
1054  SERIALIZE_ARRAY(ib_base, num_queues);
1055  SERIALIZE_ARRAY(ib_rptr, num_queues);
1056  SERIALIZE_ARRAY(ib_wptr, num_queues);
1057  SERIALIZE_ARRAY(offset, num_queues);
1058  SERIALIZE_ARRAY(processing, num_queues);
1059  SERIALIZE_ARRAY(ib, num_queues);
1060 }
1061 
1062 void
1064 {
1065  // Serialize the DmaVirtDevice base class
1067 
1068  int num_queues = 0;
1069  UNSERIALIZE_SCALAR(num_queues);
1070 
1071  Addr id[num_queues];
1072  Addr mqd_base[num_queues];
1073  Addr base[num_queues];
1074  Addr rptr[num_queues];
1075  Addr wptr[num_queues];
1076  Addr ib_base[num_queues];
1077  Addr ib_rptr[num_queues];
1078  Addr ib_wptr[num_queues];
1079  Addr offset[num_queues];
1080  bool processing[num_queues];
1081  bool ib[num_queues];
1082 
1083  UNSERIALIZE_ARRAY(id, num_queues);
1084  UNSERIALIZE_ARRAY(mqd_base, num_queues);
1085  UNSERIALIZE_ARRAY(base, num_queues);
1086  UNSERIALIZE_ARRAY(rptr, num_queues);
1087  UNSERIALIZE_ARRAY(wptr, num_queues);
1088  UNSERIALIZE_ARRAY(ib_base, num_queues);
1089  UNSERIALIZE_ARRAY(ib_rptr, num_queues);
1090  UNSERIALIZE_ARRAY(ib_wptr, num_queues);
1091  UNSERIALIZE_ARRAY(offset, num_queues);
1092  UNSERIALIZE_ARRAY(processing, num_queues);
1093  UNSERIALIZE_ARRAY(ib, num_queues);
1094 
1095  for (int i = 0; i < num_queues; i++) {
1096  QueueDesc *mqd = new QueueDesc();
1097  memset(mqd, 0, sizeof(QueueDesc));
1098 
1099  mqd->mqdBase = mqd_base[i] >> 8;
1100  mqd->base = base[i];
1101  mqd->rptr = rptr[i];
1102  mqd->ibBase = ib_base[i];
1103  mqd->ibRptr = ib_rptr[i];
1104 
1105  newQueue(mqd, offset[i], nullptr, id[i]);
1106 
1107  queues[id[i]]->ib(false);
1108  queues[id[i]]->wptr(wptr[i]);
1109  queues[id[i]]->ib(true);
1110  queues[id[i]]->wptr(ib_wptr[i]);
1111  queues[id[i]]->offset(offset[i]);
1112  queues[id[i]]->processing(processing[i]);
1113  queues[id[i]]->ib(ib[i]);
1114  DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
1115  queues[id[i]]->id(), queues[id[i]]->rptr(),
1116  queues[id[i]]->wptr());
1117  }
1118 }
1119 
1120 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
GPUCommandProcessor * CP()
uint16_t getVMID(Addr doorbell)
AMDGPUVM & getVM()
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
Translation range generators.
Definition: amdgpu_vm.hh:303
void invalidateTLBs()
Definition: amdgpu_vm.cc:174
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:177
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
Definition: amdgpu_vm.hh:266
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
HSAPacketProcessor & hsaPacketProc()
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setRbWptrPollAddrLo(uint32_t data)
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setRbWptrHi(uint32_t data)
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Addr getGARTAddr(Addr addr) const
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
void setRbWptrLo(uint32_t data)
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void setGPUDevice(AMDGPUDevice *gpu_device)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setHqdPqWptrLo(uint32_t data)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setHqdPqRptrReportAddr(uint32_t data)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void writeData(PM4Queue *q, PM4WriteData *pkt)
void setRbBaseHi(uint32_t data)
void setHqdActive(uint32_t data)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
void setHqdPqControl(uint32_t data)
void setRbBaseLo(uint32_t data)
void setHqdIbCtrl(uint32_t data)
void setRbRptrAddrHi(uint32_t data)
void setHqdPqWptrPollAddr(uint32_t data)
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
void setRbDoorbellRangeLo(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
void setHqdPqBaseHi(uint32_t data)
void runList(PM4Queue *q, PM4RunList *pkt)
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setHqdVmid(uint32_t data)
void setHqdPqDoorbellCtrl(uint32_t data)
void setHqdPqBase(uint32_t data)
void setRbDoorbellRangeHi(uint32_t data)
void doneMQDWrite(Addr mqdAddr, Addr addr)
std::unordered_map< uint16_t, PM4Queue * > queues
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setHqdPqPtr(uint32_t data)
void setHqdPqRptrReportAddrHi(uint32_t data)
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setRbRptrAddrLo(uint32_t data)
void mapProcess(PM4Queue *q, PM4MapProcess *pkt)
void setRbDoorbellCntrl(uint32_t data)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHqdPqWptrPollAddrHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
void setRbWptrPollAddrHi(uint32_t data)
Class defining a PM4 queue.
Definition: pm4_queues.hh:362
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, Addr rptr_wb_addr)
Methods for RLC queues.
Definition: sdma_engine.cc:164
void setLdsApe(Addr base, Addr limit)
Definition: shader.hh:140
void setScratchApe(Addr base, Addr limit)
Definition: shader.hh:153
void setHwReg(int regIdx, uint32_t val)
Definition: shader.hh:116
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
#define warn(...)
Definition: logging.hh:246
Bitfield< 27 > q
Definition: misc_types.hh:55
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 33 > id
Definition: misc_types.hh:257
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED PM4WriteData
struct gem5::GEM5_PACKED PM4WaitRegMem
std::ostream CheckpointOut
Definition: serialize.hh:66
@ ComputeAQL
struct gem5::GEM5_PACKED PM4RunList
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
@ SOC15_IH_CLIENTID_GRBM_CP
struct gem5::GEM5_PACKED PM4ReleaseMem
struct gem5::GEM5_PACKED PM4SwitchBuf
struct gem5::GEM5_PACKED PM4MapQueues
struct gem5::GEM5_PACKED PM4MapProcess
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED PM4UnmapQueues
struct gem5::GEM5_PACKED PM4SetUconfigReg
@ IT_RELEASE_MEM
Definition: pm4_defines.hh:58
@ IT_WRITE_DATA
Definition: pm4_defines.hh:55
@ IT_RUN_LIST
Definition: pm4_defines.hh:66
@ IT_MAP_QUEUES
Definition: pm4_defines.hh:63
@ IT_NOP
Definition: pm4_defines.hh:54
@ IT_SET_UCONFIG_REG
Definition: pm4_defines.hh:59
@ IT_MAP_PROCESS
Definition: pm4_defines.hh:62
@ IT_INVALIDATE_TLBS
Definition: pm4_defines.hh:61
@ IT_QUERY_STATUS
Definition: pm4_defines.hh:65
@ IT_WAIT_REG_MEM
Definition: pm4_defines.hh:56
@ IT_UNMAP_QUEUES
Definition: pm4_defines.hh:64
@ IT_INDIRECT_BUFFER
Definition: pm4_defines.hh:57
@ IT_SWITCH_BUFFER
Definition: pm4_defines.hh:60
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED PM4IndirectBuf
@ HW_REG_SH_MEM_BASES
output header
Definition: nop.cc:36
Declaration of the Packet class.
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
Definition: pm4_defines.hh:72
#define mmCP_RB_DOORBELL_CONTROL
Definition: pm4_mmio.hh:48
#define mmCP_RB0_RPTR_ADDR_HI
Definition: pm4_mmio.hh:45
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
Definition: pm4_mmio.hh:59
#define mmCP_RB0_BASE_HI
Definition: pm4_mmio.hh:51
#define mmCP_HQD_PQ_DOORBELL_CONTROL
Definition: pm4_mmio.hh:57
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
Definition: pm4_mmio.hh:61
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
Definition: pm4_mmio.hh:60
#define mmCP_HQD_PQ_BASE
Definition: pm4_mmio.hh:55
#define mmCP_RB_DOORBELL_RANGE_UPPER
Definition: pm4_mmio.hh:50
#define mmCP_HQD_IB_CONTROL
Definition: pm4_mmio.hh:64
#define mmCP_RB0_BASE
Definition: pm4_mmio.hh:39
#define mmCP_HQD_VMID
Definition: pm4_mmio.hh:54
#define mmCP_RB_WPTR_POLL_ADDR_LO
Definition: pm4_mmio.hh:41
#define mmCP_HQD_PQ_RPTR
Definition: pm4_mmio.hh:58
#define mmCP_HQD_ACTIVE
Definition: pm4_mmio.hh:53
#define mmCP_RB_VMID
Definition: pm4_mmio.hh:43
#define mmCP_HQD_PQ_BASE_HI
Definition: pm4_mmio.hh:56
#define mmCP_RB0_WPTR_HI
Definition: pm4_mmio.hh:47
#define mmCP_HQD_PQ_WPTR_HI
Definition: pm4_mmio.hh:66
#define mmCP_HQD_PQ_CONTROL
Definition: pm4_mmio.hh:63
#define mmCP_RB_DOORBELL_RANGE_LOWER
Definition: pm4_mmio.hh:49
#define mmCP_RB_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:42
#define mmCP_RB0_CNTL
Definition: pm4_mmio.hh:40
#define mmCP_RB0_RPTR_ADDR
Definition: pm4_mmio.hh:44
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:62
#define mmCP_RB0_WPTR
Definition: pm4_mmio.hh:46
#define mmCP_HQD_PQ_WPTR_LO
Definition: pm4_mmio.hh:65
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
PM4 packets.
Definition: pm4_defines.hh:78
uint32_t sdmax_rlcx_ib_base_lo
Definition: pm4_queues.hh:214
uint32_t sdmax_rlcx_rb_rptr
Definition: pm4_queues.hh:204
uint32_t hqd_pq_base_lo
Definition: pm4_queues.hh:104
uint32_t doorbellOffset0
Definition: pm4_defines.hh:178
uint32_t sdmax_rlcx_rb_rptr_addr_hi
Definition: pm4_queues.hh:209
uint32_t sdmax_rlcx_rb_cntl
Definition: pm4_queues.hh:194
uint32_t hqd_pq_wptr_poll_addr_hi
Definition: pm4_queues.hh:124
uint32_t hqd_pq_base_hi
Definition: pm4_queues.hh:105
uint32_t doorbellOffset3
Definition: pm4_defines.hh:189
uint32_t sdmax_rlcx_rb_wptr_hi
Definition: pm4_queues.hh:207
uint32_t doorbellOffset2
Definition: pm4_defines.hh:186
uint32_t sdmax_rlcx_ib_base_hi
Definition: pm4_queues.hh:215
uint32_t hqd_pq_wptr_poll_addr_lo
Definition: pm4_queues.hh:123
uint32_t allocFormat
Definition: pm4_defines.hh:132
uint32_t pollInterval
Definition: pm4_defines.hh:302
uint32_t doorbellOffset
Definition: pm4_defines.hh:137
uint32_t hqd_pq_rptr_report_addr_hi
Definition: pm4_queues.hh:119
uint32_t hqd_vmid
Definition: pm4_queues.hh:95
uint32_t writeConfirm
Definition: pm4_defines.hh:103
uint32_t processQuantum
Definition: pm4_defines.hh:232
uint32_t hqd_pq_control
Definition: pm4_queues.hh:131
uint32_t hqd_pq_rptr_report_addr_lo
Definition: pm4_queues.hh:118
uint32_t hqd_active
Definition: pm4_queues.hh:94
uint32_t hqd_pq_doorbell_control
Definition: pm4_queues.hh:127
uint32_t sdmax_rlcx_rb_rptr_addr_lo
Definition: pm4_queues.hh:210
uint32_t sdmax_rlcx_rb_wptr
Definition: pm4_queues.hh:206
uint32_t sdmax_rlcx_rb_rptr_hi
Definition: pm4_queues.hh:205
uint64_t mqdBase
Definition: pm4_queues.hh:92
uint32_t checkDisable
Definition: pm4_defines.hh:136
uint64_t mqdReadIndex
Definition: pm4_queues.hh:53
uint32_t doorbellOffset1
Definition: pm4_defines.hh:183
uint32_t hqd_ib_control
Definition: pm4_queues.hh:146
uint64_t completionSignal
Definition: pm4_defines.hh:271
uint32_t interruptSel
Definition: pm4_defines.hh:469
uint32_t queueWptrLo
Definition: pm4_queues.hh:348
uint32_t queueRptrAddrLo
Definition: pm4_queues.hh:339
uint32_t queueWptrHi
Definition: pm4_queues.hh:349
uint32_t doorbellRangeHi
Definition: pm4_queues.hh:355
uint32_t queueRptrAddrHi
Definition: pm4_queues.hh:340
uint32_t doorbellRangeLo
Definition: pm4_queues.hh:354
uint32_t doorbellOffset
Definition: pm4_queues.hh:353

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1