gem5  v22.0.0.2
pm4_packet_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
34 
35 #include "debug/PM4PacketProcessor.hh"
38 #include "dev/amdgpu/pm4_mmio.hh"
40 #include "dev/hsa/hw_scheduler.hh"
41 #include "enums/GfxVersion.hh"
43 #include "mem/packet.hh"
44 #include "mem/packet_access.hh"
45 
46 namespace gem5
47 {
48 
49 PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
50  : DmaVirtDevice(p)
51 {
52  memset(&kiq, 0, sizeof(QueueDesc));
53  memset(&pq, 0, sizeof(QueueDesc));
54 }
55 
63 {
64  if (gpuDevice->getVM().inAGP(vaddr)) {
65  // Use AGP translation gen
66  return TranslationGenPtr(
68  }
69 
70  // Assume GART otherwise as this is the only other translation aperture
71  // available to the PM4 packet processor.
72  return TranslationGenPtr(
74 }
75 
78 {
79  AddrRangeList ranges;
80  return ranges;
81 }
82 
83 void
85 {
86  gpuDevice = gpu_device;
87 }
88 
89 Addr
91 {
92  if (!gpuDevice->getVM().inAGP(addr)) {
93  Addr low_bits = bits(addr, 11, 0);
94  addr = (((addr >> 12) << 3) << 12) | low_bits;
95  }
96  return addr;
97 }
98 
99 PM4Queue *
101 {
102  auto result = queuesMap.find(offset);
103  if (result == queuesMap.end()) {
104  if (gfx)
105  mapPq(offset);
106  else
107  mapKiq(offset);
108  return queuesMap[offset];
109  }
110  return result->second;
111 }
112 
113 void
115 {
116  DPRINTF(PM4PacketProcessor, "Mapping KIQ\n");
118 }
119 
120 void
122 {
123  DPRINTF(PM4PacketProcessor, "Mapping PQ\n");
124  newQueue((QueueDesc *)&pq, offset);
125 }
126 
127 void
129  PM4MapQueues *pkt, int id)
130 {
131  if (id == -1)
132  id = queues.size();
133 
134  /* 256 bytes aligned address */
135  mqd->base <<= 8;
136  PM4Queue *q = new PM4Queue(id, mqd, offset, pkt);
137 
138  queuesMap[offset] = q;
139  queues[id] = q;
140 
141  /* we are assumming only compute queues can be map from MQDs */
142  QueueType qt;
143  qt = mqd->aql ? QueueType::ComputeAQL
146 
147  DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p\n",
148  id, q->base(), q->offset());
149 }
150 
151 void
153 {
154  q->wptr(wptrOffset * sizeof(uint32_t));
155 
156  if (!q->processing()) {
157  q->processing(true);
158  decodeNext(q);
159  }
160 }
161 
162 void
164 {
165  DPRINTF(PM4PacketProcessor, "PM4 decode queue %d rptr %p, wptr %p\n",
166  q->id(), q->rptr(), q->wptr());
167 
168  if (q->rptr() < q->wptr()) {
169  /* Additional braces here are needed due to a clang compilation bug
170  falsely throwing a "suggest braces around initialization of
171  subject" error. More info on this bug is available here:
172  https://stackoverflow.com/questions/31555584
173  */
174  PM4Header h{{{0, 0, 0, 0, 0, 0}}};
175  auto cb = new DmaVirtCallback<PM4Header>(
176  [ = ] (PM4Header header)
177  { decodeHeader(q, header); }, h);
178  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(uint32_t), cb,
179  &cb->dmaBuffer);
180  } else {
181  q->processing(false);
182  if (q->ib()) {
183  q->ib(false);
184  decodeNext(q);
185  }
186  }
187 }
188 
189 void
191 {
192  DPRINTF(PM4PacketProcessor, "PM4 packet %p\n", header.opcode);
193 
194  q->incRptr(sizeof(PM4Header));
195 
196  DmaVirtCallback<uint64_t> *cb = nullptr;
197  void *dmaBuffer = nullptr;
198 
199  switch(header.opcode) {
200  case IT_NOP: {
201  DPRINTF(PM4PacketProcessor, "PM4 nop, count %p\n", header.count);
202  DPRINTF(PM4PacketProcessor, "rptr %p wptr %p\n", q->rptr(), q->wptr());
203  if (header.count == 0x3fff) {
204  q->fastforwardRptr();
205  } else {
206  q->incRptr((header.count + 1) * sizeof(uint32_t));
207  }
208  decodeNext(q);
209  } break;
210  case IT_WRITE_DATA: {
211  dmaBuffer = new PM4WriteData();
212  cb = new DmaVirtCallback<uint64_t>(
213  [ = ] (const uint64_t &)
214  { writeData(q, (PM4WriteData *)dmaBuffer); });
215  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WriteData), cb,
216  dmaBuffer);
217  } break;
218 
219  case IT_MAP_QUEUES: {
220  dmaBuffer = new PM4MapQueues();
221  cb = new DmaVirtCallback<uint64_t>(
222  [ = ] (const uint64_t &)
223  { mapQueues(q, (PM4MapQueues *)dmaBuffer); });
224  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapQueues), cb,
225  dmaBuffer);
226  } break;
227 
228  case IT_RELEASE_MEM: {
229  dmaBuffer = new PM4ReleaseMem();
230  cb = new DmaVirtCallback<uint64_t>(
231  [ = ] (const uint64_t &)
232  { releaseMem(q, (PM4ReleaseMem *)dmaBuffer); });
233  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4ReleaseMem), cb,
234  dmaBuffer);
235  } break;
236 
237  case IT_INDIRECT_BUFFER: {
238  dmaBuffer = new PM4IndirectBuf();
239  cb = new DmaVirtCallback<uint64_t>(
240  [ = ] (const uint64_t &)
241  { indirectBuffer(q, (PM4IndirectBuf *)dmaBuffer); });
242  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4IndirectBuf), cb,
243  dmaBuffer);
244  } break;
245 
246  case IT_SWITCH_BUFFER: {
247  dmaBuffer = new PM4SwitchBuf();
248  cb = new DmaVirtCallback<uint64_t>(
249  [ = ] (const uint64_t &)
250  { switchBuffer(q, (PM4SwitchBuf *)dmaBuffer); });
251  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SwitchBuf), cb,
252  dmaBuffer);
253  } break;
254 
255  case IT_SET_UCONFIG_REG: {
256  dmaBuffer = new PM4SetUconfigReg();
257  cb = new DmaVirtCallback<uint64_t>(
258  [ = ] (const uint64_t &)
259  { setUconfigReg(q, (PM4SetUconfigReg *)dmaBuffer); });
260  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SetUconfigReg), cb,
261  dmaBuffer);
262  } break;
263 
264  case IT_WAIT_REG_MEM: {
265  dmaBuffer = new PM4WaitRegMem();
266  cb = new DmaVirtCallback<uint64_t>(
267  [ = ] (const uint64_t &)
268  { waitRegMem(q, (PM4WaitRegMem *)dmaBuffer); });
269  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WaitRegMem), cb,
270  dmaBuffer);
271  } break;
272  case IT_MAP_PROCESS: {
273  dmaBuffer = new PM4MapProcess();
274  cb = new DmaVirtCallback<uint64_t>(
275  [ = ] (const uint64_t &)
276  { mapProcess(q, (PM4MapProcess *)dmaBuffer); });
277  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
278  dmaBuffer);
279  } break;
280 
281  case IT_UNMAP_QUEUES: {
282  dmaBuffer = new PM4UnmapQueues();
283  cb = new DmaVirtCallback<uint64_t>(
284  [ = ] (const uint64_t &)
285  { unmapQueues(q, (PM4UnmapQueues *)dmaBuffer); });
286  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4UnmapQueues), cb,
287  dmaBuffer);
288  } break;
289 
290  case IT_RUN_LIST: {
291  dmaBuffer = new PM4RunList();
292  cb = new DmaVirtCallback<uint64_t>(
293  [ = ] (const uint64_t &)
294  { runList(q, (PM4RunList *)dmaBuffer); });
295  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4RunList), cb,
296  dmaBuffer);
297  } break;
298 
299  case IT_QUERY_STATUS: {
300  dmaBuffer = new PM4QueryStatus();
301  cb = new DmaVirtCallback<uint64_t>(
302  [ = ] (const uint64_t &)
303  { queryStatus(q, (PM4QueryStatus *)dmaBuffer); });
304  dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4QueryStatus), cb,
305  dmaBuffer);
306  } break;
307 
308  case IT_INVALIDATE_TLBS: {
309  DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
311  q->incRptr((header.count + 1) * sizeof(uint32_t));
312  decodeNext(q);
313  } break;
314 
315  default: {
316  warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
317  DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",
318  header.opcode);
319  q->incRptr((header.count + 1) * sizeof(uint32_t));
320  decodeNext(q);
321  } break;
322  }
323 }
324 
325 void
327 {
328  q->incRptr(sizeof(PM4WriteData));
329 
330  Addr addr = getGARTAddr(pkt->destAddr);
331  DPRINTF(PM4PacketProcessor, "PM4 write addr: %p data: %p.\n", addr,
332  pkt->data);
333  auto cb = new DmaVirtCallback<uint32_t>(
334  [ = ](const uint32_t &) { writeDataDone(q, pkt, addr); });
335  //TODO: the specs indicate that pkt->data holds the number of dword that
336  //need to be written.
337  dmaWriteVirt(addr, sizeof(uint32_t), cb, &pkt->data);
338 
339  if (!pkt->writeConfirm)
340  decodeNext(q);
341 }
342 
343 void
345 {
346  DPRINTF(PM4PacketProcessor, "PM4 write completed to %p, %p.\n", addr,
347  pkt->data);
348 
349  if (pkt->writeConfirm)
350  decodeNext(q);
351 
352  delete pkt;
353 }
354 
355 void
357 {
358  q->incRptr(sizeof(PM4MapQueues));
359 
360  DPRINTF(PM4PacketProcessor, "MAPQueues queueSel: %d, vmid: %d, me: %d, "
361  "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
362  "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
363  " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->queueSel, pkt->vmid,
364  pkt->me, pkt->pipe, pkt->queueSlot, pkt->queueType,
365  pkt->allocFormat, pkt->engineSel, pkt->numQueues,
366  pkt->checkDisable, pkt->doorbellOffset, pkt->mqdAddr,
367  pkt->wptrAddr);
368 
369  // Partially reading the mqd with an offset of 96 dwords
370  if (pkt->engineSel == 0 || pkt->engineSel == 1 || pkt->engineSel == 4) {
371  Addr addr = getGARTAddr(pkt->mqdAddr + 96 * sizeof(uint32_t));
372 
374  "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
375  addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
376 
378  gpuDevice->lastVMID());
379 
380  QueueDesc *mqd = new QueueDesc();
381  memset(mqd, 0, sizeof(QueueDesc));
382  auto cb = new DmaVirtCallback<uint32_t>(
383  [ = ] (const uint32_t &) {
384  processMQD(pkt, q, addr, mqd, gpuDevice->lastVMID()); });
385  dmaReadVirt(addr, sizeof(QueueDesc), cb, mqd);
386  } else if (pkt->engineSel == 2 || pkt->engineSel == 3) {
387  SDMAQueueDesc *sdmaMQD = new SDMAQueueDesc();
388  memset(sdmaMQD, 0, sizeof(SDMAQueueDesc));
389 
390  Addr addr = pkt->mqdAddr;
391 
392  auto cb = new DmaVirtCallback<uint32_t>(
393  [ = ] (const uint32_t &) {
394  processSDMAMQD(pkt, q, addr, sdmaMQD,
395  gpuDevice->lastVMID()); });
396  dmaReadVirt(addr, sizeof(SDMAQueueDesc), cb, sdmaMQD);
397  } else {
398  panic("Unknown engine for MQD: %d\n", pkt->engineSel);
399  }
400 
401  decodeNext(q);
402 }
403 
404 void
406  QueueDesc *mqd, uint16_t vmid)
407 {
408  DPRINTF(PM4PacketProcessor, "MQDbase: %lx, active: %d, vmid: %d, base: "
409  "%lx, rptr: %x aqlPtr: %lx\n", mqd->mqdBase, mqd->hqd_active,
410  mqd->hqd_vmid, mqd->base, mqd->rptr, mqd->aqlRptr);
411 
412  Addr offset = mqd->doorbell & 0x1ffffffc;
413  newQueue(mqd, offset, pkt);
414  PM4Queue *new_q = queuesMap[offset];
415  gpuDevice->insertQId(vmid, new_q->id());
416 
417  if (mqd->aql) {
418  // The queue size is encoded in the cp_hqd_pq_control field in the
419  // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1
420  // number of dwords.
421  //
422  // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
423  // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561
424  //
425  // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply
426  // by 4 to get the number of bytes as HSAPP expects.
427  int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4;
428  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
429  hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(),
430  mqd_size, 8, GfxVersion::gfx900, offset,
431  mqd->mqdReadIndex);
432  }
433 
434  DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
435  "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
436 }
437 
438 void
440  SDMAQueueDesc *mqd, uint16_t vmid)
441 {
442  DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
443  "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x\n", mqd->rb_base,
448 
449  // Engine 2 points to SDMA0 while engine 3 points to SDMA1
450  assert(pkt->engineSel == 2 || pkt->engineSel == 3);
451  SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
452 
453  // Register RLC queue with SDMA
454  sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2,
455  mqd->rb_base << 8);
456 
457  // Register doorbell with GPU device
458  gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
460 }
461 
462 void
464 {
465  q->incRptr(sizeof(PM4ReleaseMem));
466 
467  Addr addr = getGARTAddr(pkt->addr);
468  DPRINTF(PM4PacketProcessor, "PM4 release_mem event %d eventIdx %d intSel "
469  "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
470  pkt->event, pkt->eventIdx, pkt->intSelect, pkt->destSelect,
471  pkt->dataSelect, addr, pkt->dataLo, pkt->intCtxId);
472 
474  "PM4 release_mem destSel 0 bypasses caches to MC.\n");
475 
476  if (pkt->dataSelect == 1) {
477  auto cb = new DmaVirtCallback<uint32_t>(
478  [ = ](const uint32_t &) { releaseMemDone(q, pkt, addr); },
479  pkt->dataLo);
480  dmaWriteVirt(addr, sizeof(uint32_t), cb, &cb->dmaBuffer);
481  } else {
482  panic("Unimplemented PM4ReleaseMem.dataSelect");
483  }
484 }
485 
486 void
488 {
489  DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n",
490  pkt->dataLo, addr);
491  if (pkt->intSelect == 2) {
492  DPRINTF(PM4PacketProcessor, "PM4 interrupt, ctx: %d, me: %d, pipe: "
493  "%d, queueSlot:%d\n", pkt->intCtxId, q->me(), q->pipe(),
494  q->queue());
495  // Rearranging the queue field of PM4MapQueues as the interrupt RingId
496  // format specified in PM4ReleaseMem pkt.
497  uint32_t ringId = (q->me() << 6) | (q->pipe() << 4) | q->queue();
501  }
502 
503  delete pkt;
504  decodeNext(q);
505 }
506 
507 void
509 {
510  assert(queuesMap.count(offset));
511  queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
512 }
513 
514 void
516 {
517  q->incRptr(sizeof(PM4UnmapQueues));
518 
519  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues queueSel: %d numQueues: %d "
520  "pasid: %p doorbellOffset0 %p \n",
521  pkt->queueSel, pkt->numQueues, pkt->pasid, pkt->doorbellOffset0);
522 
523  switch (pkt->queueSel) {
524  case 0:
525  switch (pkt->numQueues) {
526  case 1:
535  break;
536  case 2:
543  break;
544  case 3:
549  break;
550  case 4:
553  break;
554  default:
555  panic("Unrecognized number of queues %d\n", pkt->numQueues);
556  }
557  break;
558  case 1:
560  break;
561  case 2:
562  break;
563  case 3: {
564  auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
565  for (auto iter : gpuDevice->getUsedVMIDs()) {
566  for (auto id : iter.second) {
567  assert(queues.count(id));
568 
569  // Do not unmap KMD queues
570  if (queues[id]->privileged()) {
571  continue;
572  }
573  QueueDesc *mqd = queues[id]->getMQD();
574  DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
575  "index %ld\n", id, mqd->mqdReadIndex);
576  // Partially writing the mqd with an offset of 96 dwords
577  Addr addr = getGARTAddr(queues[id]->mqdBase() +
578  96 * sizeof(uint32_t));
579  Addr mqd_base = queues[id]->mqdBase();
580  auto cb = new DmaVirtCallback<uint32_t>(
581  [ = ] (const uint32_t &) {
582  doneMQDWrite(mqd_base, addr);
583  });
584  mqd->base >>= 8;
585  dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
586  queues.erase(id);
587  hsa_pp.unsetDeviceQueueDesc(id, 8);
588  }
589  }
591  } break;
592  default:
593  panic("Unrecognized options\n");
594  break;
595  }
596 
597  delete pkt;
598  decodeNext(q);
599 }
600 
601 void
603  DPRINTF(PM4PacketProcessor, "PM4 unmap_queues MQD %p wrote to addr %p\n",
604  mqdAddr, addr);
605 }
606 
607 void
609 {
610  q->incRptr(sizeof(PM4MapProcess));
611  uint16_t vmid = gpuDevice->allocateVMID(pkt->pasid);
612 
613  DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p vmid: %d quantum: "
614  "%d pt: %p signal: %p\n", pkt->pasid, vmid, pkt->processQuantum,
615  pkt->ptBase, pkt->completionSignal);
616 
617  gpuDevice->getVM().setPageTableBase(vmid, pkt->ptBase);
618 
619  delete pkt;
620  decodeNext(q);
621 }
622 
623 void
625 {
626  DPRINTF(PM4PacketProcessor, "PM4 run_list base: %p size: %d\n",
627  pkt->ibBase, pkt->ibSize);
628 
629  q->incRptr(sizeof(PM4RunList));
630 
631  q->ib(true);
632  q->ibBase(pkt->ibBase);
633  q->rptr(0);
634  q->wptr(pkt->ibSize * sizeof(uint32_t));
635 
636  delete pkt;
637  decodeNext(q);
638 }
639 
640 void
642 {
643  DPRINTF(PM4PacketProcessor, "PM4 indirect buffer, base: %p.\n",
644  pkt->ibBase);
645 
646  q->incRptr(sizeof(PM4IndirectBuf));
647 
648  q->ib(true);
649  q->ibBase(pkt->ibBase);
650  q->wptr(pkt->ibSize * sizeof(uint32_t));
651 
652  decodeNext(q);
653 }
654 
655 void
657 {
658  q->incRptr(sizeof(PM4SwitchBuf));
659 
660  q->ib(true);
661  DPRINTF(PM4PacketProcessor, "PM4 switching buffer, rptr: %p.\n",
662  q->wptr());
663 
664  decodeNext(q);
665 }
666 
667 void
669 {
670  q->incRptr(sizeof(PM4SetUconfigReg));
671 
672  // SET_UCONFIG_REG_START and pkt->offset are dword addresses
673  uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
674 
675  gpuDevice->setRegVal(reg_addr, pkt->data);
676 
677  decodeNext(q);
678 }
679 
680 void
682 {
683  q->incRptr(sizeof(PM4WaitRegMem));
684 
685  DPRINTF(PM4PacketProcessor, "PM4 WAIT_REG_MEM\nfunc: %d memSpace: %d op: "
686  "%d\n", pkt->function, pkt->memSpace, pkt->operation);
687  DPRINTF(PM4PacketProcessor, " AddrLo/Reg1: %lx\n", pkt->memAddrLo);
688  DPRINTF(PM4PacketProcessor, " AddrHi/Reg2: %lx\n", pkt->memAddrHi);
689  DPRINTF(PM4PacketProcessor, " Reference: %lx\n", pkt->reference);
690  DPRINTF(PM4PacketProcessor, " Mask: %lx\n", pkt->mask);
691  DPRINTF(PM4PacketProcessor, " Poll Interval: %lx\n", pkt->pollInterval);
692 
693  decodeNext(q);
694 }
695 
696 void
698 {
699  q->incRptr(sizeof(PM4QueryStatus));
700 
701  DPRINTF(PM4PacketProcessor, "PM4 query status contextId: %d, interruptSel:"
702  " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
703  "addr: %lx, data: %lx\n", pkt->contextId, pkt->interruptSel,
704  pkt->command, pkt->pasid, pkt->doorbellOffset, pkt->engineSel,
705  pkt->addr, pkt->data);
706 
707  if (pkt->interruptSel == 0 && pkt->command == 2) {
708  // Write data value to fence address
709  Addr addr = getGARTAddr(pkt->addr);
710  DPRINTF(PM4PacketProcessor, "Using GART addr %lx\n", addr);
711  auto cb = new DmaVirtCallback<uint64_t>(
712  [ = ] (const uint64_t &) { queryStatusDone(q, pkt); }, pkt->data);
713  dmaWriteVirt(addr, sizeof(uint64_t), cb, &cb->dmaBuffer);
714  } else {
715  // No other combinations used in amdkfd v9
716  panic("query_status with interruptSel %d command %d not supported",
717  pkt->interruptSel, pkt->command);
718  }
719 }
720 
721 void
723 {
724  DPRINTF(PM4PacketProcessor, "PM4 query status complete\n");
725 
726  delete pkt;
727  decodeNext(q);
728 }
729 
730 void
732 {
733  switch (mmio_offset) {
734  /* Hardware queue descriptor (HQD) registers */
735  case mmCP_HQD_VMID:
736  setHqdVmid(pkt->getLE<uint32_t>());
737  break;
738  case mmCP_HQD_ACTIVE:
739  setHqdActive(pkt->getLE<uint32_t>());
740  break;
741  case mmCP_HQD_PQ_BASE:
742  setHqdPqBase(pkt->getLE<uint32_t>());
743  break;
744  case mmCP_HQD_PQ_BASE_HI:
745  setHqdPqBaseHi(pkt->getLE<uint32_t>());
746  break;
748  setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
750  break;
751  case mmCP_HQD_PQ_RPTR:
752  setHqdPqPtr(pkt->getLE<uint32_t>());
753  break;
754  case mmCP_HQD_PQ_WPTR_LO:
755  setHqdPqWptrLo(pkt->getLE<uint32_t>());
756  break;
757  case mmCP_HQD_PQ_WPTR_HI:
758  setHqdPqWptrHi(pkt->getLE<uint32_t>());
759  break;
761  setHqdPqRptrReportAddr(pkt->getLE<uint32_t>());
762  break;
764  setHqdPqRptrReportAddrHi(pkt->getLE<uint32_t>());
765  break;
767  setHqdPqWptrPollAddr(pkt->getLE<uint32_t>());
768  break;
770  setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
771  break;
772  case mmCP_HQD_IB_CONTROL:
773  setHqdIbCtrl(pkt->getLE<uint32_t>());
774  break;
775  /* Ring buffer registers */
776  case mmCP_RB_VMID:
777  setRbVmid(pkt->getLE<uint32_t>());
778  break;
779  case mmCP_RB0_CNTL:
780  setRbCntl(pkt->getLE<uint32_t>());
781  break;
782  case mmCP_RB0_WPTR:
783  setRbWptrLo(pkt->getLE<uint32_t>());
784  break;
785  case mmCP_RB0_WPTR_HI:
786  setRbWptrHi(pkt->getLE<uint32_t>());
787  break;
788  case mmCP_RB0_RPTR_ADDR:
789  setRbRptrAddrLo(pkt->getLE<uint32_t>());
790  break;
792  setRbRptrAddrHi(pkt->getLE<uint32_t>());
793  break;
795  setRbWptrPollAddrLo(pkt->getLE<uint32_t>());
796  break;
798  setRbWptrPollAddrHi(pkt->getLE<uint32_t>());
799  break;
800  case mmCP_RB0_BASE:
801  setRbBaseLo(pkt->getLE<uint32_t>());
802  break;
803  case mmCP_RB0_BASE_HI:
804  setRbBaseHi(pkt->getLE<uint32_t>());
805  break;
807  setRbDoorbellCntrl(pkt->getLE<uint32_t>());
809  break;
811  setRbDoorbellRangeLo(pkt->getLE<uint32_t>());
812  break;
814  setRbDoorbellRangeHi(pkt->getLE<uint32_t>());
815  break;
816  default:
817  break;
818  }
819 }
820 
821 void
823 {
824  kiq.hqd_vmid = data;
825 }
826 
827 void
829 {
830  kiq.hqd_active = data;
831 }
832 
833 void
835 {
837 }
838 
839 void
841 {
843 }
844 
845 void
847 {
849 }
850 
851 void
853 {
854  kiq.rptr = data;
855 }
856 
857 void
859 {
860  /* Write pointer communicated through doorbell value. */
861 }
862 
863 void
865 {
866  /* Write pointer communicated through doorbell value. */
867 }
868 
869 void
871 {
873 }
874 
875 void
877 {
879 }
880 
881 void
883 {
885 }
886 
887 void
889 {
891 }
892 
893 void
895 {
897 }
898 
899 void
901 {
902  pq.hqd_vmid = data;
903 }
904 
905 void
907 {
909 }
910 
911 void
913 {
914  pq.queueWptrLo = data;
915 }
916 
917 void
919 {
920  pq.queueWptrHi = data;
921 }
922 
923 void
925 {
927 }
928 
929 void
931 {
933 }
934 
935 void
937 {
939 }
940 
941 void
943 {
945 }
946 
947 void
949 {
951 }
952 
953 void
955 {
957 }
958 
959 void
961 {
963  pq.doorbellOffset = data & 0x1ffffffc;
964 }
965 
966 void
968 {
970 }
971 
972 void
974 {
976 }
977 
978 void
980 {
981  // Serialize the DmaVirtDevice base class
983 
984  int num_queues = queues.size();
985  Addr id[num_queues];
986  Addr mqd_base[num_queues];
987  Addr base[num_queues];
988  Addr rptr[num_queues];
989  Addr wptr[num_queues];
990  Addr ib_base[num_queues];
991  Addr ib_rptr[num_queues];
992  Addr ib_wptr[num_queues];
993  Addr offset[num_queues];
994  bool processing[num_queues];
995  bool ib[num_queues];
996 
997  int i = 0;
998  for (auto iter : queues) {
999  PM4Queue *q = iter.second;
1000  id[i] = q->id();
1001  mqd_base[i] = q->mqdBase();
1002  bool cur_state = q->ib();
1003  q->ib(false);
1004  base[i] = q->base() >> 8;
1005  rptr[i] = q->getRptr();
1006  wptr[i] = q->getWptr();
1007  q->ib(true);
1008  ib_base[i] = q->ibBase();
1009  ib_rptr[i] = q->getRptr();
1010  ib_wptr[i] = q->getWptr();
1011  q->ib(cur_state);
1012  offset[i] = q->offset();
1013  processing[i] = q->processing();
1014  ib[i] = q->ib();
1015  i++;
1016  }
1017 
1018  SERIALIZE_SCALAR(num_queues);
1019  SERIALIZE_ARRAY(id, num_queues);
1020  SERIALIZE_ARRAY(mqd_base, num_queues);
1021  SERIALIZE_ARRAY(base, num_queues);
1022  SERIALIZE_ARRAY(rptr, num_queues);
1023  SERIALIZE_ARRAY(wptr, num_queues);
1024  SERIALIZE_ARRAY(ib_base, num_queues);
1025  SERIALIZE_ARRAY(ib_rptr, num_queues);
1026  SERIALIZE_ARRAY(ib_wptr, num_queues);
1027  SERIALIZE_ARRAY(offset, num_queues);
1028  SERIALIZE_ARRAY(processing, num_queues);
1029  SERIALIZE_ARRAY(ib, num_queues);
1030 }
1031 
1032 void
1034 {
1035  // Serialize the DmaVirtDevice base class
1037 
1038  int num_queues = 0;
1039  UNSERIALIZE_SCALAR(num_queues);
1040 
1041  Addr id[num_queues];
1042  Addr mqd_base[num_queues];
1043  Addr base[num_queues];
1044  Addr rptr[num_queues];
1045  Addr wptr[num_queues];
1046  Addr ib_base[num_queues];
1047  Addr ib_rptr[num_queues];
1048  Addr ib_wptr[num_queues];
1049  Addr offset[num_queues];
1050  bool processing[num_queues];
1051  bool ib[num_queues];
1052 
1053  UNSERIALIZE_ARRAY(id, num_queues);
1054  UNSERIALIZE_ARRAY(mqd_base, num_queues);
1055  UNSERIALIZE_ARRAY(base, num_queues);
1056  UNSERIALIZE_ARRAY(rptr, num_queues);
1057  UNSERIALIZE_ARRAY(wptr, num_queues);
1058  UNSERIALIZE_ARRAY(ib_base, num_queues);
1059  UNSERIALIZE_ARRAY(ib_rptr, num_queues);
1060  UNSERIALIZE_ARRAY(ib_wptr, num_queues);
1061  UNSERIALIZE_ARRAY(offset, num_queues);
1062  UNSERIALIZE_ARRAY(processing, num_queues);
1063  UNSERIALIZE_ARRAY(ib, num_queues);
1064 
1065  for (int i = 0; i < num_queues; i++) {
1066  QueueDesc *mqd = new QueueDesc();
1067  memset(mqd, 0, sizeof(QueueDesc));
1068 
1069  mqd->mqdBase = mqd_base[i] >> 8;
1070  mqd->base = base[i];
1071  mqd->rptr = rptr[i];
1072  mqd->ibBase = ib_base[i];
1073  mqd->ibRptr = ib_rptr[i];
1074 
1075  newQueue(mqd, offset[i], nullptr, id[i]);
1076 
1077  queues[id[i]]->ib(false);
1078  queues[id[i]]->wptr(wptr[i]);
1079  queues[id[i]]->ib(true);
1080  queues[id[i]]->wptr(ib_wptr[i]);
1081  queues[id[i]]->offset(offset[i]);
1082  queues[id[i]]->processing(processing[i]);
1083  queues[id[i]]->ib(ib[i]);
1084  DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
1085  queues[id[i]]->id(), queues[id[i]]->rptr(),
1086  queues[id[i]]->wptr());
1087  }
1088 }
1089 
1090 } // namespace gem5
gem5::GEM5_PACKED::ptBase
uint64_t ptBase
Definition: pm4_defines.hh:240
gem5::PM4RunList
struct gem5::GEM5_PACKED PM4RunList
gem5::PM4PacketProcessor::mapPq
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
Definition: pm4_packet_processor.cc:121
gem5::PM4PacketProcessor::setRbRptrAddrLo
void setRbRptrAddrLo(uint32_t data)
Definition: pm4_packet_processor.cc:924
gem5::PM4QueryStatus
struct gem5::GEM5_PACKED PM4QueryStatus
gem5::PM4WriteData
struct gem5::GEM5_PACKED PM4WriteData
gem5::PM4PacketProcessor::decodeNext
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
Definition: pm4_packet_processor.cc:163
mmCP_RB0_BASE
#define mmCP_RB0_BASE
Definition: pm4_mmio.hh:39
gem5::IT_RUN_LIST
@ IT_RUN_LIST
Definition: pm4_defines.hh:66
warn
#define warn(...)
Definition: logging.hh:246
gem5::TRAP_ID
@ TRAP_ID
Definition: interrupt_handler.hh:65
gem5::GEM5_PACKED
PM4 packets.
Definition: pm4_defines.hh:77
gem5::PM4PacketProcessor::queuesMap
std::unordered_map< uint32_t, PM4Queue * > queuesMap
Definition: pm4_packet_processor.hh:63
gem5::IT_WAIT_REG_MEM
@ IT_WAIT_REG_MEM
Definition: pm4_defines.hh:56
gem5::GEM5_PACKED::doorbellOffset2
uint32_t doorbellOffset2
Definition: pm4_defines.hh:186
gem5::PM4PacketProcessor::mapProcess
void mapProcess(PM4Queue *q, PM4MapProcess *pkt)
Definition: pm4_packet_processor.cc:608
gem5::GEM5_PACKED::mqdAddr
uint64_t mqdAddr
Definition: pm4_defines.hh:146
gem5::GEM5_PACKED::rptr
uint32_t rptr
Definition: pm4_queues.hh:112
data
const char data[]
Definition: circlebuf.test.cc:48
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::PM4PacketProcessor::setRbWptrLo
void setRbWptrLo(uint32_t data)
Definition: pm4_packet_processor.cc:912
gem5::IT_INVALIDATE_TLBS
@ IT_INVALIDATE_TLBS
Definition: pm4_defines.hh:61
gem5::PM4PacketProcessor::queryStatusDone
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
Definition: pm4_packet_processor.cc:722
gem5::GEM5_PACKED::operation
uint32_t operation
Definition: pm4_defines.hh:280
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::IT_INDIRECT_BUFFER
@ IT_INDIRECT_BUFFER
Definition: pm4_defines.hh:57
gem5::AMDGPUDevice::setDoorbellType
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
Definition: amdgpu_device.cc:424
gem5::GEM5_PACKED::doorbellOffset1
uint32_t doorbellOffset1
Definition: pm4_defines.hh:183
gem5::GEM5_PACKED::hqd_pq_doorbell_control
uint32_t hqd_pq_doorbell_control
Definition: pm4_queues.hh:127
gem5::GEM5_PACKED::checkDisable
uint32_t checkDisable
Definition: pm4_defines.hh:136
gem5::PM4UnmapQueues
struct gem5::GEM5_PACKED PM4UnmapQueues
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
gem5::QueueDesc
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
gem5::GEM5_PACKED::intSelect
uint32_t intSelect
Definition: pm4_defines.hh:404
gem5::AMDGPUDevice::getSDMAById
SDMAEngine * getSDMAById(int id)
Definition: amdgpu_device.cc:437
gem5::Gfx
@ Gfx
Definition: amdgpu_defines.hh:44
gem5::PM4PacketProcessor::writeMMIO
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Definition: pm4_packet_processor.cc:731
gem5::AMDGPUDevice::deallocateAllQueues
void deallocateAllQueues()
Definition: amdgpu_device.cc:622
gem5::GEM5_PACKED::reference
uint32_t reference
Definition: pm4_defines.hh:300
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::SDMAEngine::registerRLCQueue
void registerRLCQueue(Addr doorbell, Addr rb_base)
Methods for RLC queues.
Definition: sdma_engine.cc:124
gem5::GEM5_PACKED::sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_cntl
Definition: pm4_queues.hh:194
gem5::AMDGPUDevice::lastVMID
uint16_t lastVMID()
Definition: amdgpu_device.hh:190
gem5::GEM5_PACKED::destAddr
uint64_t destAddr
Definition: pm4_defines.hh:114
gem5::PM4PacketProcessor::setRbCntl
void setRbCntl(uint32_t data)
Definition: pm4_packet_processor.cc:906
gem5::IT_QUERY_STATUS
@ IT_QUERY_STATUS
Definition: pm4_defines.hh:65
sdma_engine.hh
gem5::RLC
@ RLC
Definition: amdgpu_defines.hh:49
gem5::PM4Queue
Class defining a PM4 queue.
Definition: pm4_queues.hh:361
mmCP_HQD_ACTIVE
#define mmCP_HQD_ACTIVE
Definition: pm4_mmio.hh:53
gem5::PrimaryQueue::queueWptrLo
uint32_t queueWptrLo
Definition: pm4_queues.hh:348
gem5::GEM5_PACKED::hqd_vmid
uint32_t hqd_vmid
Definition: pm4_queues.hh:95
gem5::PM4SetUconfigReg
struct gem5::GEM5_PACKED PM4SetUconfigReg
header
output header
Definition: nop.cc:36
gem5::PM4PacketProcessor::setRbDoorbellCntrl
void setRbDoorbellCntrl(uint32_t data)
Definition: pm4_packet_processor.cc:960
gem5::IT_WRITE_DATA
@ IT_WRITE_DATA
Definition: pm4_defines.hh:55
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:167
mmCP_HQD_IB_CONTROL
#define mmCP_HQD_IB_CONTROL
Definition: pm4_mmio.hh:63
gem5::AMDGPUInterruptHandler::submitInterruptCookie
void submitInterruptCookie()
Definition: interrupt_handler.cc:126
gem5::GEM5_PACKED::queueSel
uint32_t queueSel
Definition: pm4_defines.hh:123
mmCP_RB_VMID
#define mmCP_RB_VMID
Definition: pm4_mmio.hh:43
mmCP_HQD_PQ_WPTR_LO
#define mmCP_HQD_PQ_WPTR_LO
Definition: pm4_mmio.hh:64
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
gem5::GEM5_PACKED::hqd_pq_wptr_poll_addr_hi
uint32_t hqd_pq_wptr_poll_addr_hi
Definition: pm4_queues.hh:124
pm4_mmio.hh
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
gem5::PM4PacketProcessor::setRbDoorbellRangeHi
void setRbDoorbellRangeHi(uint32_t data)
Definition: pm4_packet_processor.cc:973
gem5::PM4PacketProcessor::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: pm4_packet_processor.cc:979
gem5::PM4PacketProcessor::setHqdPqRptrReportAddr
void setHqdPqRptrReportAddr(uint32_t data)
Definition: pm4_packet_processor.cc:870
gem5::PM4PacketProcessor::process
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
Definition: pm4_packet_processor.cc:152
interrupt_handler.hh
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
hw_scheduler.hh
gem5::AMDGPUVM::inAGP
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:177
mmCP_RB0_BASE_HI
#define mmCP_RB0_BASE_HI
Definition: pm4_mmio.hh:51
gem5::ClockedObject::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: clocked_object.cc:64
gem5::AMDGPUVM::setPageTableBase
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
Definition: amdgpu_vm.hh:266
gem5::PM4PacketProcessor::getKiqDoorbellOffset
uint32_t getKiqDoorbellOffset()
Definition: pm4_packet_processor.hh:83
gem5::PM4PacketProcessor::newQueue
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
Definition: pm4_packet_processor.cc:128
gem5::HSAPacketProcessor::setDeviceQueueDesc
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Definition: hsa_packet_processor.cc:112
gem5::AMDGPUDevice::mapDoorbellToVMID
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
Definition: amdgpu_device.cc:629
gem5::GEM5_PACKED::sdmax_rlcx_ib_size
uint32_t sdmax_rlcx_ib_size
Definition: pm4_queues.hh:216
gem5::PM4PacketProcessor::setUconfigReg
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
Definition: pm4_packet_processor.cc:668
gem5::IT_UNMAP_QUEUES
@ IT_UNMAP_QUEUES
Definition: pm4_defines.hh:64
gem5::GEM5_PACKED::hqd_pq_base_lo
uint32_t hqd_pq_base_lo
Definition: pm4_queues.hh:104
gem5::PM4PacketProcessor::releaseMemDone
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
Definition: pm4_packet_processor.cc:487
gem5::PM4PacketProcessor::setRbBaseLo
void setRbBaseLo(uint32_t data)
Definition: pm4_packet_processor.cc:948
gem5::PM4MapQueues
struct gem5::GEM5_PACKED PM4MapQueues
gem5::GEM5_PACKED::data
uint32_t data
Definition: pm4_defines.hh:116
packet.hh
gem5::PrimaryQueue::queueRptrAddrHi
uint32_t queueRptrAddrHi
Definition: pm4_queues.hh:340
gem5::AMDGPUVM::invalidateTLBs
void invalidateTLBs()
Definition: amdgpu_vm.cc:174
mmCP_HQD_PQ_DOORBELL_CONTROL
#define mmCP_HQD_PQ_DOORBELL_CONTROL
Definition: pm4_mmio.hh:57
gem5::PM4PacketProcessor::switchBuffer
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
Definition: pm4_packet_processor.cc:656
gem5::PM4PacketProcessor::releaseMem
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
Definition: pm4_packet_processor.cc:463
gem5::PM4PacketProcessor::mapKiq
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Definition: pm4_packet_processor.cc:114
gem5::GEM5_PACKED::wptrAddr
uint64_t wptrAddr
Definition: pm4_defines.hh:155
gem5::GEM5_PACKED::writeConfirm
uint32_t writeConfirm
Definition: pm4_defines.hh:103
gem5::GEM5_PACKED::hqd_pq_rptr_report_addr_lo
uint32_t hqd_pq_rptr_report_addr_lo
Definition: pm4_queues.hh:118
mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:62
gem5::GEM5_PACKED::doorbellOffset3
uint32_t doorbellOffset3
Definition: pm4_defines.hh:189
gem5::GEM5_PACKED::dataLo
uint32_t dataLo
Definition: pm4_defines.hh:427
mmCP_RB_WPTR_POLL_ADDR_HI
#define mmCP_RB_WPTR_POLL_ADDR_HI
Definition: pm4_mmio.hh:42
gem5::IT_SWITCH_BUFFER
@ IT_SWITCH_BUFFER
Definition: pm4_defines.hh:60
gem5::GEM5_PACKED::hqd_pq_rptr_report_addr_hi
uint32_t hqd_pq_rptr_report_addr_hi
Definition: pm4_queues.hh:119
gem5::PM4MapProcess
struct gem5::GEM5_PACKED PM4MapProcess
gem5::AMDGPUDevice::getIH
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
Definition: amdgpu_device.hh:164
gem5::IT_RELEASE_MEM
@ IT_RELEASE_MEM
Definition: pm4_defines.hh:58
gem5::GEM5_PACKED::processQuantum
uint32_t processQuantum
Definition: pm4_defines.hh:232
gem5::PM4SwitchBuf
struct gem5::GEM5_PACKED PM4SwitchBuf
gem5::GEM5_PACKED::mqdReadIndex
uint64_t mqdReadIndex
Definition: pm4_queues.hh:53
gem5::PM4PacketProcessor::setHqdVmid
void setHqdVmid(uint32_t data)
Definition: pm4_packet_processor.cc:822
mmCP_RB0_RPTR_ADDR
#define mmCP_RB0_RPTR_ADDR
Definition: pm4_mmio.hh:44
gem5::GEM5_PACKED::queueType
uint32_t queueType
Definition: pm4_defines.hh:131
gem5::PM4PacketProcessor::setHqdPqRptrReportAddrHi
void setHqdPqRptrReportAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:876
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr_hi
uint32_t sdmax_rlcx_rb_rptr_hi
Definition: pm4_queues.hh:205
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
mmCP_HQD_PQ_RPTR
#define mmCP_HQD_PQ_RPTR
Definition: pm4_mmio.hh:58
gem5::GEM5_PACKED::command
uint32_t command
Definition: pm4_defines.hh:377
gem5::PM4PacketProcessor::setRbWptrPollAddrHi
void setRbWptrPollAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:942
gem5::PM4PacketProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: pm4_packet_processor.cc:84
gem5::GEM5_PACKED::memAddrHi
uint32_t memAddrHi
Definition: pm4_defines.hh:298
mmCP_HQD_PQ_BASE
#define mmCP_HQD_PQ_BASE
Definition: pm4_mmio.hh:55
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
PACKET3_SET_UCONFIG_REG_START
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
Definition: pm4_defines.hh:72
amdgpu_device.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
mmCP_HQD_PQ_WPTR_HI
#define mmCP_HQD_PQ_WPTR_HI
Definition: pm4_mmio.hh:65
mmCP_RB0_WPTR
#define mmCP_RB0_WPTR
Definition: pm4_mmio.hh:46
mmCP_HQD_VMID
#define mmCP_HQD_VMID
Definition: pm4_mmio.hh:54
gem5::GEM5_PACKED::dataSelect
uint32_t dataSelect
Definition: pm4_defines.hh:406
gem5::AMDGPUDevice::setRegVal
void setRegVal(uint32_t addr, uint32_t value)
Definition: amdgpu_device.cc:416
gem5::PM4PacketProcessor
Definition: pm4_packet_processor.hh:52
gem5::GEM5_PACKED::aql
uint32_t aql
Definition: pm4_queues.hh:181
gem5::AMDGPUDevice::allocateVMID
uint16_t allocateVMID(uint16_t pasid)
Definition: amdgpu_device.cc:589
mmCP_RB_DOORBELL_CONTROL
#define mmCP_RB_DOORBELL_CONTROL
Definition: pm4_mmio.hh:48
gem5::PM4PacketProcessor::setHqdPqPtr
void setHqdPqPtr(uint32_t data)
Definition: pm4_packet_processor.cc:852
gem5::PM4PacketProcessor::writeDataDone
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
Definition: pm4_packet_processor.cc:344
gem5::PM4PacketProcessor::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
Definition: pm4_packet_processor.cc:62
gem5::PM4PacketProcessor::setHqdPqWptrPollAddrHi
void setHqdPqWptrPollAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:888
gem5::GEM5_PACKED::pipe
uint32_t pipe
Definition: pm4_defines.hh:128
gem5::GEM5_PACKED::doorbellOffset
uint32_t doorbellOffset
Definition: pm4_defines.hh:137
gem5::PM4PacketProcessor::pq
PrimaryQueue pq
Definition: pm4_packet_processor.hh:56
pm4_packet_processor.hh
gem5::GEM5_PACKED::hqd_pq_base_hi
uint32_t hqd_pq_base_hi
Definition: pm4_queues.hh:105
gem5::ArmISA::offset
Bitfield< 23, 0 > offset
Definition: types.hh:144
gem5::PM4PacketProcessor::doneMQDWrite
void doneMQDWrite(Addr mqdAddr, Addr addr)
Definition: pm4_packet_processor.cc:602
gem5::PM4PacketProcessor::processSDMAMQD
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
Definition: pm4_packet_processor.cc:439
gem5::GEM5_PACKED::rb_base
uint64_t rb_base
Definition: pm4_queues.hh:202
gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition: amdgpu_device.hh:60
gem5::ClockedObject::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: clocked_object.cc:59
mmCP_HQD_PQ_BASE_HI
#define mmCP_HQD_PQ_BASE_HI
Definition: pm4_mmio.hh:56
gem5::GEM5_PACKED::doorbellOffset0
uint32_t doorbellOffset0
Definition: pm4_defines.hh:178
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::GEM5_PACKED::me
uint32_t me
Definition: pm4_defines.hh:127
gem5::GEM5_PACKED::event
uint32_t event
Definition: pm4_defines.hh:383
gpu_command_processor.hh
gem5::GEM5_PACKED::sdmax_rlcx_ib_base_hi
uint32_t sdmax_rlcx_ib_base_hi
Definition: pm4_queues.hh:215
SERIALIZE_ARRAY
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
gem5::PM4PacketProcessor::setHqdActive
void setHqdActive(uint32_t data)
Definition: pm4_packet_processor.cc:828
gem5::PM4PacketProcessor::setRbWptrHi
void setRbWptrHi(uint32_t data)
Definition: pm4_packet_processor.cc:918
mmCP_RB0_RPTR_ADDR_HI
#define mmCP_RB0_RPTR_ADDR_HI
Definition: pm4_mmio.hh:45
mmCP_RB_DOORBELL_RANGE_LOWER
#define mmCP_RB_DOORBELL_RANGE_LOWER
Definition: pm4_mmio.hh:49
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::GEM5_PACKED::ibRptr
uint32_t ibRptr
Definition: pm4_queues.hh:144
gem5::SDMAEngine
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
gem5::GEM5_PACKED::contextId
uint32_t contextId
Definition: pm4_defines.hh:468
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::PrimaryQueue::queueRptrAddrLo
uint32_t queueRptrAddrLo
Definition: pm4_queues.hh:339
mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
Definition: pm4_mmio.hh:60
gem5::PM4PacketProcessor::queryStatus
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
Definition: pm4_packet_processor.cc:697
gem5::GEM5_PACKED::function
uint32_t function
Definition: pm4_defines.hh:278
gem5::GEM5_PACKED::memAddrLo
uint32_t memAddrLo
Definition: pm4_defines.hh:289
gem5::PrimaryQueue::doorbellOffset
uint32_t doorbellOffset
Definition: pm4_queues.hh:353
gem5::SOC15_IH_CLIENTID_RLC
@ SOC15_IH_CLIENTID_RLC
Definition: interrupt_handler.hh:58
gem5::PM4ReleaseMem
struct gem5::GEM5_PACKED PM4ReleaseMem
gem5::PM4PacketProcessor::runList
void runList(PM4Queue *q, PM4RunList *pkt)
Definition: pm4_packet_processor.cc:624
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::IT_MAP_QUEUES
@ IT_MAP_QUEUES
Definition: pm4_defines.hh:63
gem5::Compute
@ Compute
Definition: amdgpu_defines.hh:43
gem5::GEM5_PACKED::queueSlot
uint32_t queueSlot
Definition: pm4_defines.hh:129
mmCP_RB0_WPTR_HI
#define mmCP_RB0_WPTR_HI
Definition: pm4_mmio.hh:47
packet_access.hh
gem5::PM4PacketProcessor::setHqdPqWptrLo
void setHqdPqWptrLo(uint32_t data)
Definition: pm4_packet_processor.cc:858
gem5::GEM5_PACKED::sdmax_rlcx_rb_rptr
uint32_t sdmax_rlcx_rb_rptr
Definition: pm4_queues.hh:204
gem5::AMDGPUDevice::getVMID
uint16_t getVMID(Addr doorbell)
Definition: amdgpu_device.hh:196
gem5::PM4PacketProcessor::setHqdPqBase
void setHqdPqBase(uint32_t data)
Definition: pm4_packet_processor.cc:834
gem5::AMDGPUVM::AGPTranslationGen
Translation range generators.
Definition: amdgpu_vm.hh:302
gem5::GEM5_PACKED::doorbell
uint32_t doorbell
Definition: pm4_queues.hh:128
gem5::AMDGPUDevice::getUsedVMIDs
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
Definition: amdgpu_device.cc:635
gem5::PM4IndirectBuf
struct gem5::GEM5_PACKED PM4IndirectBuf
gem5::PM4PacketProcessor::decodeHeader
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
Definition: pm4_packet_processor.cc:190
gem5::GEM5_PACKED::hqd_ib_control
uint32_t hqd_ib_control
Definition: pm4_queues.hh:146
gem5::PM4PacketProcessor::processMQD
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
Definition: pm4_packet_processor.cc:405
gem5::GEM5_PACKED::ibBase
uint64_t ibBase
Definition: pm4_defines.hh:323
gem5::PM4PacketProcessor::setHqdPqDoorbellCtrl
void setHqdPqDoorbellCtrl(uint32_t data)
Definition: pm4_packet_processor.cc:846
gem5::PM4PacketProcessor::indirectBuffer
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
Definition: pm4_packet_processor.cc:641
gem5::AMDGPUVM::GARTTranslationGen
Definition: amdgpu_vm.hh:315
gem5::GEM5_PACKED::mqdBase
uint64_t mqdBase
Definition: pm4_queues.hh:92
gem5::GEM5_PACKED::pollInterval
uint32_t pollInterval
Definition: pm4_defines.hh:302
gem5::ArmISA::q
Bitfield< 27 > q
Definition: misc_types.hh:55
gem5::PM4PacketProcessor::PM4PacketProcessor
PM4PacketProcessor(const PM4PacketProcessorParams &p)
Definition: pm4_packet_processor.cc:49
gem5::PM4PacketProcessor::setRbWptrPollAddrLo
void setRbWptrPollAddrLo(uint32_t data)
Definition: pm4_packet_processor.cc:936
gem5::PM4PacketProcessor::setHqdPqBaseHi
void setHqdPqBaseHi(uint32_t data)
Definition: pm4_packet_processor.cc:840
gem5::PM4PacketProcessor::setHqdPqWptrHi
void setHqdPqWptrHi(uint32_t data)
Definition: pm4_packet_processor.cc:864
gem5::GEM5_PACKED::hqd_active
uint32_t hqd_active
Definition: pm4_queues.hh:94
UNSERIALIZE_ARRAY
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
gem5::IT_MAP_PROCESS
@ IT_MAP_PROCESS
Definition: pm4_defines.hh:62
gem5::GEM5_PACKED::engineSel
uint32_t engineSel
Definition: pm4_defines.hh:133
gem5::GEM5_PACKED::offset
uint32_t offset
Definition: pm4_defines.hh:439
gem5::PM4PacketProcessor::getQueue
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
Definition: pm4_packet_processor.cc:100
gem5::PM4PacketProcessor::queues
std::unordered_map< uint16_t, PM4Queue * > queues
Definition: pm4_packet_processor.hh:61
mmCP_RB0_CNTL
#define mmCP_RB0_CNTL
Definition: pm4_mmio.hh:40
mmCP_HQD_PQ_WPTR_POLL_ADDR
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
Definition: pm4_mmio.hh:61
gem5::GEM5_PACKED::aqlRptr
uint64_t aqlRptr
Definition: pm4_queues.hh:121
gem5::GEM5_PACKED::addr
uint64_t addr
Definition: pm4_defines.hh:414
gem5::PrimaryQueue::queueWptrHi
uint32_t queueWptrHi
Definition: pm4_queues.hh:349
gem5::GEM5_PACKED::pasid
uint32_t pasid
Definition: pm4_defines.hh:172
gem5::PM4PacketProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: pm4_packet_processor.cc:77
gem5::GEM5_PACKED::hqd_pq_wptr_poll_addr_lo
uint32_t hqd_pq_wptr_poll_addr_lo
Definition: pm4_queues.hh:123
gem5::PM4PacketProcessor::updateReadIndex
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
Definition: pm4_packet_processor.cc:508
gem5::AMDGPUDevice::insertQId
void insertQId(uint16_t vmid, int id)
Definition: amdgpu_device.cc:641
gem5::Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition: packet_access.hh:78
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:251
gem5::GEM5_PACKED::sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_wptr
Definition: pm4_queues.hh:206
gem5::AMDGPUDevice::deallocateVmid
void deallocateVmid(uint16_t vmid)
Definition: amdgpu_device.cc:604
gem5::GEM5_PACKED::vmid
uint32_t vmid
Definition: pm4_defines.hh:125
mmCP_RB_DOORBELL_RANGE_UPPER
#define mmCP_RB_DOORBELL_RANGE_UPPER
Definition: pm4_mmio.hh:50
gem5::AMDGPUDevice::deallocatePasid
void deallocatePasid(uint16_t pasid)
Definition: amdgpu_device.cc:610
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
gem5::GEM5_PACKED::mask
uint32_t mask
Definition: pm4_defines.hh:301
mmCP_HQD_PQ_RPTR_REPORT_ADDR
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
Definition: pm4_mmio.hh:59
gem5::IT_NOP
@ IT_NOP
Definition: pm4_defines.hh:54
gem5::GEM5_PACKED::base
uint64_t base
Definition: pm4_queues.hh:107
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gem5::PM4PacketProcessor::writeData
void writeData(PM4Queue *q, PM4WriteData *pkt)
Definition: pm4_packet_processor.cc:326
gem5::PrimaryQueue::doorbellRangeHi
uint32_t doorbellRangeHi
Definition: pm4_queues.hh:355
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::AMDGPUDevice::CP
GPUCommandProcessor * CP()
Definition: amdgpu_device.hh:169
gem5::PM4Queue::id
int id()
Definition: pm4_queues.hh:390
gem5::GEM5_PACKED::memSpace
uint32_t memSpace
Definition: pm4_defines.hh:279
gem5::GEM5_PACKED::ibSize
uint32_t ibSize
Definition: pm4_defines.hh:325
gem5::GEM5_PACKED::allocFormat
uint32_t allocFormat
Definition: pm4_defines.hh:132
std::list< AddrRange >
gem5::GEM5_PACKED::interruptSel
uint32_t interruptSel
Definition: pm4_defines.hh:469
gem5::GEM5_PACKED::sdmax_rlcx_rb_wptr_hi
uint32_t sdmax_rlcx_rb_wptr_hi
Definition: pm4_queues.hh:207
gem5::PM4PacketProcessor::mapQueues
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
Definition: pm4_packet_processor.cc:356
gem5::GEM5_PACKED::numQueues
uint32_t numQueues
Definition: pm4_defines.hh:134
gem5::PM4PacketProcessor::setRbVmid
void setRbVmid(uint32_t data)
Definition: pm4_packet_processor.cc:900
gem5::PM4PacketProcessor::waitRegMem
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
Definition: pm4_packet_processor.cc:681
gem5::PM4PacketProcessor::getPqDoorbellOffset
uint32_t getPqDoorbellOffset()
Definition: pm4_packet_processor.hh:84
gem5::GEM5_PACKED::sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_ib_base_lo
Definition: pm4_queues.hh:214
gem5::PM4PacketProcessor::setRbRptrAddrHi
void setRbRptrAddrHi(uint32_t data)
Definition: pm4_packet_processor.cc:930
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::QueueType
QueueType
Definition: amdgpu_defines.hh:41
gem5::PrimaryQueue::doorbellRangeLo
uint32_t doorbellRangeLo
Definition: pm4_queues.hh:354
gem5::GEM5_PACKED::eventIdx
uint32_t eventIdx
Definition: pm4_defines.hh:385
gem5::AMDGPUDevice::setSDMAEngine
void setSDMAEngine(Addr offset, SDMAEngine *eng)
Definition: amdgpu_device.cc:431
gem5::PM4PacketProcessor::setHqdPqWptrPollAddr
void setHqdPqWptrPollAddr(uint32_t data)
Definition: pm4_packet_processor.cc:882
gem5::IT_SET_UCONFIG_REG
@ IT_SET_UCONFIG_REG
Definition: pm4_defines.hh:59
gem5::PM4PacketProcessor::getGARTAddr
Addr getGARTAddr(Addr addr) const
Definition: pm4_packet_processor.cc:90
gem5::PM4PacketProcessor::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: pm4_packet_processor.cc:1033
gem5::AMDGPUInterruptHandler::prepareInterruptCookie
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
Definition: interrupt_handler.cc:75
gem5::GEM5_PACKED::completionSignal
uint64_t completionSignal
Definition: pm4_defines.hh:271
gem5::PM4WaitRegMem
struct gem5::GEM5_PACKED PM4WaitRegMem
gem5::PM4PacketProcessor::setRbBaseHi
void setRbBaseHi(uint32_t data)
Definition: pm4_packet_processor.cc:954
gem5::GEM5_PACKED::destSelect
uint32_t destSelect
Definition: pm4_defines.hh:402
gem5::PM4PacketProcessor::unmapQueues
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
Definition: pm4_packet_processor.cc:515
gem5::PM4PacketProcessor::kiq
QueueDesc kiq
Definition: pm4_packet_processor.hh:58
gem5::PM4PacketProcessor::gpuDevice
AMDGPUDevice * gpuDevice
Definition: pm4_packet_processor.hh:54
gem5::SDMAQueueDesc
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
mmCP_RB_WPTR_POLL_ADDR_LO
#define mmCP_RB_WPTR_POLL_ADDR_LO
Definition: pm4_mmio.hh:41
gem5::PM4PacketProcessor::setRbDoorbellRangeLo
void setRbDoorbellRangeLo(uint32_t data)
Definition: pm4_packet_processor.cc:967
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::GEM5_PACKED::hqd_pq_control
uint32_t hqd_pq_control
Definition: pm4_queues.hh:131
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::GEM5_PACKED::intCtxId
uint32_t intCtxId
Definition: pm4_defines.hh:433
gem5::ComputeAQL
@ ComputeAQL
Definition: amdgpu_defines.hh:47
gem5::PM4PacketProcessor::setHqdIbCtrl
void setHqdIbCtrl(uint32_t data)
Definition: pm4_packet_processor.cc:894

Generated on Thu Jul 28 2022 13:32:30 for gem5 by doxygen 1.8.17