gem5 v23.0.0.1
Loading...
Searching...
No Matches
pm4_packet_processor.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
34
35#include "debug/PM4PacketProcessor.hh"
42#include "enums/GfxVersion.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/packet.hh"
46#include "mem/packet_access.hh"
47
48namespace gem5
49{
50
51PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
53{
54 memset(&kiq, 0, sizeof(QueueDesc));
55 memset(&pq, 0, sizeof(QueueDesc));
56}
57
65{
66 if (gpuDevice->getVM().inAGP(vaddr)) {
67 // Use AGP translation gen
68 return TranslationGenPtr(
70 }
71
72 // Assume GART otherwise as this is the only other translation aperture
73 // available to the PM4 packet processor.
74 return TranslationGenPtr(
76}
77
80{
81 AddrRangeList ranges;
82 return ranges;
83}
84
85void
87{
88 gpuDevice = gpu_device;
89}
90
91Addr
93{
94 if (!gpuDevice->getVM().inAGP(addr)) {
95 Addr low_bits = bits(addr, 11, 0);
96 addr = (((addr >> 12) << 3) << 12) | low_bits;
97 }
98 return addr;
99}
100
101PM4Queue *
103{
104 auto result = queuesMap.find(offset);
105 if (result == queuesMap.end()) {
106 if (gfx)
107 mapPq(offset);
108 else
109 mapKiq(offset);
110 return queuesMap[offset];
111 }
112 return result->second;
113}
114
115void
117{
118 DPRINTF(PM4PacketProcessor, "Mapping KIQ\n");
120}
121
122void
124{
125 DPRINTF(PM4PacketProcessor, "Mapping PQ\n");
127}
128
129void
131 PM4MapQueues *pkt, int id)
132{
133 if (id == -1)
134 id = queues.size();
135
136 /* 256 bytes aligned address */
137 mqd->base <<= 8;
138 PM4Queue *q = new PM4Queue(id, mqd, offset, pkt);
139
140 queuesMap[offset] = q;
141 queues[id] = q;
142
143 /* we are assumming only compute queues can be map from MQDs */
144 QueueType qt;
145 qt = mqd->aql ? QueueType::ComputeAQL
148
149 DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
150 "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
151 q->me(), q->pipe(), q->queue(), q->size());
152}
153
154void
156{
157 q->wptr(wptrOffset * sizeof(uint32_t));
158
159 if (!q->processing()) {
160 q->processing(true);
161 decodeNext(q);
162 }
163}
164
165void
167{
168 DPRINTF(PM4PacketProcessor, "PM4 decode queue %d rptr %p, wptr %p\n",
169 q->id(), q->rptr(), q->wptr());
170
171 if (q->rptr() < q->wptr()) {
172 /* Additional braces here are needed due to a clang compilation bug
173 falsely throwing a "suggest braces around initialization of
174 subject" error. More info on this bug is available here:
175 https://stackoverflow.com/questions/31555584
176 */
177 PM4Header h{{{0, 0, 0, 0, 0, 0}}};
178 auto cb = new DmaVirtCallback<PM4Header>(
179 [ = ] (PM4Header header)
180 { decodeHeader(q, header); }, h);
181 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(uint32_t), cb,
182 &cb->dmaBuffer);
183 } else {
184 q->processing(false);
185 if (q->ib()) {
186 q->ib(false);
187 decodeNext(q);
188 }
189 }
190}
191
192void
194{
195 DPRINTF(PM4PacketProcessor, "PM4 packet %p\n", header.opcode);
196
197 q->incRptr(sizeof(PM4Header));
198
199 DmaVirtCallback<uint64_t> *cb = nullptr;
200 void *dmaBuffer = nullptr;
201
202 switch(header.opcode) {
203 case IT_NOP: {
204 DPRINTF(PM4PacketProcessor, "PM4 nop, count %p\n", header.count);
205 DPRINTF(PM4PacketProcessor, "rptr %p wptr %p\n", q->rptr(), q->wptr());
206 if (header.count != 0x3fff) {
207 q->incRptr((header.count + 1) * sizeof(uint32_t));
208 }
209 decodeNext(q);
210 } break;
211 case IT_WRITE_DATA: {
212 dmaBuffer = new PM4WriteData();
214 [ = ] (const uint64_t &)
215 { writeData(q, (PM4WriteData *)dmaBuffer); });
216 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WriteData), cb,
217 dmaBuffer);
218 } break;
219
220 case IT_MAP_QUEUES: {
221 dmaBuffer = new PM4MapQueues();
223 [ = ] (const uint64_t &)
224 { mapQueues(q, (PM4MapQueues *)dmaBuffer); });
225 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapQueues), cb,
226 dmaBuffer);
227 } break;
228
229 case IT_RELEASE_MEM: {
230 dmaBuffer = new PM4ReleaseMem();
232 [ = ] (const uint64_t &)
233 { releaseMem(q, (PM4ReleaseMem *)dmaBuffer); });
234 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4ReleaseMem), cb,
235 dmaBuffer);
236 } break;
237
238 case IT_INDIRECT_BUFFER: {
239 dmaBuffer = new PM4IndirectBuf();
241 [ = ] (const uint64_t &)
242 { indirectBuffer(q, (PM4IndirectBuf *)dmaBuffer); });
243 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4IndirectBuf), cb,
244 dmaBuffer);
245 } break;
246
247 case IT_SWITCH_BUFFER: {
248 dmaBuffer = new PM4SwitchBuf();
250 [ = ] (const uint64_t &)
251 { switchBuffer(q, (PM4SwitchBuf *)dmaBuffer); });
252 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SwitchBuf), cb,
253 dmaBuffer);
254 } break;
255
256 case IT_SET_UCONFIG_REG: {
257 dmaBuffer = new PM4SetUconfigReg();
259 [ = ] (const uint64_t &)
260 { setUconfigReg(q, (PM4SetUconfigReg *)dmaBuffer); });
261 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SetUconfigReg), cb,
262 dmaBuffer);
263 } break;
264
265 case IT_WAIT_REG_MEM: {
266 dmaBuffer = new PM4WaitRegMem();
268 [ = ] (const uint64_t &)
269 { waitRegMem(q, (PM4WaitRegMem *)dmaBuffer); });
270 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WaitRegMem), cb,
271 dmaBuffer);
272 } break;
273 case IT_MAP_PROCESS: {
274 if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a) {
275 dmaBuffer = new PM4MapProcessMI200();
277 [ = ] (const uint64_t &)
278 { mapProcessGfx90a(q, (PM4MapProcessMI200 *)dmaBuffer); });
280 cb, dmaBuffer);
281 } else {
282 dmaBuffer = new PM4MapProcess();
284 [ = ] (const uint64_t &)
285 { mapProcessGfx9(q, (PM4MapProcess *)dmaBuffer); });
286 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
287 dmaBuffer);
288 }
289 } break;
290
291 case IT_UNMAP_QUEUES: {
292 dmaBuffer = new PM4UnmapQueues();
294 [ = ] (const uint64_t &)
295 { unmapQueues(q, (PM4UnmapQueues *)dmaBuffer); });
296 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4UnmapQueues), cb,
297 dmaBuffer);
298 } break;
299
300 case IT_RUN_LIST: {
301 dmaBuffer = new PM4RunList();
303 [ = ] (const uint64_t &)
304 { runList(q, (PM4RunList *)dmaBuffer); });
305 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4RunList), cb,
306 dmaBuffer);
307 } break;
308
309 case IT_QUERY_STATUS: {
310 dmaBuffer = new PM4QueryStatus();
312 [ = ] (const uint64_t &)
313 { queryStatus(q, (PM4QueryStatus *)dmaBuffer); });
314 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4QueryStatus), cb,
315 dmaBuffer);
316 } break;
317
318 case IT_INVALIDATE_TLBS: {
319 DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
321 q->incRptr((header.count + 1) * sizeof(uint32_t));
322 decodeNext(q);
323 } break;
324
325 default: {
326 warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
327 DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",
328 header.opcode);
329 q->incRptr((header.count + 1) * sizeof(uint32_t));
330 decodeNext(q);
331 } break;
332 }
333}
334
335void
337{
338 q->incRptr(sizeof(PM4WriteData));
339
341 DPRINTF(PM4PacketProcessor, "PM4 write addr: %p data: %p.\n", addr,
342 pkt->data);
343 auto cb = new DmaVirtCallback<uint32_t>(
344 [ = ](const uint32_t &) { writeDataDone(q, pkt, addr); });
345 //TODO: the specs indicate that pkt->data holds the number of dword that
346 //need to be written.
347 dmaWriteVirt(addr, sizeof(uint32_t), cb, &pkt->data);
348
349 if (!pkt->writeConfirm)
350 decodeNext(q);
351}
352
353void
355{
356 DPRINTF(PM4PacketProcessor, "PM4 write completed to %p, %p.\n", addr,
357 pkt->data);
358
359 if (pkt->writeConfirm)
360 decodeNext(q);
361
362 delete pkt;
363}
364
365void
367{
368 q->incRptr(sizeof(PM4MapQueues));
369
370 DPRINTF(PM4PacketProcessor, "MAPQueues queueSel: %d, vmid: %d, me: %d, "
371 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
372 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
373 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->queueSel, pkt->vmid,
374 pkt->me, pkt->pipe, pkt->queueSlot, pkt->queueType,
375 pkt->allocFormat, pkt->engineSel, pkt->numQueues,
376 pkt->checkDisable, pkt->doorbellOffset, pkt->mqdAddr,
377 pkt->wptrAddr);
378
379 // Partially reading the mqd with an offset of 96 dwords
380 if (pkt->engineSel == 0 || pkt->engineSel == 1 || pkt->engineSel == 4) {
381 Addr addr = getGARTAddr(pkt->mqdAddr + 96 * sizeof(uint32_t));
382
384 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
385 addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
386
389
390 QueueDesc *mqd = new QueueDesc();
391 memset(mqd, 0, sizeof(QueueDesc));
392 auto cb = new DmaVirtCallback<uint32_t>(
393 [ = ] (const uint32_t &) {
394 processMQD(pkt, q, addr, mqd, gpuDevice->lastVMID()); });
395 dmaReadVirt(addr, sizeof(QueueDesc), cb, mqd);
396 } else if (pkt->engineSel == 2 || pkt->engineSel == 3) {
397 SDMAQueueDesc *sdmaMQD = new SDMAQueueDesc();
398 memset(sdmaMQD, 0, sizeof(SDMAQueueDesc));
399
400 // For SDMA we read the full MQD, so there is no offset calculation.
402
403 auto cb = new DmaVirtCallback<uint32_t>(
404 [ = ] (const uint32_t &) {
405 processSDMAMQD(pkt, q, addr, sdmaMQD,
406 gpuDevice->lastVMID()); });
407 dmaReadVirt(addr, sizeof(SDMAQueueDesc), cb, sdmaMQD);
408 } else {
409 panic("Unknown engine for MQD: %d\n", pkt->engineSel);
410 }
411
412 decodeNext(q);
413}
414
415void
417 QueueDesc *mqd, uint16_t vmid)
418{
419 DPRINTF(PM4PacketProcessor, "MQDbase: %lx, active: %d, vmid: %d, base: "
420 "%lx, rptr: %x aqlPtr: %lx\n", mqd->mqdBase, mqd->hqd_active,
421 mqd->hqd_vmid, mqd->base, mqd->rptr, mqd->aqlRptr);
422
423 Addr offset = mqd->doorbell & 0x1ffffffc;
424 newQueue(mqd, offset, pkt);
425 PM4Queue *new_q = queuesMap[offset];
426 gpuDevice->insertQId(vmid, new_q->id());
427
428 if (mqd->aql) {
429 // The queue size is encoded in the cp_hqd_pq_control field in the
430 // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1
431 // number of dwords.
432 //
433 // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
434 // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561
435 //
436 // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply
437 // by 4 to get the number of bytes as HSAPP expects.
438 int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4;
439 auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
440 hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(),
441 mqd_size, 8, GfxVersion::gfx900, offset,
442 mqd->mqdReadIndex);
443 }
444
445 DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
446 "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
447}
448
449void
451 SDMAQueueDesc *mqd, uint16_t vmid)
452{
453 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
454 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
455 rptr_wb_addr <<= 32;
456 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
457
458 DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
459 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
463 rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr);
464
465 // Engine 2 points to SDMA0 while engine 3 points to SDMA1
466 assert(pkt->engineSel == 2 || pkt->engineSel == 3);
467 SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
468
469 // Register RLC queue with SDMA
470 sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
471
472 // Register doorbell with GPU device
473 gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
475}
476
477void
479{
480 q->incRptr(sizeof(PM4ReleaseMem));
481
482 Addr addr = getGARTAddr(pkt->addr);
483 DPRINTF(PM4PacketProcessor, "PM4 release_mem event %d eventIdx %d intSel "
484 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
485 pkt->event, pkt->eventIdx, pkt->intSelect, pkt->destSelect,
486 pkt->dataSelect, addr, pkt->dataLo, pkt->intCtxId);
487
489 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
490
491 if (pkt->dataSelect == 1) {
492 auto cb = new DmaVirtCallback<uint32_t>(
493 [ = ](const uint32_t &) { releaseMemDone(q, pkt, addr); },
494 pkt->dataLo);
495 dmaWriteVirt(addr, sizeof(uint32_t), cb, &cb->dmaBuffer);
496 } else {
497 panic("Unimplemented PM4ReleaseMem.dataSelect");
498 }
499}
500
501void
503{
504 DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n",
505 pkt->dataLo, addr);
506 if (pkt->intSelect == 2) {
507 DPRINTF(PM4PacketProcessor, "PM4 interrupt, id: %d ctx: %d, me: %d, "
508 "pipe: %d, queueSlot:%d\n", q->id(), pkt->intCtxId, q->me(),
509 q->pipe(), q->queue());
510
511 uint8_t ringId = 0;
512 if (q->id() != 0) {
513 ringId = (q->queue() << 4) | (q->me() << 2) | q->pipe();
514 }
518 }
519
520 delete pkt;
521 decodeNext(q);
522}
523
524void
526{
527 assert(queuesMap.count(offset));
528 queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
529}
530
531void
533{
534 q->incRptr(sizeof(PM4UnmapQueues));
535
536 DPRINTF(PM4PacketProcessor, "PM4 unmap_queues queueSel: %d numQueues: %d "
537 "pasid: %p doorbellOffset0 %p \n",
538 pkt->queueSel, pkt->numQueues, pkt->pasid, pkt->doorbellOffset0);
539
540 switch (pkt->queueSel) {
541 case 0:
542 switch (pkt->numQueues) {
543 case 1:
552 break;
553 case 2:
560 break;
561 case 3:
566 break;
567 case 4:
570 break;
571 default:
572 panic("Unrecognized number of queues %d\n", pkt->numQueues);
573 }
574 break;
575 case 1:
577 break;
578 case 2:
579 break;
580 case 3: {
581 auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
582 for (auto iter : gpuDevice->getUsedVMIDs()) {
583 for (auto id : iter.second) {
584 assert(queues.count(id));
585
586 // Do not unmap KMD queues
587 if (queues[id]->privileged()) {
588 continue;
589 }
590 QueueDesc *mqd = queues[id]->getMQD();
591 DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
592 "index %ld\n", id, mqd->mqdReadIndex);
593 // Partially writing the mqd with an offset of 96 dwords
594 Addr addr = getGARTAddr(queues[id]->mqdBase() +
595 96 * sizeof(uint32_t));
596 Addr mqd_base = queues[id]->mqdBase();
597 auto cb = new DmaVirtCallback<uint32_t>(
598 [ = ] (const uint32_t &) {
599 doneMQDWrite(mqd_base, addr);
600 });
601 mqd->base >>= 8;
602 dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
603 queues.erase(id);
604 hsa_pp.unsetDeviceQueueDesc(id, 8);
605 }
606 }
608 } break;
609 default:
610 panic("Unrecognized options\n");
611 break;
612 }
613
614 delete pkt;
615 decodeNext(q);
616}
617
618void
620 DPRINTF(PM4PacketProcessor, "PM4 unmap_queues MQD %p wrote to addr %p\n",
621 mqdAddr, addr);
622}
623
624void
625PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
626 uint32_t shMemBases)
627{
628 uint16_t vmid = gpuDevice->allocateVMID(pasid);
629
630 gpuDevice->getVM().setPageTableBase(vmid, ptBase);
631 gpuDevice->CP()->shader()->setHwReg(HW_REG_SH_MEM_BASES, shMemBases);
632
633 // Setup the apertures that gem5 uses. These values are bits [63:48].
634 Addr lds_base = (Addr)bits(shMemBases, 31, 16) << 48;
635 Addr scratch_base = (Addr)bits(shMemBases, 15, 0) << 48;
636
637 // There does not seem to be any register for the limit, but the driver
638 // assumes scratch and LDS have a 4GB aperture, so use that.
639 gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
640 gpuDevice->CP()->shader()->setScratchApe(scratch_base,
641 scratch_base + 0xFFFFFFFF);
642}
643
644void
646{
647 q->incRptr(sizeof(PM4MapProcess));
648
649 DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
650 "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
651 pkt->ptBase, pkt->completionSignal);
652
653 mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
654
655 delete pkt;
656 decodeNext(q);
657}
658
659void
661{
662 q->incRptr(sizeof(PM4MapProcessMI200));
663
664 DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
665 "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
666 pkt->ptBase, pkt->completionSignal);
667
668 mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
669
670 delete pkt;
671 decodeNext(q);
672}
673
674void
676{
677 DPRINTF(PM4PacketProcessor, "PM4 run_list base: %p size: %d\n",
678 pkt->ibBase, pkt->ibSize);
679
680 q->incRptr(sizeof(PM4RunList));
681
682 q->ib(true);
683 q->ibBase(pkt->ibBase);
684 q->rptr(0);
685 q->wptr(pkt->ibSize * sizeof(uint32_t));
686
687 delete pkt;
688 decodeNext(q);
689}
690
691void
693{
694 DPRINTF(PM4PacketProcessor, "PM4 indirect buffer, base: %p.\n",
695 pkt->ibBase);
696
697 q->incRptr(sizeof(PM4IndirectBuf));
698
699 q->ib(true);
700 q->ibBase(pkt->ibBase);
701 q->wptr(pkt->ibSize * sizeof(uint32_t));
702
703 decodeNext(q);
704}
705
706void
708{
709 q->incRptr(sizeof(PM4SwitchBuf));
710
711 q->ib(true);
712 DPRINTF(PM4PacketProcessor, "PM4 switching buffer, rptr: %p.\n",
713 q->wptr());
714
715 decodeNext(q);
716}
717
718void
720{
721 q->incRptr(sizeof(PM4SetUconfigReg));
722
723 // SET_UCONFIG_REG_START and pkt->offset are dword addresses
724 uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
725
726 gpuDevice->setRegVal(reg_addr, pkt->data);
727
728 decodeNext(q);
729}
730
731void
733{
734 q->incRptr(sizeof(PM4WaitRegMem));
735
736 DPRINTF(PM4PacketProcessor, "PM4 WAIT_REG_MEM\nfunc: %d memSpace: %d op: "
737 "%d\n", pkt->function, pkt->memSpace, pkt->operation);
738 DPRINTF(PM4PacketProcessor, " AddrLo/Reg1: %lx\n", pkt->memAddrLo);
739 DPRINTF(PM4PacketProcessor, " AddrHi/Reg2: %lx\n", pkt->memAddrHi);
740 DPRINTF(PM4PacketProcessor, " Reference: %lx\n", pkt->reference);
741 DPRINTF(PM4PacketProcessor, " Mask: %lx\n", pkt->mask);
742 DPRINTF(PM4PacketProcessor, " Poll Interval: %lx\n", pkt->pollInterval);
743
744 decodeNext(q);
745}
746
747void
749{
750 q->incRptr(sizeof(PM4QueryStatus));
751
752 DPRINTF(PM4PacketProcessor, "PM4 query status contextId: %d, interruptSel:"
753 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
754 "addr: %lx, data: %lx\n", pkt->contextId, pkt->interruptSel,
755 pkt->command, pkt->pasid, pkt->doorbellOffset, pkt->engineSel,
756 pkt->addr, pkt->data);
757
758 if (pkt->interruptSel == 0 && pkt->command == 2) {
759 // Write data value to fence address
760 Addr addr = getGARTAddr(pkt->addr);
761 DPRINTF(PM4PacketProcessor, "Using GART addr %lx\n", addr);
762 auto cb = new DmaVirtCallback<uint64_t>(
763 [ = ] (const uint64_t &) { queryStatusDone(q, pkt); }, pkt->data);
764 dmaWriteVirt(addr, sizeof(uint64_t), cb, &cb->dmaBuffer);
765 } else {
766 // No other combinations used in amdkfd v9
767 panic("query_status with interruptSel %d command %d not supported",
768 pkt->interruptSel, pkt->command);
769 }
770}
771
772void
774{
775 DPRINTF(PM4PacketProcessor, "PM4 query status complete\n");
776
777 delete pkt;
778 decodeNext(q);
779}
780
781void
783{
784 switch (mmio_offset) {
785 /* Hardware queue descriptor (HQD) registers */
786 case mmCP_HQD_VMID:
787 setHqdVmid(pkt->getLE<uint32_t>());
788 break;
789 case mmCP_HQD_ACTIVE:
790 setHqdActive(pkt->getLE<uint32_t>());
791 break;
792 case mmCP_HQD_PQ_BASE:
793 setHqdPqBase(pkt->getLE<uint32_t>());
794 break;
796 setHqdPqBaseHi(pkt->getLE<uint32_t>());
797 break;
799 setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
801 break;
802 case mmCP_HQD_PQ_RPTR:
803 setHqdPqPtr(pkt->getLE<uint32_t>());
804 break;
806 setHqdPqWptrLo(pkt->getLE<uint32_t>());
807 break;
809 setHqdPqWptrHi(pkt->getLE<uint32_t>());
810 break;
812 setHqdPqRptrReportAddr(pkt->getLE<uint32_t>());
813 break;
815 setHqdPqRptrReportAddrHi(pkt->getLE<uint32_t>());
816 break;
818 setHqdPqWptrPollAddr(pkt->getLE<uint32_t>());
819 break;
821 setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
822 break;
824 setHqdPqControl(pkt->getLE<uint32_t>());
825 break;
827 setHqdIbCtrl(pkt->getLE<uint32_t>());
828 break;
829 /* Ring buffer registers */
830 case mmCP_RB_VMID:
831 setRbVmid(pkt->getLE<uint32_t>());
832 break;
833 case mmCP_RB0_CNTL:
834 setRbCntl(pkt->getLE<uint32_t>());
835 break;
836 case mmCP_RB0_WPTR:
837 setRbWptrLo(pkt->getLE<uint32_t>());
838 break;
839 case mmCP_RB0_WPTR_HI:
840 setRbWptrHi(pkt->getLE<uint32_t>());
841 break;
843 setRbRptrAddrLo(pkt->getLE<uint32_t>());
844 break;
846 setRbRptrAddrHi(pkt->getLE<uint32_t>());
847 break;
849 setRbWptrPollAddrLo(pkt->getLE<uint32_t>());
850 break;
852 setRbWptrPollAddrHi(pkt->getLE<uint32_t>());
853 break;
854 case mmCP_RB0_BASE:
855 setRbBaseLo(pkt->getLE<uint32_t>());
856 break;
857 case mmCP_RB0_BASE_HI:
858 setRbBaseHi(pkt->getLE<uint32_t>());
859 break;
861 setRbDoorbellCntrl(pkt->getLE<uint32_t>());
863 break;
865 setRbDoorbellRangeLo(pkt->getLE<uint32_t>());
866 break;
868 setRbDoorbellRangeHi(pkt->getLE<uint32_t>());
869 break;
870 default:
871 break;
872 }
873}
874
875void
877{
878 kiq.hqd_vmid = data;
879}
880
881void
883{
885}
886
887void
889{
891}
892
893void
895{
897}
898
899void
901{
903}
904
905void
907{
908 kiq.rptr = data;
909}
910
911void
913{
914 /* Write pointer communicated through doorbell value. */
915}
916
917void
919{
920 /* Write pointer communicated through doorbell value. */
921}
922
923void
925{
927}
928
929void
931{
933}
934
935void
937{
939}
940
941void
943{
945}
946
947void
949{
951}
952
953void
955{
957}
958
959void
961{
962 pq.hqd_vmid = data;
963}
964
965void
967{
969}
970
971void
973{
975}
976
977void
979{
981}
982
983void
985{
987}
988
989void
991{
993}
994
995void
997{
999}
1000
1001void
1003{
1005}
1006
1007void
1009{
1011}
1012
1013void
1015{
1017}
1018
1019void
1021{
1023 pq.doorbellOffset = data & 0x1ffffffc;
1024}
1025
1026void
1028{
1030}
1031
1032void
1034{
1036}
1037
1038void
1040{
1041 // Serialize the DmaVirtDevice base class
1043
1044 int num_queues = queues.size();
1045 Addr id[num_queues];
1046 Addr mqd_base[num_queues];
1047 Addr base[num_queues];
1048 Addr rptr[num_queues];
1049 Addr wptr[num_queues];
1050 Addr ib_base[num_queues];
1051 Addr ib_rptr[num_queues];
1052 Addr ib_wptr[num_queues];
1053 Addr offset[num_queues];
1054 bool processing[num_queues];
1055 bool ib[num_queues];
1056 uint32_t me[num_queues];
1057 uint32_t pipe[num_queues];
1058 uint32_t queue[num_queues];
1059 bool privileged[num_queues];
1060 uint32_t hqd_active[num_queues];
1061 uint32_t hqd_vmid[num_queues];
1062 Addr aql_rptr[num_queues];
1063 uint32_t doorbell[num_queues];
1064 uint32_t hqd_pq_control[num_queues];
1065
1066 int i = 0;
1067 for (auto iter : queues) {
1068 PM4Queue *q = iter.second;
1069 id[i] = q->id();
1070 mqd_base[i] = q->mqdBase();
1071 bool cur_state = q->ib();
1072 q->ib(false);
1073 base[i] = q->base() >> 8;
1074 rptr[i] = q->getRptr();
1075 wptr[i] = q->getWptr();
1076 q->ib(true);
1077 ib_base[i] = q->ibBase();
1078 ib_rptr[i] = q->getRptr();
1079 ib_wptr[i] = q->getWptr();
1080 q->ib(cur_state);
1081 offset[i] = q->offset();
1082 processing[i] = q->processing();
1083 ib[i] = q->ib();
1084 me[i] = q->me();
1085 pipe[i] = q->pipe();
1086 queue[i] = q->queue();
1087 privileged[i] = q->privileged();
1088 hqd_active[i] = q->getMQD()->hqd_active;
1089 hqd_vmid[i] = q->getMQD()->hqd_vmid;
1090 aql_rptr[i] = q->getMQD()->aqlRptr;
1091 doorbell[i] = q->getMQD()->doorbell;
1092 hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
1093 i++;
1094 }
1095
1096 SERIALIZE_SCALAR(num_queues);
1097 SERIALIZE_ARRAY(id, num_queues);
1098 SERIALIZE_ARRAY(mqd_base, num_queues);
1099 SERIALIZE_ARRAY(base, num_queues);
1100 SERIALIZE_ARRAY(rptr, num_queues);
1101 SERIALIZE_ARRAY(wptr, num_queues);
1102 SERIALIZE_ARRAY(ib_base, num_queues);
1103 SERIALIZE_ARRAY(ib_rptr, num_queues);
1104 SERIALIZE_ARRAY(ib_wptr, num_queues);
1105 SERIALIZE_ARRAY(offset, num_queues);
1106 SERIALIZE_ARRAY(processing, num_queues);
1107 SERIALIZE_ARRAY(ib, num_queues);
1108 SERIALIZE_ARRAY(me, num_queues);
1109 SERIALIZE_ARRAY(pipe, num_queues);
1110 SERIALIZE_ARRAY(queue, num_queues);
1111 SERIALIZE_ARRAY(privileged, num_queues);
1112 SERIALIZE_ARRAY(hqd_active, num_queues);
1113 SERIALIZE_ARRAY(hqd_vmid, num_queues);
1114 SERIALIZE_ARRAY(aql_rptr, num_queues);
1115 SERIALIZE_ARRAY(doorbell, num_queues);
1116 SERIALIZE_ARRAY(hqd_pq_control, num_queues);
1117}
1118
1119void
1121{
1122 // Serialize the DmaVirtDevice base class
1124
1125 int num_queues = 0;
1126 UNSERIALIZE_SCALAR(num_queues);
1127
1128 Addr id[num_queues];
1129 Addr mqd_base[num_queues];
1130 Addr base[num_queues];
1131 Addr rptr[num_queues];
1132 Addr wptr[num_queues];
1133 Addr ib_base[num_queues];
1134 Addr ib_rptr[num_queues];
1135 Addr ib_wptr[num_queues];
1136 Addr offset[num_queues];
1137 bool processing[num_queues];
1138 bool ib[num_queues];
1139 uint32_t me[num_queues];
1140 uint32_t pipe[num_queues];
1141 uint32_t queue[num_queues];
1142 bool privileged[num_queues];
1143 uint32_t hqd_active[num_queues];
1144 uint32_t hqd_vmid[num_queues];
1145 Addr aql_rptr[num_queues];
1146 uint32_t doorbell[num_queues];
1147 uint32_t hqd_pq_control[num_queues];
1148
1149 UNSERIALIZE_ARRAY(id, num_queues);
1150 UNSERIALIZE_ARRAY(mqd_base, num_queues);
1151 UNSERIALIZE_ARRAY(base, num_queues);
1152 UNSERIALIZE_ARRAY(rptr, num_queues);
1153 UNSERIALIZE_ARRAY(wptr, num_queues);
1154 UNSERIALIZE_ARRAY(ib_base, num_queues);
1155 UNSERIALIZE_ARRAY(ib_rptr, num_queues);
1156 UNSERIALIZE_ARRAY(ib_wptr, num_queues);
1157 UNSERIALIZE_ARRAY(offset, num_queues);
1158 UNSERIALIZE_ARRAY(processing, num_queues);
1159 UNSERIALIZE_ARRAY(ib, num_queues);
1160 UNSERIALIZE_ARRAY(me, num_queues);
1161 UNSERIALIZE_ARRAY(pipe, num_queues);
1162 UNSERIALIZE_ARRAY(queue, num_queues);
1163 UNSERIALIZE_ARRAY(privileged, num_queues);
1164 UNSERIALIZE_ARRAY(hqd_active, num_queues);
1165 UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
1166 UNSERIALIZE_ARRAY(aql_rptr, num_queues);
1167 UNSERIALIZE_ARRAY(doorbell, num_queues);
1168 UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);
1169
1170 for (int i = 0; i < num_queues; i++) {
1171 QueueDesc *mqd = new QueueDesc();
1172 memset(mqd, 0, sizeof(QueueDesc));
1173
1174 mqd->mqdBase = mqd_base[i] >> 8;
1175 mqd->base = base[i];
1176 mqd->rptr = rptr[i];
1177 mqd->ibBase = ib_base[i];
1178 mqd->ibRptr = ib_rptr[i];
1179
1180 PM4MapQueues* pkt = new PM4MapQueues;
1181 memset(pkt, 0, sizeof(PM4MapQueues));
1182 newQueue(mqd, offset[i], pkt, id[i]);
1183
1184 queues[id[i]]->ib(false);
1185 queues[id[i]]->wptr(wptr[i]);
1186 queues[id[i]]->ib(true);
1187 queues[id[i]]->wptr(ib_wptr[i]);
1188 queues[id[i]]->offset(offset[i]);
1189 queues[id[i]]->processing(processing[i]);
1190 queues[id[i]]->ib(ib[i]);
1191 queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
1192 queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
1193 queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
1194 queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
1195 queues[id[i]]->getMQD()->doorbell = doorbell[i];
1196 queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
1197
1198 DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
1199 queues[id[i]]->id(), queues[id[i]]->rptr(),
1200 queues[id[i]]->wptr());
1201 }
1202}
1203
1204} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
GfxVersion getGfxVersion() const
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
uint16_t getVMID(Addr doorbell)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
GPUCommandProcessor * CP()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
Translation range generators.
Definition amdgpu_vm.hh:317
void invalidateTLBs()
Definition amdgpu_vm.cc:174
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:188
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
Definition amdgpu_vm.hh:280
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
HSAPacketProcessor & hsaPacketProc()
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setRbWptrPollAddrLo(uint32_t data)
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Addr getGARTAddr(Addr addr) const
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void setGPUDevice(AMDGPUDevice *gpu_device)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setHqdPqWptrLo(uint32_t data)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setHqdPqRptrReportAddr(uint32_t data)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void writeData(PM4Queue *q, PM4WriteData *pkt)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
void setHqdPqControl(uint32_t data)
void setRbRptrAddrHi(uint32_t data)
void setHqdPqWptrPollAddr(uint32_t data)
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
void setRbDoorbellRangeLo(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
void setHqdPqBaseHi(uint32_t data)
void runList(PM4Queue *q, PM4RunList *pkt)
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setHqdPqDoorbellCtrl(uint32_t data)
void setRbDoorbellRangeHi(uint32_t data)
void doneMQDWrite(Addr mqdAddr, Addr addr)
std::unordered_map< uint16_t, PM4Queue * > queues
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setHqdPqRptrReportAddrHi(uint32_t data)
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setRbRptrAddrLo(uint32_t data)
void setRbDoorbellCntrl(uint32_t data)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHqdPqWptrPollAddrHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
void setRbWptrPollAddrHi(uint32_t data)
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
Class defining a PM4 queue.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
void setLdsApe(Addr base, Addr limit)
Definition shader.hh:140
void setScratchApe(Addr base, Addr limit)
Definition shader.hh:153
void setHwReg(int regIdx, uint32_t val)
Definition shader.hh:116
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > p
Bitfield< 12 > me
Definition misc.hh:118
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED PM4MapProcessMI200
struct gem5::GEM5_PACKED PM4WriteData
struct gem5::GEM5_PACKED PM4WaitRegMem
std::ostream CheckpointOut
Definition serialize.hh:66
struct gem5::GEM5_PACKED PM4RunList
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_GRBM_CP
struct gem5::GEM5_PACKED PM4ReleaseMem
struct gem5::GEM5_PACKED PM4SwitchBuf
struct gem5::GEM5_PACKED PM4MapQueues
struct gem5::GEM5_PACKED PM4MapProcess
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED PM4UnmapQueues
struct gem5::GEM5_PACKED PM4SetUconfigReg
@ IT_RELEASE_MEM
@ IT_WRITE_DATA
@ IT_RUN_LIST
@ IT_MAP_QUEUES
@ IT_SET_UCONFIG_REG
@ IT_MAP_PROCESS
@ IT_INVALIDATE_TLBS
@ IT_QUERY_STATUS
@ IT_WAIT_REG_MEM
@ IT_UNMAP_QUEUES
@ IT_INDIRECT_BUFFER
@ IT_SWITCH_BUFFER
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED PM4IndirectBuf
@ HW_REG_SH_MEM_BASES
output header
Definition nop.cc:36
Declaration of the Packet class.
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
#define mmCP_RB_DOORBELL_CONTROL
Definition pm4_mmio.hh:48
#define mmCP_RB0_RPTR_ADDR_HI
Definition pm4_mmio.hh:45
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
Definition pm4_mmio.hh:59
#define mmCP_RB0_BASE_HI
Definition pm4_mmio.hh:51
#define mmCP_HQD_PQ_DOORBELL_CONTROL
Definition pm4_mmio.hh:57
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
Definition pm4_mmio.hh:61
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
Definition pm4_mmio.hh:60
#define mmCP_HQD_PQ_BASE
Definition pm4_mmio.hh:55
#define mmCP_RB_DOORBELL_RANGE_UPPER
Definition pm4_mmio.hh:50
#define mmCP_HQD_IB_CONTROL
Definition pm4_mmio.hh:64
#define mmCP_RB0_BASE
Definition pm4_mmio.hh:39
#define mmCP_HQD_VMID
Definition pm4_mmio.hh:54
#define mmCP_RB_WPTR_POLL_ADDR_LO
Definition pm4_mmio.hh:41
#define mmCP_HQD_PQ_RPTR
Definition pm4_mmio.hh:58
#define mmCP_HQD_ACTIVE
Definition pm4_mmio.hh:53
#define mmCP_RB_VMID
Definition pm4_mmio.hh:43
#define mmCP_HQD_PQ_BASE_HI
Definition pm4_mmio.hh:56
#define mmCP_RB0_WPTR_HI
Definition pm4_mmio.hh:47
#define mmCP_HQD_PQ_WPTR_HI
Definition pm4_mmio.hh:66
#define mmCP_HQD_PQ_CONTROL
Definition pm4_mmio.hh:63
#define mmCP_RB_DOORBELL_RANGE_LOWER
Definition pm4_mmio.hh:49
#define mmCP_RB_WPTR_POLL_ADDR_HI
Definition pm4_mmio.hh:42
#define mmCP_RB0_CNTL
Definition pm4_mmio.hh:40
#define mmCP_RB0_RPTR_ADDR
Definition pm4_mmio.hh:44
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
Definition pm4_mmio.hh:62
#define mmCP_RB0_WPTR
Definition pm4_mmio.hh:46
#define mmCP_HQD_PQ_WPTR_LO
Definition pm4_mmio.hh:65
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
PM4 packets.
uint32_t sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_rb_rptr
uint32_t hqd_pq_base_lo
uint32_t doorbellOffset0
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t hqd_pq_wptr_poll_addr_hi
uint32_t hqd_pq_base_hi
uint32_t doorbellOffset3
uint32_t sdmax_rlcx_rb_wptr_hi
uint32_t doorbellOffset2
uint32_t sdmax_rlcx_ib_base_hi
uint32_t hqd_pq_wptr_poll_addr_lo
uint32_t doorbellOffset
uint32_t hqd_pq_rptr_report_addr_hi
uint32_t processQuantum
uint32_t hqd_pq_control
uint32_t hqd_pq_rptr_report_addr_lo
uint32_t hqd_active
Definition pm4_queues.hh:96
uint32_t hqd_pq_doorbell_control
uint32_t sdmax_rlcx_rb_rptr_addr_lo
uint32_t sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_rptr_hi
uint64_t mqdReadIndex
Definition pm4_queues.hh:55
uint32_t doorbellOffset1
uint32_t hqd_ib_control
uint64_t completionSignal
uint32_t queueRptrAddrLo
uint32_t doorbellRangeHi
uint32_t queueRptrAddrHi
uint32_t doorbellRangeLo
uint32_t doorbellOffset

Generated on Mon Jul 10 2023 15:32:02 for gem5 by doxygen 1.9.7