gem5 v24.0.0.0
Loading...
Searching...
No Matches
pm4_packet_processor.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 *
31 */
32
34
35#include "debug/PM4PacketProcessor.hh"
42#include "enums/GfxVersion.hh"
44#include "gpu-compute/shader.hh"
45#include "mem/packet.hh"
46#include "mem/packet_access.hh"
47
48namespace gem5
49{
50
51PM4PacketProcessor::PM4PacketProcessor(const PM4PacketProcessorParams &p)
52 : DmaVirtDevice(p), _ipId(p.ip_id), _mmioRange(p.mmio_range)
53{
54 memset(&kiq, 0, sizeof(QueueDesc));
55 memset(&pq, 0, sizeof(QueueDesc));
56}
57
65{
66 if (gpuDevice->getVM().inAGP(vaddr)) {
67 // Use AGP translation gen
68 return TranslationGenPtr(
70 }
71
72 // Assume GART otherwise as this is the only other translation aperture
73 // available to the PM4 packet processor.
74 return TranslationGenPtr(
76}
77
80{
81 AddrRangeList ranges;
82 return ranges;
83}
84
85void
87{
88 gpuDevice = gpu_device;
89}
90
91Addr
93{
94 if (!gpuDevice->getVM().inAGP(addr)) {
95 Addr low_bits = bits(addr, 11, 0);
96 addr = (((addr >> 12) << 3) << 12) | low_bits;
97 }
98 return addr;
99}
100
101PM4Queue *
103{
104 auto result = queuesMap.find(offset);
105 if (result == queuesMap.end()) {
106 if (gfx)
107 mapPq(offset);
108 else
109 mapKiq(offset);
110 return queuesMap[offset];
111 }
112 return result->second;
113}
114
115void
121
122void
128
129void
131 PM4MapQueues *pkt, int id)
132{
133 if (id == -1)
134 id = queues.size();
135
136 /* 256 bytes aligned address */
137 mqd->base <<= 8;
138 PM4Queue *q = new PM4Queue(id, mqd, offset, pkt);
139
140 queuesMap[offset] = q;
141 queues[id] = q;
142
143 /* we are assumming only compute queues can be map from MQDs */
144 QueueType qt;
145 qt = mqd->aql ? QueueType::ComputeAQL
148
149 DPRINTF(PM4PacketProcessor, "New PM4 queue %d, base: %p offset: %p, me: "
150 "%d, pipe %d queue: %d size: %d\n", id, q->base(), q->offset(),
151 q->me(), q->pipe(), q->queue(), q->size());
152}
153
154void
156{
157 q->wptr(wptrOffset * sizeof(uint32_t));
158
159 if (!q->processing()) {
160 q->processing(true);
161 decodeNext(q);
162 }
163}
164
165void
167{
168 DPRINTF(PM4PacketProcessor, "PM4 decode queue %d rptr %p, wptr %p\n",
169 q->id(), q->rptr(), q->wptr());
170
171 if (q->rptr() != q->wptr()) {
172 /* Additional braces here are needed due to a clang compilation bug
173 falsely throwing a "suggest braces around initialization of
174 subject" error. More info on this bug is available here:
175 https://stackoverflow.com/questions/31555584
176 */
177 PM4Header h{{{0, 0, 0, 0, 0, 0}}};
178 auto cb = new DmaVirtCallback<PM4Header>(
179 [ = ] (PM4Header header)
180 { decodeHeader(q, header); }, h);
181 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(uint32_t), cb,
182 &cb->dmaBuffer);
183 } else {
184 // Reached the end of processable data in the queue. Switch out of IB
185 // if this is an indirect buffer.
186 assert(q->rptr() == q->wptr());
187 q->processing(false);
188 if (q->ib()) {
189 q->ib(false);
190 decodeNext(q);
191 }
192
193 // Write back rptr when the queue is empty. For static queues which
194 // are not unmapped, this is how the driver knows there is enough
195 // space in the queue to continue writing packets to the ring buffer.
196 if (q->getMQD()->aqlRptr) {
197 Addr addr = getGARTAddr(q->getMQD()->aqlRptr);
198 uint32_t *data = new uint32_t;
199 // gem5 stores rptr as a bytes offset while the driver expects
200 // a dword offset. Convert the offset to dword count.
201 *data = q->getRptr() >> 2;
202 auto cb = new DmaVirtCallback<uint32_t>(
203 [data](const uint32_t &) { delete data; });
204 dmaWriteVirt(addr, sizeof(uint32_t), cb, data);
205 }
206 }
207}
208
209void
211{
212 DPRINTF(PM4PacketProcessor, "PM4 packet %p\n", header.opcode);
213
214 q->incRptr(sizeof(PM4Header));
215
216 DmaVirtCallback<uint64_t> *cb = nullptr;
217 void *dmaBuffer = nullptr;
218
219 switch(header.opcode) {
220 case IT_NOP: {
221 DPRINTF(PM4PacketProcessor, "PM4 nop, count %p\n", header.count);
222 DPRINTF(PM4PacketProcessor, "rptr %p wptr %p\n", q->rptr(), q->wptr());
223 if (header.count != 0x3fff) {
224 q->incRptr((header.count + 1) * sizeof(uint32_t));
225 }
226 decodeNext(q);
227 } break;
228 case IT_WRITE_DATA: {
229 dmaBuffer = new PM4WriteData();
230 DPRINTF(PM4PacketProcessor, "PM4 writeData header: %x, count: %d\n",
231 header.ordinal, header.count);
233 [ = ] (const uint64_t &)
234 { writeData(q, (PM4WriteData *)dmaBuffer, header); });
235 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WriteData), cb,
236 dmaBuffer);
237 } break;
238
239 case IT_MAP_QUEUES: {
240 dmaBuffer = new PM4MapQueues();
242 [ = ] (const uint64_t &)
243 { mapQueues(q, (PM4MapQueues *)dmaBuffer); });
244 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapQueues), cb,
245 dmaBuffer);
246 } break;
247
248 case IT_RELEASE_MEM: {
249 dmaBuffer = new PM4ReleaseMem();
251 [ = ] (const uint64_t &)
252 { releaseMem(q, (PM4ReleaseMem *)dmaBuffer); });
253 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4ReleaseMem), cb,
254 dmaBuffer);
255 } break;
256
257 case IT_INDIRECT_BUFFER: {
258 dmaBuffer = new PM4IndirectBuf();
260 [ = ] (const uint64_t &)
261 { indirectBuffer(q, (PM4IndirectBuf *)dmaBuffer); });
262 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4IndirectBuf), cb,
263 dmaBuffer);
264 } break;
265
266 case IT_SWITCH_BUFFER: {
267 dmaBuffer = new PM4SwitchBuf();
269 [ = ] (const uint64_t &)
270 { switchBuffer(q, (PM4SwitchBuf *)dmaBuffer); });
271 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SwitchBuf), cb,
272 dmaBuffer);
273 } break;
274
275 case IT_SET_UCONFIG_REG: {
276 dmaBuffer = new PM4SetUconfigReg();
278 [ = ] (const uint64_t &)
279 { setUconfigReg(q, (PM4SetUconfigReg *)dmaBuffer); });
280 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4SetUconfigReg), cb,
281 dmaBuffer);
282 } break;
283
284 case IT_WAIT_REG_MEM: {
285 dmaBuffer = new PM4WaitRegMem();
287 [ = ] (const uint64_t &)
288 { waitRegMem(q, (PM4WaitRegMem *)dmaBuffer); });
289 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4WaitRegMem), cb,
290 dmaBuffer);
291 } break;
292 case IT_MAP_PROCESS: {
293 if (gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
294 gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
295 dmaBuffer = new PM4MapProcessV2();
297 [ = ] (const uint64_t &)
298 { mapProcessV2(q, (PM4MapProcessV2 *)dmaBuffer); });
299 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcessV2),
300 cb, dmaBuffer);
301 } else {
302 dmaBuffer = new PM4MapProcess();
304 [ = ] (const uint64_t &)
305 { mapProcessV1(q, (PM4MapProcess *)dmaBuffer); });
306 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4MapProcess), cb,
307 dmaBuffer);
308 }
309 } break;
310
311 case IT_UNMAP_QUEUES: {
312 dmaBuffer = new PM4UnmapQueues();
314 [ = ] (const uint64_t &)
315 { unmapQueues(q, (PM4UnmapQueues *)dmaBuffer); });
316 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4UnmapQueues), cb,
317 dmaBuffer);
318 } break;
319
320 case IT_RUN_LIST: {
321 dmaBuffer = new PM4RunList();
323 [ = ] (const uint64_t &)
324 { runList(q, (PM4RunList *)dmaBuffer); });
325 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4RunList), cb,
326 dmaBuffer);
327 } break;
328
329 case IT_QUERY_STATUS: {
330 dmaBuffer = new PM4QueryStatus();
332 [ = ] (const uint64_t &)
333 { queryStatus(q, (PM4QueryStatus *)dmaBuffer); });
334 dmaReadVirt(getGARTAddr(q->rptr()), sizeof(PM4QueryStatus), cb,
335 dmaBuffer);
336 } break;
337
338 case IT_INVALIDATE_TLBS: {
339 DPRINTF(PM4PacketProcessor, "Functionaly invalidating all TLBs\n");
341 q->incRptr((header.count + 1) * sizeof(uint32_t));
342 decodeNext(q);
343 } break;
344
345 default: {
346 warn("PM4 packet opcode 0x%x not supported.\n", header.opcode);
347 DPRINTF(PM4PacketProcessor, "PM4 packet opcode 0x%x not supported.\n",
348 header.opcode);
349 q->incRptr((header.count + 1) * sizeof(uint32_t));
350 decodeNext(q);
351 } break;
352 }
353}
354
355void
357{
358 q->incRptr(sizeof(PM4WriteData));
359
360 DPRINTF(PM4PacketProcessor, "PM4 write addr: %p data: %p destSel: %d "
361 "addrIncr: %d resume: %d writeConfirm: %d cachePolicy: %d\n",
362 pkt->destAddr, pkt->data, pkt->destSel, pkt->addrIncr,
363 pkt->resume, pkt->writeConfirm, pkt->cachePolicy);
364
365 if (pkt->destSel == 5) {
366 // Memory address destination
368
369 // This is a variable length packet. The size of the packet is in
370 // the header.count field and is set as Number Of Dwords - 1. This
371 // packet is 4 bytes minuimum meaning the count is minimum 3. To
372 // get the number of dwords of data subtract two from the count.
373 unsigned size = (header.count - 2) * sizeof(uint32_t);
374
375 DPRINTF(PM4PacketProcessor, "Writing %d bytes to %p\n", size, addr);
376 auto cb = new DmaVirtCallback<uint32_t>(
377 [ = ](const uint32_t &) { writeDataDone(q, pkt, addr); });
378 dmaWriteVirt(addr, size, cb, &pkt->data);
379
380 if (!pkt->writeConfirm) {
381 decodeNext(q);
382 }
383 } else if (pkt->destSel == 0) {
384 // Register dword address destination
385 Addr byte_addr = pkt->destAddr << 2;
386
387 gpuDevice->setRegVal(byte_addr, pkt->data);
388
389 // setRegVal is instant on the simulated device so we ignore write
390 // confirm.
391 delete pkt;
392 decodeNext(q);
393 } else {
394 fatal("Unknown PM4 writeData destination %d\n", pkt->destSel);
395 }
396}
397
398void
400{
401 DPRINTF(PM4PacketProcessor, "PM4 write completed to %p, %p.\n", addr,
402 pkt->data);
403
404 if (pkt->writeConfirm) {
405 decodeNext(q);
406 }
407
408 delete pkt;
409}
410
411void
413{
414 q->incRptr(sizeof(PM4MapQueues));
415
416 DPRINTF(PM4PacketProcessor, "MAPQueues queueSel: %d, vmid: %d, me: %d, "
417 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
418 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
419 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->queueSel, pkt->vmid,
420 pkt->me, pkt->pipe, pkt->queueSlot, pkt->queueType,
421 pkt->allocFormat, pkt->engineSel, pkt->numQueues,
422 pkt->checkDisable, pkt->doorbellOffset, pkt->mqdAddr,
423 pkt->wptrAddr);
424
425 // Partially reading the mqd with an offset of 96 dwords
426 if (pkt->engineSel == 0 || pkt->engineSel == 1 || pkt->engineSel == 4) {
427 Addr addr = getGARTAddr(pkt->mqdAddr + 96 * sizeof(uint32_t));
428
430 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
431 addr, pkt->mqdAddr, pkt->vmid, gpuDevice->lastVMID());
432
433 // The doorbellOffset is a dword address. We shift by two / multiply
434 // by four to get the byte address to match doorbell addresses in
435 // the GPU device.
438
439 QueueDesc *mqd = new QueueDesc();
440 memset(mqd, 0, sizeof(QueueDesc));
441 auto cb = new DmaVirtCallback<uint32_t>(
442 [ = ] (const uint32_t &) {
443 processMQD(pkt, q, addr, mqd, gpuDevice->lastVMID()); });
444 dmaReadVirt(addr, sizeof(QueueDesc), cb, mqd);
445 } else if (pkt->engineSel == 2 || pkt->engineSel == 3) {
446 SDMAQueueDesc *sdmaMQD = new SDMAQueueDesc();
447 memset(sdmaMQD, 0, sizeof(SDMAQueueDesc));
448
449 // For SDMA we read the full MQD, so there is no offset calculation.
451
452 auto cb = new DmaVirtCallback<uint32_t>(
453 [ = ] (const uint32_t &) {
454 processSDMAMQD(pkt, q, addr, sdmaMQD,
455 gpuDevice->lastVMID()); });
456 dmaReadVirt(addr, sizeof(SDMAQueueDesc), cb, sdmaMQD);
457 } else {
458 panic("Unknown engine for MQD: %d\n", pkt->engineSel);
459 }
460}
461
462void
464 QueueDesc *mqd, uint16_t vmid)
465{
466 DPRINTF(PM4PacketProcessor, "MQDbase: %lx, active: %d, vmid: %d, base: "
467 "%lx, rptr: %x aqlPtr: %lx\n", mqd->mqdBase, mqd->hqd_active,
468 mqd->hqd_vmid, mqd->base, mqd->rptr, mqd->aqlRptr);
469
470 Addr offset = mqd->doorbell & 0x1ffffffc;
471 newQueue(mqd, offset, pkt);
472 PM4Queue *new_q = queuesMap[offset];
473 gpuDevice->insertQId(vmid, new_q->id());
474
475 if (mqd->aql) {
476 // The queue size is encoded in the cp_hqd_pq_control field in the
477 // kernel driver in the 6 lowest bits as log2(queue_size / 4) - 1
478 // number of dwords.
479 //
480 // https://github.com/RadeonOpenCompute/ROCK-Kernel-Driver/blob/
481 // roc-4.3.x/drivers/gpu/drm/amd/amdgpu/gfx_v9_0.c#L3561
482 //
483 // Queue size is then 2^(cp_hqd_pq_control[5:0] + 1) dword. Multiply
484 // by 4 to get the number of bytes as HSAPP expects.
485 int mqd_size = (1 << ((mqd->hqd_pq_control & 0x3f) + 1)) * 4;
486 auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
487 hsa_pp.setDeviceQueueDesc(mqd->aqlRptr, mqd->base, new_q->id(),
488 mqd_size, 8, GfxVersion::gfx900, offset,
489 mqd->mqdReadIndex);
490 }
491
492 DPRINTF(PM4PacketProcessor, "PM4 mqd read completed, base %p, mqd %p, "
493 "hqdAQL %d.\n", mqd->base, mqd->mqdBase, mqd->aql);
494
496
497 delete pkt;
498 decodeNext(q);
499}
500
501void
503 SDMAQueueDesc *mqd, uint16_t vmid)
504{
505 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
506 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
507 rptr_wb_addr <<= 32;
508 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
509
510 DPRINTF(PM4PacketProcessor, "SDMAMQD: rb base: %#lx rptr: %#x/%#x wptr: "
511 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
515 rlc_size, mqd->sdmax_rlcx_rb_cntl, rptr_wb_addr);
516
517 // Engine 2 points to SDMA0 while engine 3 points to SDMA1
518 assert(pkt->engineSel == 2 || pkt->engineSel == 3);
519 SDMAEngine *sdma_eng = gpuDevice->getSDMAById(pkt->engineSel - 2);
520
521 // Register RLC queue with SDMA
522 sdma_eng->registerRLCQueue(pkt->doorbellOffset << 2, addr, mqd);
523
524 // Register doorbell with GPU device
525 gpuDevice->setSDMAEngine(pkt->doorbellOffset << 2, sdma_eng);
527
529
530 delete pkt;
531 decodeNext(q);
532}
533
534void
536{
537 q->incRptr(sizeof(PM4ReleaseMem));
538
539 Addr addr = getGARTAddr(pkt->addr);
540 DPRINTF(PM4PacketProcessor, "PM4 release_mem event %d eventIdx %d intSel "
541 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
542 pkt->event, pkt->eventIdx, pkt->intSelect, pkt->destSelect,
543 pkt->dataSelect, addr, pkt->dataLo, pkt->intCtxId);
544
546 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
547
548 if (pkt->dataSelect == 1) {
549 auto cb = new DmaVirtCallback<uint32_t>(
550 [ = ](const uint32_t &) { releaseMemDone(q, pkt, addr); },
551 pkt->dataLo);
552 dmaWriteVirt(addr, sizeof(uint32_t), cb, &cb->dmaBuffer);
553 } else {
554 panic("Unimplemented PM4ReleaseMem.dataSelect");
555 }
556}
557
558void
560{
561 DPRINTF(PM4PacketProcessor, "PM4 release_mem wrote %d to %p\n",
562 pkt->dataLo, addr);
563 if (pkt->intSelect == 2) {
564 DPRINTF(PM4PacketProcessor, "PM4 interrupt, id: %d ctx: %d, me: %d, "
565 "pipe: %d, queueSlot:%d\n", q->id(), pkt->intCtxId, q->me(),
566 q->pipe(), q->queue());
567
568 uint8_t ringId = 0;
569 if (q->id() != 0) {
570 ringId = (q->queue() << 4) | (q->me() << 2) | q->pipe();
571 }
574 0);
576 }
577
578 delete pkt;
579 decodeNext(q);
580}
581
582void
584{
585 assert(queuesMap.count(offset));
586 queuesMap[offset]->getMQD()->mqdReadIndex = rd_idx;
587}
588
589void
591{
592 q->incRptr(sizeof(PM4UnmapQueues));
593
594 DPRINTF(PM4PacketProcessor, "PM4 unmap_queues queueSel: %d numQueues: %d "
595 "pasid: %p doorbellOffset0 %p \n",
596 pkt->queueSel, pkt->numQueues, pkt->pasid, pkt->doorbellOffset0);
597
598 switch (pkt->queueSel) {
599 case 0:
600 switch (pkt->numQueues) {
601 case 1:
610 break;
611 case 2:
618 break;
619 case 3:
624 break;
625 case 4:
628 break;
629 default:
630 panic("Unrecognized number of queues %d\n", pkt->numQueues);
631 }
632 break;
633 case 1:
635 break;
636 case 2:
637 panic("Unmapping queue selection 2 unimplemented\n");
638 break;
639 case 3: {
640 auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
641 for (auto iter : gpuDevice->getUsedVMIDs()) {
642 for (auto id : iter.second) {
643 assert(queues.count(id));
644
645 // Do not unmap KMD queues
646 if (queues[id]->privileged()) {
647 continue;
648 }
649 QueueDesc *mqd = queues[id]->getMQD();
650 DPRINTF(PM4PacketProcessor, "Unmapping queue %d with read "
651 "index %ld\n", id, mqd->mqdReadIndex);
652 // Partially writing the mqd with an offset of 96 dwords
653 Addr addr = getGARTAddr(queues[id]->mqdBase() +
654 96 * sizeof(uint32_t));
655 Addr mqd_base = queues[id]->mqdBase();
656 auto cb = new DmaVirtCallback<uint32_t>(
657 [ = ] (const uint32_t &) {
658 doneMQDWrite(mqd_base, addr);
659 });
660 mqd->base >>= 8;
661 dmaWriteVirt(addr, sizeof(QueueDesc), cb, mqd);
662 queues.erase(id);
663 hsa_pp.unsetDeviceQueueDesc(id, 8);
664 delete mqd;
665 }
666 }
668 } break;
669 default:
670 panic("Unrecognized options\n");
671 break;
672 }
673
674 delete pkt;
675 decodeNext(q);
676}
677
678void
680 DPRINTF(PM4PacketProcessor, "PM4 unmap_queues MQD %p wrote to addr %p\n",
681 mqdAddr, addr);
682}
683
684void
685PM4PacketProcessor::mapProcess(uint32_t pasid, uint64_t ptBase,
686 uint32_t shMemBases)
687{
688 uint16_t vmid = gpuDevice->allocateVMID(pasid);
689
690 gpuDevice->getVM().setPageTableBase(vmid, ptBase);
691 gpuDevice->CP()->shader()->setHwReg(HW_REG_SH_MEM_BASES, shMemBases);
692
693 // Setup the apertures that gem5 uses. These values are bits [63:48].
694 Addr lds_base = (Addr)bits(shMemBases, 31, 16) << 48;
695 Addr scratch_base = (Addr)bits(shMemBases, 15, 0) << 48;
696
697 // There does not seem to be any register for the limit, but the driver
698 // assumes scratch and LDS have a 4GB aperture, so use that.
699 gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
700 gpuDevice->CP()->shader()->setScratchApe(scratch_base,
701 scratch_base + 0xFFFFFFFF);
702}
703
704void
706{
707 q->incRptr(sizeof(PM4MapProcess));
708
709 DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
710 "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
711 pkt->ptBase, pkt->completionSignal);
712
713 mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
714
715 delete pkt;
716 decodeNext(q);
717}
718
719void
721{
722 q->incRptr(sizeof(PM4MapProcessV2));
723
724 DPRINTF(PM4PacketProcessor, "PM4 map_process pasid: %p quantum: "
725 "%d pt: %p signal: %p\n", pkt->pasid, pkt->processQuantum,
726 pkt->ptBase, pkt->completionSignal);
727
728 mapProcess(pkt->pasid, pkt->ptBase, pkt->shMemBases);
729
730 delete pkt;
731 decodeNext(q);
732}
733
734void
736{
737 DPRINTF(PM4PacketProcessor, "PM4 run_list base: %p size: %d\n",
738 pkt->ibBase, pkt->ibSize);
739
740 q->incRptr(sizeof(PM4RunList));
741
742 q->ib(true);
743 q->ibBase(pkt->ibBase);
744 q->rptr(0);
745 q->wptr(pkt->ibSize * sizeof(uint32_t));
746
747 delete pkt;
748 decodeNext(q);
749}
750
751void
753{
754 DPRINTF(PM4PacketProcessor, "PM4 indirect buffer, base: %p.\n",
755 pkt->ibBase);
756
757 q->incRptr(sizeof(PM4IndirectBuf));
758
759 q->ib(true);
760 q->ibBase(pkt->ibBase);
761 q->wptr(pkt->ibSize * sizeof(uint32_t));
762
763 delete pkt;
764 decodeNext(q);
765}
766
767void
769{
770 q->incRptr(sizeof(PM4SwitchBuf));
771
772 q->ib(true);
773 DPRINTF(PM4PacketProcessor, "PM4 switching buffer, rptr: %p.\n",
774 q->wptr());
775
776 delete pkt;
777 decodeNext(q);
778}
779
780void
782{
783 q->incRptr(sizeof(PM4SetUconfigReg));
784
785 DPRINTF(PM4PacketProcessor, "SetUconfig offset %x data %x\n",
786 pkt->offset, pkt->data);
787
788 // SET_UCONFIG_REG_START and pkt->offset are dword addresses
789 uint32_t reg_addr = (PACKET3_SET_UCONFIG_REG_START + pkt->offset) * 4;
790
791 // Additional CPs respond to addresses 0x40000 apart.
792 reg_addr += 0x40000 * getIpId();
793 gpuDevice->setRegVal(reg_addr, pkt->data);
794
795 delete pkt;
796 decodeNext(q);
797}
798
799void
801{
802 q->incRptr(sizeof(PM4WaitRegMem));
803
804 DPRINTF(PM4PacketProcessor, "PM4 WAIT_REG_MEM\nfunc: %d memSpace: %d op: "
805 "%d\n", pkt->function, pkt->memSpace, pkt->operation);
806 DPRINTF(PM4PacketProcessor, " AddrLo/Reg1: %lx\n", pkt->memAddrLo);
807 DPRINTF(PM4PacketProcessor, " AddrHi/Reg2: %lx\n", pkt->memAddrHi);
808 DPRINTF(PM4PacketProcessor, " Reference: %lx\n", pkt->reference);
809 DPRINTF(PM4PacketProcessor, " Mask: %lx\n", pkt->mask);
810 DPRINTF(PM4PacketProcessor, " Poll Interval: %lx\n", pkt->pollInterval);
811
812 delete pkt;
813 decodeNext(q);
814}
815
816void
818{
819 q->incRptr(sizeof(PM4QueryStatus));
820
821 DPRINTF(PM4PacketProcessor, "PM4 query status contextId: %d, interruptSel:"
822 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
823 "addr: %lx, data: %lx\n", pkt->contextId, pkt->interruptSel,
824 pkt->command, pkt->pasid, pkt->doorbellOffset, pkt->engineSel,
825 pkt->addr, pkt->data);
826
827 if (pkt->interruptSel == 0 && pkt->command == 2) {
828 // Write data value to fence address
829 Addr addr = getGARTAddr(pkt->addr);
830 DPRINTF(PM4PacketProcessor, "Using GART addr %lx\n", addr);
831 auto cb = new DmaVirtCallback<uint64_t>(
832 [ = ] (const uint64_t &) { queryStatusDone(q, pkt); }, pkt->data);
833 dmaWriteVirt(addr, sizeof(uint64_t), cb, &cb->dmaBuffer);
834 } else {
835 // No other combinations used in amdkfd v9
836 panic("query_status with interruptSel %d command %d not supported",
837 pkt->interruptSel, pkt->command);
838 }
839}
840
841void
843{
844 DPRINTF(PM4PacketProcessor, "PM4 query status complete\n");
845
846 delete pkt;
847 decodeNext(q);
848}
849
850void
852{
853 switch (mmio_offset) {
854 /* Hardware queue descriptor (HQD) registers */
855 case mmCP_HQD_VMID:
856 setHqdVmid(pkt->getLE<uint32_t>());
857 break;
858 case mmCP_HQD_ACTIVE:
859 setHqdActive(pkt->getLE<uint32_t>());
860 break;
861 case mmCP_HQD_PQ_BASE:
862 setHqdPqBase(pkt->getLE<uint32_t>());
863 break;
865 setHqdPqBaseHi(pkt->getLE<uint32_t>());
866 break;
868 setHqdPqDoorbellCtrl(pkt->getLE<uint32_t>());
870 break;
871 case mmCP_HQD_PQ_RPTR:
872 setHqdPqPtr(pkt->getLE<uint32_t>());
873 break;
875 setHqdPqWptrLo(pkt->getLE<uint32_t>());
876 break;
878 setHqdPqWptrHi(pkt->getLE<uint32_t>());
879 break;
881 setHqdPqRptrReportAddr(pkt->getLE<uint32_t>());
882 break;
884 setHqdPqRptrReportAddrHi(pkt->getLE<uint32_t>());
885 break;
887 setHqdPqWptrPollAddr(pkt->getLE<uint32_t>());
888 break;
890 setHqdPqWptrPollAddrHi(pkt->getLE<uint32_t>());
891 break;
893 setHqdPqControl(pkt->getLE<uint32_t>());
894 break;
896 setHqdIbCtrl(pkt->getLE<uint32_t>());
897 break;
898 /* Ring buffer registers */
899 case mmCP_RB_VMID:
900 setRbVmid(pkt->getLE<uint32_t>());
901 break;
902 case mmCP_RB0_CNTL:
903 setRbCntl(pkt->getLE<uint32_t>());
904 break;
905 case mmCP_RB0_WPTR:
906 setRbWptrLo(pkt->getLE<uint32_t>());
907 break;
908 case mmCP_RB0_WPTR_HI:
909 setRbWptrHi(pkt->getLE<uint32_t>());
910 break;
912 setRbRptrAddrLo(pkt->getLE<uint32_t>());
913 break;
915 setRbRptrAddrHi(pkt->getLE<uint32_t>());
916 break;
918 setRbWptrPollAddrLo(pkt->getLE<uint32_t>());
919 break;
921 setRbWptrPollAddrHi(pkt->getLE<uint32_t>());
922 break;
923 case mmCP_RB0_BASE:
924 setRbBaseLo(pkt->getLE<uint32_t>());
925 break;
926 case mmCP_RB0_BASE_HI:
927 setRbBaseHi(pkt->getLE<uint32_t>());
928 break;
930 setRbDoorbellCntrl(pkt->getLE<uint32_t>());
932 break;
934 setRbDoorbellRangeLo(pkt->getLE<uint32_t>());
935 break;
937 setRbDoorbellRangeHi(pkt->getLE<uint32_t>());
938 break;
939 default:
940 break;
941 }
942}
943
944void
949
950void
955
956void
961
962void
967
968void
973
974void
979
980void
982{
983 /* Write pointer communicated through doorbell value. */
984}
985
986void
988{
989 /* Write pointer communicated through doorbell value. */
990}
991
992void
997
998void
1003
1004void
1009
1010void
1015
1016void
1021
1022void
1027
1028void
1030{
1031 pq.hqd_vmid = data;
1032}
1033
1034void
1039
1040void
1045
1046void
1051
1052void
1057
1058void
1063
1064void
1069
1070void
1075
1076void
1081
1082void
1087
1088void
1094
1095void
1100
1101void
1106
1107void
1109{
1110 // Serialize the DmaVirtDevice base class
1112
1113 int num_queues = queues.size();
1114 Addr id[num_queues];
1115 Addr mqd_base[num_queues];
1116 uint64_t mqd_read_index[num_queues];
1117 Addr base[num_queues];
1118 Addr rptr[num_queues];
1119 Addr wptr[num_queues];
1120 Addr ib_base[num_queues];
1121 Addr ib_rptr[num_queues];
1122 Addr ib_wptr[num_queues];
1123 Addr offset[num_queues];
1124 bool processing[num_queues];
1125 bool ib[num_queues];
1126 uint32_t me[num_queues];
1127 uint32_t pipe[num_queues];
1128 uint32_t queue[num_queues];
1129 bool privileged[num_queues];
1130 uint32_t hqd_active[num_queues];
1131 uint32_t hqd_vmid[num_queues];
1132 Addr aql_rptr[num_queues];
1133 uint32_t aql[num_queues];
1134 uint32_t doorbell[num_queues];
1135 uint32_t hqd_pq_control[num_queues];
1136
1137 int i = 0;
1138 for (auto iter : queues) {
1139 PM4Queue *q = iter.second;
1140 id[i] = q->id();
1141 mqd_base[i] = q->mqdBase();
1142 mqd_read_index[i] = q->getMQD()->mqdReadIndex;
1143 bool cur_state = q->ib();
1144 q->ib(false);
1145 base[i] = q->base();
1146 rptr[i] = q->getRptr();
1147 wptr[i] = q->getWptr();
1148 q->ib(true);
1149 ib_base[i] = q->ibBase();
1150 ib_rptr[i] = q->getRptr();
1151 ib_wptr[i] = q->getWptr();
1152 q->ib(cur_state);
1153 offset[i] = q->offset();
1154 processing[i] = q->processing();
1155 ib[i] = q->ib();
1156 me[i] = q->me();
1157 pipe[i] = q->pipe();
1158 queue[i] = q->queue();
1159 privileged[i] = q->privileged();
1160 hqd_active[i] = q->getMQD()->hqd_active;
1161 hqd_vmid[i] = q->getMQD()->hqd_vmid;
1162 aql_rptr[i] = q->getMQD()->aqlRptr;
1163 aql[i] = q->getMQD()->aql;
1164 doorbell[i] = q->getMQD()->doorbell;
1165 hqd_pq_control[i] = q->getMQD()->hqd_pq_control;
1166 i++;
1167 }
1168
1169 SERIALIZE_SCALAR(num_queues);
1170 SERIALIZE_ARRAY(id, num_queues);
1171 SERIALIZE_ARRAY(mqd_base, num_queues);
1172 SERIALIZE_ARRAY(mqd_read_index, num_queues);
1173 SERIALIZE_ARRAY(base, num_queues);
1174 SERIALIZE_ARRAY(rptr, num_queues);
1175 SERIALIZE_ARRAY(wptr, num_queues);
1176 SERIALIZE_ARRAY(ib_base, num_queues);
1177 SERIALIZE_ARRAY(ib_rptr, num_queues);
1178 SERIALIZE_ARRAY(ib_wptr, num_queues);
1179 SERIALIZE_ARRAY(offset, num_queues);
1180 SERIALIZE_ARRAY(processing, num_queues);
1181 SERIALIZE_ARRAY(ib, num_queues);
1182 SERIALIZE_ARRAY(me, num_queues);
1183 SERIALIZE_ARRAY(pipe, num_queues);
1184 SERIALIZE_ARRAY(queue, num_queues);
1185 SERIALIZE_ARRAY(privileged, num_queues);
1186 SERIALIZE_ARRAY(hqd_active, num_queues);
1187 SERIALIZE_ARRAY(hqd_vmid, num_queues);
1188 SERIALIZE_ARRAY(aql_rptr, num_queues);
1189 SERIALIZE_ARRAY(aql, num_queues);
1190 SERIALIZE_ARRAY(doorbell, num_queues);
1191 SERIALIZE_ARRAY(hqd_pq_control, num_queues);
1192}
1193
1194void
1196{
1197 // Serialize the DmaVirtDevice base class
1199
1200 int num_queues = 0;
1201 UNSERIALIZE_SCALAR(num_queues);
1202
1203 Addr id[num_queues];
1204 Addr mqd_base[num_queues];
1205 uint64_t mqd_read_index[num_queues];
1206 Addr base[num_queues];
1207 Addr rptr[num_queues];
1208 Addr wptr[num_queues];
1209 Addr ib_base[num_queues];
1210 Addr ib_rptr[num_queues];
1211 Addr ib_wptr[num_queues];
1212 Addr offset[num_queues];
1213 bool processing[num_queues];
1214 bool ib[num_queues];
1215 uint32_t me[num_queues];
1216 uint32_t pipe[num_queues];
1217 uint32_t queue[num_queues];
1218 bool privileged[num_queues];
1219 uint32_t hqd_active[num_queues];
1220 uint32_t hqd_vmid[num_queues];
1221 Addr aql_rptr[num_queues];
1222 uint32_t aql[num_queues];
1223 uint32_t doorbell[num_queues];
1224 uint32_t hqd_pq_control[num_queues];
1225
1226 UNSERIALIZE_ARRAY(id, num_queues);
1227 UNSERIALIZE_ARRAY(mqd_base, num_queues);
1228 UNSERIALIZE_ARRAY(mqd_read_index, num_queues);
1229 UNSERIALIZE_ARRAY(base, num_queues);
1230 UNSERIALIZE_ARRAY(rptr, num_queues);
1231 UNSERIALIZE_ARRAY(wptr, num_queues);
1232 UNSERIALIZE_ARRAY(ib_base, num_queues);
1233 UNSERIALIZE_ARRAY(ib_rptr, num_queues);
1234 UNSERIALIZE_ARRAY(ib_wptr, num_queues);
1235 UNSERIALIZE_ARRAY(offset, num_queues);
1236 UNSERIALIZE_ARRAY(processing, num_queues);
1237 UNSERIALIZE_ARRAY(ib, num_queues);
1238 UNSERIALIZE_ARRAY(me, num_queues);
1239 UNSERIALIZE_ARRAY(pipe, num_queues);
1240 UNSERIALIZE_ARRAY(queue, num_queues);
1241 UNSERIALIZE_ARRAY(privileged, num_queues);
1242 UNSERIALIZE_ARRAY(hqd_active, num_queues);
1243 UNSERIALIZE_ARRAY(hqd_vmid, num_queues);
1244 UNSERIALIZE_ARRAY(aql_rptr, num_queues);
1245 UNSERIALIZE_ARRAY(aql, num_queues);
1246 UNSERIALIZE_ARRAY(doorbell, num_queues);
1247 UNSERIALIZE_ARRAY(hqd_pq_control, num_queues);
1248
1249 for (int i = 0; i < num_queues; i++) {
1250 QueueDesc *mqd = new QueueDesc();
1251 memset(mqd, 0, sizeof(QueueDesc));
1252
1253 mqd->mqdBase = mqd_base[i] >> 8;
1254 mqd->mqdReadIndex = mqd_read_index[i];
1255 mqd->base = base[i] >> 8;
1256 mqd->aql = aql[i];
1257
1258 PM4MapQueues* pkt = new PM4MapQueues;
1259 memset(pkt, 0, sizeof(PM4MapQueues));
1260 newQueue(mqd, offset[i], pkt, id[i]);
1261
1262 if (ib[i]) {
1263 queues[id[i]]->wptr(ib_wptr[i]);
1264 queues[id[i]]->rptr(ib_rptr[i]);
1265 } else {
1266 queues[id[i]]->rptr(rptr[i]);
1267 queues[id[i]]->wptr(wptr[i]);
1268 }
1269 queues[id[i]]->ib(ib[i]);
1270 queues[id[i]]->offset(offset[i]);
1271 queues[id[i]]->processing(processing[i]);
1272 queues[id[i]]->setPkt(me[i], pipe[i], queue[i], privileged[i]);
1273 queues[id[i]]->getMQD()->hqd_active = hqd_active[i];
1274 queues[id[i]]->getMQD()->hqd_vmid = hqd_vmid[i];
1275 queues[id[i]]->getMQD()->aqlRptr = aql_rptr[i];
1276 queues[id[i]]->getMQD()->doorbell = doorbell[i];
1277 queues[id[i]]->getMQD()->hqd_pq_control = hqd_pq_control[i];
1278
1279 if (mqd->aql) {
1280 int mqd_size = (1 << ((hqd_pq_control[i] & 0x3f) + 1)) * 4;
1281 auto &hsa_pp = gpuDevice->CP()->hsaPacketProc();
1282 hsa_pp.setDeviceQueueDesc(aql_rptr[i], base[i], id[i],
1283 mqd_size, 8, GfxVersion::gfx900, offset[i],
1284 mqd_read_index[i]);
1285 }
1286
1287 DPRINTF(PM4PacketProcessor, "PM4 queue %d, rptr: %p wptr: %p\n",
1288 queues[id[i]]->id(), queues[id[i]]->rptr(),
1289 queues[id[i]]->wptr());
1290 }
1291}
1292
1293} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
void processPendingDoorbells(uint32_t offset)
GfxVersion getGfxVersion() const
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
void setRegVal(uint64_t addr, uint32_t value)
uint16_t getVMID(Addr doorbell)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
GPUCommandProcessor * CP()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id, unsigned node_id)
Translation range generators.
Definition amdgpu_vm.hh:314
void invalidateTLBs()
Definition amdgpu_vm.cc:204
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:212
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
Definition amdgpu_vm.hh:277
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
HSAPacketProcessor & hsaPacketProc()
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setRbWptrPollAddrLo(uint32_t data)
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Addr getGARTAddr(Addr addr) const
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void setGPUDevice(AMDGPUDevice *gpu_device)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setHqdPqWptrLo(uint32_t data)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setHqdPqRptrReportAddr(uint32_t data)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
void setHqdPqControl(uint32_t data)
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
void setRbRptrAddrHi(uint32_t data)
void setHqdPqWptrPollAddr(uint32_t data)
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
void setRbDoorbellRangeLo(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
void setHqdPqBaseHi(uint32_t data)
void runList(PM4Queue *q, PM4RunList *pkt)
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void writeData(PM4Queue *q, PM4WriteData *pkt, PM4Header header)
void setHqdPqDoorbellCtrl(uint32_t data)
void setRbDoorbellRangeHi(uint32_t data)
void doneMQDWrite(Addr mqdAddr, Addr addr)
std::unordered_map< uint16_t, PM4Queue * > queues
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setHqdPqRptrReportAddrHi(uint32_t data)
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setRbRptrAddrLo(uint32_t data)
void setRbDoorbellCntrl(uint32_t data)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHqdPqWptrPollAddrHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
void setRbWptrPollAddrHi(uint32_t data)
Class defining a PM4 queue.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
void setLdsApe(Addr base, Addr limit)
Definition shader.hh:152
void setScratchApe(Addr base, Addr limit)
Definition shader.hh:165
void setHwReg(int regIdx, uint32_t val)
Definition shader.hh:128
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal(...)
This implements a cprintf based fatal() function.
Definition logging.hh:200
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 33 > id
Bitfield< 0 > p
Bitfield< 12 > me
Definition misc.hh:118
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED PM4WriteData
struct gem5::GEM5_PACKED PM4WaitRegMem
std::ostream CheckpointOut
Definition serialize.hh:66
struct gem5::GEM5_PACKED PM4RunList
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_GRBM_CP
struct gem5::GEM5_PACKED PM4ReleaseMem
struct gem5::GEM5_PACKED PM4SwitchBuf
Bitfield< 10 > pasid
Definition x86_cpu.cc:129
struct gem5::GEM5_PACKED PM4MapQueues
struct gem5::GEM5_PACKED PM4MapProcess
struct gem5::GEM5_PACKED PM4MapProcessV2
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED PM4UnmapQueues
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED PM4SetUconfigReg
@ IT_RELEASE_MEM
@ IT_WRITE_DATA
@ IT_RUN_LIST
@ IT_MAP_QUEUES
@ IT_SET_UCONFIG_REG
@ IT_MAP_PROCESS
@ IT_INVALIDATE_TLBS
@ IT_QUERY_STATUS
@ IT_WAIT_REG_MEM
@ IT_UNMAP_QUEUES
@ IT_INDIRECT_BUFFER
@ IT_SWITCH_BUFFER
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
struct gem5::GEM5_PACKED PM4IndirectBuf
@ HW_REG_SH_MEM_BASES
output header
Definition nop.cc:36
Declaration of the Packet class.
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
#define mmCP_RB_DOORBELL_CONTROL
Definition pm4_mmio.hh:48
#define mmCP_RB0_RPTR_ADDR_HI
Definition pm4_mmio.hh:45
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
Definition pm4_mmio.hh:59
#define mmCP_RB0_BASE_HI
Definition pm4_mmio.hh:51
#define mmCP_HQD_PQ_DOORBELL_CONTROL
Definition pm4_mmio.hh:57
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
Definition pm4_mmio.hh:61
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
Definition pm4_mmio.hh:60
#define mmCP_HQD_PQ_BASE
Definition pm4_mmio.hh:55
#define mmCP_RB_DOORBELL_RANGE_UPPER
Definition pm4_mmio.hh:50
#define mmCP_HQD_IB_CONTROL
Definition pm4_mmio.hh:64
#define mmCP_RB0_BASE
Definition pm4_mmio.hh:39
#define mmCP_HQD_VMID
Definition pm4_mmio.hh:54
#define mmCP_RB_WPTR_POLL_ADDR_LO
Definition pm4_mmio.hh:41
#define mmCP_HQD_PQ_RPTR
Definition pm4_mmio.hh:58
#define mmCP_HQD_ACTIVE
Definition pm4_mmio.hh:53
#define mmCP_RB_VMID
Definition pm4_mmio.hh:43
#define mmCP_HQD_PQ_BASE_HI
Definition pm4_mmio.hh:56
#define mmCP_RB0_WPTR_HI
Definition pm4_mmio.hh:47
#define mmCP_HQD_PQ_WPTR_HI
Definition pm4_mmio.hh:66
#define mmCP_HQD_PQ_CONTROL
Definition pm4_mmio.hh:63
#define mmCP_RB_DOORBELL_RANGE_LOWER
Definition pm4_mmio.hh:49
#define mmCP_RB_WPTR_POLL_ADDR_HI
Definition pm4_mmio.hh:42
#define mmCP_RB0_CNTL
Definition pm4_mmio.hh:40
#define mmCP_RB0_RPTR_ADDR
Definition pm4_mmio.hh:44
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
Definition pm4_mmio.hh:62
#define mmCP_RB0_WPTR
Definition pm4_mmio.hh:46
#define mmCP_HQD_PQ_WPTR_LO
Definition pm4_mmio.hh:65
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
PM4 packets.
uint32_t sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_rb_rptr
uint32_t hqd_pq_base_lo
uint32_t doorbellOffset0
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t hqd_pq_wptr_poll_addr_hi
uint32_t hqd_pq_base_hi
uint32_t doorbellOffset3
uint32_t sdmax_rlcx_rb_wptr_hi
uint32_t doorbellOffset2
uint32_t sdmax_rlcx_ib_base_hi
uint32_t hqd_pq_wptr_poll_addr_lo
uint32_t doorbellOffset
uint32_t hqd_pq_rptr_report_addr_hi
uint32_t processQuantum
uint32_t hqd_pq_control
uint32_t hqd_pq_rptr_report_addr_lo
uint32_t hqd_active
Definition pm4_queues.hh:96
uint32_t hqd_pq_doorbell_control
uint32_t sdmax_rlcx_rb_rptr_addr_lo
uint32_t sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_rptr_hi
uint64_t mqdReadIndex
Definition pm4_queues.hh:55
uint32_t doorbellOffset1
uint32_t hqd_ib_control
uint64_t completionSignal
uint32_t queueRptrAddrLo
uint32_t doorbellRangeHi
uint32_t queueRptrAddrHi
uint32_t doorbellRangeLo
uint32_t doorbellOffset

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0