gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
sdma_engine.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
35#include "arch/generic/mmu.hh"
36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
42#include "mem/packet.hh"
43#include "mem/packet_access.hh"
44#include "params/SDMAEngine.hh"
45
46namespace gem5
47{
48
49SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
50 : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
53 pageWptr(0), gpuDevice(nullptr), walker(p.walker),
54 mmioBase(p.mmio_base), mmioSize(p.mmio_size)
55{
56 gfx.ib(&gfxIb);
57 gfxIb.parent(&gfx);
58 gfx.valid(true);
59 gfxIb.valid(true);
60 gfx.queueType(SDMAGfx);
61 gfxIb.queueType(SDMAGfx);
62
63 page.ib(&pageIb);
64 pageIb.parent(&page);
65 page.valid(true);
66 pageIb.valid(true);
67 page.queueType(SDMAPage);
68 pageIb.queueType(SDMAPage);
69
70 rlc0.ib(&rlc0Ib);
71 rlc0Ib.parent(&rlc0);
72
73 rlc1.ib(&rlc1Ib);
74 rlc1Ib.parent(&rlc1);
75}
76
77void
79{
80 gpuDevice = gpu_device;
81 walker->setDevRequestor(gpuDevice->vramRequestorId());
82}
83
84int
86{
87 switch (_id) {
88 case 0:
90 case 1:
92 case 2:
94 case 3:
96 case 4:
98 case 5:
100 case 6:
102 case 7:
104 default:
105 panic("Unknown SDMA id");
106 }
107}
108
109Addr
111{
112 if (!gpuDevice->getVM().inAGP(addr)) {
113 Addr low_bits = bits(addr, 11, 0);
114 addr = (((addr >> 12) << 3) << 12) | low_bits;
115 }
116 return addr;
117}
118
119Addr
121{
122 // SDMA packets can access both host and device memory as either a source
123 // or destination address. We don't know which until it is translated, so
124 // we do a dummy functional translation to determine if the address
125 // resides in system memory or not.
126 auto tgen = translate(raw_addr, 64);
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
129 DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
130 raw_addr, tmp_addr);
131
132 // SDMA packets will access device memory through the MMHUB aperture in
133 // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
134 // of vmid == 0 the address is already an MMHUB address in the packet,
135 // so simply subtract the MMHUB base. For vmid > 0 the address is a
136 // virtual address that must first be translated. The translation will
137 // return an MMHUB address, then we can similarly subtract the base to
138 // get the device address. Otherwise, for host, device address is 0.
139 Addr device_addr = 0;
140 if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
141 (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
142 if (cur_vmid == 0) {
143 device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
144 } else {
145 device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
146 }
147 }
148
149 return device_addr;
150}
151
159{
160 if (cur_vmid > 0) {
161 // Only user translation is available to user queues (vmid > 0)
163 &gpuDevice->getVM(), walker,
164 cur_vmid, vaddr, size));
165 } else if (gpuDevice->getVM().inAGP(vaddr)) {
166 // Use AGP translation gen
167 return TranslationGenPtr(
168 new AMDGPUVM::AGPTranslationGen(&gpuDevice->getVM(), vaddr, size));
169 } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
170 // Use MMHUB translation gen
172 &gpuDevice->getVM(), vaddr, size));
173 }
174
175 // Assume GART otherwise as this is the only other translation aperture
176 // available to the SDMA engine processor.
177 return TranslationGenPtr(
178 new AMDGPUVM::GARTTranslationGen(&gpuDevice->getVM(), vaddr, size));
179}
180
181void
183 bool isStatic)
184{
185 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
186 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
187 rptr_wb_addr <<= 32;
188 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
189 bool priv = bits(mqd->sdmax_rlcx_rb_cntl, 23, 23);
190
191 // Get first free RLC
192 if (!rlc0.valid()) {
193 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
194 rlcInfo[0] = doorbell;
195 rlc0.valid(true);
196 rlc0.base(mqd->rb_base << 8);
197 rlc0.size(rlc_size);
198 rlc0.rptr(0);
199 rlc0.incRptr(mqd->rptr);
200 rlc0.setWptr(mqd->wptr);
201 rlc0.rptrWbAddr(rptr_wb_addr);
202 rlc0.processing(false);
203 rlc0.setMQD(mqd);
204 rlc0.setMQDAddr(mqdAddr);
205 rlc0.setPriv(priv);
206 rlc0.setStatic(isStatic);
207 } else if (!rlc1.valid()) {
208 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
209 rlcInfo[1] = doorbell;
210 rlc1.valid(true);
211 rlc1.base(mqd->rb_base << 8);
212 rlc1.size(rlc_size);
213 rlc1.rptr(0);
214 rlc1.incRptr(mqd->rptr);
215 rlc1.setWptr(mqd->wptr);
216 rlc1.rptrWbAddr(rptr_wb_addr);
217 rlc1.processing(false);
218 rlc1.setMQD(mqd);
219 rlc1.setMQDAddr(mqdAddr);
220 rlc1.setPriv(priv);
221 rlc1.setStatic(isStatic);
222 } else {
223 panic("No free RLCs. Check they are properly unmapped.");
224 }
225}
226
227void
228SDMAEngine::unregisterRLCQueue(Addr doorbell, bool unmap_static)
229{
230 DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
231 if (rlcInfo[0] == doorbell) {
232 if (!unmap_static && rlc0.isStatic()) {
233 DPRINTF(SDMAEngine, "RLC0 is static. Will not unregister.\n");
234 return;
235 }
236
237 SDMAQueueDesc *mqd = rlc0.getMQD();
238 if (mqd) {
239 DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
240 rlc0.getMQDAddr());
241
242 mqd->rptr = rlc0.globalRptr();
243 mqd->wptr = rlc0.getWptr();
244
245 auto cb = new DmaVirtCallback<uint32_t>(
246 [ = ] (const uint32_t &) { });
247 dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
248 } else {
249 warn("RLC0 SDMAMQD address invalid\n");
250 }
251 rlc0.valid(false);
252 rlcInfo[0] = 0;
253 } else if (rlcInfo[1] == doorbell) {
254 if (!unmap_static && rlc1.isStatic()) {
255 DPRINTF(SDMAEngine, "RLC1 is static. Will not unregister.\n");
256 return;
257 }
258
259 SDMAQueueDesc *mqd = rlc1.getMQD();
260 if (mqd) {
261 DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
262 rlc1.getMQDAddr());
263
264 mqd->rptr = rlc1.globalRptr();
265 mqd->wptr = rlc1.getWptr();
266
267 auto cb = new DmaVirtCallback<uint32_t>(
268 [ = ] (const uint32_t &) { });
269 dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
270 } else {
271 warn("RLC1 SDMAMQD address invalid\n");
272 }
273 rlc1.valid(false);
274 rlcInfo[1] = 0;
275 } else {
276 panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
277 }
278
279 gpuDevice->unsetDoorbell(doorbell);
280}
281
282void
284{
285 for (auto doorbell: rlcInfo) {
286 if (doorbell) {
287 unregisterRLCQueue(doorbell, unmap_static);
288 }
289 }
290}
291
292/* Start decoding packets from the Gfx queue. */
293void
295{
296 gfx.setWptr(wptrOffset);
297 if (!gfx.processing()) {
298 gfx.processing(true);
299 decodeNext(&gfx);
300 }
301}
302
303/* Start decoding packets from the Page queue. */
304void
306{
307 page.setWptr(wptrOffset);
308 if (!page.processing()) {
309 page.processing(true);
311 }
312}
313
314/* Process RLC queue at given doorbell. */
315void
316SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
317{
318 if (rlcInfo[0] == doorbellOffset) {
319 processRLC0(wptrOffset);
320 } else if (rlcInfo[1] == doorbellOffset) {
321 processRLC1(wptrOffset);
322 } else {
323 panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
324 }
325}
326
327/* Start decoding packets from the RLC0 queue. */
328void
330{
331 assert(rlc0.valid());
332
333 rlc0.setWptr(wptrOffset);
334 if (!rlc0.processing()) {
335 cur_vmid = 1;
336 rlc0.processing(true);
338 }
339}
340
341/* Start decoding packets from the RLC1 queue. */
342void
344{
345 assert(rlc1.valid());
346
347 rlc1.setWptr(wptrOffset);
348 if (!rlc1.processing()) {
349 cur_vmid = 1;
350 rlc1.processing(true);
352 }
353}
354
355/* Decoding next packet in the queue. */
356void
358{
359 DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
360
361 if (q->rptr() != q->wptr()) {
362 // We are using lambda functions passed to the DmaVirtCallback objects
363 // which will call the actuall callback method (e.g., decodeHeader).
364 // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
365 // function as header in this case.
366 auto cb = new DmaVirtCallback<uint32_t>(
367 [ = ] (const uint32_t &header)
368 { decodeHeader(q, header); });
369 dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer,
370 sdma_delay);
371 } else {
372 // The driver expects the rptr to be written back to host memory
373 // periodically. In simulation, we writeback rptr after each burst of
374 // packets from a doorbell, rather than using the cycle count which
375 // is not accurate in all simulation settings (e.g., KVM).
376 DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
377 q->globalRptr(), q->rptrWbAddr());
378 if (q->rptrWbAddr()) {
379 auto cb = new DmaVirtCallback<uint64_t>(
380 [ = ](const uint64_t &) { }, q->globalRptr());
381 dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
382 }
383 q->processing(false);
384 if (q->parent()) {
385 DPRINTF(SDMAEngine, "SDMA switching queues\n");
386 // If current vmid is non-zero, set it back to 0 before
387 // switching back to parent
388 cur_vmid = 0;
389 decodeNext(q->parent());
390 }
391 cur_vmid = 0;
392 }
393}
394
395/* Decoding the header of a packet. */
396void
398{
399 q->incRptr(sizeof(header));
400 int opcode = bits(header, 7, 0);
401 int sub_opcode = bits(header, 15, 8);
402
403 DmaVirtCallback<uint64_t> *cb = nullptr;
404 void *dmaBuffer = nullptr;
405
406 DPRINTF(SDMAEngine, "SDMA header %x opcode %x sub-opcode %x\n",
407 header, opcode, sub_opcode);
408
409 switch(opcode) {
410 case SDMA_OP_NOP: {
411 uint32_t NOP_count = (header >> 16) & 0x3FFF;
412 DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
413 if (NOP_count > 0) {
414 for (int i = 0; i < NOP_count; ++i) {
415 if (q->rptr() == q->wptr()) {
416 warn("NOP count is beyond wptr, ignoring remaining NOPs");
417 break;
418 }
419 q->incRptr(4);
420 }
421 }
422 decodeNext(q);
423 } break;
424 case SDMA_OP_COPY: {
425 DPRINTF(SDMAEngine, "SDMA Copy packet\n");
426 switch (sub_opcode) {
428 dmaBuffer = new sdmaCopy();
430 [ = ] (const uint64_t &)
431 { copy(q, (sdmaCopy *)dmaBuffer); });
432 dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer,
433 sdma_delay);
434 } break;
436 panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
437 } break;
439 panic("SDMA_SUBOP_COPY_TILED not implemented");
440 } break;
442 panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
443 } break;
445 panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
446 } break;
447 case SDMA_SUBOP_COPY_SOA: {
448 panic("SDMA_SUBOP_COPY_SOA not implemented");
449 } break;
451 panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
452 } break;
454 panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
455 } break;
456 default: {
457 panic("SDMA unknown copy sub-opcode.");
458 } break;
459 }
460 } break;
461 case SDMA_OP_WRITE: {
462 DPRINTF(SDMAEngine, "SDMA Write packet\n");
463 switch (sub_opcode) {
465 dmaBuffer = new sdmaWrite();
467 [ = ] (const uint64_t &)
468 { write(q, (sdmaWrite *)dmaBuffer); });
469 dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer,
470 sdma_delay);
471 } break;
473 panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
474 } break;
475 default:
476 break;
477 }
478 } break;
479 case SDMA_OP_INDIRECT: {
480 DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
481 dmaBuffer = new sdmaIndirectBuffer();
483 [ = ] (const uint64_t &)
484 { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer,
485 header); });
486 dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer,
487 sdma_delay);
488 } break;
489 case SDMA_OP_FENCE: {
490 DPRINTF(SDMAEngine, "SDMA Fence packet\n");
491 dmaBuffer = new sdmaFence();
493 [ = ] (const uint64_t &)
494 { fence(q, (sdmaFence *)dmaBuffer); });
495 dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer,
496 sdma_delay);
497 } break;
498 case SDMA_OP_TRAP: {
499 DPRINTF(SDMAEngine, "SDMA Trap packet\n");
500 dmaBuffer = new sdmaTrap();
502 [ = ] (const uint64_t &)
503 { trap(q, (sdmaTrap *)dmaBuffer); });
504 dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer,
505 sdma_delay);
506 } break;
507 case SDMA_OP_SEM: {
508 q->incRptr(sizeof(sdmaSemaphore));
509 warn("SDMA_OP_SEM not implemented");
510 decodeNext(q);
511 } break;
512 case SDMA_OP_POLL_REGMEM: {
513 DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
514 dmaBuffer = new sdmaPollRegMem();
516 [ = ] (const uint64_t &)
517 { pollRegMem(q, header, (sdmaPollRegMem *)dmaBuffer); });
518 dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer,
519 sdma_delay);
520 switch (sub_opcode) {
522 panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
523 } break;
525 panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
526 } break;
528 panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
529 } break;
530 default:
531 break;
532 }
533 } break;
534 case SDMA_OP_COND_EXE: {
535 q->incRptr(sizeof(sdmaCondExec));
536 warn("SDMA_OP_SEM not implemented");
537 decodeNext(q);
538 } break;
539 case SDMA_OP_ATOMIC: {
540 DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
541 dmaBuffer = new sdmaAtomic();
543 [ = ] (const uint64_t &)
544 { atomic(q, header, (sdmaAtomic *)dmaBuffer); });
545 dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer,
546 sdma_delay);
547 } break;
548 case SDMA_OP_CONST_FILL: {
549 DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
550 dmaBuffer = new sdmaConstFill();
552 [ = ] (const uint64_t &)
553 { constFill(q, (sdmaConstFill *)dmaBuffer, header); });
554 dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer,
555 sdma_delay);
556 } break;
557 case SDMA_OP_PTEPDE: {
558 DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
559 switch (sub_opcode) {
561 DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
562 dmaBuffer = new sdmaPtePde();
564 [ = ] (const uint64_t &)
565 { ptePde(q, (sdmaPtePde *)dmaBuffer); });
566 dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer,
567 sdma_delay);
568 break;
570 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
571 break;
573 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
574 break;
576 panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
577 } break;
578 default:
579 DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
580 sub_opcode);
581 decodeNext(q);
582 break;
583 }
584 } break;
585 case SDMA_OP_TIMESTAMP: {
586 q->incRptr(sizeof(sdmaTimestamp));
587 switch (sub_opcode) {
589 } break;
591 } break;
593 } break;
594 default:
595 break;
596 }
597 warn("SDMA_OP_TIMESTAMP not implemented");
598 decodeNext(q);
599 } break;
600 case SDMA_OP_SRBM_WRITE: {
601 DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
602 dmaBuffer = new sdmaSRBMWrite();
604 [ = ] (const uint64_t &)
605 { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
606 dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer,
607 sdma_delay);
608 } break;
609 case SDMA_OP_PRE_EXE: {
610 q->incRptr(sizeof(sdmaPredExec));
611 warn("SDMA_OP_PRE_EXE not implemented");
612 decodeNext(q);
613 } break;
614 case SDMA_OP_DUMMY_TRAP: {
615 q->incRptr(sizeof(sdmaDummyTrap));
616 warn("SDMA_OP_DUMMY_TRAP not implemented");
617 decodeNext(q);
618 } break;
619 default: {
620 panic("Invalid SDMA packet.\n");
621 } break;
622 }
623}
624
625/* Implements a write packet. */
626void
628{
629 q->incRptr(sizeof(sdmaWrite));
630 // count represents the number of dwords - 1 to write
631 pkt->count++;
632 DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
633
634 // first we have to read needed data from the SDMA queue
635 uint32_t *dmaBuffer = new uint32_t[pkt->count];
636 auto cb = new DmaVirtCallback<uint64_t>(
637 [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
638 dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
639 (void *)dmaBuffer, sdma_delay);
640}
641
642/* Completion of data reading for a write packet. */
643void
644SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
645{
646 int bufferSize = sizeof(uint32_t) * pkt->count;
647 q->incRptr(bufferSize);
648
649 DPRINTF(SDMAEngine, "Write packet data:\n");
650 for (int i = 0; i < pkt->count; ++i) {
651 DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
652 }
653
654 // lastly we write read data to the destination address
655 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
656 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
657
658 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
659 "SDMA write to GART not implemented");
660
661 auto cb = new EventFunctionWrapper(
662 [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
663 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
664 bufferSize, 0, cb);
665 } else {
666 if (q->priv() && cur_vmid == 0) {
667 pkt->dest = getGARTAddr(pkt->dest);
668 }
669 auto cb = new DmaVirtCallback<uint32_t>(
670 [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
671 dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
672 }
673}
674
675/* Completion of a write packet. */
676void
677SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
678{
679 DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
680 pkt->dest, pkt->count);
681
682 auto cleanup_cb = new EventFunctionWrapper(
683 [ = ]{ writeCleanup(dmaBuffer); }, name());
684
685 auto system_ptr = gpuDevice->CP()->system();
686 if (!system_ptr->isAtomicMode()) {
687 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
688 " This has not been tested in timing mode\n");
689 }
690
691 // Only 2000 ticks should be necessary, but add additional padding.
692 schedule(cleanup_cb, curTick() + 10000);
693
694 delete pkt;
695 decodeNext(q);
696}
697
698void
699SDMAEngine::writeCleanup(uint32_t *dmaBuffer)
700{
701 delete [] dmaBuffer;
702}
703
704/* Implements a copy packet. */
705void
707{
708 DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
709 pkt->source, pkt->dest, pkt->count);
710 q->incRptr(sizeof(sdmaCopy));
711 // count represents the number of bytes - 1 to be copied
712 // However, when vmid != 0, the sdma copies count number
713 // of bytes
714 if (cur_vmid == 0) {
715 pkt->count++;
716 }
717
718 if (q->priv() && cur_vmid == 0) {
719 if (!gpuDevice->getVM().inMMHUB(pkt->source)) {
720 DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
721 pkt->source = getGARTAddr(pkt->source);
722 DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
723 }
724 }
725
726 // Read data from the source first, then call the copyReadData method
727 uint8_t *dmaBuffer = new uint8_t[pkt->count];
728 Addr device_addr = getDeviceAddress(pkt->source);
729 if (device_addr) {
730 DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
731 auto cb = new EventFunctionWrapper(
732 [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
733
734 // Copy the minimum page size at a time in case the physical addresses
735 // are not contiguous.
737 uint8_t *buffer_ptr = dmaBuffer;
738 for (; !gen.done(); gen.next()) {
739 Addr chunk_addr = getDeviceAddress(gen.addr());
740 assert(chunk_addr);
741
742 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
743 gen.size(), gen.addr(), chunk_addr);
744
745 gpuDevice->getMemMgr()->readRequest(chunk_addr, buffer_ptr,
746 gen.size(), 0,
747 gen.last() ? cb : nullptr);
748 buffer_ptr += gen.size();
749 }
750 } else {
751 auto cb = new DmaVirtCallback<uint64_t>(
752 [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
753 dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer,
754 sdma_delay);
755 }
756}
757
758/* Completion of data reading for a copy packet. */
759void
760SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
761{
762 // lastly we write read data to the destination address
763 uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
764
765 DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
766 DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
767 DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
768
769 DPRINTF(SDMAData, "Copy packet data:\n");
770 for (int i = 0; i < pkt->count/8; ++i) {
771 DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
772 }
773
774 Addr device_addr = getDeviceAddress(pkt->dest);
775 // Write read data to the destination address then call the copyDone method
776 if (device_addr) {
777 DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
778 auto cb = new EventFunctionWrapper(
779 [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
780
781 // Copy the minimum page size at a time in case the physical addresses
782 // are not contiguous.
784 uint8_t *buffer_ptr = dmaBuffer;
785 for (; !gen.done(); gen.next()) {
786 Addr chunk_addr = getDeviceAddress(gen.addr());
787 assert(chunk_addr);
788
789 DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
790 gen.size(), gen.addr(), chunk_addr);
791
792 gpuDevice->getMemMgr()->writeRequest(chunk_addr, buffer_ptr,
793 gen.size(), 0,
794 gen.last() ? cb : nullptr);
795
796 buffer_ptr += gen.size();
797 }
798 } else {
799 DPRINTF(SDMAEngine, "Copying to host address %#lx\n", pkt->dest);
800 auto cb = new DmaVirtCallback<uint64_t>(
801 [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
802 dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
803 }
804
805 // For destinations in the GART table, gem5 uses a mapping tables instead
806 // of functionally going to device memory, so we need to update that copy.
807 if (gpuDevice->getVM().inGARTRange(device_addr)) {
808 // GART entries are always 8 bytes.
809 assert((pkt->count % 8) == 0);
810 for (int i = 0; i < pkt->count/8; ++i) {
811 Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase();
812 DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n",
813 gart_addr, dmaBuffer64[i]);
814 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i];
815 }
816 }
817}
818
819/* Completion of a copy packet. */
820void
821SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
822{
823 DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
824 pkt->dest, pkt->count);
825
826 auto cleanup_cb = new EventFunctionWrapper(
827 [ = ]{ copyCleanup(dmaBuffer); }, name());
828
829 auto system_ptr = gpuDevice->CP()->system();
830 if (!system_ptr->isAtomicMode()) {
831 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
832 " This has not been tested in timing mode\n");
833 }
834
835 // Only 2000 ticks should be necessary, but add additional padding.
836 schedule(cleanup_cb, curTick() + 10000);
837
838 delete pkt;
839 decodeNext(q);
840}
841
842void
843SDMAEngine::copyCleanup(uint8_t *dmaBuffer)
844{
845 delete [] dmaBuffer;
846}
847
848/* Implements an indirect buffer packet. */
849void
851 uint32_t header)
852{
853 cur_vmid = (header >> 16) & 0xF;
854 if (q->priv() && cur_vmid == 0) {
855 q->ib()->base(getGARTAddr(pkt->base));
856 } else {
857 q->ib()->base(pkt->base);
858 }
859 q->ib()->rptr(0);
860 q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
861 q->ib()->setWptr(pkt->size * sizeof(uint32_t));
862
863 q->incRptr(sizeof(sdmaIndirectBuffer));
864
865 delete pkt;
866 decodeNext(q->ib());
867}
868
869/* Implements a fence packet. */
870void
872{
873 q->incRptr(sizeof(sdmaFence));
874 if (q->priv() && cur_vmid == 0) {
875 pkt->dest = getGARTAddr(pkt->dest);
876 }
877
878 // Writing the data from the fence packet to the destination address.
879 auto cb = new DmaVirtCallback<uint32_t>(
880 [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
881 dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
882}
883
884/* Completion of a fence packet. */
885void
887{
888 DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
889 pkt->dest, pkt->data);
890 delete pkt;
891 decodeNext(q);
892}
893
894/* Implements a trap packet. */
895void
897{
898 q->incRptr(sizeof(sdmaTrap));
899
900 DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
901
902 uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
903
904 int node_id = 0;
905 int local_id = getId();
906
907 if (gpuDevice->getGfxVersion() == GfxVersion::gfx942 ||
908 gpuDevice->getGfxVersion() == GfxVersion::gfx950) {
909 node_id = getId() >> 2;
910
911 // For most SDMAs the "node_id" for the interrupt handler is the SDMA
912 // id / 4. node_id of 2 is used by some other IP, so this gets changed
913 // to node_id 4:
914 // SDMA 0-3: node_id 0
915 // SDMA 4-7: node_id 1
916 // SDMA 8-11: node_id 4
917 // SDMA 12-15: node_id 3
918 if (node_id == 2) {
919 node_id += 2;
920 }
921
922 local_id = getId() % 4;
923 }
924 gpuDevice->getIH()->prepareInterruptCookie(pkt->intrContext, ring_id,
925 getIHClientId(local_id),
926 TRAP_ID, 2*node_id);
927 gpuDevice->getIH()->submitInterruptCookie();
928
929 delete pkt;
930 decodeNext(q);
931}
932
933/* Implements a write SRBM packet. */
934void
936{
937 q->incRptr(sizeof(sdmaSRBMWrite));
938
939 sdmaSRBMWriteHeader srbm_header;
940 srbm_header.ordinal = header;
941
942 [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
943 uint32_t reg_mask = 0x00000000;
944
945 if (srbm_header.byteEnable & 0x8) reg_mask |= 0xFF000000;
946 if (srbm_header.byteEnable & 0x4) reg_mask |= 0x00FF0000;
947 if (srbm_header.byteEnable & 0x2) reg_mask |= 0x0000FF00;
948 if (srbm_header.byteEnable & 0x1) reg_mask |= 0x000000FF;
949 pkt->data &= reg_mask;
950
951 DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
952 reg_addr, pkt->data);
953
954 gpuDevice->setRegVal(reg_addr, pkt->data);
955
956 delete pkt;
957 decodeNext(q);
958}
959
965void
967{
968 q->incRptr(sizeof(sdmaPollRegMem));
969
970 sdmaPollRegMemHeader prm_header;
971 prm_header.ordinal = header;
972
973 if (q->priv() && cur_vmid == 0) {
974 pkt->address = getGARTAddr(pkt->address);
975 }
976
977 DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
978 "mask=%p, retry=%d, pinterval=%d\n", prm_header.mode,
979 prm_header.func, prm_header.op, pkt->address, pkt->ref, pkt->mask,
980 pkt->retryCount, pkt->pollInt);
981
982 bool skip = false;
983
984 if (prm_header.mode == 1) {
985 // polling on a memory location
986 if (prm_header.op == 0) {
987 auto cb = new DmaVirtCallback<uint32_t>(
988 [ = ] (const uint32_t &dma_buffer) {
989 pollRegMemRead(q, header, pkt, dma_buffer, 0); });
990 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
991 (void *)&cb->dmaBuffer, sdma_delay);
992 } else {
993 panic("SDMA poll mem operation not implemented.");
994 skip = true;
995 }
996 } else {
997 warn_once("SDMA poll reg is not implemented. If this is required for "
998 "correctness, an SRBM model needs to be implemented.");
999 skip = true;
1000 }
1001
1002 if (skip) {
1003 delete pkt;
1004 decodeNext(q);
1005 }
1006}
1007
1008void
1010 uint32_t dma_buffer, int count)
1011{
1012 sdmaPollRegMemHeader prm_header;
1013 prm_header.ordinal = header;
1014
1015 assert(prm_header.mode == 1 && prm_header.op == 0);
1016
1017 if (!pollRegMemFunc(dma_buffer, pkt->ref, prm_header.func) &&
1018 ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
1019 pkt->retryCount == 0xfff)) {
1020
1021 // continue polling on a memory location until reference value is met,
1022 // retryCount is met or indefinitelly if retryCount is 0xfff
1023 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
1024 pkt->address, dma_buffer, pkt->ref);
1025
1026 auto cb = new DmaVirtCallback<uint32_t>(
1027 [ = ] (const uint32_t &dma_buffer) {
1028 pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
1029 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
1030 (void *)&cb->dmaBuffer, sdma_delay);
1031 } else {
1032 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
1033 pkt->address, dma_buffer, pkt->ref);
1034
1035 delete pkt;
1036 decodeNext(q);
1037 }
1038}
1039
1040bool
1041SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
1042{
1043 switch (func) {
1044 case 0:
1045 return true;
1046 break;
1047 case 1:
1048 return value < reference;
1049 break;
1050 case 2:
1051 return value <= reference;
1052 break;
1053 case 3:
1054 return value == reference;
1055 break;
1056 case 4:
1057 return value != reference;
1058 break;
1059 case 5:
1060 return value >= reference;
1061 break;
1062 case 6:
1063 return value > reference;
1064 break;
1065 default:
1066 panic("SDMA POLL_REGMEM unknown comparison function.");
1067 break;
1068 }
1069}
1070
1071/* Implements a PTE PDE generation packet. */
1072void
1074{
1075 q->incRptr(sizeof(sdmaPtePde));
1076 pkt->count++;
1077
1078 DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
1079 pkt->initValue, pkt->increment, pkt->count);
1080
1081 // Generating pkt->count double dwords using the initial value, increment
1082 // and a mask.
1083 uint64_t *dmaBuffer = new uint64_t[pkt->count];
1084 for (int i = 0; i < pkt->count; i++) {
1085 dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
1086 }
1087
1088 // Writing generated data to the destination address.
1089 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
1090 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
1091
1092 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
1093 "SDMA write to GART not implemented");
1094
1095 auto cb = new EventFunctionWrapper(
1096 [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
1097 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1098 sizeof(uint64_t) * pkt->count, 0,
1099 cb);
1100 } else {
1101 if (q->priv() && cur_vmid == 0) {
1102 pkt->dest = getGARTAddr(pkt->dest);
1103 }
1104 auto cb = new DmaVirtCallback<uint64_t>(
1105 [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
1106 dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
1107 (void *)dmaBuffer);
1108 }
1109}
1110
1111/* Completion of a PTE PDE generation packet. */
1112void
1113SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
1114{
1115 DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
1116 pkt->dest, pkt->count);
1117
1118 auto cleanup_cb = new EventFunctionWrapper(
1119 [ = ]{ ptePdeCleanup(dmaBuffer); }, name());
1120
1121 auto system_ptr = gpuDevice->CP()->system();
1122 if (!system_ptr->isAtomicMode()) {
1123 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
1124 " This has not been tested in timing mode\n");
1125 }
1126
1127 // Only 2000 ticks should be necessary, but add additional padding.
1128 schedule(cleanup_cb, curTick() + 10000);
1129
1130 delete pkt;
1131 decodeNext(q);
1132}
1133
1134void
1135SDMAEngine::ptePdeCleanup(uint64_t *dmaBuffer)
1136{
1137 delete [] dmaBuffer;
1138}
1139
1140void
1142{
1143 q->incRptr(sizeof(sdmaAtomic));
1144
1145 sdmaAtomicHeader at_header;
1146 at_header.ordinal = header;
1147
1148 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
1149 " %d loopInt: %d\n", at_header.opcode, pkt->addr, pkt->srcData,
1150 pkt->cmpData, at_header.loop, pkt->loopInt);
1151
1152 // Read the data at pkt->addr
1153 uint64_t *dmaBuffer = new uint64_t;
1154 auto cb = new DmaVirtCallback<uint64_t>(
1155 [ = ] (const uint64_t &)
1156 { atomicData(q, header, pkt, dmaBuffer); });
1157 dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer,
1158 sdma_delay);
1159}
1160
1161void
1163 uint64_t *dmaBuffer)
1164{
1165 sdmaAtomicHeader at_header;
1166 at_header.ordinal = header;
1167
1168 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
1169 at_header.opcode, pkt->addr, *dmaBuffer);
1170
1171 if (at_header.opcode == SDMA_ATOMIC_ADD64) {
1172 // Atomic add with return -- dst = dst + src
1173 int64_t dst_data = *dmaBuffer;
1174 int64_t src_data = pkt->srcData;
1175
1176 DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
1177 src_data, dst_data + src_data);
1178
1179 // Reuse the dmaBuffer allocated
1180 *dmaBuffer = dst_data + src_data;
1181
1182 auto cb = new DmaVirtCallback<uint64_t>(
1183 [ = ] (const uint64_t &)
1184 { atomicDone(q, header, pkt, dmaBuffer); });
1185 dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1186 } else {
1187 panic("Unsupported SDMA atomic opcode: %d\n", at_header.opcode);
1188 }
1189}
1190
1191void
1193 uint64_t *dmaBuffer)
1194{
1195 sdmaAtomicHeader at_header;
1196 at_header.ordinal = header;
1197
1198 DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
1199 at_header.opcode, pkt->addr, *dmaBuffer);
1200
1201 delete dmaBuffer;
1202 delete pkt;
1203 decodeNext(q);
1204}
1205
1206void
1208{
1209 q->incRptr(sizeof(sdmaConstFill));
1210
1211 sdmaConstFillHeader fill_header;
1212 fill_header.ordinal = header;
1213
1214 DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
1215 pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
1216 fill_header.sw);
1217
1218 // Count is number of <size> elements - 1. Size is log2 of byte size.
1219 int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
1220 uint8_t *fill_data = new uint8_t[fill_bytes];
1221
1222 memset(fill_data, pkt->srcData, fill_bytes);
1223
1224 Addr device_addr = getDeviceAddress(pkt->addr);
1225 if (device_addr) {
1226 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
1227 fill_bytes, pkt->srcData, pkt->addr);
1228
1229 auto cb = new EventFunctionWrapper(
1230 [ = ]{ constFillDone(q, pkt, fill_data); }, name());
1231
1232 // Copy the minimum page size at a time in case the physical addresses
1233 // are not contiguous.
1234 ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
1235 uint8_t *fill_data_ptr = fill_data;
1236 for (; !gen.done(); gen.next()) {
1237 Addr chunk_addr = getDeviceAddress(gen.addr());
1238 assert(chunk_addr);
1239
1240 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
1241 gen.size(), gen.addr(), chunk_addr);
1242
1243 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data_ptr,
1244 gen.size(), 0,
1245 gen.last() ? cb : nullptr);
1246 fill_data_ptr += gen.size();
1247 }
1248 } else {
1249 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
1250 fill_bytes, pkt->srcData, pkt->addr);
1251
1252 auto cb = new DmaVirtCallback<uint64_t>(
1253 [ = ] (const uint64_t &)
1254 { constFillDone(q, pkt, fill_data); });
1255 dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
1256 }
1257}
1258
1259void
1261{
1262 DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
1263
1264 delete [] fill_data;
1265 delete pkt;
1266 decodeNext(q);
1267}
1268
1271{
1272 AddrRangeList ranges;
1273 return ranges;
1274}
1275
1276void
1278{
1279 // Serialize the DmaVirtDevice base class
1281
1292
1293 int num_queues = 4;
1294
1296 queues.push_back((SDMAQueue *)&gfx);
1297 queues.push_back((SDMAQueue *)&page);
1298 queues.push_back((SDMAQueue *)&gfxIb);
1299 queues.push_back((SDMAQueue *)&pageIb);
1300
1301 auto base = std::make_unique<Addr[]>(num_queues);
1302 auto rptr = std::make_unique<Addr[]>(num_queues);
1303 auto wptr = std::make_unique<Addr[]>(num_queues);
1304 auto size = std::make_unique<Addr[]>(num_queues);
1305 auto processing = std::make_unique<bool[]>(num_queues);
1306
1307 for (int i = 0; i < num_queues; i++) {
1308 base[i] = queues[i]->base();
1309 rptr[i] = queues[i]->getRptr();
1310 wptr[i] = queues[i]->getWptr();
1311 size[i] = queues[i]->size();
1312 processing[i] = queues[i]->processing();
1313 }
1314
1315 SERIALIZE_UNIQUE_PTR_ARRAY(base, num_queues);
1316 SERIALIZE_UNIQUE_PTR_ARRAY(rptr, num_queues);
1317 SERIALIZE_UNIQUE_PTR_ARRAY(wptr, num_queues);
1318 SERIALIZE_UNIQUE_PTR_ARRAY(size, num_queues);
1319 SERIALIZE_UNIQUE_PTR_ARRAY(processing, num_queues);
1320
1321 // Capture RLC queue information in checkpoint
1322 // Only two RLC queues are supported right now
1323 const int num_rlc_queues = 2;
1324 std::vector<SDMAQueue *> rlc_queues;
1325 rlc_queues.push_back((SDMAQueue *)&rlc0);
1326 rlc_queues.push_back((SDMAQueue *)&rlc1);
1327
1328 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1329 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1330 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1331 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1332 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1333 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1334 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1335 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1336 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1337 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1338 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1339 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1340 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1341
1342 // Save RLC queue information in arrays that
1343 // are easier to serialize
1344 for (int i = 0; i < num_rlc_queues; i++) {
1345 rlc_valid[i] = rlc_queues[i]->valid();
1346 if (rlc_valid[i]) {
1347 rlc_info[i] = rlcInfo[i];
1348 rlc_base[i] = rlc_queues[i]->base();
1349 rlc_rptr[i] = rlc_queues[i]->getRptr();
1350 rlc_global_rptr[i] = rlc_queues[i]->globalRptr();
1351 rlc_wptr[i] = rlc_queues[i]->getWptr();
1352 rlc_size[i] = rlc_queues[i]->size();
1353 rlc_rptr_wb_addr[i] = rlc_queues[i]->rptrWbAddr();
1354 rlc_processing[i] = rlc_queues[i]->processing();
1355 rlc_mqd_addr[i] = rlc_queues[i]->getMQDAddr();
1356 rlc_priv[i] = rlc_queues[i]->priv();
1357 rlc_static[i] = rlc_queues[i]->isStatic();
1358 memcpy(rlc_mqd.get() + 128*i, rlc_queues[i]->getMQD(),
1359 sizeof(SDMAQueueDesc));
1360 }
1361 }
1362
1363 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_info, num_rlc_queues);
1364 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_valid, num_rlc_queues);
1365 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_base, num_rlc_queues);
1366 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr, num_rlc_queues);
1367 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_global_rptr, num_rlc_queues);
1368 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_wptr, num_rlc_queues);
1369 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_size, num_rlc_queues);
1370 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr_wb_addr, num_rlc_queues);
1371 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_processing, num_rlc_queues);
1372 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd_addr, num_rlc_queues);
1373 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_priv, num_rlc_queues);
1374 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_static, num_rlc_queues);
1375 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd, num_rlc_queues * 128);
1376}
1377
1378void
1380{
1381 // Serialize the DmaVirtDevice base class
1383
1394
1395 int num_queues = 4;
1396 auto base = std::make_unique<Addr[]>(num_queues);
1397 auto rptr = std::make_unique<Addr[]>(num_queues);
1398 auto wptr = std::make_unique<Addr[]>(num_queues);
1399 auto size = std::make_unique<Addr[]>(num_queues);
1400 auto processing = std::make_unique<bool[]>(num_queues);
1401
1403 UNSERIALIZE_UNIQUE_PTR_ARRAY(rptr, num_queues);
1404 UNSERIALIZE_UNIQUE_PTR_ARRAY(wptr, num_queues);
1405 UNSERIALIZE_UNIQUE_PTR_ARRAY(size, num_queues);
1406 UNSERIALIZE_UNIQUE_PTR_ARRAY(processing, num_queues);
1407
1409 queues.push_back((SDMAQueue *)&gfx);
1410 queues.push_back((SDMAQueue *)&page);
1411 queues.push_back((SDMAQueue *)&gfxIb);
1412 queues.push_back((SDMAQueue *)&pageIb);
1413
1414 for (int i = 0; i < num_queues; i++) {
1415 queues[i]->base(base[i]);
1416 queues[i]->rptr(rptr[i]);
1417 queues[i]->wptr(wptr[i]);
1418 queues[i]->size(size[i]);
1419 queues[i]->processing(processing[i]);
1420 }
1421
1422 // Restore RLC queue state information from checkpoint
1423 // Only two RLC queues are supported right now
1424 const int num_rlc_queues = 2;
1425 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1426 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1427 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1428 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1429 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1430 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1431 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1432 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1433 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1434 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1435 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1436 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1437 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1438
1439 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_info, num_rlc_queues);
1440 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_valid, num_rlc_queues);
1441 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_base, num_rlc_queues);
1442 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr, num_rlc_queues);
1443 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_global_rptr, num_rlc_queues);
1444 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_wptr, num_rlc_queues);
1445 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_size, num_rlc_queues);
1446 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr_wb_addr, num_rlc_queues);
1447 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_processing, num_rlc_queues);
1448 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd_addr, num_rlc_queues);
1449 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_priv, num_rlc_queues);
1450 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_static, num_rlc_queues);
1451 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd, num_rlc_queues * 128);
1452
1453 // Save RLC queue information into RLC0, RLC1
1454 std::vector<SDMAQueue *> rlc_queues;
1455 rlc_queues.push_back((SDMAQueue *)&rlc0);
1456 rlc_queues.push_back((SDMAQueue *)&rlc1);
1457
1458 for (int i = 0; i < num_rlc_queues; i++) {
1459 rlc_queues[i]->valid(rlc_valid[i]);
1460 if (rlc_valid[i]) {
1461 rlcInfo[i] = rlc_info[i];
1462 rlc_queues[i]->base(rlc_base[i]);
1463 rlc_queues[i]->rptr(rlc_rptr[i]);
1464 rlc_queues[i]->setGlobalRptr(rlc_global_rptr[i]);
1465 rlc_queues[i]->wptr(rlc_wptr[i]);
1466 rlc_queues[i]->size(rlc_size[i]);
1467 rlc_queues[i]->rptrWbAddr(rlc_rptr_wb_addr[i]);
1468 rlc_queues[i]->processing(rlc_processing[i]);
1469 rlc_queues[i]->setMQDAddr(rlc_mqd_addr[i]);
1470 rlc_queues[i]->setPriv(rlc_priv[i]);
1471 rlc_queues[i]->setStatic(rlc_static[i]);
1472 SDMAQueueDesc* mqd = new SDMAQueueDesc();
1473 memcpy(mqd, rlc_mqd.get() + 128*i, sizeof(SDMAQueueDesc));
1474 rlc_queues[i]->setMQD(mqd);
1475 }
1476 }
1477}
1478
1479void
1481{
1482 DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1483 pkt->getLE<uint32_t>());
1484
1485 // In Vega10 headers, the offsets are the same for both SDMAs
1486 switch (mmio_offset) {
1487 case mmSDMA_GFX_RB_BASE:
1488 setGfxBaseLo(pkt->getLE<uint32_t>());
1489 break;
1491 setGfxBaseHi(pkt->getLE<uint32_t>());
1492 break;
1494 setGfxRptrLo(pkt->getLE<uint32_t>());
1495 break;
1497 setGfxRptrHi(pkt->getLE<uint32_t>());
1498 break;
1500 setGfxDoorbellLo(pkt->getLE<uint32_t>());
1501 break;
1503 setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1504 // Bit 28 of doorbell indicates that doorbell is enabled.
1505 if (bits(getGfxDoorbell(), 28, 28)) {
1506 gpuDevice->setDoorbellType(getGfxDoorbellOffset(),
1508 gpuDevice->setSDMAEngine(getGfxDoorbellOffset(), this);
1509 }
1510 break;
1511 case mmSDMA_GFX_RB_CNTL: {
1512 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1513 assert(rb_size >= 6 && rb_size <= 62);
1514 setGfxSize(1 << (rb_size + 2));
1515 } break;
1517 setGfxWptrLo(pkt->getLE<uint32_t>());
1518 break;
1520 setGfxWptrHi(pkt->getLE<uint32_t>());
1521 break;
1523 setPageBaseLo(pkt->getLE<uint32_t>());
1524 break;
1526 setPageRptrLo(pkt->getLE<uint32_t>());
1527 break;
1529 setPageRptrHi(pkt->getLE<uint32_t>());
1530 break;
1532 setPageDoorbellLo(pkt->getLE<uint32_t>());
1533 break;
1535 setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1536 // Bit 28 of doorbell indicates that doorbell is enabled.
1537 if (bits(getPageDoorbell(), 28, 28)) {
1538 gpuDevice->setDoorbellType(getPageDoorbellOffset(),
1540 gpuDevice->setSDMAEngine(getPageDoorbellOffset(), this);
1541 }
1542 break;
1543 case mmSDMA_PAGE_RB_CNTL: {
1544 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1545 assert(rb_size >= 6 && rb_size <= 62);
1546 setPageSize(1 << (rb_size + 2));
1547 } break;
1549 setPageWptrLo(pkt->getLE<uint32_t>());
1550 break;
1551 default:
1552 DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1553 break;
1554 }
1555}
1556
1557void
1559{
1560 gfxBase = insertBits(gfxBase, 31, 0, 0);
1561 gfxBase |= data;
1562 gfx.base((gfxBase >> 1) << 12);
1563}
1564
1565void
1567{
1568 gfxBase = insertBits(gfxBase, 63, 32, 0);
1569 gfxBase |= ((uint64_t)data) << 32;
1570 gfx.base((gfxBase >> 1) << 12);
1571}
1572
1573void
1575{
1576 gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1577 gfxRptr |= data;
1578 gfx.rptrWbAddr(getGARTAddr(gfxRptr));
1579}
1580
1581void
1583{
1584 gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1585 gfxRptr |= ((uint64_t)data) << 32;
1586 gfx.rptrWbAddr(getGARTAddr(gfxRptr));
1587}
1588
1589void
1591{
1592 gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1593 gfxDoorbell |= data;
1594}
1595
1596void
1598{
1599 gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1600 gfxDoorbell |= ((uint64_t)data) << 32;
1601}
1602
1603void
1605{
1608 if (bits(gfxDoorbell, 28, 28)) {
1610 gpuDevice->setSDMAEngine(gfxDoorbellOffset, this);
1611 }
1612}
1613
1614void
1616{
1618 gfxDoorbellOffset |= ((uint64_t)data) << 32;
1619}
1620
1621void
1623{
1624 uint32_t rb_size = bits(data, 6, 1);
1625 assert(rb_size >= 6 && rb_size <= 62);
1626 gfx.size(1 << (rb_size + 2));
1627}
1628
1629void
1631{
1632 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1633 gfxWptr |= data;
1634}
1635
1636void
1638{
1639 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1640 gfxWptr |= ((uint64_t)data) << 32;
1641}
1642
1643void
1645{
1646 pageBase = insertBits(pageBase, 31, 0, 0);
1647 pageBase |= data;
1648 page.base((pageBase >> 1) << 12);
1649}
1650
1651void
1653{
1654 pageBase = insertBits(pageBase, 63, 32, 0);
1655 pageBase |= ((uint64_t)data) << 32;
1656 page.base((pageBase >> 1) << 12);
1657}
1658
1659void
1661{
1662 pageRptr = insertBits(pageRptr, 31, 0, 0);
1663 pageRptr |= data;
1664 page.rptrWbAddr(getGARTAddr(pageRptr));
1665}
1666
1667void
1669{
1670 pageRptr = insertBits(pageRptr, 63, 32, 0);
1671 pageRptr |= ((uint64_t)data) << 32;
1672 page.rptrWbAddr(getGARTAddr(pageRptr));
1673}
1674
1675void
1681
1682void
1684{
1685 pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1686 pageDoorbell |= ((uint64_t)data) << 32;
1687}
1688
1689void
1691{
1694 if (bits(pageDoorbell, 28, 28)) {
1696 gpuDevice->setSDMAEngine(pageDoorbellOffset, this);
1697 }
1698}
1699
1700void
1702{
1704 pageDoorbellOffset |= ((uint64_t)data) << 32;
1705}
1706
1707void
1709{
1710 uint32_t rb_size = bits(data, 6, 1);
1711 assert(rb_size >= 6 && rb_size <= 62);
1712 page.size(1 << (rb_size + 2));
1713}
1714
1715void
1717{
1718 pageWptr = insertBits(pageWptr, 31, 0, 0);
1719 pageWptr |= data;
1720}
1721
1722void
1724{
1725 pageWptr = insertBits(pageWptr, 63, 32, 0);
1726 pageWptr |= ((uint64_t)data) << 32;
1727}
1728
1729} // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:115
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
Device model for an AMD GPU.
Translation range generators.
Definition amdgpu_vm.hh:391
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
uint64_t pageDoorbell
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void pollRegMemRead(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt, uint32_t header)
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
AMDGPUDevice * gpuDevice
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void atomicData(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void atomicDone(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void atomic(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt)
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
int getId() const
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void srbmWrite(SDMAQueue *q, uint32_t header, sdmaSRBMWrite *pkt)
void fence(SDMAQueue *q, sdmaFence *pkt)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
static constexpr Tick sdma_delay
void pollRegMem(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:634
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:626
#define warn(...)
Definition logging.hh:288
#define warn_once(...)
Definition logging.hh:292
Bitfield< 23, 20 > atomic
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 24, 21 > opcode
Definition types.hh:92
Bitfield< 0 > p
Bitfield< 2 > priv
Definition misc.hh:131
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaTimestamp
struct gem5::GEM5_PACKED sdmaConstFillHeader
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
struct gem5::GEM5_PACKED sdmaPredExec
Packet * PacketPtr
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
struct gem5::GEM5_PACKED sdmaDummyTrap
struct gem5::GEM5_PACKED sdmaSemaphore
struct gem5::GEM5_PACKED sdmaCondExec
output header
Definition nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()
Definition trace.cc:48

Generated on Mon Oct 27 2025 04:13:01 for gem5 by doxygen 1.14.0