gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
sdma_engine.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
35#include "arch/generic/mmu.hh"
36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
42#include "mem/packet.hh"
43#include "mem/packet_access.hh"
44#include "params/SDMAEngine.hh"
45
46namespace gem5
47{
48
49SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
50 : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
53 pageWptr(0), gpuDevice(nullptr), walker(p.walker),
54 mmioBase(p.mmio_base), mmioSize(p.mmio_size)
55{
56 gfx.ib(&gfxIb);
57 gfxIb.parent(&gfx);
58 gfx.valid(true);
59 gfxIb.valid(true);
60 gfx.queueType(SDMAGfx);
61 gfxIb.queueType(SDMAGfx);
62
63 page.ib(&pageIb);
64 pageIb.parent(&page);
65 page.valid(true);
66 pageIb.valid(true);
67 page.queueType(SDMAPage);
68 pageIb.queueType(SDMAPage);
69
70 rlc0.ib(&rlc0Ib);
71 rlc0Ib.parent(&rlc0);
72
73 rlc1.ib(&rlc1Ib);
74 rlc1Ib.parent(&rlc1);
75}
76
77void
79{
80 gpuDevice = gpu_device;
81 walker->setDevRequestor(gpuDevice->vramRequestorId());
82}
83
84int
86{
87 switch (_id) {
88 case 0:
90 case 1:
92 case 2:
94 case 3:
96 case 4:
98 case 5:
100 case 6:
102 case 7:
104 default:
105 panic("Unknown SDMA id");
106 }
107}
108
109Addr
111{
112 if (!gpuDevice->getVM().inAGP(addr)) {
113 Addr low_bits = bits(addr, 11, 0);
114 addr = (((addr >> 12) << 3) << 12) | low_bits;
115 }
116 return addr;
117}
118
119Addr
121{
122 // SDMA packets can access both host and device memory as either a source
123 // or destination address. We don't know which until it is translated, so
124 // we do a dummy functional translation to determine if the address
125 // resides in system memory or not.
126 auto tgen = translate(raw_addr, 64);
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
129 DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
130 raw_addr, tmp_addr);
131
132 // SDMA packets will access device memory through the MMHUB aperture in
133 // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
134 // of vmid == 0 the address is already an MMHUB address in the packet,
135 // so simply subtract the MMHUB base. For vmid > 0 the address is a
136 // virtual address that must first be translated. The translation will
137 // return an MMHUB address, then we can similarly subtract the base to
138 // get the device address. Otherwise, for host, device address is 0.
139 Addr device_addr = 0;
140 if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
141 (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
142 if (cur_vmid == 0) {
143 device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
144 } else {
145 device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
146 }
147 }
148
149 return device_addr;
150}
151
159{
160 if (cur_vmid > 0) {
161 // Only user translation is available to user queues (vmid > 0)
163 &gpuDevice->getVM(), walker,
164 cur_vmid, vaddr, size));
165 } else if (gpuDevice->getVM().inAGP(vaddr)) {
166 // Use AGP translation gen
167 return TranslationGenPtr(
168 new AMDGPUVM::AGPTranslationGen(&gpuDevice->getVM(), vaddr, size));
169 } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
170 // Use MMHUB translation gen
172 &gpuDevice->getVM(), vaddr, size));
173 }
174
175 // Assume GART otherwise as this is the only other translation aperture
176 // available to the SDMA engine processor.
177 return TranslationGenPtr(
178 new AMDGPUVM::GARTTranslationGen(&gpuDevice->getVM(), vaddr, size));
179}
180
181void
183 bool isStatic)
184{
185 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
186 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
187 rptr_wb_addr <<= 32;
188 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
189 bool priv = bits(mqd->sdmax_rlcx_rb_cntl, 23, 23);
190
191 // Get first free RLC
192 if (!rlc0.valid()) {
193 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
194 rlcInfo[0] = doorbell;
195 rlc0.valid(true);
196 rlc0.base(mqd->rb_base << 8);
197 rlc0.size(rlc_size);
198 rlc0.rptr(0);
199 rlc0.incRptr(mqd->rptr);
200 rlc0.setWptr(mqd->wptr);
201 rlc0.rptrWbAddr(rptr_wb_addr);
202 rlc0.processing(false);
203 rlc0.setMQD(mqd);
204 rlc0.setMQDAddr(mqdAddr);
205 rlc0.setPriv(priv);
206 rlc0.setStatic(isStatic);
207 } else if (!rlc1.valid()) {
208 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
209 rlcInfo[1] = doorbell;
210 rlc1.valid(true);
211 rlc1.base(mqd->rb_base << 8);
212 rlc1.size(rlc_size);
213 rlc1.rptr(0);
214 rlc1.incRptr(mqd->rptr);
215 rlc1.setWptr(mqd->wptr);
216 rlc1.rptrWbAddr(rptr_wb_addr);
217 rlc1.processing(false);
218 rlc1.setMQD(mqd);
219 rlc1.setMQDAddr(mqdAddr);
220 rlc1.setPriv(priv);
221 rlc1.setStatic(isStatic);
222 } else {
223 panic("No free RLCs. Check they are properly unmapped.");
224 }
225}
226
227void
228SDMAEngine::unregisterRLCQueue(Addr doorbell, bool unmap_static)
229{
230 DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
231 if (rlcInfo[0] == doorbell) {
232 if (!unmap_static && rlc0.isStatic()) {
233 DPRINTF(SDMAEngine, "RLC0 is static. Will not unregister.\n");
234 return;
235 }
236
237 SDMAQueueDesc *mqd = rlc0.getMQD();
238 if (mqd) {
239 DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
240 rlc0.getMQDAddr());
241
242 mqd->rptr = rlc0.globalRptr();
243 mqd->wptr = rlc0.getWptr();
244
245 auto cb = new DmaVirtCallback<uint32_t>(
246 [ = ] (const uint32_t &) { });
247 dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
248 } else {
249 warn("RLC0 SDMAMQD address invalid\n");
250 }
251 rlc0.valid(false);
252 rlcInfo[0] = 0;
253 } else if (rlcInfo[1] == doorbell) {
254 if (!unmap_static && rlc1.isStatic()) {
255 DPRINTF(SDMAEngine, "RLC1 is static. Will not unregister.\n");
256 return;
257 }
258
259 SDMAQueueDesc *mqd = rlc1.getMQD();
260 if (mqd) {
261 DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
262 rlc1.getMQDAddr());
263
264 mqd->rptr = rlc1.globalRptr();
265 mqd->wptr = rlc1.getWptr();
266
267 auto cb = new DmaVirtCallback<uint32_t>(
268 [ = ] (const uint32_t &) { });
269 dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
270 } else {
271 warn("RLC1 SDMAMQD address invalid\n");
272 }
273 rlc1.valid(false);
274 rlcInfo[1] = 0;
275 } else {
276 panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
277 }
278
279 gpuDevice->unsetDoorbell(doorbell);
280}
281
282void
284{
285 for (auto doorbell: rlcInfo) {
286 if (doorbell) {
287 unregisterRLCQueue(doorbell, unmap_static);
288 }
289 }
290}
291
292/* Start decoding packets from the Gfx queue. */
293void
295{
296 gfx.setWptr(wptrOffset);
297 if (!gfx.processing()) {
298 gfx.processing(true);
299 decodeNext(&gfx);
300 }
301}
302
303/* Start decoding packets from the Page queue. */
304void
306{
307 page.setWptr(wptrOffset);
308 if (!page.processing()) {
309 page.processing(true);
311 }
312}
313
314/* Process RLC queue at given doorbell. */
315void
316SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
317{
318 if (rlcInfo[0] == doorbellOffset) {
319 processRLC0(wptrOffset);
320 } else if (rlcInfo[1] == doorbellOffset) {
321 processRLC1(wptrOffset);
322 } else {
323 panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
324 }
325}
326
327/* Start decoding packets from the RLC0 queue. */
328void
330{
331 assert(rlc0.valid());
332
333 rlc0.setWptr(wptrOffset);
334 if (!rlc0.processing()) {
335 cur_vmid = 1;
336 rlc0.processing(true);
338 }
339}
340
341/* Start decoding packets from the RLC1 queue. */
342void
344{
345 assert(rlc1.valid());
346
347 rlc1.setWptr(wptrOffset);
348 if (!rlc1.processing()) {
349 cur_vmid = 1;
350 rlc1.processing(true);
352 }
353}
354
355/* Decoding next packet in the queue. */
356void
358{
359 DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
360
361 if (q->rptr() != q->wptr()) {
362 // We are using lambda functions passed to the DmaVirtCallback objects
363 // which will call the actuall callback method (e.g., decodeHeader).
364 // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
365 // function as header in this case.
366 auto cb = new DmaVirtCallback<uint32_t>(
367 [ = ] (const uint32_t &header)
368 { decodeHeader(q, header); });
369 dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
370 } else {
371 // The driver expects the rptr to be written back to host memory
372 // periodically. In simulation, we writeback rptr after each burst of
373 // packets from a doorbell, rather than using the cycle count which
374 // is not accurate in all simulation settings (e.g., KVM).
375 DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
376 q->globalRptr(), q->rptrWbAddr());
377 if (q->rptrWbAddr()) {
378 auto cb = new DmaVirtCallback<uint64_t>(
379 [ = ](const uint64_t &) { }, q->globalRptr());
380 dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
381 }
382 q->processing(false);
383 if (q->parent()) {
384 DPRINTF(SDMAEngine, "SDMA switching queues\n");
385 decodeNext(q->parent());
386 }
387 cur_vmid = 0;
388 }
389}
390
391/* Decoding the header of a packet. */
392void
394{
395 q->incRptr(sizeof(header));
396 int opcode = bits(header, 7, 0);
397 int sub_opcode = bits(header, 15, 8);
398
399 DmaVirtCallback<uint64_t> *cb = nullptr;
400 void *dmaBuffer = nullptr;
401
402 DPRINTF(SDMAEngine, "SDMA header %x opcode %x sub-opcode %x\n",
403 header, opcode, sub_opcode);
404
405 switch(opcode) {
406 case SDMA_OP_NOP: {
407 uint32_t NOP_count = (header >> 16) & 0x3FFF;
408 DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
409 if (NOP_count > 0) {
410 for (int i = 0; i < NOP_count; ++i) {
411 if (q->rptr() == q->wptr()) {
412 warn("NOP count is beyond wptr, ignoring remaining NOPs");
413 break;
414 }
415 q->incRptr(4);
416 }
417 }
418 decodeNext(q);
419 } break;
420 case SDMA_OP_COPY: {
421 DPRINTF(SDMAEngine, "SDMA Copy packet\n");
422 switch (sub_opcode) {
424 dmaBuffer = new sdmaCopy();
426 [ = ] (const uint64_t &)
427 { copy(q, (sdmaCopy *)dmaBuffer); });
428 dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
429 } break;
431 panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
432 } break;
434 panic("SDMA_SUBOP_COPY_TILED not implemented");
435 } break;
437 panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
438 } break;
440 panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
441 } break;
442 case SDMA_SUBOP_COPY_SOA: {
443 panic("SDMA_SUBOP_COPY_SOA not implemented");
444 } break;
446 panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
447 } break;
449 panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
450 } break;
451 default: {
452 panic("SDMA unknown copy sub-opcode.");
453 } break;
454 }
455 } break;
456 case SDMA_OP_WRITE: {
457 DPRINTF(SDMAEngine, "SDMA Write packet\n");
458 switch (sub_opcode) {
460 dmaBuffer = new sdmaWrite();
462 [ = ] (const uint64_t &)
463 { write(q, (sdmaWrite *)dmaBuffer); });
464 dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
465 } break;
467 panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
468 } break;
469 default:
470 break;
471 }
472 } break;
473 case SDMA_OP_INDIRECT: {
474 DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
475 dmaBuffer = new sdmaIndirectBuffer();
477 [ = ] (const uint64_t &)
478 { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
479 dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
480 } break;
481 case SDMA_OP_FENCE: {
482 DPRINTF(SDMAEngine, "SDMA Fence packet\n");
483 dmaBuffer = new sdmaFence();
485 [ = ] (const uint64_t &)
486 { fence(q, (sdmaFence *)dmaBuffer); });
487 dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
488 } break;
489 case SDMA_OP_TRAP: {
490 DPRINTF(SDMAEngine, "SDMA Trap packet\n");
491 dmaBuffer = new sdmaTrap();
493 [ = ] (const uint64_t &)
494 { trap(q, (sdmaTrap *)dmaBuffer); });
495 dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
496 } break;
497 case SDMA_OP_SEM: {
498 q->incRptr(sizeof(sdmaSemaphore));
499 warn("SDMA_OP_SEM not implemented");
500 decodeNext(q);
501 } break;
502 case SDMA_OP_POLL_REGMEM: {
503 DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
504 dmaBuffer = new sdmaPollRegMem();
506 [ = ] (const uint64_t &)
507 { pollRegMem(q, header, (sdmaPollRegMem *)dmaBuffer); });
508 dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
509 switch (sub_opcode) {
511 panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
512 } break;
514 panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
515 } break;
517 panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
518 } break;
519 default:
520 break;
521 }
522 } break;
523 case SDMA_OP_COND_EXE: {
524 q->incRptr(sizeof(sdmaCondExec));
525 warn("SDMA_OP_SEM not implemented");
526 decodeNext(q);
527 } break;
528 case SDMA_OP_ATOMIC: {
529 DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
530 dmaBuffer = new sdmaAtomic();
532 [ = ] (const uint64_t &)
533 { atomic(q, header, (sdmaAtomic *)dmaBuffer); });
534 dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
535 } break;
536 case SDMA_OP_CONST_FILL: {
537 DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
538 dmaBuffer = new sdmaConstFill();
540 [ = ] (const uint64_t &)
541 { constFill(q, (sdmaConstFill *)dmaBuffer, header); });
542 dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
543 } break;
544 case SDMA_OP_PTEPDE: {
545 DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
546 switch (sub_opcode) {
548 DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
549 dmaBuffer = new sdmaPtePde();
551 [ = ] (const uint64_t &)
552 { ptePde(q, (sdmaPtePde *)dmaBuffer); });
553 dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
554 break;
556 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
557 break;
559 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
560 break;
562 panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
563 } break;
564 default:
565 DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
566 sub_opcode);
567 decodeNext(q);
568 break;
569 }
570 } break;
571 case SDMA_OP_TIMESTAMP: {
572 q->incRptr(sizeof(sdmaTimestamp));
573 switch (sub_opcode) {
575 } break;
577 } break;
579 } break;
580 default:
581 break;
582 }
583 warn("SDMA_OP_TIMESTAMP not implemented");
584 decodeNext(q);
585 } break;
586 case SDMA_OP_SRBM_WRITE: {
587 DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
588 dmaBuffer = new sdmaSRBMWrite();
590 [ = ] (const uint64_t &)
591 { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
592 dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
593 } break;
594 case SDMA_OP_PRE_EXE: {
595 q->incRptr(sizeof(sdmaPredExec));
596 warn("SDMA_OP_PRE_EXE not implemented");
597 decodeNext(q);
598 } break;
599 case SDMA_OP_DUMMY_TRAP: {
600 q->incRptr(sizeof(sdmaDummyTrap));
601 warn("SDMA_OP_DUMMY_TRAP not implemented");
602 decodeNext(q);
603 } break;
604 default: {
605 panic("Invalid SDMA packet.\n");
606 } break;
607 }
608}
609
610/* Implements a write packet. */
611void
613{
614 q->incRptr(sizeof(sdmaWrite));
615 // count represents the number of dwords - 1 to write
616 pkt->count++;
617 DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
618
619 // first we have to read needed data from the SDMA queue
620 uint32_t *dmaBuffer = new uint32_t[pkt->count];
621 auto cb = new DmaVirtCallback<uint64_t>(
622 [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
623 dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
624 (void *)dmaBuffer);
625}
626
627/* Completion of data reading for a write packet. */
628void
629SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
630{
631 int bufferSize = sizeof(uint32_t) * pkt->count;
632 q->incRptr(bufferSize);
633
634 DPRINTF(SDMAEngine, "Write packet data:\n");
635 for (int i = 0; i < pkt->count; ++i) {
636 DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
637 }
638
639 // lastly we write read data to the destination address
640 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
641 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
642
643 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
644 "SDMA write to GART not implemented");
645
646 auto cb = new EventFunctionWrapper(
647 [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
648 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
649 bufferSize, 0, cb);
650 } else {
651 if (q->priv()) {
652 pkt->dest = getGARTAddr(pkt->dest);
653 }
654 auto cb = new DmaVirtCallback<uint32_t>(
655 [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
656 dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
657 }
658}
659
660/* Completion of a write packet. */
661void
662SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
663{
664 DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
665 pkt->dest, pkt->count);
666
667 auto cleanup_cb = new EventFunctionWrapper(
668 [ = ]{ writeCleanup(dmaBuffer); }, name());
669
670 auto system_ptr = gpuDevice->CP()->system();
671 if (!system_ptr->isAtomicMode()) {
672 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
673 " This has not been tested in timing mode\n");
674 }
675
676 // Only 2000 ticks should be necessary, but add additional padding.
677 schedule(cleanup_cb, curTick() + 10000);
678
679 delete pkt;
680 decodeNext(q);
681}
682
683void
684SDMAEngine::writeCleanup(uint32_t *dmaBuffer)
685{
686 delete [] dmaBuffer;
687}
688
689/* Implements a copy packet. */
690void
692{
693 DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
694 pkt->source, pkt->dest, pkt->count);
695 q->incRptr(sizeof(sdmaCopy));
696 // count represents the number of bytes - 1 to be copied
697 pkt->count++;
698 if (q->priv()) {
699 if (!gpuDevice->getVM().inMMHUB(pkt->source)) {
700 DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
701 pkt->source = getGARTAddr(pkt->source);
702 DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
703 }
704 }
705
706 // Read data from the source first, then call the copyReadData method
707 uint8_t *dmaBuffer = new uint8_t[pkt->count];
708 Addr device_addr = getDeviceAddress(pkt->source);
709 if (device_addr) {
710 DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
711 auto cb = new EventFunctionWrapper(
712 [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
713
714 // Copy the minimum page size at a time in case the physical addresses
715 // are not contiguous.
717 uint8_t *buffer_ptr = dmaBuffer;
718 for (; !gen.done(); gen.next()) {
719 Addr chunk_addr = getDeviceAddress(gen.addr());
720 assert(chunk_addr);
721
722 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
723 gen.size(), gen.addr(), chunk_addr);
724
725 gpuDevice->getMemMgr()->readRequest(chunk_addr, buffer_ptr,
726 gen.size(), 0,
727 gen.last() ? cb : nullptr);
728 buffer_ptr += gen.size();
729 }
730 } else {
731 auto cb = new DmaVirtCallback<uint64_t>(
732 [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
733 dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
734 }
735}
736
737/* Completion of data reading for a copy packet. */
738void
739SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
740{
741 // lastly we write read data to the destination address
742 uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
743
744 DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
745 DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
746 DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
747
748 DPRINTF(SDMAData, "Copy packet data:\n");
749 for (int i = 0; i < pkt->count/8; ++i) {
750 DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
751 }
752
753 Addr device_addr = getDeviceAddress(pkt->dest);
754 // Write read data to the destination address then call the copyDone method
755 if (device_addr) {
756 DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
757 auto cb = new EventFunctionWrapper(
758 [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
759
760 // Copy the minimum page size at a time in case the physical addresses
761 // are not contiguous.
763 uint8_t *buffer_ptr = dmaBuffer;
764 for (; !gen.done(); gen.next()) {
765 Addr chunk_addr = getDeviceAddress(gen.addr());
766 assert(chunk_addr);
767
768 DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
769 gen.size(), gen.addr(), chunk_addr);
770
771 gpuDevice->getMemMgr()->writeRequest(chunk_addr, buffer_ptr,
772 gen.size(), 0,
773 gen.last() ? cb : nullptr);
774
775 buffer_ptr += gen.size();
776 }
777 } else {
778 DPRINTF(SDMAEngine, "Copying to host address %#lx\n", pkt->dest);
779 auto cb = new DmaVirtCallback<uint64_t>(
780 [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
781 dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
782 }
783
784 // For destinations in the GART table, gem5 uses a mapping tables instead
785 // of functionally going to device memory, so we need to update that copy.
786 if (gpuDevice->getVM().inGARTRange(device_addr)) {
787 // GART entries are always 8 bytes.
788 assert((pkt->count % 8) == 0);
789 for (int i = 0; i < pkt->count/8; ++i) {
790 Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase();
791 DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n",
792 gart_addr, dmaBuffer64[i]);
793 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i];
794 }
795 }
796}
797
798/* Completion of a copy packet. */
799void
800SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
801{
802 DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
803 pkt->dest, pkt->count);
804
805 auto cleanup_cb = new EventFunctionWrapper(
806 [ = ]{ copyCleanup(dmaBuffer); }, name());
807
808 auto system_ptr = gpuDevice->CP()->system();
809 if (!system_ptr->isAtomicMode()) {
810 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
811 " This has not been tested in timing mode\n");
812 }
813
814 // Only 2000 ticks should be necessary, but add additional padding.
815 schedule(cleanup_cb, curTick() + 10000);
816
817 delete pkt;
818 decodeNext(q);
819}
820
821void
822SDMAEngine::copyCleanup(uint8_t *dmaBuffer)
823{
824 delete [] dmaBuffer;
825}
826
827/* Implements an indirect buffer packet. */
828void
830{
831 if (q->priv()) {
832 q->ib()->base(getGARTAddr(pkt->base));
833 } else {
834 q->ib()->base(pkt->base);
835 }
836 q->ib()->rptr(0);
837 q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
838 q->ib()->setWptr(pkt->size * sizeof(uint32_t));
839
840 q->incRptr(sizeof(sdmaIndirectBuffer));
841
842 delete pkt;
843 decodeNext(q->ib());
844}
845
846/* Implements a fence packet. */
847void
849{
850 q->incRptr(sizeof(sdmaFence));
851 if (q->priv()) {
852 pkt->dest = getGARTAddr(pkt->dest);
853 }
854
855 // Writing the data from the fence packet to the destination address.
856 auto cb = new DmaVirtCallback<uint32_t>(
857 [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
858 dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
859}
860
861/* Completion of a fence packet. */
862void
864{
865 DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
866 pkt->dest, pkt->data);
867 delete pkt;
868 decodeNext(q);
869}
870
871/* Implements a trap packet. */
872void
874{
875 q->incRptr(sizeof(sdmaTrap));
876
877 DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
878
879 uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
880
881 int node_id = 0;
882 int local_id = getId();
883
884 if (gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
885 node_id = getId() >> 2;
886
887 // For most SDMAs the "node_id" for the interrupt handler is the SDMA
888 // id / 4. node_id of 2 is used by some other IP, so this gets changed
889 // to node_id 4:
890 // SDMA 0-3: node_id 0
891 // SDMA 4-7: node_id 1
892 // SDMA 8-11: node_id 4
893 // SDMA 12-15: node_id 3
894 if (node_id == 2) {
895 node_id += 2;
896 }
897
898 local_id = getId() % 4;
899 }
900 gpuDevice->getIH()->prepareInterruptCookie(pkt->intrContext, ring_id,
901 getIHClientId(local_id),
902 TRAP_ID, 2*node_id);
903 gpuDevice->getIH()->submitInterruptCookie();
904
905 delete pkt;
906 decodeNext(q);
907}
908
909/* Implements a write SRBM packet. */
910void
912{
913 q->incRptr(sizeof(sdmaSRBMWrite));
914
915 sdmaSRBMWriteHeader srbm_header;
916 srbm_header.ordinal = header;
917
918 [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
919 uint32_t reg_mask = 0x00000000;
920
921 if (srbm_header.byteEnable & 0x8) reg_mask |= 0xFF000000;
922 if (srbm_header.byteEnable & 0x4) reg_mask |= 0x00FF0000;
923 if (srbm_header.byteEnable & 0x2) reg_mask |= 0x0000FF00;
924 if (srbm_header.byteEnable & 0x1) reg_mask |= 0x000000FF;
925 pkt->data &= reg_mask;
926
927 DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
928 reg_addr, pkt->data);
929
930 gpuDevice->setRegVal(reg_addr, pkt->data);
931
932 delete pkt;
933 decodeNext(q);
934}
935
941void
943{
944 q->incRptr(sizeof(sdmaPollRegMem));
945
946 sdmaPollRegMemHeader prm_header;
947 prm_header.ordinal = header;
948
949 if (q->priv()) {
950 pkt->address = getGARTAddr(pkt->address);
951 }
952
953 DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
954 "mask=%p, retry=%d, pinterval=%d\n", prm_header.mode,
955 prm_header.func, prm_header.op, pkt->address, pkt->ref, pkt->mask,
956 pkt->retryCount, pkt->pollInt);
957
958 bool skip = false;
959
960 if (prm_header.mode == 1) {
961 // polling on a memory location
962 if (prm_header.op == 0) {
963 auto cb = new DmaVirtCallback<uint32_t>(
964 [ = ] (const uint32_t &dma_buffer) {
965 pollRegMemRead(q, header, pkt, dma_buffer, 0); });
966 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
967 (void *)&cb->dmaBuffer);
968 } else {
969 panic("SDMA poll mem operation not implemented.");
970 skip = true;
971 }
972 } else {
973 warn_once("SDMA poll reg is not implemented. If this is required for "
974 "correctness, an SRBM model needs to be implemented.");
975 skip = true;
976 }
977
978 if (skip) {
979 delete pkt;
980 decodeNext(q);
981 }
982}
983
984void
986 uint32_t dma_buffer, int count)
987{
988 sdmaPollRegMemHeader prm_header;
989 prm_header.ordinal = header;
990
991 assert(prm_header.mode == 1 && prm_header.op == 0);
992
993 if (!pollRegMemFunc(dma_buffer, pkt->ref, prm_header.func) &&
994 ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
995 pkt->retryCount == 0xfff)) {
996
997 // continue polling on a memory location until reference value is met,
998 // retryCount is met or indefinitelly if retryCount is 0xfff
999 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
1000 pkt->address, dma_buffer, pkt->ref);
1001
1002 auto cb = new DmaVirtCallback<uint32_t>(
1003 [ = ] (const uint32_t &dma_buffer) {
1004 pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
1005 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
1006 (void *)&cb->dmaBuffer);
1007 } else {
1008 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
1009 pkt->address, dma_buffer, pkt->ref);
1010
1011 delete pkt;
1012 decodeNext(q);
1013 }
1014}
1015
1016bool
1017SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
1018{
1019 switch (func) {
1020 case 0:
1021 return true;
1022 break;
1023 case 1:
1024 return value < reference;
1025 break;
1026 case 2:
1027 return value <= reference;
1028 break;
1029 case 3:
1030 return value == reference;
1031 break;
1032 case 4:
1033 return value != reference;
1034 break;
1035 case 5:
1036 return value >= reference;
1037 break;
1038 case 6:
1039 return value > reference;
1040 break;
1041 default:
1042 panic("SDMA POLL_REGMEM unknown comparison function.");
1043 break;
1044 }
1045}
1046
1047/* Implements a PTE PDE generation packet. */
1048void
1050{
1051 q->incRptr(sizeof(sdmaPtePde));
1052 pkt->count++;
1053
1054 DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
1055 pkt->initValue, pkt->increment, pkt->count);
1056
1057 // Generating pkt->count double dwords using the initial value, increment
1058 // and a mask.
1059 uint64_t *dmaBuffer = new uint64_t[pkt->count];
1060 for (int i = 0; i < pkt->count; i++) {
1061 dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
1062 }
1063
1064 // Writing generated data to the destination address.
1065 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
1066 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
1067
1068 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
1069 "SDMA write to GART not implemented");
1070
1071 auto cb = new EventFunctionWrapper(
1072 [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
1073 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1074 sizeof(uint64_t) * pkt->count, 0,
1075 cb);
1076 } else {
1077 if (q->priv()) {
1078 pkt->dest = getGARTAddr(pkt->dest);
1079 }
1080 auto cb = new DmaVirtCallback<uint64_t>(
1081 [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
1082 dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
1083 (void *)dmaBuffer);
1084 }
1085}
1086
1087/* Completion of a PTE PDE generation packet. */
1088void
1089SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
1090{
1091 DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
1092 pkt->dest, pkt->count);
1093
1094 auto cleanup_cb = new EventFunctionWrapper(
1095 [ = ]{ ptePdeCleanup(dmaBuffer); }, name());
1096
1097 auto system_ptr = gpuDevice->CP()->system();
1098 if (!system_ptr->isAtomicMode()) {
1099 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
1100 " This has not been tested in timing mode\n");
1101 }
1102
1103 // Only 2000 ticks should be necessary, but add additional padding.
1104 schedule(cleanup_cb, curTick() + 10000);
1105
1106 delete pkt;
1107 decodeNext(q);
1108}
1109
1110void
1111SDMAEngine::ptePdeCleanup(uint64_t *dmaBuffer)
1112{
1113 delete [] dmaBuffer;
1114}
1115
1116void
1118{
1119 q->incRptr(sizeof(sdmaAtomic));
1120
1121 sdmaAtomicHeader at_header;
1122 at_header.ordinal = header;
1123
1124 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
1125 " %d loopInt: %d\n", at_header.opcode, pkt->addr, pkt->srcData,
1126 pkt->cmpData, at_header.loop, pkt->loopInt);
1127
1128 // Read the data at pkt->addr
1129 uint64_t *dmaBuffer = new uint64_t;
1130 auto cb = new DmaVirtCallback<uint64_t>(
1131 [ = ] (const uint64_t &)
1132 { atomicData(q, header, pkt, dmaBuffer); });
1133 dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1134}
1135
1136void
1138 uint64_t *dmaBuffer)
1139{
1140 sdmaAtomicHeader at_header;
1141 at_header.ordinal = header;
1142
1143 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
1144 at_header.opcode, pkt->addr, *dmaBuffer);
1145
1146 if (at_header.opcode == SDMA_ATOMIC_ADD64) {
1147 // Atomic add with return -- dst = dst + src
1148 int64_t dst_data = *dmaBuffer;
1149 int64_t src_data = pkt->srcData;
1150
1151 DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
1152 src_data, dst_data + src_data);
1153
1154 // Reuse the dmaBuffer allocated
1155 *dmaBuffer = dst_data + src_data;
1156
1157 auto cb = new DmaVirtCallback<uint64_t>(
1158 [ = ] (const uint64_t &)
1159 { atomicDone(q, header, pkt, dmaBuffer); });
1160 dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1161 } else {
1162 panic("Unsupported SDMA atomic opcode: %d\n", at_header.opcode);
1163 }
1164}
1165
1166void
1168 uint64_t *dmaBuffer)
1169{
1170 sdmaAtomicHeader at_header;
1171 at_header.ordinal = header;
1172
1173 DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
1174 at_header.opcode, pkt->addr, *dmaBuffer);
1175
1176 delete dmaBuffer;
1177 delete pkt;
1178 decodeNext(q);
1179}
1180
1181void
1183{
1184 q->incRptr(sizeof(sdmaConstFill));
1185
1186 sdmaConstFillHeader fill_header;
1187 fill_header.ordinal = header;
1188
1189 DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
1190 pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
1191 fill_header.sw);
1192
1193 // Count is number of <size> elements - 1. Size is log2 of byte size.
1194 int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
1195 uint8_t *fill_data = new uint8_t[fill_bytes];
1196
1197 memset(fill_data, pkt->srcData, fill_bytes);
1198
1199 Addr device_addr = getDeviceAddress(pkt->addr);
1200 if (device_addr) {
1201 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
1202 fill_bytes, pkt->srcData, pkt->addr);
1203
1204 auto cb = new EventFunctionWrapper(
1205 [ = ]{ constFillDone(q, pkt, fill_data); }, name());
1206
1207 // Copy the minimum page size at a time in case the physical addresses
1208 // are not contiguous.
1209 ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
1210 uint8_t *fill_data_ptr = fill_data;
1211 for (; !gen.done(); gen.next()) {
1212 Addr chunk_addr = getDeviceAddress(gen.addr());
1213 assert(chunk_addr);
1214
1215 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
1216 gen.size(), gen.addr(), chunk_addr);
1217
1218 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data_ptr,
1219 gen.size(), 0,
1220 gen.last() ? cb : nullptr);
1221 fill_data_ptr += gen.size();
1222 }
1223 } else {
1224 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
1225 fill_bytes, pkt->srcData, pkt->addr);
1226
1227 auto cb = new DmaVirtCallback<uint64_t>(
1228 [ = ] (const uint64_t &)
1229 { constFillDone(q, pkt, fill_data); });
1230 dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
1231 }
1232}
1233
1234void
1236{
1237 DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
1238
1239 delete [] fill_data;
1240 delete pkt;
1241 decodeNext(q);
1242}
1243
1246{
1247 AddrRangeList ranges;
1248 return ranges;
1249}
1250
1251void
1253{
1254 // Serialize the DmaVirtDevice base class
1256
1267
1268 int num_queues = 4;
1269
1271 queues.push_back((SDMAQueue *)&gfx);
1272 queues.push_back((SDMAQueue *)&page);
1273 queues.push_back((SDMAQueue *)&gfxIb);
1274 queues.push_back((SDMAQueue *)&pageIb);
1275
1276 auto base = std::make_unique<Addr[]>(num_queues);
1277 auto rptr = std::make_unique<Addr[]>(num_queues);
1278 auto wptr = std::make_unique<Addr[]>(num_queues);
1279 auto size = std::make_unique<Addr[]>(num_queues);
1280 auto processing = std::make_unique<bool[]>(num_queues);
1281
1282 for (int i = 0; i < num_queues; i++) {
1283 base[i] = queues[i]->base();
1284 rptr[i] = queues[i]->getRptr();
1285 wptr[i] = queues[i]->getWptr();
1286 size[i] = queues[i]->size();
1287 processing[i] = queues[i]->processing();
1288 }
1289
1290 SERIALIZE_UNIQUE_PTR_ARRAY(base, num_queues);
1291 SERIALIZE_UNIQUE_PTR_ARRAY(rptr, num_queues);
1292 SERIALIZE_UNIQUE_PTR_ARRAY(wptr, num_queues);
1293 SERIALIZE_UNIQUE_PTR_ARRAY(size, num_queues);
1294 SERIALIZE_UNIQUE_PTR_ARRAY(processing, num_queues);
1295
1296 // Capture RLC queue information in checkpoint
1297 // Only two RLC queues are supported right now
1298 const int num_rlc_queues = 2;
1299 std::vector<SDMAQueue *> rlc_queues;
1300 rlc_queues.push_back((SDMAQueue *)&rlc0);
1301 rlc_queues.push_back((SDMAQueue *)&rlc1);
1302
1303 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1304 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1305 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1306 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1307 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1308 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1309 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1310 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1311 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1312 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1313 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1314 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1315 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1316
1317 // Save RLC queue information in arrays that
1318 // are easier to serialize
1319 for (int i = 0; i < num_rlc_queues; i++) {
1320 rlc_valid[i] = rlc_queues[i]->valid();
1321 if (rlc_valid[i]) {
1322 rlc_info[i] = rlcInfo[i];
1323 rlc_base[i] = rlc_queues[i]->base();
1324 rlc_rptr[i] = rlc_queues[i]->getRptr();
1325 rlc_global_rptr[i] = rlc_queues[i]->globalRptr();
1326 rlc_wptr[i] = rlc_queues[i]->getWptr();
1327 rlc_size[i] = rlc_queues[i]->size();
1328 rlc_rptr_wb_addr[i] = rlc_queues[i]->rptrWbAddr();
1329 rlc_processing[i] = rlc_queues[i]->processing();
1330 rlc_mqd_addr[i] = rlc_queues[i]->getMQDAddr();
1331 rlc_priv[i] = rlc_queues[i]->priv();
1332 rlc_static[i] = rlc_queues[i]->isStatic();
1333 memcpy(rlc_mqd.get() + 128*i, rlc_queues[i]->getMQD(),
1334 sizeof(SDMAQueueDesc));
1335 }
1336 }
1337
1338 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_info, num_rlc_queues);
1339 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_valid, num_rlc_queues);
1340 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_base, num_rlc_queues);
1341 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr, num_rlc_queues);
1342 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_global_rptr, num_rlc_queues);
1343 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_wptr, num_rlc_queues);
1344 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_size, num_rlc_queues);
1345 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr_wb_addr, num_rlc_queues);
1346 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_processing, num_rlc_queues);
1347 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd_addr, num_rlc_queues);
1348 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_priv, num_rlc_queues);
1349 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_static, num_rlc_queues);
1350 SERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd, num_rlc_queues * 128);
1351}
1352
1353void
1355{
1356 // Serialize the DmaVirtDevice base class
1358
1369
1370 int num_queues = 4;
1371 auto base = std::make_unique<Addr[]>(num_queues);
1372 auto rptr = std::make_unique<Addr[]>(num_queues);
1373 auto wptr = std::make_unique<Addr[]>(num_queues);
1374 auto size = std::make_unique<Addr[]>(num_queues);
1375 auto processing = std::make_unique<bool[]>(num_queues);
1376
1378 UNSERIALIZE_UNIQUE_PTR_ARRAY(rptr, num_queues);
1379 UNSERIALIZE_UNIQUE_PTR_ARRAY(wptr, num_queues);
1380 UNSERIALIZE_UNIQUE_PTR_ARRAY(size, num_queues);
1381 UNSERIALIZE_UNIQUE_PTR_ARRAY(processing, num_queues);
1382
1384 queues.push_back((SDMAQueue *)&gfx);
1385 queues.push_back((SDMAQueue *)&page);
1386 queues.push_back((SDMAQueue *)&gfxIb);
1387 queues.push_back((SDMAQueue *)&pageIb);
1388
1389 for (int i = 0; i < num_queues; i++) {
1390 queues[i]->base(base[i]);
1391 queues[i]->rptr(rptr[i]);
1392 queues[i]->wptr(wptr[i]);
1393 queues[i]->size(size[i]);
1394 queues[i]->processing(processing[i]);
1395 }
1396
1397 // Restore RLC queue state information from checkpoint
1398 // Only two RLC queues are supported right now
1399 const int num_rlc_queues = 2;
1400 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1401 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1402 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1403 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1404 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1405 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1406 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1407 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1408 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1409 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1410 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1411 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1412 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1413
1414 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_info, num_rlc_queues);
1415 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_valid, num_rlc_queues);
1416 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_base, num_rlc_queues);
1417 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr, num_rlc_queues);
1418 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_global_rptr, num_rlc_queues);
1419 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_wptr, num_rlc_queues);
1420 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_size, num_rlc_queues);
1421 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_rptr_wb_addr, num_rlc_queues);
1422 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_processing, num_rlc_queues);
1423 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd_addr, num_rlc_queues);
1424 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_priv, num_rlc_queues);
1425 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_static, num_rlc_queues);
1426 UNSERIALIZE_UNIQUE_PTR_ARRAY(rlc_mqd, num_rlc_queues * 128);
1427
1428 // Save RLC queue information into RLC0, RLC1
1429 std::vector<SDMAQueue *> rlc_queues;
1430 rlc_queues.push_back((SDMAQueue *)&rlc0);
1431 rlc_queues.push_back((SDMAQueue *)&rlc1);
1432
1433 for (int i = 0; i < num_rlc_queues; i++) {
1434 rlc_queues[i]->valid(rlc_valid[i]);
1435 if (rlc_valid[i]) {
1436 rlcInfo[i] = rlc_info[i];
1437 rlc_queues[i]->base(rlc_base[i]);
1438 rlc_queues[i]->rptr(rlc_rptr[i]);
1439 rlc_queues[i]->setGlobalRptr(rlc_global_rptr[i]);
1440 rlc_queues[i]->wptr(rlc_wptr[i]);
1441 rlc_queues[i]->size(rlc_size[i]);
1442 rlc_queues[i]->rptrWbAddr(rlc_rptr_wb_addr[i]);
1443 rlc_queues[i]->processing(rlc_processing[i]);
1444 rlc_queues[i]->setMQDAddr(rlc_mqd_addr[i]);
1445 rlc_queues[i]->setPriv(rlc_priv[i]);
1446 rlc_queues[i]->setStatic(rlc_static[i]);
1447 SDMAQueueDesc* mqd = new SDMAQueueDesc();
1448 memcpy(mqd, rlc_mqd.get() + 128*i, sizeof(SDMAQueueDesc));
1449 rlc_queues[i]->setMQD(mqd);
1450 }
1451 }
1452}
1453
1454void
1456{
1457 DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1458 pkt->getLE<uint32_t>());
1459
1460 // In Vega10 headers, the offsets are the same for both SDMAs
1461 switch (mmio_offset) {
1462 case mmSDMA_GFX_RB_BASE:
1463 setGfxBaseLo(pkt->getLE<uint32_t>());
1464 break;
1466 setGfxBaseHi(pkt->getLE<uint32_t>());
1467 break;
1469 setGfxRptrLo(pkt->getLE<uint32_t>());
1470 break;
1472 setGfxRptrHi(pkt->getLE<uint32_t>());
1473 break;
1475 setGfxDoorbellLo(pkt->getLE<uint32_t>());
1476 break;
1478 setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1479 // Bit 28 of doorbell indicates that doorbell is enabled.
1480 if (bits(getGfxDoorbell(), 28, 28)) {
1481 gpuDevice->setDoorbellType(getGfxDoorbellOffset(),
1483 gpuDevice->setSDMAEngine(getGfxDoorbellOffset(), this);
1484 }
1485 break;
1486 case mmSDMA_GFX_RB_CNTL: {
1487 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1488 assert(rb_size >= 6 && rb_size <= 62);
1489 setGfxSize(1 << (rb_size + 2));
1490 } break;
1492 setGfxWptrLo(pkt->getLE<uint32_t>());
1493 break;
1495 setGfxWptrHi(pkt->getLE<uint32_t>());
1496 break;
1498 setPageBaseLo(pkt->getLE<uint32_t>());
1499 break;
1501 setPageRptrLo(pkt->getLE<uint32_t>());
1502 break;
1504 setPageRptrHi(pkt->getLE<uint32_t>());
1505 break;
1507 setPageDoorbellLo(pkt->getLE<uint32_t>());
1508 break;
1510 setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1511 // Bit 28 of doorbell indicates that doorbell is enabled.
1512 if (bits(getPageDoorbell(), 28, 28)) {
1513 gpuDevice->setDoorbellType(getPageDoorbellOffset(),
1515 gpuDevice->setSDMAEngine(getPageDoorbellOffset(), this);
1516 }
1517 break;
1518 case mmSDMA_PAGE_RB_CNTL: {
1519 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1520 assert(rb_size >= 6 && rb_size <= 62);
1521 setPageSize(1 << (rb_size + 2));
1522 } break;
1524 setPageWptrLo(pkt->getLE<uint32_t>());
1525 break;
1526 default:
1527 DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1528 break;
1529 }
1530}
1531
1532void
1534{
1535 gfxBase = insertBits(gfxBase, 31, 0, 0);
1536 gfxBase |= data;
1537 gfx.base((gfxBase >> 1) << 12);
1538}
1539
1540void
1542{
1543 gfxBase = insertBits(gfxBase, 63, 32, 0);
1544 gfxBase |= ((uint64_t)data) << 32;
1545 gfx.base((gfxBase >> 1) << 12);
1546}
1547
1548void
1550{
1551 gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1552 gfxRptr |= data;
1553 gfx.rptrWbAddr(getGARTAddr(gfxRptr));
1554}
1555
1556void
1558{
1559 gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1560 gfxRptr |= ((uint64_t)data) << 32;
1561 gfx.rptrWbAddr(getGARTAddr(gfxRptr));
1562}
1563
1564void
1566{
1567 gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1568 gfxDoorbell |= data;
1569}
1570
1571void
1573{
1574 gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1575 gfxDoorbell |= ((uint64_t)data) << 32;
1576}
1577
1578void
1580{
1583 if (bits(gfxDoorbell, 28, 28)) {
1585 gpuDevice->setSDMAEngine(gfxDoorbellOffset, this);
1586 }
1587}
1588
1589void
1591{
1593 gfxDoorbellOffset |= ((uint64_t)data) << 32;
1594}
1595
1596void
1598{
1599 uint32_t rb_size = bits(data, 6, 1);
1600 assert(rb_size >= 6 && rb_size <= 62);
1601 gfx.size(1 << (rb_size + 2));
1602}
1603
1604void
1606{
1607 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1608 gfxWptr |= data;
1609}
1610
1611void
1613{
1614 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1615 gfxWptr |= ((uint64_t)data) << 32;
1616}
1617
1618void
1620{
1621 pageBase = insertBits(pageBase, 31, 0, 0);
1622 pageBase |= data;
1623 page.base((pageBase >> 1) << 12);
1624}
1625
1626void
1628{
1629 pageBase = insertBits(pageBase, 63, 32, 0);
1630 pageBase |= ((uint64_t)data) << 32;
1631 page.base((pageBase >> 1) << 12);
1632}
1633
1634void
1636{
1637 pageRptr = insertBits(pageRptr, 31, 0, 0);
1638 pageRptr |= data;
1639 page.rptrWbAddr(getGARTAddr(pageRptr));
1640}
1641
1642void
1644{
1645 pageRptr = insertBits(pageRptr, 63, 32, 0);
1646 pageRptr |= ((uint64_t)data) << 32;
1647 page.rptrWbAddr(getGARTAddr(pageRptr));
1648}
1649
1650void
1656
1657void
1659{
1660 pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1661 pageDoorbell |= ((uint64_t)data) << 32;
1662}
1663
1664void
1666{
1669 if (bits(pageDoorbell, 28, 28)) {
1671 gpuDevice->setSDMAEngine(pageDoorbellOffset, this);
1672 }
1673}
1674
1675void
1677{
1679 pageDoorbellOffset |= ((uint64_t)data) << 32;
1680}
1681
1682void
1684{
1685 uint32_t rb_size = bits(data, 6, 1);
1686 assert(rb_size >= 6 && rb_size <= 62);
1687 page.size(1 << (rb_size + 2));
1688}
1689
1690void
1692{
1693 pageWptr = insertBits(pageWptr, 31, 0, 0);
1694 pageWptr |= data;
1695}
1696
1697void
1699{
1700 pageWptr = insertBits(pageWptr, 63, 32, 0);
1701 pageWptr |= ((uint64_t)data) << 32;
1702}
1703
1704} // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:115
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
Device model for an AMD GPU.
Translation range generators.
Definition amdgpu_vm.hh:342
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
uint64_t pageDoorbell
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void pollRegMemRead(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
AMDGPUDevice * gpuDevice
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void atomicData(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void atomicDone(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void atomic(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt)
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
int getId() const
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void srbmWrite(SDMAQueue *q, uint32_t header, sdmaSRBMWrite *pkt)
void fence(SDMAQueue *q, sdmaFence *pkt)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
Definition addr_range.hh:64
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:634
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Definition serialize.hh:626
#define warn(...)
Definition logging.hh:288
#define warn_once(...)
Definition logging.hh:292
Bitfield< 23, 20 > atomic
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 24, 21 > opcode
Definition types.hh:92
Bitfield< 0 > p
Bitfield< 2 > priv
Definition misc.hh:131
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaTimestamp
struct gem5::GEM5_PACKED sdmaConstFillHeader
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
struct gem5::GEM5_PACKED sdmaPredExec
Packet * PacketPtr
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
struct gem5::GEM5_PACKED sdmaDummyTrap
struct gem5::GEM5_PACKED sdmaSemaphore
struct gem5::GEM5_PACKED sdmaCondExec
output header
Definition nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()
Definition trace.cc:48

Generated on Mon May 26 2025 09:19:09 for gem5 by doxygen 1.13.2