gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
sdma_engine.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
35#include "arch/generic/mmu.hh"
36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
42#include "mem/packet.hh"
43#include "mem/packet_access.hh"
44#include "params/SDMAEngine.hh"
45
46namespace gem5
47{
48
49SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
50 : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
51 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
52 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
53 pageWptr(0), gpuDevice(nullptr), walker(p.walker),
54 mmioBase(p.mmio_base), mmioSize(p.mmio_size)
55{
56 gfx.ib(&gfxIb);
58 gfx.valid(true);
59 gfxIb.valid(true);
62
63 page.ib(&pageIb);
65 page.valid(true);
66 pageIb.valid(true);
69
70 rlc0.ib(&rlc0Ib);
72
73 rlc1.ib(&rlc1Ib);
75}
76
77void
83
84int
86{
87 switch (_id) {
88 case 0:
90 case 1:
92 case 2:
94 case 3:
96 case 4:
98 case 5:
100 case 6:
102 case 7:
104 default:
105 panic("Unknown SDMA id");
106 }
107}
108
109Addr
111{
112 if (!gpuDevice->getVM().inAGP(addr)) {
113 Addr low_bits = bits(addr, 11, 0);
114 addr = (((addr >> 12) << 3) << 12) | low_bits;
115 }
116 return addr;
117}
118
119Addr
121{
122 // SDMA packets can access both host and device memory as either a source
123 // or destination address. We don't know which until it is translated, so
124 // we do a dummy functional translation to determine if the address
125 // resides in system memory or not.
126 auto tgen = translate(raw_addr, 64);
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
129 DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
130 raw_addr, tmp_addr);
131
132 // SDMA packets will access device memory through the MMHUB aperture in
133 // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
134 // of vmid == 0 the address is already an MMHUB address in the packet,
135 // so simply subtract the MMHUB base. For vmid > 0 the address is a
136 // virtual address that must first be translated. The translation will
137 // return an MMHUB address, then we can similarly subtract the base to
138 // get the device address. Otherwise, for host, device address is 0.
139 Addr device_addr = 0;
140 if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
141 (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
142 if (cur_vmid == 0) {
143 device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
144 } else {
145 device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
146 }
147 }
148
149 return device_addr;
150}
151
159{
160 if (cur_vmid > 0) {
161 // Only user translation is available to user queues (vmid > 0)
164 cur_vmid, vaddr, size));
165 } else if (gpuDevice->getVM().inAGP(vaddr)) {
166 // Use AGP translation gen
167 return TranslationGenPtr(
169 } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
170 // Use MMHUB translation gen
172 &gpuDevice->getVM(), vaddr, size));
173 }
174
175 // Assume GART otherwise as this is the only other translation aperture
176 // available to the SDMA engine processor.
177 return TranslationGenPtr(
179}
180
181void
183 bool isStatic)
184{
185 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
186 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
187 rptr_wb_addr <<= 32;
188 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
189 bool priv = bits(mqd->sdmax_rlcx_rb_cntl, 23, 23);
190
191 // Get first free RLC
192 if (!rlc0.valid()) {
193 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
194 rlcInfo[0] = doorbell;
195 rlc0.valid(true);
196 rlc0.base(mqd->rb_base << 8);
197 rlc0.size(rlc_size);
198 rlc0.rptr(0);
199 rlc0.incRptr(mqd->rptr);
200 rlc0.setWptr(mqd->wptr);
201 rlc0.rptrWbAddr(rptr_wb_addr);
202 rlc0.processing(false);
203 rlc0.setMQD(mqd);
204 rlc0.setMQDAddr(mqdAddr);
206 rlc0.setStatic(isStatic);
207 } else if (!rlc1.valid()) {
208 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
209 rlcInfo[1] = doorbell;
210 rlc1.valid(true);
211 rlc1.base(mqd->rb_base << 8);
212 rlc1.size(rlc_size);
213 rlc1.rptr(0);
214 rlc1.incRptr(mqd->rptr);
215 rlc1.setWptr(mqd->wptr);
216 rlc1.rptrWbAddr(rptr_wb_addr);
217 rlc1.processing(false);
218 rlc1.setMQD(mqd);
219 rlc1.setMQDAddr(mqdAddr);
221 rlc1.setStatic(isStatic);
222 } else {
223 panic("No free RLCs. Check they are properly unmapped.");
224 }
225}
226
227void
228SDMAEngine::unregisterRLCQueue(Addr doorbell, bool unmap_static)
229{
230 DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
231 if (rlcInfo[0] == doorbell) {
232 if (!unmap_static && rlc0.isStatic()) {
233 DPRINTF(SDMAEngine, "RLC0 is static. Will not unregister.\n");
234 return;
235 }
236
237 SDMAQueueDesc *mqd = rlc0.getMQD();
238 if (mqd) {
239 DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
240 rlc0.getMQDAddr());
241
242 mqd->rptr = rlc0.globalRptr();
243 mqd->wptr = rlc0.getWptr();
244
245 auto cb = new DmaVirtCallback<uint32_t>(
246 [ = ] (const uint32_t &) { });
247 dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
248 } else {
249 warn("RLC0 SDMAMQD address invalid\n");
250 }
251 rlc0.valid(false);
252 rlcInfo[0] = 0;
253 } else if (rlcInfo[1] == doorbell) {
254 if (!unmap_static && rlc1.isStatic()) {
255 DPRINTF(SDMAEngine, "RLC1 is static. Will not unregister.\n");
256 return;
257 }
258
259 SDMAQueueDesc *mqd = rlc1.getMQD();
260 if (mqd) {
261 DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
262 rlc1.getMQDAddr());
263
264 mqd->rptr = rlc1.globalRptr();
265 mqd->wptr = rlc1.getWptr();
266
267 auto cb = new DmaVirtCallback<uint32_t>(
268 [ = ] (const uint32_t &) { });
269 dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
270 } else {
271 warn("RLC1 SDMAMQD address invalid\n");
272 }
273 rlc1.valid(false);
274 rlcInfo[1] = 0;
275 } else {
276 panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
277 }
278
279 gpuDevice->unsetDoorbell(doorbell);
280}
281
282void
284{
285 for (auto doorbell: rlcInfo) {
286 if (doorbell) {
287 unregisterRLCQueue(doorbell, unmap_static);
288 }
289 }
290}
291
292/* Start decoding packets from the Gfx queue. */
293void
295{
296 gfx.setWptr(wptrOffset);
297 if (!gfx.processing()) {
298 gfx.processing(true);
299 decodeNext(&gfx);
300 }
301}
302
303/* Start decoding packets from the Page queue. */
304void
306{
307 page.setWptr(wptrOffset);
308 if (!page.processing()) {
309 page.processing(true);
311 }
312}
313
314/* Process RLC queue at given doorbell. */
315void
316SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
317{
318 if (rlcInfo[0] == doorbellOffset) {
319 processRLC0(wptrOffset);
320 } else if (rlcInfo[1] == doorbellOffset) {
321 processRLC1(wptrOffset);
322 } else {
323 panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
324 }
325}
326
327/* Start decoding packets from the RLC0 queue. */
328void
330{
331 assert(rlc0.valid());
332
333 rlc0.setWptr(wptrOffset);
334 if (!rlc0.processing()) {
335 cur_vmid = 1;
336 rlc0.processing(true);
338 }
339}
340
341/* Start decoding packets from the RLC1 queue. */
342void
344{
345 assert(rlc1.valid());
346
347 rlc1.setWptr(wptrOffset);
348 if (!rlc1.processing()) {
349 cur_vmid = 1;
350 rlc1.processing(true);
352 }
353}
354
355/* Decoding next packet in the queue. */
356void
358{
359 DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
360
361 if (q->rptr() != q->wptr()) {
362 // We are using lambda functions passed to the DmaVirtCallback objects
363 // which will call the actuall callback method (e.g., decodeHeader).
364 // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
365 // function as header in this case.
366 auto cb = new DmaVirtCallback<uint32_t>(
367 [ = ] (const uint32_t &header)
368 { decodeHeader(q, header); });
369 dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
370 } else {
371 // The driver expects the rptr to be written back to host memory
372 // periodically. In simulation, we writeback rptr after each burst of
373 // packets from a doorbell, rather than using the cycle count which
374 // is not accurate in all simulation settings (e.g., KVM).
375 DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
376 q->globalRptr(), q->rptrWbAddr());
377 if (q->rptrWbAddr()) {
378 auto cb = new DmaVirtCallback<uint64_t>(
379 [ = ](const uint64_t &) { }, q->globalRptr());
380 dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
381 }
382 q->processing(false);
383 if (q->parent()) {
384 DPRINTF(SDMAEngine, "SDMA switching queues\n");
385 decodeNext(q->parent());
386 }
387 cur_vmid = 0;
388 }
389}
390
391/* Decoding the header of a packet. */
392void
394{
395 q->incRptr(sizeof(header));
396 int opcode = bits(header, 7, 0);
397 int sub_opcode = bits(header, 15, 8);
398
399 DmaVirtCallback<uint64_t> *cb = nullptr;
400 void *dmaBuffer = nullptr;
401
402 DPRINTF(SDMAEngine, "SDMA opcode %p sub-opcode %p\n", opcode, sub_opcode);
403
404 switch(opcode) {
405 case SDMA_OP_NOP: {
406 uint32_t NOP_count = (header >> 16) & 0x3FFF;
407 DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
408 if (NOP_count > 0) {
409 for (int i = 0; i < NOP_count; ++i) {
410 if (q->rptr() == q->wptr()) {
411 warn("NOP count is beyond wptr, ignoring remaining NOPs");
412 break;
413 }
414 q->incRptr(4);
415 }
416 }
417 decodeNext(q);
418 } break;
419 case SDMA_OP_COPY: {
420 DPRINTF(SDMAEngine, "SDMA Copy packet\n");
421 switch (sub_opcode) {
423 dmaBuffer = new sdmaCopy();
425 [ = ] (const uint64_t &)
426 { copy(q, (sdmaCopy *)dmaBuffer); });
427 dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
428 } break;
430 panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
431 } break;
433 panic("SDMA_SUBOP_COPY_TILED not implemented");
434 } break;
436 panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
437 } break;
439 panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
440 } break;
441 case SDMA_SUBOP_COPY_SOA: {
442 panic("SDMA_SUBOP_COPY_SOA not implemented");
443 } break;
445 panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
446 } break;
448 panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
449 } break;
450 default: {
451 panic("SDMA unknown copy sub-opcode.");
452 } break;
453 }
454 } break;
455 case SDMA_OP_WRITE: {
456 DPRINTF(SDMAEngine, "SDMA Write packet\n");
457 switch (sub_opcode) {
459 dmaBuffer = new sdmaWrite();
461 [ = ] (const uint64_t &)
462 { write(q, (sdmaWrite *)dmaBuffer); });
463 dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
464 } break;
466 panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
467 } break;
468 default:
469 break;
470 }
471 } break;
472 case SDMA_OP_INDIRECT: {
473 DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
474 dmaBuffer = new sdmaIndirectBuffer();
476 [ = ] (const uint64_t &)
477 { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
478 dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
479 } break;
480 case SDMA_OP_FENCE: {
481 DPRINTF(SDMAEngine, "SDMA Fence packet\n");
482 dmaBuffer = new sdmaFence();
484 [ = ] (const uint64_t &)
485 { fence(q, (sdmaFence *)dmaBuffer); });
486 dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
487 } break;
488 case SDMA_OP_TRAP: {
489 DPRINTF(SDMAEngine, "SDMA Trap packet\n");
490 dmaBuffer = new sdmaTrap();
492 [ = ] (const uint64_t &)
493 { trap(q, (sdmaTrap *)dmaBuffer); });
494 dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
495 } break;
496 case SDMA_OP_SEM: {
497 q->incRptr(sizeof(sdmaSemaphore));
498 warn("SDMA_OP_SEM not implemented");
499 decodeNext(q);
500 } break;
501 case SDMA_OP_POLL_REGMEM: {
502 DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
505 dmaBuffer = new sdmaPollRegMem();
507 [ = ] (const uint64_t &)
508 { pollRegMem(q, h, (sdmaPollRegMem *)dmaBuffer); });
509 dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
510 switch (sub_opcode) {
512 panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
513 } break;
515 panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
516 } break;
518 panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
519 } break;
520 default:
521 break;
522 }
523 } break;
524 case SDMA_OP_COND_EXE: {
525 q->incRptr(sizeof(sdmaCondExec));
526 warn("SDMA_OP_SEM not implemented");
527 decodeNext(q);
528 } break;
529 case SDMA_OP_ATOMIC: {
530 DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
531 dmaBuffer = new sdmaAtomic();
533 *h = *(sdmaAtomicHeader *)&header;
535 [ = ] (const uint64_t &)
536 { atomic(q, h, (sdmaAtomic *)dmaBuffer); });
537 dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
538 } break;
539 case SDMA_OP_CONST_FILL: {
540 DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
541 dmaBuffer = new sdmaConstFill();
543 [ = ] (const uint64_t &)
544 { constFill(q, (sdmaConstFill *)dmaBuffer, header); });
545 dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
546 } break;
547 case SDMA_OP_PTEPDE: {
548 DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
549 switch (sub_opcode) {
551 DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
552 dmaBuffer = new sdmaPtePde();
554 [ = ] (const uint64_t &)
555 { ptePde(q, (sdmaPtePde *)dmaBuffer); });
556 dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
557 break;
559 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
560 break;
562 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
563 break;
565 panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
566 } break;
567 default:
568 DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
569 sub_opcode);
570 decodeNext(q);
571 break;
572 }
573 } break;
574 case SDMA_OP_TIMESTAMP: {
575 q->incRptr(sizeof(sdmaTimestamp));
576 switch (sub_opcode) {
578 } break;
580 } break;
582 } break;
583 default:
584 break;
585 }
586 warn("SDMA_OP_TIMESTAMP not implemented");
587 decodeNext(q);
588 } break;
589 case SDMA_OP_SRBM_WRITE: {
590 DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
593 dmaBuffer = new sdmaSRBMWrite();
595 [ = ] (const uint64_t &)
596 { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
597 dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
598 } break;
599 case SDMA_OP_PRE_EXE: {
600 q->incRptr(sizeof(sdmaPredExec));
601 warn("SDMA_OP_PRE_EXE not implemented");
602 decodeNext(q);
603 } break;
604 case SDMA_OP_DUMMY_TRAP: {
605 q->incRptr(sizeof(sdmaDummyTrap));
606 warn("SDMA_OP_DUMMY_TRAP not implemented");
607 decodeNext(q);
608 } break;
609 default: {
610 panic("Invalid SDMA packet.\n");
611 } break;
612 }
613}
614
615/* Implements a write packet. */
616void
618{
619 q->incRptr(sizeof(sdmaWrite));
620 // count represents the number of dwords - 1 to write
621 pkt->count++;
622 DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
623
624 // first we have to read needed data from the SDMA queue
625 uint32_t *dmaBuffer = new uint32_t[pkt->count];
626 auto cb = new DmaVirtCallback<uint64_t>(
627 [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
628 dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
629 (void *)dmaBuffer);
630}
631
632/* Completion of data reading for a write packet. */
633void
634SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
635{
636 int bufferSize = sizeof(uint32_t) * pkt->count;
637 q->incRptr(bufferSize);
638
639 DPRINTF(SDMAEngine, "Write packet data:\n");
640 for (int i = 0; i < pkt->count; ++i) {
641 DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
642 }
643
644 // lastly we write read data to the destination address
645 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
646 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
647
648 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
649 "SDMA write to GART not implemented");
650
651 auto cb = new EventFunctionWrapper(
652 [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
653 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
654 bufferSize, 0, cb);
655 } else {
656 if (q->priv()) {
657 pkt->dest = getGARTAddr(pkt->dest);
658 }
659 auto cb = new DmaVirtCallback<uint32_t>(
660 [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
661 dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
662 }
663}
664
665/* Completion of a write packet. */
666void
667SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
668{
669 DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
670 pkt->dest, pkt->count);
671
672 auto cleanup_cb = new EventFunctionWrapper(
673 [ = ]{ writeCleanup(dmaBuffer); }, name());
674
675 auto system_ptr = gpuDevice->CP()->system();
676 if (!system_ptr->isAtomicMode()) {
677 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
678 " This has not been tested in timing mode\n");
679 }
680
681 // Only 2000 ticks should be necessary, but add additional padding.
682 schedule(cleanup_cb, curTick() + 10000);
683
684 delete pkt;
685 decodeNext(q);
686}
687
688void
689SDMAEngine::writeCleanup(uint32_t *dmaBuffer)
690{
691 delete [] dmaBuffer;
692}
693
694/* Implements a copy packet. */
695void
697{
698 DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
699 pkt->source, pkt->dest, pkt->count);
700 q->incRptr(sizeof(sdmaCopy));
701 // count represents the number of bytes - 1 to be copied
702 pkt->count++;
703 if (q->priv()) {
704 if (!gpuDevice->getVM().inMMHUB(pkt->source)) {
705 DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
706 pkt->source = getGARTAddr(pkt->source);
707 DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
708 }
709 }
710
711 // Read data from the source first, then call the copyReadData method
712 uint8_t *dmaBuffer = new uint8_t[pkt->count];
713 Addr device_addr = getDeviceAddress(pkt->source);
714 if (device_addr) {
715 DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
716 auto cb = new EventFunctionWrapper(
717 [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
718
719 // Copy the minimum page size at a time in case the physical addresses
720 // are not contiguous.
722 uint8_t *buffer_ptr = dmaBuffer;
723 for (; !gen.done(); gen.next()) {
724 Addr chunk_addr = getDeviceAddress(gen.addr());
725 assert(chunk_addr);
726
727 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
728 gen.size(), gen.addr(), chunk_addr);
729
730 gpuDevice->getMemMgr()->readRequest(chunk_addr, buffer_ptr,
731 gen.size(), 0,
732 gen.last() ? cb : nullptr);
733 buffer_ptr += gen.size();
734 }
735 } else {
736 auto cb = new DmaVirtCallback<uint64_t>(
737 [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
738 dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
739 }
740}
741
742/* Completion of data reading for a copy packet. */
743void
744SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
745{
746 // lastly we write read data to the destination address
747 uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
748
749 DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
750 DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
751 DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
752
753 DPRINTF(SDMAData, "Copy packet data:\n");
754 for (int i = 0; i < pkt->count/8; ++i) {
755 DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
756 }
757
758 Addr device_addr = getDeviceAddress(pkt->dest);
759 // Write read data to the destination address then call the copyDone method
760 if (device_addr) {
761 DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
762 auto cb = new EventFunctionWrapper(
763 [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
764
765 // Copy the minimum page size at a time in case the physical addresses
766 // are not contiguous.
768 uint8_t *buffer_ptr = dmaBuffer;
769 for (; !gen.done(); gen.next()) {
770 Addr chunk_addr = getDeviceAddress(gen.addr());
771 assert(chunk_addr);
772
773 DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
774 gen.size(), gen.addr(), chunk_addr);
775
776 gpuDevice->getMemMgr()->writeRequest(chunk_addr, buffer_ptr,
777 gen.size(), 0,
778 gen.last() ? cb : nullptr);
779
780 buffer_ptr += gen.size();
781 }
782 } else {
783 DPRINTF(SDMAEngine, "Copying to host address %#lx\n", pkt->dest);
784 auto cb = new DmaVirtCallback<uint64_t>(
785 [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
786 dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
787 }
788
789 // For destinations in the GART table, gem5 uses a mapping tables instead
790 // of functionally going to device memory, so we need to update that copy.
791 if (gpuDevice->getVM().inGARTRange(device_addr)) {
792 // GART entries are always 8 bytes.
793 assert((pkt->count % 8) == 0);
794 for (int i = 0; i < pkt->count/8; ++i) {
795 Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase();
796 DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n",
797 gart_addr, dmaBuffer64[i]);
798 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i];
799 }
800 }
801}
802
803/* Completion of a copy packet. */
804void
805SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
806{
807 DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
808 pkt->dest, pkt->count);
809
810 auto cleanup_cb = new EventFunctionWrapper(
811 [ = ]{ copyCleanup(dmaBuffer); }, name());
812
813 auto system_ptr = gpuDevice->CP()->system();
814 if (!system_ptr->isAtomicMode()) {
815 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
816 " This has not been tested in timing mode\n");
817 }
818
819 // Only 2000 ticks should be necessary, but add additional padding.
820 schedule(cleanup_cb, curTick() + 10000);
821
822 delete pkt;
823 decodeNext(q);
824}
825
826void
827SDMAEngine::copyCleanup(uint8_t *dmaBuffer)
828{
829 delete [] dmaBuffer;
830}
831
832/* Implements an indirect buffer packet. */
833void
835{
836 if (q->priv()) {
837 q->ib()->base(getGARTAddr(pkt->base));
838 } else {
839 q->ib()->base(pkt->base);
840 }
841 q->ib()->rptr(0);
842 q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
843 q->ib()->setWptr(pkt->size * sizeof(uint32_t));
844
845 q->incRptr(sizeof(sdmaIndirectBuffer));
846
847 delete pkt;
848 decodeNext(q->ib());
849}
850
851/* Implements a fence packet. */
852void
854{
855 q->incRptr(sizeof(sdmaFence));
856 if (q->priv()) {
857 pkt->dest = getGARTAddr(pkt->dest);
858 }
859
860 // Writing the data from the fence packet to the destination address.
861 auto cb = new DmaVirtCallback<uint32_t>(
862 [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
863 dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
864}
865
866/* Completion of a fence packet. */
867void
869{
870 DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
871 pkt->dest, pkt->data);
872 delete pkt;
873 decodeNext(q);
874}
875
876/* Implements a trap packet. */
877void
879{
880 q->incRptr(sizeof(sdmaTrap));
881
882 DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
883
884 uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
885
886 int node_id = 0;
887 int local_id = getId();
888
890 getIHClientId(local_id),
891 TRAP_ID, 2*node_id);
893
894 delete pkt;
895 decodeNext(q);
896}
897
898/* Implements a write SRBM packet. */
899void
901 sdmaSRBMWrite *pkt)
902{
903 q->incRptr(sizeof(sdmaSRBMWrite));
904
905 [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
906 uint32_t reg_mask = 0x00000000;
907
908 if (header->byteEnable & 0x8) reg_mask |= 0xFF000000;
909 if (header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
910 if (header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
911 if (header->byteEnable & 0x1) reg_mask |= 0x000000FF;
912 pkt->data &= reg_mask;
913
914 DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
915 reg_addr, pkt->data);
916
917 gpuDevice->setRegVal(reg_addr, pkt->data);
918
919 delete header;
920 delete pkt;
921 decodeNext(q);
922}
923
929void
931 sdmaPollRegMem *pkt)
932{
933 q->incRptr(sizeof(sdmaPollRegMem));
934
935 DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
936 "mask=%p, retry=%d, pinterval=%d\n", header->mode, header->func,
937 header->op, pkt->address, pkt->ref, pkt->mask, pkt->retryCount,
938 pkt->pollInt);
939
940 bool skip = false;
941
942 if (header->mode == 1) {
943 // polling on a memory location
944 if (header->op == 0) {
945 auto cb = new DmaVirtCallback<uint32_t>(
946 [ = ] (const uint32_t &dma_buffer) {
947 pollRegMemRead(q, header, pkt, dma_buffer, 0); });
948 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
949 (void *)&cb->dmaBuffer);
950 } else {
951 panic("SDMA poll mem operation not implemented.");
952 skip = true;
953 }
954 } else {
955 warn_once("SDMA poll reg is not implemented. If this is required for "
956 "correctness, an SRBM model needs to be implemented.");
957 skip = true;
958 }
959
960 if (skip) {
961 delete header;
962 delete pkt;
963 decodeNext(q);
964 }
965}
966
967void
969 sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
970{
971 assert(header->mode == 1 && header->op == 0);
972
973 if (!pollRegMemFunc(dma_buffer, pkt->ref, header->func) &&
974 ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
975 pkt->retryCount == 0xfff)) {
976
977 // continue polling on a memory location until reference value is met,
978 // retryCount is met or indefinitelly if retryCount is 0xfff
979 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
980 pkt->address, dma_buffer, pkt->ref);
981
982 auto cb = new DmaVirtCallback<uint32_t>(
983 [ = ] (const uint32_t &dma_buffer) {
984 pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
985 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
986 (void *)&cb->dmaBuffer);
987 } else {
988 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
989 pkt->address, dma_buffer, pkt->ref);
990
991 delete header;
992 delete pkt;
993 decodeNext(q);
994 }
995}
996
997bool
998SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
999{
1000 switch (func) {
1001 case 0:
1002 return true;
1003 break;
1004 case 1:
1005 return value < reference;
1006 break;
1007 case 2:
1008 return value <= reference;
1009 break;
1010 case 3:
1011 return value == reference;
1012 break;
1013 case 4:
1014 return value != reference;
1015 break;
1016 case 5:
1017 return value >= reference;
1018 break;
1019 case 6:
1020 return value > reference;
1021 break;
1022 default:
1023 panic("SDMA POLL_REGMEM unknown comparison function.");
1024 break;
1025 }
1026}
1027
1028/* Implements a PTE PDE generation packet. */
1029void
1031{
1032 q->incRptr(sizeof(sdmaPtePde));
1033 pkt->count++;
1034
1035 DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
1036 pkt->initValue, pkt->increment, pkt->count);
1037
1038 // Generating pkt->count double dwords using the initial value, increment
1039 // and a mask.
1040 uint64_t *dmaBuffer = new uint64_t[pkt->count];
1041 for (int i = 0; i < pkt->count; i++) {
1042 dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
1043 }
1044
1045 // Writing generated data to the destination address.
1046 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
1047 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
1048
1049 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
1050 "SDMA write to GART not implemented");
1051
1052 auto cb = new EventFunctionWrapper(
1053 [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
1054 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1055 sizeof(uint64_t) * pkt->count, 0,
1056 cb);
1057 } else {
1058 if (q->priv()) {
1059 pkt->dest = getGARTAddr(pkt->dest);
1060 }
1061 auto cb = new DmaVirtCallback<uint64_t>(
1062 [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
1063 dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
1064 (void *)dmaBuffer);
1065 }
1066}
1067
1068/* Completion of a PTE PDE generation packet. */
1069void
1070SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
1071{
1072 DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
1073 pkt->dest, pkt->count);
1074
1075 auto cleanup_cb = new EventFunctionWrapper(
1076 [ = ]{ ptePdeCleanup(dmaBuffer); }, name());
1077
1078 auto system_ptr = gpuDevice->CP()->system();
1079 if (!system_ptr->isAtomicMode()) {
1080 warn_once("SDMA cleanup assumes 2000 tick timing for completion."
1081 " This has not been tested in timing mode\n");
1082 }
1083
1084 // Only 2000 ticks should be necessary, but add additional padding.
1085 schedule(cleanup_cb, curTick() + 10000);
1086
1087 delete pkt;
1088 decodeNext(q);
1089}
1090
1091void
1092SDMAEngine::ptePdeCleanup(uint64_t *dmaBuffer)
1093{
1094 delete [] dmaBuffer;
1095}
1096
1097void
1099{
1100 q->incRptr(sizeof(sdmaAtomic));
1101 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
1102 " %d loopInt: %d\n", header->opcode, pkt->addr, pkt->srcData,
1103 pkt->cmpData, header->loop, pkt->loopInt);
1104
1105 // Read the data at pkt->addr
1106 uint64_t *dmaBuffer = new uint64_t;
1107 auto cb = new DmaVirtCallback<uint64_t>(
1108 [ = ] (const uint64_t &)
1109 { atomicData(q, header, pkt, dmaBuffer); });
1110 dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1111}
1112
1113void
1115 uint64_t *dmaBuffer)
1116{
1117 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
1118 header->opcode, pkt->addr, *dmaBuffer);
1119
1120 if (header->opcode == SDMA_ATOMIC_ADD64) {
1121 // Atomic add with return -- dst = dst + src
1122 int64_t dst_data = *dmaBuffer;
1123 int64_t src_data = pkt->srcData;
1124
1125 DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
1126 src_data, dst_data + src_data);
1127
1128 // Reuse the dmaBuffer allocated
1129 *dmaBuffer = dst_data + src_data;
1130
1131 auto cb = new DmaVirtCallback<uint64_t>(
1132 [ = ] (const uint64_t &)
1133 { atomicDone(q, header, pkt, dmaBuffer); });
1134 dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1135 } else {
1136 panic("Unsupported SDMA atomic opcode: %d\n", header->opcode);
1137 }
1138}
1139
1140void
1142 uint64_t *dmaBuffer)
1143{
1144 DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
1145 header->opcode, pkt->addr, *dmaBuffer);
1146
1147 delete dmaBuffer;
1148 delete header;
1149 delete pkt;
1150 decodeNext(q);
1151}
1152
1153void
1155{
1156 q->incRptr(sizeof(sdmaConstFill));
1157
1158 sdmaConstFillHeader fill_header;
1159 fill_header.ordinal = header;
1160
1161 DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
1162 pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
1163 fill_header.sw);
1164
1165 // Count is number of <size> elements - 1. Size is log2 of byte size.
1166 int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
1167 uint8_t *fill_data = new uint8_t[fill_bytes];
1168
1169 memset(fill_data, pkt->srcData, fill_bytes);
1170
1171 Addr device_addr = getDeviceAddress(pkt->addr);
1172 if (device_addr) {
1173 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
1174 fill_bytes, pkt->srcData, pkt->addr);
1175
1176 auto cb = new EventFunctionWrapper(
1177 [ = ]{ constFillDone(q, pkt, fill_data); }, name());
1178
1179 // Copy the minimum page size at a time in case the physical addresses
1180 // are not contiguous.
1181 ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
1182 uint8_t *fill_data_ptr = fill_data;
1183 for (; !gen.done(); gen.next()) {
1184 Addr chunk_addr = getDeviceAddress(gen.addr());
1185 assert(chunk_addr);
1186
1187 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
1188 gen.size(), gen.addr(), chunk_addr);
1189
1190 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data_ptr,
1191 gen.size(), 0,
1192 gen.last() ? cb : nullptr);
1193 fill_data_ptr += gen.size();
1194 }
1195 } else {
1196 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
1197 fill_bytes, pkt->srcData, pkt->addr);
1198
1199 auto cb = new DmaVirtCallback<uint64_t>(
1200 [ = ] (const uint64_t &)
1201 { constFillDone(q, pkt, fill_data); });
1202 dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
1203 }
1204}
1205
1206void
1208{
1209 DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
1210
1211 delete [] fill_data;
1212 delete pkt;
1213 decodeNext(q);
1214}
1215
1218{
1219 AddrRangeList ranges;
1220 return ranges;
1221}
1222
1223void
1225{
1226 // Serialize the DmaVirtDevice base class
1228
1239
1240 int num_queues = 4;
1241
1243 queues.push_back((SDMAQueue *)&gfx);
1244 queues.push_back((SDMAQueue *)&page);
1245 queues.push_back((SDMAQueue *)&gfxIb);
1246 queues.push_back((SDMAQueue *)&pageIb);
1247
1248 Addr base[num_queues];
1249 Addr rptr[num_queues];
1250 Addr wptr[num_queues];
1251 Addr size[num_queues];
1252 bool processing[num_queues];
1253
1254 for (int i = 0; i < num_queues; i++) {
1255 base[i] = queues[i]->base();
1256 rptr[i] = queues[i]->getRptr();
1257 wptr[i] = queues[i]->getWptr();
1258 size[i] = queues[i]->size();
1259 processing[i] = queues[i]->processing();
1260 }
1261
1262 SERIALIZE_ARRAY(base, num_queues);
1263 SERIALIZE_ARRAY(rptr, num_queues);
1264 SERIALIZE_ARRAY(wptr, num_queues);
1265 SERIALIZE_ARRAY(size, num_queues);
1266 SERIALIZE_ARRAY(processing, num_queues);
1267}
1268
1269void
1271{
1272 // Serialize the DmaVirtDevice base class
1274
1285
1286 int num_queues = 4;
1287 Addr base[num_queues];
1288 Addr rptr[num_queues];
1289 Addr wptr[num_queues];
1290 Addr size[num_queues];
1291 bool processing[num_queues];
1292
1293 UNSERIALIZE_ARRAY(base, num_queues);
1294 UNSERIALIZE_ARRAY(rptr, num_queues);
1295 UNSERIALIZE_ARRAY(wptr, num_queues);
1296 UNSERIALIZE_ARRAY(size, num_queues);
1297 UNSERIALIZE_ARRAY(processing, num_queues);
1298
1300 queues.push_back((SDMAQueue *)&gfx);
1301 queues.push_back((SDMAQueue *)&page);
1302 queues.push_back((SDMAQueue *)&gfxIb);
1303 queues.push_back((SDMAQueue *)&pageIb);
1304
1305 for (int i = 0; i < num_queues; i++) {
1306 queues[i]->base(base[i]);
1307 queues[i]->rptr(rptr[i]);
1308 queues[i]->wptr(wptr[i]);
1309 queues[i]->size(size[i]);
1310 queues[i]->processing(processing[i]);
1311 }
1312}
1313
1314void
1316{
1317 DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1318 pkt->getLE<uint32_t>());
1319
1320 // In Vega10 headers, the offsets are the same for both SDMAs
1321 switch (mmio_offset) {
1322 case mmSDMA_GFX_RB_BASE:
1323 setGfxBaseLo(pkt->getLE<uint32_t>());
1324 break;
1326 setGfxBaseHi(pkt->getLE<uint32_t>());
1327 break;
1329 setGfxRptrLo(pkt->getLE<uint32_t>());
1330 break;
1332 setGfxRptrHi(pkt->getLE<uint32_t>());
1333 break;
1335 setGfxDoorbellLo(pkt->getLE<uint32_t>());
1336 break;
1338 setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1339 // Bit 28 of doorbell indicates that doorbell is enabled.
1340 if (bits(getGfxDoorbell(), 28, 28)) {
1344 }
1345 break;
1346 case mmSDMA_GFX_RB_CNTL: {
1347 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1348 assert(rb_size >= 6 && rb_size <= 62);
1349 setGfxSize(1 << (rb_size + 2));
1350 } break;
1352 setGfxWptrLo(pkt->getLE<uint32_t>());
1353 break;
1355 setGfxWptrHi(pkt->getLE<uint32_t>());
1356 break;
1358 setPageBaseLo(pkt->getLE<uint32_t>());
1359 break;
1361 setPageRptrLo(pkt->getLE<uint32_t>());
1362 break;
1364 setPageRptrHi(pkt->getLE<uint32_t>());
1365 break;
1367 setPageDoorbellLo(pkt->getLE<uint32_t>());
1368 break;
1370 setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1371 // Bit 28 of doorbell indicates that doorbell is enabled.
1372 if (bits(getPageDoorbell(), 28, 28)) {
1376 }
1377 break;
1378 case mmSDMA_PAGE_RB_CNTL: {
1379 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1380 assert(rb_size >= 6 && rb_size <= 62);
1381 setPageSize(1 << (rb_size + 2));
1382 } break;
1384 setPageWptrLo(pkt->getLE<uint32_t>());
1385 break;
1386 default:
1387 DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1388 break;
1389 }
1390}
1391
1392void
1394{
1395 gfxBase = insertBits(gfxBase, 31, 0, 0);
1396 gfxBase |= data;
1397 gfx.base((gfxBase >> 1) << 12);
1398}
1399
1400void
1402{
1403 gfxBase = insertBits(gfxBase, 63, 32, 0);
1404 gfxBase |= ((uint64_t)data) << 32;
1405 gfx.base((gfxBase >> 1) << 12);
1406}
1407
1408void
1410{
1411 gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1412 gfxRptr |= data;
1414}
1415
1416void
1418{
1419 gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1420 gfxRptr |= ((uint64_t)data) << 32;
1422}
1423
1424void
1426{
1427 gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1428 gfxDoorbell |= data;
1429}
1430
1431void
1433{
1434 gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1435 gfxDoorbell |= ((uint64_t)data) << 32;
1436}
1437
1438void
1448
1449void
1451{
1453 gfxDoorbellOffset |= ((uint64_t)data) << 32;
1454}
1455
1456void
1458{
1459 uint32_t rb_size = bits(data, 6, 1);
1460 assert(rb_size >= 6 && rb_size <= 62);
1461 gfx.size(1 << (rb_size + 2));
1462}
1463
1464void
1466{
1467 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1468 gfxWptr |= data;
1469}
1470
1471void
1473{
1474 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1475 gfxWptr |= ((uint64_t)data) << 32;
1476}
1477
1478void
1480{
1481 pageBase = insertBits(pageBase, 31, 0, 0);
1482 pageBase |= data;
1483 page.base((pageBase >> 1) << 12);
1484}
1485
1486void
1488{
1489 pageBase = insertBits(pageBase, 63, 32, 0);
1490 pageBase |= ((uint64_t)data) << 32;
1491 page.base((pageBase >> 1) << 12);
1492}
1493
1494void
1501
1502void
1504{
1505 pageRptr = insertBits(pageRptr, 63, 32, 0);
1506 pageRptr |= ((uint64_t)data) << 32;
1508}
1509
1510void
1516
1517void
1519{
1520 pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1521 pageDoorbell |= ((uint64_t)data) << 32;
1522}
1523
1524void
1534
1535void
1537{
1539 pageDoorbellOffset |= ((uint64_t)data) << 32;
1540}
1541
1542void
1544{
1545 uint32_t rb_size = bits(data, 6, 1);
1546 assert(rb_size >= 6 && rb_size <= 62);
1547 page.size(1 << (rb_size + 2));
1548}
1549
1550void
1552{
1553 pageWptr = insertBits(pageWptr, 31, 0, 0);
1554 pageWptr |= data;
1555}
1556
1557void
1559{
1560 pageWptr = insertBits(pageWptr, 63, 32, 0);
1561 pageWptr |= ((uint64_t)data) << 32;
1562}
1563
1564} // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:94
#define DPRINTF(x,...)
Definition trace.hh:209
const char data[]
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
void unsetDoorbell(uint32_t offset)
void setRegVal(uint64_t addr, uint32_t value)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
GPUCommandProcessor * CP()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id, unsigned node_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
Definition amdgpu_vm.hh:314
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:203
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:212
Addr getMMHUBBase()
Definition amdgpu_vm.hh:227
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:87
bool inGARTRange(Addr paddr)
Definition amdgpu_vm.hh:194
bool inMMHUB(Addr vaddr)
Definition amdgpu_vm.hh:222
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
void setStatic(bool isStatic)
SDMAQueueDesc * getMQD()
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
uint64_t pageDoorbell
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
AMDGPUDevice * gpuDevice
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
int getId() const
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
STL vector class.
Definition stl.hh:37
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
#define warn_once(...)
Definition logging.hh:260
Bitfield< 23, 20 > atomic
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 33 > id
Bitfield< 24, 21 > opcode
Definition types.hh:92
Bitfield< 0 > p
Bitfield< 2 > priv
Definition misc.hh:131
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
output header
Definition nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
PM4 packets.
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()
Definition trace.cc:48

Generated on Mon Jan 13 2025 04:28:33 for gem5 by doxygen 1.9.8