gem5 v24.0.0.0
Loading...
Searching...
No Matches
sdma_engine.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
35#include "arch/generic/mmu.hh"
36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
41#include "mem/packet.hh"
42#include "mem/packet_access.hh"
43#include "params/SDMAEngine.hh"
44
45namespace gem5
46{
47
48SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
49 : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(p.walker),
53 mmioBase(p.mmio_base), mmioSize(p.mmio_size)
54{
55 gfx.ib(&gfxIb);
57 gfx.valid(true);
58 gfxIb.valid(true);
61
62 page.ib(&pageIb);
64 page.valid(true);
65 pageIb.valid(true);
68
69 rlc0.ib(&rlc0Ib);
71
72 rlc1.ib(&rlc1Ib);
74}
75
76void
82
83int
85{
86 switch (_id) {
87 case 0:
89 case 1:
91 case 2:
93 case 3:
95 case 4:
97 case 5:
99 case 6:
101 case 7:
103 default:
104 panic("Unknown SDMA id");
105 }
106}
107
108Addr
110{
111 if (!gpuDevice->getVM().inAGP(addr)) {
112 Addr low_bits = bits(addr, 11, 0);
113 addr = (((addr >> 12) << 3) << 12) | low_bits;
114 }
115 return addr;
116}
117
118Addr
120{
121 // SDMA packets can access both host and device memory as either a source
122 // or destination address. We don't know which until it is translated, so
123 // we do a dummy functional translation to determine if the address
124 // resides in system memory or not.
125 auto tgen = translate(raw_addr, 64);
126 auto addr_range = *(tgen->begin());
127 Addr tmp_addr = addr_range.paddr;
128 DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
129 raw_addr, tmp_addr);
130
131 // SDMA packets will access device memory through the MMHUB aperture in
132 // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
133 // of vmid == 0 the address is already an MMHUB address in the packet,
134 // so simply subtract the MMHUB base. For vmid > 0 the address is a
135 // virtual address that must first be translated. The translation will
136 // return an MMHUB address, then we can similarly subtract the base to
137 // get the device address. Otherwise, for host, device address is 0.
138 Addr device_addr = 0;
139 if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
140 (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
141 if (cur_vmid == 0) {
142 device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
143 } else {
144 device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
145 }
146 }
147
148 return device_addr;
149}
150
158{
159 if (cur_vmid > 0) {
160 // Only user translation is available to user queues (vmid > 0)
163 cur_vmid, vaddr, size));
164 } else if (gpuDevice->getVM().inAGP(vaddr)) {
165 // Use AGP translation gen
166 return TranslationGenPtr(
168 } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
169 // Use MMHUB translation gen
171 &gpuDevice->getVM(), vaddr, size));
172 }
173
174 // Assume GART otherwise as this is the only other translation aperture
175 // available to the SDMA engine processor.
176 return TranslationGenPtr(
178}
179
180void
182{
183 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
184 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
185 rptr_wb_addr <<= 32;
186 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
187 bool priv = bits(mqd->sdmax_rlcx_rb_cntl, 23, 23);
188
189 // Get first free RLC
190 if (!rlc0.valid()) {
191 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
192 rlcInfo[0] = doorbell;
193 rlc0.valid(true);
194 rlc0.base(mqd->rb_base << 8);
195 rlc0.size(rlc_size);
196 rlc0.rptr(0);
197 rlc0.incRptr(mqd->rptr);
198 rlc0.setWptr(mqd->wptr);
199 rlc0.rptrWbAddr(rptr_wb_addr);
200 rlc0.processing(false);
201 rlc0.setMQD(mqd);
202 rlc0.setMQDAddr(mqdAddr);
204 } else if (!rlc1.valid()) {
205 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
206 rlcInfo[1] = doorbell;
207 rlc1.valid(true);
208 rlc1.base(mqd->rb_base << 8);
209 rlc1.size(rlc_size);
210 rlc1.rptr(0);
211 rlc1.incRptr(mqd->rptr);
212 rlc1.setWptr(mqd->wptr);
213 rlc1.rptrWbAddr(rptr_wb_addr);
214 rlc1.processing(false);
215 rlc1.setMQD(mqd);
216 rlc1.setMQDAddr(mqdAddr);
218 } else {
219 panic("No free RLCs. Check they are properly unmapped.");
220 }
221}
222
223void
225{
226 DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
227 if (rlcInfo[0] == doorbell) {
228 SDMAQueueDesc *mqd = rlc0.getMQD();
229 if (mqd) {
230 DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
231 rlc0.getMQDAddr());
232
233 mqd->rptr = rlc0.globalRptr();
234 mqd->wptr = rlc0.getWptr();
235
236 auto cb = new DmaVirtCallback<uint32_t>(
237 [ = ] (const uint32_t &) { });
238 dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
239 } else {
240 warn("RLC0 SDMAMQD address invalid\n");
241 }
242 rlc0.valid(false);
243 rlcInfo[0] = 0;
244 } else if (rlcInfo[1] == doorbell) {
245 SDMAQueueDesc *mqd = rlc1.getMQD();
246 if (mqd) {
247 DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
248 rlc1.getMQDAddr());
249
250 mqd->rptr = rlc1.globalRptr();
251 mqd->wptr = rlc1.getWptr();
252
253 auto cb = new DmaVirtCallback<uint32_t>(
254 [ = ] (const uint32_t &) { });
255 dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
256 } else {
257 warn("RLC1 SDMAMQD address invalid\n");
258 }
259 rlc1.valid(false);
260 rlcInfo[1] = 0;
261 } else {
262 panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
263 }
264}
265
266void
268{
269 for (auto doorbell: rlcInfo) {
270 if (doorbell) {
271 unregisterRLCQueue(doorbell);
272 gpuDevice->unsetDoorbell(doorbell);
273 }
274 }
275}
276
277/* Start decoding packets from the Gfx queue. */
278void
280{
281 gfx.setWptr(wptrOffset);
282 if (!gfx.processing()) {
283 gfx.processing(true);
284 decodeNext(&gfx);
285 }
286}
287
288/* Start decoding packets from the Page queue. */
289void
291{
292 page.setWptr(wptrOffset);
293 if (!page.processing()) {
294 page.processing(true);
296 }
297}
298
299/* Process RLC queue at given doorbell. */
300void
301SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
302{
303 if (rlcInfo[0] == doorbellOffset) {
304 processRLC0(wptrOffset);
305 } else if (rlcInfo[1] == doorbellOffset) {
306 processRLC1(wptrOffset);
307 } else {
308 panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
309 }
310}
311
312/* Start decoding packets from the RLC0 queue. */
313void
315{
316 assert(rlc0.valid());
317
318 rlc0.setWptr(wptrOffset);
319 if (!rlc0.processing()) {
320 cur_vmid = 1;
321 rlc0.processing(true);
323 }
324}
325
326/* Start decoding packets from the RLC1 queue. */
327void
329{
330 assert(rlc1.valid());
331
332 rlc1.setWptr(wptrOffset);
333 if (!rlc1.processing()) {
334 cur_vmid = 1;
335 rlc1.processing(true);
337 }
338}
339
340/* Decoding next packet in the queue. */
341void
343{
344 DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
345
346 if (q->rptr() != q->wptr()) {
347 // We are using lambda functions passed to the DmaVirtCallback objects
348 // which will call the actuall callback method (e.g., decodeHeader).
349 // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
350 // function as header in this case.
351 auto cb = new DmaVirtCallback<uint32_t>(
352 [ = ] (const uint32_t &header)
353 { decodeHeader(q, header); });
354 dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
355 } else {
356 // The driver expects the rptr to be written back to host memory
357 // periodically. In simulation, we writeback rptr after each burst of
358 // packets from a doorbell, rather than using the cycle count which
359 // is not accurate in all simulation settings (e.g., KVM).
360 DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
361 q->globalRptr(), q->rptrWbAddr());
362 if (q->rptrWbAddr()) {
363 auto cb = new DmaVirtCallback<uint64_t>(
364 [ = ](const uint64_t &) { }, q->globalRptr());
365 dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
366 }
367 q->processing(false);
368 if (q->parent()) {
369 DPRINTF(SDMAEngine, "SDMA switching queues\n");
370 decodeNext(q->parent());
371 }
372 cur_vmid = 0;
373 }
374}
375
376/* Decoding the header of a packet. */
377void
379{
380 q->incRptr(sizeof(header));
381 int opcode = bits(header, 7, 0);
382 int sub_opcode = bits(header, 15, 8);
383
384 DmaVirtCallback<uint64_t> *cb = nullptr;
385 void *dmaBuffer = nullptr;
386
387 DPRINTF(SDMAEngine, "SDMA opcode %p sub-opcode %p\n", opcode, sub_opcode);
388
389 switch(opcode) {
390 case SDMA_OP_NOP: {
391 uint32_t NOP_count = (header >> 16) & 0x3FFF;
392 DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
393 if (NOP_count > 0) {
394 for (int i = 0; i < NOP_count; ++i) {
395 if (q->rptr() == q->wptr()) {
396 warn("NOP count is beyond wptr, ignoring remaining NOPs");
397 break;
398 }
399 q->incRptr(4);
400 }
401 }
402 decodeNext(q);
403 } break;
404 case SDMA_OP_COPY: {
405 DPRINTF(SDMAEngine, "SDMA Copy packet\n");
406 switch (sub_opcode) {
408 dmaBuffer = new sdmaCopy();
410 [ = ] (const uint64_t &)
411 { copy(q, (sdmaCopy *)dmaBuffer); });
412 dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
413 } break;
415 panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
416 } break;
418 panic("SDMA_SUBOP_COPY_TILED not implemented");
419 } break;
421 panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
422 } break;
424 panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
425 } break;
426 case SDMA_SUBOP_COPY_SOA: {
427 panic("SDMA_SUBOP_COPY_SOA not implemented");
428 } break;
430 panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
431 } break;
433 panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
434 } break;
435 default: {
436 panic("SDMA unknown copy sub-opcode.");
437 } break;
438 }
439 } break;
440 case SDMA_OP_WRITE: {
441 DPRINTF(SDMAEngine, "SDMA Write packet\n");
442 switch (sub_opcode) {
444 dmaBuffer = new sdmaWrite();
446 [ = ] (const uint64_t &)
447 { write(q, (sdmaWrite *)dmaBuffer); });
448 dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
449 } break;
451 panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
452 } break;
453 default:
454 break;
455 }
456 } break;
457 case SDMA_OP_INDIRECT: {
458 DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
459 dmaBuffer = new sdmaIndirectBuffer();
461 [ = ] (const uint64_t &)
462 { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
463 dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
464 } break;
465 case SDMA_OP_FENCE: {
466 DPRINTF(SDMAEngine, "SDMA Fence packet\n");
467 dmaBuffer = new sdmaFence();
469 [ = ] (const uint64_t &)
470 { fence(q, (sdmaFence *)dmaBuffer); });
471 dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
472 } break;
473 case SDMA_OP_TRAP: {
474 DPRINTF(SDMAEngine, "SDMA Trap packet\n");
475 dmaBuffer = new sdmaTrap();
477 [ = ] (const uint64_t &)
478 { trap(q, (sdmaTrap *)dmaBuffer); });
479 dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
480 } break;
481 case SDMA_OP_SEM: {
482 q->incRptr(sizeof(sdmaSemaphore));
483 warn("SDMA_OP_SEM not implemented");
484 decodeNext(q);
485 } break;
486 case SDMA_OP_POLL_REGMEM: {
487 DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
490 dmaBuffer = new sdmaPollRegMem();
492 [ = ] (const uint64_t &)
493 { pollRegMem(q, h, (sdmaPollRegMem *)dmaBuffer); });
494 dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
495 switch (sub_opcode) {
497 panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
498 } break;
500 panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
501 } break;
503 panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
504 } break;
505 default:
506 break;
507 }
508 } break;
509 case SDMA_OP_COND_EXE: {
510 q->incRptr(sizeof(sdmaCondExec));
511 warn("SDMA_OP_SEM not implemented");
512 decodeNext(q);
513 } break;
514 case SDMA_OP_ATOMIC: {
515 DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
516 dmaBuffer = new sdmaAtomic();
518 *h = *(sdmaAtomicHeader *)&header;
520 [ = ] (const uint64_t &)
521 { atomic(q, h, (sdmaAtomic *)dmaBuffer); });
522 dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
523 } break;
524 case SDMA_OP_CONST_FILL: {
525 DPRINTF(SDMAEngine, "SDMA Constant fill packet\n");
526 dmaBuffer = new sdmaConstFill();
528 [ = ] (const uint64_t &)
529 { constFill(q, (sdmaConstFill *)dmaBuffer, header); });
530 dmaReadVirt(q->rptr(), sizeof(sdmaConstFill), cb, dmaBuffer);
531 } break;
532 case SDMA_OP_PTEPDE: {
533 DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
534 switch (sub_opcode) {
536 DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
537 dmaBuffer = new sdmaPtePde();
539 [ = ] (const uint64_t &)
540 { ptePde(q, (sdmaPtePde *)dmaBuffer); });
541 dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
542 break;
544 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
545 break;
547 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
548 break;
550 panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
551 } break;
552 default:
553 DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
554 sub_opcode);
555 decodeNext(q);
556 break;
557 }
558 } break;
559 case SDMA_OP_TIMESTAMP: {
560 q->incRptr(sizeof(sdmaTimestamp));
561 switch (sub_opcode) {
563 } break;
565 } break;
567 } break;
568 default:
569 break;
570 }
571 warn("SDMA_OP_TIMESTAMP not implemented");
572 decodeNext(q);
573 } break;
574 case SDMA_OP_SRBM_WRITE: {
575 DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
578 dmaBuffer = new sdmaSRBMWrite();
580 [ = ] (const uint64_t &)
581 { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
582 dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
583 } break;
584 case SDMA_OP_PRE_EXE: {
585 q->incRptr(sizeof(sdmaPredExec));
586 warn("SDMA_OP_PRE_EXE not implemented");
587 decodeNext(q);
588 } break;
589 case SDMA_OP_DUMMY_TRAP: {
590 q->incRptr(sizeof(sdmaDummyTrap));
591 warn("SDMA_OP_DUMMY_TRAP not implemented");
592 decodeNext(q);
593 } break;
594 default: {
595 panic("Invalid SDMA packet.\n");
596 } break;
597 }
598}
599
600/* Implements a write packet. */
601void
603{
604 q->incRptr(sizeof(sdmaWrite));
605 // count represents the number of dwords - 1 to write
606 pkt->count++;
607 DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
608
609 // first we have to read needed data from the SDMA queue
610 uint32_t *dmaBuffer = new uint32_t[pkt->count];
611 auto cb = new DmaVirtCallback<uint64_t>(
612 [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
613 dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
614 (void *)dmaBuffer);
615}
616
617/* Completion of data reading for a write packet. */
618void
619SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
620{
621 int bufferSize = sizeof(uint32_t) * pkt->count;
622 q->incRptr(bufferSize);
623
624 DPRINTF(SDMAEngine, "Write packet data:\n");
625 for (int i = 0; i < pkt->count; ++i) {
626 DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
627 }
628
629 // lastly we write read data to the destination address
630 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
631 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
632
633 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
634 "SDMA write to GART not implemented");
635
636 auto cb = new EventFunctionWrapper(
637 [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
638 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
639 bufferSize, 0, cb);
640 } else {
641 if (q->priv()) {
642 pkt->dest = getGARTAddr(pkt->dest);
643 }
644 auto cb = new DmaVirtCallback<uint32_t>(
645 [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
646 dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
647 }
648}
649
650/* Completion of a write packet. */
651void
652SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
653{
654 DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
655 pkt->dest, pkt->count);
656 delete []dmaBuffer;
657 delete pkt;
658 decodeNext(q);
659}
660
661/* Implements a copy packet. */
662void
664{
665 DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
666 pkt->source, pkt->dest, pkt->count);
667 q->incRptr(sizeof(sdmaCopy));
668 // count represents the number of bytes - 1 to be copied
669 pkt->count++;
670 if (q->priv()) {
671 if (!gpuDevice->getVM().inMMHUB(pkt->source)) {
672 DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
673 pkt->source = getGARTAddr(pkt->source);
674 DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
675 }
676 }
677
678 // Read data from the source first, then call the copyReadData method
679 uint8_t *dmaBuffer = new uint8_t[pkt->count];
680 Addr device_addr = getDeviceAddress(pkt->source);
681 if (device_addr) {
682 DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
683 auto cb = new EventFunctionWrapper(
684 [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
685
686 // Copy the minimum page size at a time in case the physical addresses
687 // are not contiguous.
689 for (; !gen.done(); gen.next()) {
690 Addr chunk_addr = getDeviceAddress(gen.addr());
691 assert(chunk_addr);
692
693 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
694 gen.size(), gen.addr(), chunk_addr);
695
696 gpuDevice->getMemMgr()->readRequest(chunk_addr, dmaBuffer,
697 gen.size(), 0,
698 gen.last() ? cb : nullptr);
699 dmaBuffer += gen.size();
700 }
701 } else {
702 auto cb = new DmaVirtCallback<uint64_t>(
703 [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
704 dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
705 }
706}
707
708/* Completion of data reading for a copy packet. */
709void
710SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
711{
712 // lastly we write read data to the destination address
713 uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
714
715 DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
716 DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
717 DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
718
719 DPRINTF(SDMAData, "Copy packet data:\n");
720 for (int i = 0; i < pkt->count/8; ++i) {
721 DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
722 }
723
724 Addr device_addr = getDeviceAddress(pkt->dest);
725 // Write read data to the destination address then call the copyDone method
726 if (device_addr) {
727 DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
728 auto cb = new EventFunctionWrapper(
729 [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
730
731 // Copy the minimum page size at a time in case the physical addresses
732 // are not contiguous.
734 for (; !gen.done(); gen.next()) {
735 Addr chunk_addr = getDeviceAddress(gen.addr());
736 assert(chunk_addr);
737
738 DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
739 gen.size(), gen.addr(), chunk_addr);
740
741 gpuDevice->getMemMgr()->writeRequest(chunk_addr, dmaBuffer,
742 gen.size(), 0,
743 gen.last() ? cb : nullptr);
744
745 dmaBuffer += gen.size();
746 }
747 } else {
748 auto cb = new DmaVirtCallback<uint64_t>(
749 [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
750 dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
751 }
752
753 // For destinations in the GART table, gem5 uses a mapping tables instead
754 // of functionally going to device memory, so we need to update that copy.
755 if (gpuDevice->getVM().inGARTRange(device_addr)) {
756 // GART entries are always 8 bytes.
757 assert((pkt->count % 8) == 0);
758 for (int i = 0; i < pkt->count/8; ++i) {
759 Addr gart_addr = device_addr + i*8 - gpuDevice->getVM().gartBase();
760 DPRINTF(SDMAEngine, "Shadow copying to GART table %lx -> %lx\n",
761 gart_addr, dmaBuffer64[i]);
762 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[i];
763 }
764 }
765}
766
767/* Completion of a copy packet. */
768void
769SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
770{
771 DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
772 pkt->dest, pkt->count);
773 delete []dmaBuffer;
774 delete pkt;
775 decodeNext(q);
776}
777
778/* Implements an indirect buffer packet. */
779void
781{
782 if (q->priv()) {
783 q->ib()->base(getGARTAddr(pkt->base));
784 } else {
785 q->ib()->base(pkt->base);
786 }
787 q->ib()->rptr(0);
788 q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
789 q->ib()->setWptr(pkt->size * sizeof(uint32_t));
790
791 q->incRptr(sizeof(sdmaIndirectBuffer));
792
793 delete pkt;
794 decodeNext(q->ib());
795}
796
797/* Implements a fence packet. */
798void
800{
801 q->incRptr(sizeof(sdmaFence));
802 if (q->priv()) {
803 pkt->dest = getGARTAddr(pkt->dest);
804 }
805
806 // Writing the data from the fence packet to the destination address.
807 auto cb = new DmaVirtCallback<uint32_t>(
808 [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
809 dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
810}
811
812/* Completion of a fence packet. */
813void
815{
816 DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
817 pkt->dest, pkt->data);
818 delete pkt;
819 decodeNext(q);
820}
821
822/* Implements a trap packet. */
823void
825{
826 q->incRptr(sizeof(sdmaTrap));
827
828 DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
829
830 uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
831
832 int node_id = 0;
833 int local_id = getId();
834
836 getIHClientId(local_id),
837 TRAP_ID, 2*node_id);
839
840 delete pkt;
841 decodeNext(q);
842}
843
844/* Implements a write SRBM packet. */
845void
847 sdmaSRBMWrite *pkt)
848{
849 q->incRptr(sizeof(sdmaSRBMWrite));
850
851 [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
852 uint32_t reg_mask = 0x00000000;
853
854 if (header->byteEnable & 0x8) reg_mask |= 0xFF000000;
855 if (header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
856 if (header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
857 if (header->byteEnable & 0x1) reg_mask |= 0x000000FF;
858 pkt->data &= reg_mask;
859
860 DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
861 reg_addr, pkt->data);
862
863 gpuDevice->setRegVal(reg_addr, pkt->data);
864
865 delete header;
866 delete pkt;
867 decodeNext(q);
868}
869
875void
877 sdmaPollRegMem *pkt)
878{
879 q->incRptr(sizeof(sdmaPollRegMem));
880
881 DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
882 "mask=%p, retry=%d, pinterval=%d\n", header->mode, header->func,
883 header->op, pkt->address, pkt->ref, pkt->mask, pkt->retryCount,
884 pkt->pollInt);
885
886 bool skip = false;
887
888 if (header->mode == 1) {
889 // polling on a memory location
890 if (header->op == 0) {
891 auto cb = new DmaVirtCallback<uint32_t>(
892 [ = ] (const uint32_t &dma_buffer) {
893 pollRegMemRead(q, header, pkt, dma_buffer, 0); });
894 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
895 (void *)&cb->dmaBuffer);
896 } else {
897 panic("SDMA poll mem operation not implemented.");
898 skip = true;
899 }
900 } else {
901 warn_once("SDMA poll reg is not implemented. If this is required for "
902 "correctness, an SRBM model needs to be implemented.");
903 skip = true;
904 }
905
906 if (skip) {
907 delete header;
908 delete pkt;
909 decodeNext(q);
910 }
911}
912
913void
915 sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
916{
917 assert(header->mode == 1 && header->op == 0);
918
919 if (!pollRegMemFunc(dma_buffer, pkt->ref, header->func) &&
920 ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
921 pkt->retryCount == 0xfff)) {
922
923 // continue polling on a memory location until reference value is met,
924 // retryCount is met or indefinitelly if retryCount is 0xfff
925 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
926 pkt->address, dma_buffer, pkt->ref);
927
928 auto cb = new DmaVirtCallback<uint32_t>(
929 [ = ] (const uint32_t &dma_buffer) {
930 pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
931 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
932 (void *)&cb->dmaBuffer);
933 } else {
934 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
935 pkt->address, dma_buffer, pkt->ref);
936
937 delete header;
938 delete pkt;
939 decodeNext(q);
940 }
941}
942
943bool
944SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
945{
946 switch (func) {
947 case 0:
948 return true;
949 break;
950 case 1:
951 return value < reference;
952 break;
953 case 2:
954 return value <= reference;
955 break;
956 case 3:
957 return value == reference;
958 break;
959 case 4:
960 return value != reference;
961 break;
962 case 5:
963 return value >= reference;
964 break;
965 case 6:
966 return value > reference;
967 break;
968 default:
969 panic("SDMA POLL_REGMEM unknown comparison function.");
970 break;
971 }
972}
973
974/* Implements a PTE PDE generation packet. */
975void
977{
978 q->incRptr(sizeof(sdmaPtePde));
979 pkt->count++;
980
981 DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
982 pkt->initValue, pkt->increment, pkt->count);
983
984 // Generating pkt->count double dwords using the initial value, increment
985 // and a mask.
986 uint64_t *dmaBuffer = new uint64_t[pkt->count];
987 for (int i = 0; i < pkt->count; i++) {
988 dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
989 }
990
991 // Writing generated data to the destination address.
992 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
993 Addr mmhub_addr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
994
995 fatal_if(gpuDevice->getVM().inGARTRange(mmhub_addr),
996 "SDMA write to GART not implemented");
997
998 auto cb = new EventFunctionWrapper(
999 [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
1000 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1001 sizeof(uint64_t) * pkt->count, 0,
1002 cb);
1003 } else {
1004 if (q->priv()) {
1005 pkt->dest = getGARTAddr(pkt->dest);
1006 }
1007 auto cb = new DmaVirtCallback<uint64_t>(
1008 [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
1009 dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
1010 (void *)dmaBuffer);
1011 }
1012}
1013
1014/* Completion of a PTE PDE generation packet. */
1015void
1016SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
1017{
1018 DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
1019 pkt->dest, pkt->count);
1020
1021 delete []dmaBuffer;
1022 delete pkt;
1023 decodeNext(q);
1024}
1025
1026void
1028{
1029 q->incRptr(sizeof(sdmaAtomic));
1030 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
1031 " %d loopInt: %d\n", header->opcode, pkt->addr, pkt->srcData,
1032 pkt->cmpData, header->loop, pkt->loopInt);
1033
1034 // Read the data at pkt->addr
1035 uint64_t *dmaBuffer = new uint64_t;
1036 auto cb = new DmaVirtCallback<uint64_t>(
1037 [ = ] (const uint64_t &)
1038 { atomicData(q, header, pkt, dmaBuffer); });
1039 dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1040}
1041
1042void
1044 uint64_t *dmaBuffer)
1045{
1046 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
1047 header->opcode, pkt->addr, *dmaBuffer);
1048
1049 if (header->opcode == SDMA_ATOMIC_ADD64) {
1050 // Atomic add with return -- dst = dst + src
1051 int64_t dst_data = *dmaBuffer;
1052 int64_t src_data = pkt->srcData;
1053
1054 DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
1055 src_data, dst_data + src_data);
1056
1057 // Reuse the dmaBuffer allocated
1058 *dmaBuffer = dst_data + src_data;
1059
1060 auto cb = new DmaVirtCallback<uint64_t>(
1061 [ = ] (const uint64_t &)
1062 { atomicDone(q, header, pkt, dmaBuffer); });
1063 dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1064 } else {
1065 panic("Unsupported SDMA atomic opcode: %d\n", header->opcode);
1066 }
1067}
1068
1069void
1071 uint64_t *dmaBuffer)
1072{
1073 DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
1074 header->opcode, pkt->addr, *dmaBuffer);
1075
1076 delete dmaBuffer;
1077 delete header;
1078 delete pkt;
1079 decodeNext(q);
1080}
1081
1082void
1084{
1085 q->incRptr(sizeof(sdmaConstFill));
1086
1087 sdmaConstFillHeader fill_header;
1088 fill_header.ordinal = header;
1089
1090 DPRINTF(SDMAEngine, "ConstFill %lx srcData %x count %d size %d sw %d\n",
1091 pkt->addr, pkt->srcData, pkt->count, fill_header.fillsize,
1092 fill_header.sw);
1093
1094 // Count is number of <size> elements - 1. Size is log2 of byte size.
1095 int fill_bytes = (pkt->count + 1) * (1 << fill_header.fillsize);
1096 uint8_t *fill_data = new uint8_t[fill_bytes];
1097
1098 memset(fill_data, pkt->srcData, fill_bytes);
1099
1100 Addr device_addr = getDeviceAddress(pkt->addr);
1101 if (device_addr) {
1102 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to device at %lx\n",
1103 fill_bytes, pkt->srcData, pkt->addr);
1104
1105 auto cb = new EventFunctionWrapper(
1106 [ = ]{ constFillDone(q, pkt, fill_data); }, name());
1107
1108 // Copy the minimum page size at a time in case the physical addresses
1109 // are not contiguous.
1110 ChunkGenerator gen(pkt->addr, fill_bytes, AMDGPU_MMHUB_PAGE_SIZE);
1111 for (; !gen.done(); gen.next()) {
1112 Addr chunk_addr = getDeviceAddress(gen.addr());
1113 assert(chunk_addr);
1114
1115 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
1116 gen.size(), gen.addr(), chunk_addr);
1117
1118 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data,
1119 gen.size(), 0,
1120 gen.last() ? cb : nullptr);
1121 fill_data += gen.size();
1122 }
1123 } else {
1124 DPRINTF(SDMAEngine, "ConstFill %d bytes of %x to host at %lx\n",
1125 fill_bytes, pkt->srcData, pkt->addr);
1126
1127 auto cb = new DmaVirtCallback<uint64_t>(
1128 [ = ] (const uint64_t &)
1129 { constFillDone(q, pkt, fill_data); });
1130 dmaWriteVirt(pkt->addr, fill_bytes, cb, (void *)fill_data);
1131 }
1132}
1133
1134void
1136{
1137 DPRINTF(SDMAEngine, "ConstFill to %lx done\n", pkt->addr);
1138
1139 delete [] fill_data;
1140 delete pkt;
1141 decodeNext(q);
1142}
1143
1146{
1147 AddrRangeList ranges;
1148 return ranges;
1149}
1150
1151void
1153{
1154 // Serialize the DmaVirtDevice base class
1156
1167
1168 int num_queues = 4;
1169
1171 queues.push_back((SDMAQueue *)&gfx);
1172 queues.push_back((SDMAQueue *)&page);
1173 queues.push_back((SDMAQueue *)&gfxIb);
1174 queues.push_back((SDMAQueue *)&pageIb);
1175
1176 Addr base[num_queues];
1177 Addr rptr[num_queues];
1178 Addr wptr[num_queues];
1179 Addr size[num_queues];
1180 bool processing[num_queues];
1181
1182 for (int i = 0; i < num_queues; i++) {
1183 base[i] = queues[i]->base();
1184 rptr[i] = queues[i]->getRptr();
1185 wptr[i] = queues[i]->getWptr();
1186 size[i] = queues[i]->size();
1187 processing[i] = queues[i]->processing();
1188 }
1189
1190 SERIALIZE_ARRAY(base, num_queues);
1191 SERIALIZE_ARRAY(rptr, num_queues);
1192 SERIALIZE_ARRAY(wptr, num_queues);
1193 SERIALIZE_ARRAY(size, num_queues);
1194 SERIALIZE_ARRAY(processing, num_queues);
1195}
1196
1197void
1199{
1200 // Serialize the DmaVirtDevice base class
1202
1213
1214 int num_queues = 4;
1215 Addr base[num_queues];
1216 Addr rptr[num_queues];
1217 Addr wptr[num_queues];
1218 Addr size[num_queues];
1219 bool processing[num_queues];
1220
1221 UNSERIALIZE_ARRAY(base, num_queues);
1222 UNSERIALIZE_ARRAY(rptr, num_queues);
1223 UNSERIALIZE_ARRAY(wptr, num_queues);
1224 UNSERIALIZE_ARRAY(size, num_queues);
1225 UNSERIALIZE_ARRAY(processing, num_queues);
1226
1228 queues.push_back((SDMAQueue *)&gfx);
1229 queues.push_back((SDMAQueue *)&page);
1230 queues.push_back((SDMAQueue *)&gfxIb);
1231 queues.push_back((SDMAQueue *)&pageIb);
1232
1233 for (int i = 0; i < num_queues; i++) {
1234 queues[i]->base(base[i]);
1235 queues[i]->rptr(rptr[i]);
1236 queues[i]->wptr(wptr[i]);
1237 queues[i]->size(size[i]);
1238 queues[i]->processing(processing[i]);
1239 }
1240}
1241
1242void
1244{
1245 DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1246 pkt->getLE<uint32_t>());
1247
1248 // In Vega10 headers, the offsets are the same for both SDMAs
1249 switch (mmio_offset) {
1250 case mmSDMA_GFX_RB_BASE:
1251 setGfxBaseLo(pkt->getLE<uint32_t>());
1252 break;
1254 setGfxBaseHi(pkt->getLE<uint32_t>());
1255 break;
1257 setGfxRptrLo(pkt->getLE<uint32_t>());
1258 break;
1260 setGfxRptrHi(pkt->getLE<uint32_t>());
1261 break;
1263 setGfxDoorbellLo(pkt->getLE<uint32_t>());
1264 break;
1266 setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1267 // Bit 28 of doorbell indicates that doorbell is enabled.
1268 if (bits(getGfxDoorbell(), 28, 28)) {
1272 }
1273 break;
1274 case mmSDMA_GFX_RB_CNTL: {
1275 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1276 assert(rb_size >= 6 && rb_size <= 62);
1277 setGfxSize(1 << (rb_size + 2));
1278 } break;
1280 setGfxWptrLo(pkt->getLE<uint32_t>());
1281 break;
1283 setGfxWptrHi(pkt->getLE<uint32_t>());
1284 break;
1286 setPageBaseLo(pkt->getLE<uint32_t>());
1287 break;
1289 setPageRptrLo(pkt->getLE<uint32_t>());
1290 break;
1292 setPageRptrHi(pkt->getLE<uint32_t>());
1293 break;
1295 setPageDoorbellLo(pkt->getLE<uint32_t>());
1296 break;
1298 setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1299 // Bit 28 of doorbell indicates that doorbell is enabled.
1300 if (bits(getPageDoorbell(), 28, 28)) {
1304 }
1305 break;
1306 case mmSDMA_PAGE_RB_CNTL: {
1307 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1308 assert(rb_size >= 6 && rb_size <= 62);
1309 setPageSize(1 << (rb_size + 2));
1310 } break;
1312 setPageWptrLo(pkt->getLE<uint32_t>());
1313 break;
1314 default:
1315 DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1316 break;
1317 }
1318}
1319
1320void
1322{
1323 gfxBase = insertBits(gfxBase, 31, 0, 0);
1324 gfxBase |= data;
1325 gfx.base((gfxBase >> 1) << 12);
1326}
1327
1328void
1330{
1331 gfxBase = insertBits(gfxBase, 63, 32, 0);
1332 gfxBase |= ((uint64_t)data) << 32;
1333 gfx.base((gfxBase >> 1) << 12);
1334}
1335
1336void
1338{
1339 gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1340 gfxRptr |= data;
1342}
1343
1344void
1346{
1347 gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1348 gfxRptr |= ((uint64_t)data) << 32;
1350}
1351
1352void
1354{
1355 gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1356 gfxDoorbell |= data;
1357}
1358
1359void
1361{
1362 gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1363 gfxDoorbell |= ((uint64_t)data) << 32;
1364}
1365
1366void
1376
1377void
1379{
1381 gfxDoorbellOffset |= ((uint64_t)data) << 32;
1382}
1383
1384void
1386{
1387 uint32_t rb_size = bits(data, 6, 1);
1388 assert(rb_size >= 6 && rb_size <= 62);
1389 gfx.size(1 << (rb_size + 2));
1390}
1391
1392void
1394{
1395 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1396 gfxWptr |= data;
1397}
1398
1399void
1401{
1402 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1403 gfxWptr |= ((uint64_t)data) << 32;
1404}
1405
1406void
1408{
1409 pageBase = insertBits(pageBase, 31, 0, 0);
1410 pageBase |= data;
1411 page.base((pageBase >> 1) << 12);
1412}
1413
1414void
1416{
1417 pageBase = insertBits(pageBase, 63, 32, 0);
1418 pageBase |= ((uint64_t)data) << 32;
1419 page.base((pageBase >> 1) << 12);
1420}
1421
1422void
1429
1430void
1432{
1433 pageRptr = insertBits(pageRptr, 63, 32, 0);
1434 pageRptr |= ((uint64_t)data) << 32;
1436}
1437
1438void
1444
1445void
1447{
1448 pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1449 pageDoorbell |= ((uint64_t)data) << 32;
1450}
1451
1452void
1462
1463void
1465{
1467 pageDoorbellOffset |= ((uint64_t)data) << 32;
1468}
1469
1470void
1472{
1473 uint32_t rb_size = bits(data, 6, 1);
1474 assert(rb_size >= 6 && rb_size <= 62);
1475 page.size(1 << (rb_size + 2));
1476}
1477
1478void
1480{
1481 pageWptr = insertBits(pageWptr, 31, 0, 0);
1482 pageWptr |= data;
1483}
1484
1485void
1487{
1488 pageWptr = insertBits(pageWptr, 63, 32, 0);
1489 pageWptr |= ((uint64_t)data) << 32;
1490}
1491
1492} // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:94
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
void unsetDoorbell(uint32_t offset)
void setRegVal(uint64_t addr, uint32_t value)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id, unsigned node_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
Definition amdgpu_vm.hh:314
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
Definition amdgpu_vm.hh:203
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:212
Addr getMMHUBBase()
Definition amdgpu_vm.hh:227
Addr gartBase()
Return base address of GART table in framebuffer.
Definition amdgpu_vm.cc:87
bool inGARTRange(Addr paddr)
Definition amdgpu_vm.hh:194
bool inMMHUB(Addr vaddr)
Definition amdgpu_vm.hh:222
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
SDMAQueueDesc * getMQD()
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
uint64_t pageDoorbell
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unregisterRLCQueue(Addr doorbell)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
void deallocateRLCQueues()
AMDGPUDevice * gpuDevice
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
int getId() const
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
STL vector class.
Definition stl.hh:37
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:185
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
#define warn_once(...)
Definition logging.hh:260
Bitfield< 23, 20 > atomic
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 33 > id
Bitfield< 24, 21 > opcode
Definition types.hh:92
Bitfield< 0 > p
Bitfield< 2 > priv
Definition misc.hh:131
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
output header
Definition nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
PM4 packets.
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0