gem5 v23.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
sdma_engine.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
35#include "arch/generic/mmu.hh"
36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
41#include "mem/packet.hh"
42#include "mem/packet_access.hh"
43#include "params/SDMAEngine.hh"
44
45namespace gem5
46{
47
48SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
49 : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(p.walker),
53 mmioBase(p.mmio_base), mmioSize(p.mmio_size)
54{
55 gfx.ib(&gfxIb);
57 gfx.valid(true);
58 gfxIb.valid(true);
61
62 page.ib(&pageIb);
64 page.valid(true);
65 pageIb.valid(true);
68
69 rlc0.ib(&rlc0Ib);
71
72 rlc1.ib(&rlc1Ib);
74}
75
76void
78{
79 gpuDevice = gpu_device;
81}
82
83int
85{
86 switch (id) {
87 case 0:
89 case 1:
91 case 2:
93 case 3:
95 case 4:
97 case 5:
99 case 6:
101 case 7:
103 default:
104 panic("Unknown SDMA id");
105 }
106}
107
108Addr
110{
111 if (!gpuDevice->getVM().inAGP(addr)) {
112 Addr low_bits = bits(addr, 11, 0);
113 addr = (((addr >> 12) << 3) << 12) | low_bits;
114 }
115 return addr;
116}
117
118Addr
120{
121 // SDMA packets can access both host and device memory as either a source
122 // or destination address. We don't know which until it is translated, so
123 // we do a dummy functional translation to determine if the address
124 // resides in system memory or not.
125 auto tgen = translate(raw_addr, 64);
126 auto addr_range = *(tgen->begin());
127 Addr tmp_addr = addr_range.paddr;
128 DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
129 raw_addr, tmp_addr);
130
131 // SDMA packets will access device memory through the MMHUB aperture in
132 // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
133 // of vmid == 0 the address is already an MMHUB address in the packet,
134 // so simply subtract the MMHUB base. For vmid > 0 the address is a
135 // virtual address that must first be translated. The translation will
136 // return an MMHUB address, then we can similarly subtract the base to
137 // get the device address. Otherwise, for host, device address is 0.
138 Addr device_addr = 0;
139 if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
140 (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
141 if (cur_vmid == 0) {
142 device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
143 } else {
144 device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
145 }
146 }
147
148 return device_addr;
149}
150
158{
159 if (cur_vmid > 0) {
160 // Only user translation is available to user queues (vmid > 0)
163 cur_vmid, vaddr, size));
164 } else if (gpuDevice->getVM().inAGP(vaddr)) {
165 // Use AGP translation gen
166 return TranslationGenPtr(
168 } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
169 // Use MMHUB translation gen
171 &gpuDevice->getVM(), vaddr, size));
172 }
173
174 // Assume GART otherwise as this is the only other translation aperture
175 // available to the SDMA engine processor.
176 return TranslationGenPtr(
178}
179
180void
182{
183 uint32_t rlc_size = 4UL << bits(mqd->sdmax_rlcx_rb_cntl, 6, 1);
184 Addr rptr_wb_addr = mqd->sdmax_rlcx_rb_rptr_addr_hi;
185 rptr_wb_addr <<= 32;
186 rptr_wb_addr |= mqd->sdmax_rlcx_rb_rptr_addr_lo;
187
188 // Get first free RLC
189 if (!rlc0.valid()) {
190 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
191 rlcInfo[0] = doorbell;
192 rlc0.valid(true);
193 rlc0.base(mqd->rb_base << 8);
194 rlc0.size(rlc_size);
195 rlc0.rptr(0);
196 rlc0.incRptr(mqd->rptr);
197 rlc0.setWptr(mqd->wptr);
198 rlc0.rptrWbAddr(rptr_wb_addr);
199 rlc0.processing(false);
200 rlc0.setMQD(mqd);
201 rlc0.setMQDAddr(mqdAddr);
202 } else if (!rlc1.valid()) {
203 DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
204 rlcInfo[1] = doorbell;
205 rlc1.valid(true);
206 rlc1.base(mqd->rb_base << 8);
207 rlc1.size(rlc_size);
208 rlc1.rptr(0);
209 rlc1.incRptr(mqd->rptr);
210 rlc1.setWptr(mqd->wptr);
211 rlc1.rptrWbAddr(rptr_wb_addr);
212 rlc1.processing(false);
213 rlc1.setMQD(mqd);
214 rlc1.setMQDAddr(mqdAddr);
215 } else {
216 panic("No free RLCs. Check they are properly unmapped.");
217 }
218}
219
220void
222{
223 DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
224 if (rlcInfo[0] == doorbell) {
225 SDMAQueueDesc *mqd = rlc0.getMQD();
226 if (mqd) {
227 DPRINTF(SDMAEngine, "Writing RLC0 SDMAMQD back to %#lx\n",
228 rlc0.getMQDAddr());
229
230 mqd->rptr = rlc0.globalRptr();
231 mqd->wptr = rlc0.getWptr();
232
233 auto cb = new DmaVirtCallback<uint32_t>(
234 [ = ] (const uint32_t &) { });
235 dmaWriteVirt(rlc0.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
236 } else {
237 warn("RLC0 SDMAMQD address invalid\n");
238 }
239 rlc0.valid(false);
240 rlcInfo[0] = 0;
241 } else if (rlcInfo[1] == doorbell) {
242 SDMAQueueDesc *mqd = rlc1.getMQD();
243 if (mqd) {
244 DPRINTF(SDMAEngine, "Writing RLC1 SDMAMQD back to %#lx\n",
245 rlc1.getMQDAddr());
246
247 mqd->rptr = rlc1.globalRptr();
248 mqd->wptr = rlc1.getWptr();
249
250 auto cb = new DmaVirtCallback<uint32_t>(
251 [ = ] (const uint32_t &) { });
252 dmaWriteVirt(rlc1.getMQDAddr(), sizeof(SDMAQueueDesc), cb, mqd);
253 } else {
254 warn("RLC1 SDMAMQD address invalid\n");
255 }
256 rlc1.valid(false);
257 rlcInfo[1] = 0;
258 } else {
259 panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
260 }
261}
262
263void
265{
266 for (auto doorbell: rlcInfo) {
267 if (doorbell) {
268 unregisterRLCQueue(doorbell);
269 }
270 }
271}
272
273/* Start decoding packets from the Gfx queue. */
274void
276{
277 gfx.setWptr(wptrOffset);
278 if (!gfx.processing()) {
279 gfx.processing(true);
280 decodeNext(&gfx);
281 }
282}
283
284/* Start decoding packets from the Page queue. */
285void
287{
288 page.setWptr(wptrOffset);
289 if (!page.processing()) {
290 page.processing(true);
292 }
293}
294
295/* Process RLC queue at given doorbell. */
296void
297SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
298{
299 if (rlcInfo[0] == doorbellOffset) {
300 processRLC0(wptrOffset);
301 } else if (rlcInfo[1] == doorbellOffset) {
302 processRLC1(wptrOffset);
303 } else {
304 panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
305 }
306}
307
308/* Start decoding packets from the RLC0 queue. */
309void
311{
312 assert(rlc0.valid());
313
314 rlc0.setWptr(wptrOffset);
315 if (!rlc0.processing()) {
316 cur_vmid = 1;
317 rlc0.processing(true);
319 }
320}
321
322/* Start decoding packets from the RLC1 queue. */
323void
325{
326 assert(rlc1.valid());
327
328 rlc1.setWptr(wptrOffset);
329 if (!rlc1.processing()) {
330 cur_vmid = 1;
331 rlc1.processing(true);
333 }
334}
335
336/* Decoding next packet in the queue. */
337void
339{
340 DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
341
342 if (q->rptr() != q->wptr()) {
343 // We are using lambda functions passed to the DmaVirtCallback objects
344 // which will call the actuall callback method (e.g., decodeHeader).
345 // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
346 // function as header in this case.
347 auto cb = new DmaVirtCallback<uint32_t>(
348 [ = ] (const uint32_t &header)
349 { decodeHeader(q, header); });
350 dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
351 } else {
352 // The driver expects the rptr to be written back to host memory
353 // periodically. In simulation, we writeback rptr after each burst of
354 // packets from a doorbell, rather than using the cycle count which
355 // is not accurate in all simulation settings (e.g., KVM).
356 DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
357 q->globalRptr(), q->rptrWbAddr());
358 if (q->rptrWbAddr()) {
359 auto cb = new DmaVirtCallback<uint64_t>(
360 [ = ](const uint64_t &) { }, q->globalRptr());
361 dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
362 }
363 q->processing(false);
364 if (q->parent()) {
365 DPRINTF(SDMAEngine, "SDMA switching queues\n");
366 decodeNext(q->parent());
367 }
368 cur_vmid = 0;
369 }
370}
371
372/* Decoding the header of a packet. */
373void
375{
376 q->incRptr(sizeof(header));
377 int opcode = bits(header, 7, 0);
378 int sub_opcode = bits(header, 15, 8);
379
380 DmaVirtCallback<uint64_t> *cb = nullptr;
381 void *dmaBuffer = nullptr;
382
383 DPRINTF(SDMAEngine, "SDMA opcode %p sub-opcode %p\n", opcode, sub_opcode);
384
385 switch(opcode) {
386 case SDMA_OP_NOP: {
387 uint32_t NOP_count = (header >> 16) & 0x3FFF;
388 DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
389 if (NOP_count > 0) q->incRptr(NOP_count * 4);
390 decodeNext(q);
391 } break;
392 case SDMA_OP_COPY: {
393 DPRINTF(SDMAEngine, "SDMA Copy packet\n");
394 switch (sub_opcode) {
396 dmaBuffer = new sdmaCopy();
398 [ = ] (const uint64_t &)
399 { copy(q, (sdmaCopy *)dmaBuffer); });
400 dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
401 } break;
403 panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
404 } break;
406 panic("SDMA_SUBOP_COPY_TILED not implemented");
407 } break;
409 panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
410 } break;
412 panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
413 } break;
414 case SDMA_SUBOP_COPY_SOA: {
415 panic("SDMA_SUBOP_COPY_SOA not implemented");
416 } break;
418 panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
419 } break;
421 panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
422 } break;
423 default: {
424 panic("SDMA unknown copy sub-opcode.");
425 } break;
426 }
427 } break;
428 case SDMA_OP_WRITE: {
429 DPRINTF(SDMAEngine, "SDMA Write packet\n");
430 switch (sub_opcode) {
432 dmaBuffer = new sdmaWrite();
434 [ = ] (const uint64_t &)
435 { write(q, (sdmaWrite *)dmaBuffer); });
436 dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
437 } break;
439 panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
440 } break;
441 default:
442 break;
443 }
444 } break;
445 case SDMA_OP_INDIRECT: {
446 DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
447 dmaBuffer = new sdmaIndirectBuffer();
449 [ = ] (const uint64_t &)
450 { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
451 dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
452 } break;
453 case SDMA_OP_FENCE: {
454 DPRINTF(SDMAEngine, "SDMA Fence packet\n");
455 dmaBuffer = new sdmaFence();
457 [ = ] (const uint64_t &)
458 { fence(q, (sdmaFence *)dmaBuffer); });
459 dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
460 } break;
461 case SDMA_OP_TRAP: {
462 DPRINTF(SDMAEngine, "SDMA Trap packet\n");
463 dmaBuffer = new sdmaTrap();
465 [ = ] (const uint64_t &)
466 { trap(q, (sdmaTrap *)dmaBuffer); });
467 dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
468 } break;
469 case SDMA_OP_SEM: {
470 q->incRptr(sizeof(sdmaSemaphore));
471 warn("SDMA_OP_SEM not implemented");
472 decodeNext(q);
473 } break;
474 case SDMA_OP_POLL_REGMEM: {
475 DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
478 dmaBuffer = new sdmaPollRegMem();
480 [ = ] (const uint64_t &)
481 { pollRegMem(q, h, (sdmaPollRegMem *)dmaBuffer); });
482 dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
483 switch (sub_opcode) {
485 panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
486 } break;
488 panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
489 } break;
491 panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
492 } break;
493 default:
494 break;
495 }
496 } break;
497 case SDMA_OP_COND_EXE: {
498 q->incRptr(sizeof(sdmaCondExec));
499 warn("SDMA_OP_SEM not implemented");
500 decodeNext(q);
501 } break;
502 case SDMA_OP_ATOMIC: {
503 DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
504 dmaBuffer = new sdmaAtomic();
506 *h = *(sdmaAtomicHeader *)&header;
508 [ = ] (const uint64_t &)
509 { atomic(q, h, (sdmaAtomic *)dmaBuffer); });
510 dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
511 } break;
512 case SDMA_OP_CONST_FILL: {
513 q->incRptr(sizeof(sdmaConstFill));
514 warn("SDMA_OP_CONST_FILL not implemented");
515 decodeNext(q);
516 } break;
517 case SDMA_OP_PTEPDE: {
518 DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
519 switch (sub_opcode) {
521 DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
522 dmaBuffer = new sdmaPtePde();
524 [ = ] (const uint64_t &)
525 { ptePde(q, (sdmaPtePde *)dmaBuffer); });
526 dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
527 break;
529 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
530 break;
532 panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
533 break;
535 panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
536 } break;
537 default:
538 DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
539 sub_opcode);
540 decodeNext(q);
541 break;
542 }
543 } break;
544 case SDMA_OP_TIMESTAMP: {
545 q->incRptr(sizeof(sdmaTimestamp));
546 switch (sub_opcode) {
548 } break;
550 } break;
552 } break;
553 default:
554 break;
555 }
556 warn("SDMA_OP_TIMESTAMP not implemented");
557 decodeNext(q);
558 } break;
559 case SDMA_OP_SRBM_WRITE: {
560 DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
563 dmaBuffer = new sdmaSRBMWrite();
565 [ = ] (const uint64_t &)
566 { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
567 dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
568 } break;
569 case SDMA_OP_PRE_EXE: {
570 q->incRptr(sizeof(sdmaPredExec));
571 warn("SDMA_OP_PRE_EXE not implemented");
572 decodeNext(q);
573 } break;
574 case SDMA_OP_DUMMY_TRAP: {
575 q->incRptr(sizeof(sdmaDummyTrap));
576 warn("SDMA_OP_DUMMY_TRAP not implemented");
577 decodeNext(q);
578 } break;
579 default: {
580 panic("Invalid SDMA packet.\n");
581 } break;
582 }
583}
584
585/* Implements a write packet. */
586void
588{
589 q->incRptr(sizeof(sdmaWrite));
590 // count represents the number of dwords - 1 to write
591 pkt->count++;
592 DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
593
594 // first we have to read needed data from the SDMA queue
595 uint32_t *dmaBuffer = new uint32_t[pkt->count];
596 auto cb = new DmaVirtCallback<uint64_t>(
597 [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
598 dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
599 (void *)dmaBuffer);
600}
601
602/* Completion of data reading for a write packet. */
603void
604SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
605{
606 int bufferSize = sizeof(uint32_t) * pkt->count;
607 q->incRptr(bufferSize);
608
609 DPRINTF(SDMAEngine, "Write packet data:\n");
610 for (int i = 0; i < pkt->count; ++i) {
611 DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
612 }
613
614 // lastly we write read data to the destination address
615 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
616 Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
617 auto cb = new EventFunctionWrapper(
618 [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
619 gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
620 bufferSize, 0, cb);
621 } else {
622 // TODO: getGARTAddr?
623 pkt->dest = getGARTAddr(pkt->dest);
624 auto cb = new DmaVirtCallback<uint32_t>(
625 [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
626 dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
627 }
628}
629
630/* Completion of a write packet. */
631void
632SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
633{
634 DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
635 pkt->dest, pkt->count);
636 delete []dmaBuffer;
637 delete pkt;
638 decodeNext(q);
639}
640
641/* Implements a copy packet. */
642void
644{
645 DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
646 pkt->source, pkt->dest, pkt->count);
647 q->incRptr(sizeof(sdmaCopy));
648 // count represents the number of bytes - 1 to be copied
649 pkt->count++;
650 DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
651 pkt->source = getGARTAddr(pkt->source);
652 DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
653
654 // Read data from the source first, then call the copyReadData method
655 uint8_t *dmaBuffer = new uint8_t[pkt->count];
656 Addr device_addr = getDeviceAddress(pkt->source);
657 if (device_addr) {
658 DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
659 auto cb = new EventFunctionWrapper(
660 [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
661
662 // Copy the minimum page size at a time in case the physical addresses
663 // are not contiguous.
665 for (; !gen.done(); gen.next()) {
666 Addr chunk_addr = getDeviceAddress(gen.addr());
667 assert(chunk_addr);
668
669 DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
670 gen.size(), gen.addr(), chunk_addr);
671
672 gpuDevice->getMemMgr()->readRequest(chunk_addr, dmaBuffer,
673 gen.size(), 0,
674 gen.last() ? cb : nullptr);
675 dmaBuffer += gen.size();
676 }
677 } else {
678 auto cb = new DmaVirtCallback<uint64_t>(
679 [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
680 dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
681 }
682}
683
684/* Completion of data reading for a copy packet. */
685void
686SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
687{
688 // lastly we write read data to the destination address
689 uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
690
691 DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
692 DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
693 DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
694
695 DPRINTF(SDMAData, "Copy packet data:\n");
696 for (int i = 0; i < pkt->count/8; ++i) {
697 DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
698 }
699
700 Addr device_addr = getDeviceAddress(pkt->dest);
701 // Write read data to the destination address then call the copyDone method
702 if (device_addr) {
703 DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
704 auto cb = new EventFunctionWrapper(
705 [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
706
707 // Copy the minimum page size at a time in case the physical addresses
708 // are not contiguous.
710 for (; !gen.done(); gen.next()) {
711 Addr chunk_addr = getDeviceAddress(gen.addr());
712 assert(chunk_addr);
713
714 DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
715 gen.size(), gen.addr(), chunk_addr);
716
717 gpuDevice->getMemMgr()->writeRequest(chunk_addr, dmaBuffer,
718 gen.size(), 0,
719 gen.last() ? cb : nullptr);
720
721 dmaBuffer += gen.size();
722 }
723 } else {
724 auto cb = new DmaVirtCallback<uint64_t>(
725 [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
726 dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
727 }
728}
729
730/* Completion of a copy packet. */
731void
732SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
733{
734 DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
735 pkt->dest, pkt->count);
736 delete []dmaBuffer;
737 delete pkt;
738 decodeNext(q);
739}
740
741/* Implements an indirect buffer packet. */
742void
744{
745 q->ib()->base(getGARTAddr(pkt->base));
746 q->ib()->rptr(0);
747 q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
748 q->ib()->setWptr(pkt->size * sizeof(uint32_t));
749
750 q->incRptr(sizeof(sdmaIndirectBuffer));
751
752 delete pkt;
753 decodeNext(q->ib());
754}
755
756/* Implements a fence packet. */
757void
759{
760 q->incRptr(sizeof(sdmaFence));
761 pkt->dest = getGARTAddr(pkt->dest);
762
763 // Writing the data from the fence packet to the destination address.
764 auto cb = new DmaVirtCallback<uint32_t>(
765 [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
766 dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
767}
768
769/* Completion of a fence packet. */
770void
772{
773 DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
774 pkt->dest, pkt->data);
775 delete pkt;
776 decodeNext(q);
777}
778
779/* Implements a trap packet. */
780void
782{
783 q->incRptr(sizeof(sdmaTrap));
784
785 DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
786
787 uint32_t ring_id = (q->queueType() == SDMAPage) ? 3 : 0;
788
792
793 delete pkt;
794 decodeNext(q);
795}
796
797/* Implements a write SRBM packet. */
798void
800 sdmaSRBMWrite *pkt)
801{
802 q->incRptr(sizeof(sdmaSRBMWrite));
803
804 [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
805 uint32_t reg_mask = 0x00000000;
806
807 if (header->byteEnable & 0x8) reg_mask |= 0xFF000000;
808 if (header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
809 if (header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
810 if (header->byteEnable & 0x1) reg_mask |= 0x000000FF;
811 pkt->data &= reg_mask;
812
813 DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
814 reg_addr, pkt->data);
815
816 warn_once("SRBM write not performed, no SRBM model. This needs to be fixed"
817 " if correct system simulation is relying on SRBM registers.");
818
819 delete header;
820 delete pkt;
821 decodeNext(q);
822}
823
829void
831 sdmaPollRegMem *pkt)
832{
833 q->incRptr(sizeof(sdmaPollRegMem));
834
835 DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
836 "mask=%p, retry=%d, pinterval=%d\n", header->mode, header->func,
837 header->op, pkt->address, pkt->ref, pkt->mask, pkt->retryCount,
838 pkt->pollInt);
839
840 bool skip = false;
841
842 if (header->mode == 1) {
843 // polling on a memory location
844 if (header->op == 0) {
845 auto cb = new DmaVirtCallback<uint32_t>(
846 [ = ] (const uint32_t &dma_buffer) {
847 pollRegMemRead(q, header, pkt, dma_buffer, 0); });
848 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
849 (void *)&cb->dmaBuffer);
850 } else {
851 panic("SDMA poll mem operation not implemented.");
852 skip = true;
853 }
854 } else {
855 warn_once("SDMA poll reg is not implemented. If this is required for "
856 "correctness, an SRBM model needs to be implemented.");
857 skip = true;
858 }
859
860 if (skip) {
861 delete header;
862 delete pkt;
863 decodeNext(q);
864 }
865}
866
867void
869 sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
870{
871 assert(header->mode == 1 && header->op == 0);
872
873 if (!pollRegMemFunc(dma_buffer, pkt->ref, header->func) &&
874 ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
875 pkt->retryCount == 0xfff)) {
876
877 // continue polling on a memory location until reference value is met,
878 // retryCount is met or indefinitelly if retryCount is 0xfff
879 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
880 pkt->address, dma_buffer, pkt->ref);
881
882 auto cb = new DmaVirtCallback<uint32_t>(
883 [ = ] (const uint32_t &dma_buffer) {
884 pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
885 dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
886 (void *)&cb->dmaBuffer);
887 } else {
888 DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
889 pkt->address, dma_buffer, pkt->ref);
890
891 delete header;
892 delete pkt;
893 decodeNext(q);
894 }
895}
896
897bool
898SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
899{
900 switch (func) {
901 case 0:
902 return true;
903 break;
904 case 1:
905 return value < reference;
906 break;
907 case 2:
908 return value <= reference;
909 break;
910 case 3:
911 return value == reference;
912 break;
913 case 4:
914 return value != reference;
915 break;
916 case 5:
917 return value >= reference;
918 break;
919 case 6:
920 return value > reference;
921 break;
922 default:
923 panic("SDMA POLL_REGMEM unknown comparison function.");
924 break;
925 }
926}
927
928/* Implements a PTE PDE generation packet. */
929void
931{
932 q->incRptr(sizeof(sdmaPtePde));
933 pkt->count++;
934
935 DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
936 pkt->initValue, pkt->increment, pkt->count);
937
938 // Generating pkt->count double dwords using the initial value, increment
939 // and a mask.
940 uint64_t *dmaBuffer = new uint64_t[pkt->count];
941 for (int i = 0; i < pkt->count; i++) {
942 dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
943 }
944
945 // Writing generated data to the destination address.
946 if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
947 Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
948 auto cb = new EventFunctionWrapper(
949 [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
950 gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
951 sizeof(uint64_t) * pkt->count, 0,
952 cb);
953 } else {
954 auto cb = new DmaVirtCallback<uint64_t>(
955 [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
956 dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
957 (void *)dmaBuffer);
958 }
959}
960
961/* Completion of a PTE PDE generation packet. */
962void
963SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
964{
965 DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
966 pkt->dest, pkt->count);
967
968 delete []dmaBuffer;
969 delete pkt;
970 decodeNext(q);
971}
972
973void
975{
976 q->incRptr(sizeof(sdmaAtomic));
977 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
978 " %d loopInt: %d\n", header->opcode, pkt->addr, pkt->srcData,
979 pkt->cmpData, header->loop, pkt->loopInt);
980
981 // Read the data at pkt->addr
982 uint64_t *dmaBuffer = new uint64_t;
983 auto cb = new DmaVirtCallback<uint64_t>(
984 [ = ] (const uint64_t &)
985 { atomicData(q, header, pkt, dmaBuffer); });
986 dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
987}
988
989void
991 uint64_t *dmaBuffer)
992{
993 DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
994 header->opcode, pkt->addr, *dmaBuffer);
995
996 if (header->opcode == SDMA_ATOMIC_ADD64) {
997 // Atomic add with return -- dst = dst + src
998 int64_t dst_data = *dmaBuffer;
999 int64_t src_data = pkt->srcData;
1000
1001 DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
1002 src_data, dst_data + src_data);
1003
1004 // Reuse the dmaBuffer allocated
1005 *dmaBuffer = dst_data + src_data;
1006
1007 auto cb = new DmaVirtCallback<uint64_t>(
1008 [ = ] (const uint64_t &)
1009 { atomicDone(q, header, pkt, dmaBuffer); });
1010 dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
1011 } else {
1012 panic("Unsupported SDMA atomic opcode: %d\n", header->opcode);
1013 }
1014}
1015
1016void
1018 uint64_t *dmaBuffer)
1019{
1020 DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
1021 header->opcode, pkt->addr, *dmaBuffer);
1022
1023 delete dmaBuffer;
1024 delete header;
1025 delete pkt;
1026 decodeNext(q);
1027}
1028
1031{
1032 AddrRangeList ranges;
1033 return ranges;
1034}
1035
1036void
1038{
1039 // Serialize the DmaVirtDevice base class
1041
1052
1053 int num_queues = 4;
1054
1056 queues.push_back((SDMAQueue *)&gfx);
1057 queues.push_back((SDMAQueue *)&page);
1058 queues.push_back((SDMAQueue *)&gfxIb);
1059 queues.push_back((SDMAQueue *)&pageIb);
1060
1061 Addr base[num_queues];
1062 Addr rptr[num_queues];
1063 Addr wptr[num_queues];
1064 Addr size[num_queues];
1065 bool processing[num_queues];
1066
1067 for (int i = 0; i < num_queues; i++) {
1068 base[i] = queues[i]->base();
1069 rptr[i] = queues[i]->getRptr();
1070 wptr[i] = queues[i]->getWptr();
1071 size[i] = queues[i]->size();
1072 processing[i] = queues[i]->processing();
1073 }
1074
1075 SERIALIZE_ARRAY(base, num_queues);
1076 SERIALIZE_ARRAY(rptr, num_queues);
1077 SERIALIZE_ARRAY(wptr, num_queues);
1078 SERIALIZE_ARRAY(size, num_queues);
1079 SERIALIZE_ARRAY(processing, num_queues);
1080}
1081
1082void
1084{
1085 // Serialize the DmaVirtDevice base class
1087
1098
1099 int num_queues = 4;
1100 Addr base[num_queues];
1101 Addr rptr[num_queues];
1102 Addr wptr[num_queues];
1103 Addr size[num_queues];
1104 bool processing[num_queues];
1105
1106 UNSERIALIZE_ARRAY(base, num_queues);
1107 UNSERIALIZE_ARRAY(rptr, num_queues);
1108 UNSERIALIZE_ARRAY(wptr, num_queues);
1109 UNSERIALIZE_ARRAY(size, num_queues);
1110 UNSERIALIZE_ARRAY(processing, num_queues);
1111
1113 queues.push_back((SDMAQueue *)&gfx);
1114 queues.push_back((SDMAQueue *)&page);
1115 queues.push_back((SDMAQueue *)&gfxIb);
1116 queues.push_back((SDMAQueue *)&pageIb);
1117
1118 for (int i = 0; i < num_queues; i++) {
1119 queues[i]->base(base[i]);
1120 queues[i]->rptr(rptr[i]);
1121 queues[i]->wptr(wptr[i]);
1122 queues[i]->size(size[i]);
1123 queues[i]->processing(processing[i]);
1124 }
1125}
1126
1127void
1129{
1130 DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1131 pkt->getLE<uint32_t>());
1132
1133 // In Vega10 headers, the offsets are the same for both SDMAs
1134 switch (mmio_offset) {
1135 case mmSDMA_GFX_RB_BASE:
1136 setGfxBaseLo(pkt->getLE<uint32_t>());
1137 break;
1139 setGfxBaseHi(pkt->getLE<uint32_t>());
1140 break;
1142 setGfxRptrLo(pkt->getLE<uint32_t>());
1143 break;
1145 setGfxRptrHi(pkt->getLE<uint32_t>());
1146 break;
1148 setGfxDoorbellLo(pkt->getLE<uint32_t>());
1149 break;
1151 setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1152 // Bit 28 of doorbell indicates that doorbell is enabled.
1153 if (bits(getGfxDoorbell(), 28, 28)) {
1157 }
1158 break;
1159 case mmSDMA_GFX_RB_CNTL: {
1160 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1161 assert(rb_size >= 6 && rb_size <= 62);
1162 setGfxSize(1 << (rb_size + 2));
1163 } break;
1165 setGfxWptrLo(pkt->getLE<uint32_t>());
1166 break;
1168 setGfxWptrHi(pkt->getLE<uint32_t>());
1169 break;
1171 setPageBaseLo(pkt->getLE<uint32_t>());
1172 break;
1174 setPageRptrLo(pkt->getLE<uint32_t>());
1175 break;
1177 setPageRptrHi(pkt->getLE<uint32_t>());
1178 break;
1180 setPageDoorbellLo(pkt->getLE<uint32_t>());
1181 break;
1183 setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1184 // Bit 28 of doorbell indicates that doorbell is enabled.
1185 if (bits(getPageDoorbell(), 28, 28)) {
1189 }
1190 break;
1191 case mmSDMA_PAGE_RB_CNTL: {
1192 uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1193 assert(rb_size >= 6 && rb_size <= 62);
1194 setPageSize(1 << (rb_size + 2));
1195 } break;
1197 setPageWptrLo(pkt->getLE<uint32_t>());
1198 break;
1199 default:
1200 DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1201 break;
1202 }
1203}
1204
1205void
1207{
1208 gfxBase = insertBits(gfxBase, 31, 0, 0);
1209 gfxBase |= data;
1210 gfx.base((gfxBase >> 1) << 12);
1211}
1212
1213void
1215{
1216 gfxBase = insertBits(gfxBase, 63, 32, 0);
1217 gfxBase |= ((uint64_t)data) << 32;
1218 gfx.base((gfxBase >> 1) << 12);
1219}
1220
1221void
1223{
1224 gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1225 gfxRptr |= data;
1227}
1228
1229void
1231{
1232 gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1233 gfxRptr |= ((uint64_t)data) << 32;
1235}
1236
1237void
1239{
1240 gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1241 gfxDoorbell |= data;
1242}
1243
1244void
1246{
1247 gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1248 gfxDoorbell |= ((uint64_t)data) << 32;
1249}
1250
1251void
1253{
1256 if (bits(gfxDoorbell, 28, 28)) {
1259 }
1260}
1261
1262void
1264{
1266 gfxDoorbellOffset |= ((uint64_t)data) << 32;
1267}
1268
1269void
1271{
1272 uint32_t rb_size = bits(data, 6, 1);
1273 assert(rb_size >= 6 && rb_size <= 62);
1274 gfx.size(1 << (rb_size + 2));
1275}
1276
1277void
1279{
1280 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1281 gfxWptr |= data;
1282}
1283
1284void
1286{
1287 gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1288 gfxWptr |= ((uint64_t)data) << 32;
1289}
1290
1291void
1293{
1294 pageBase = insertBits(pageBase, 31, 0, 0);
1295 pageBase |= data;
1296 page.base((pageBase >> 1) << 12);
1297}
1298
1299void
1301{
1302 pageBase = insertBits(pageBase, 63, 32, 0);
1303 pageBase |= ((uint64_t)data) << 32;
1304 page.base((pageBase >> 1) << 12);
1305}
1306
1307void
1309{
1310 pageRptr = insertBits(pageRptr, 31, 0, 0);
1311 pageRptr |= data;
1313}
1314
1315void
1317{
1318 pageRptr = insertBits(pageRptr, 63, 32, 0);
1319 pageRptr |= ((uint64_t)data) << 32;
1321}
1322
1323void
1325{
1327 pageDoorbell |= data;
1328}
1329
1330void
1332{
1333 pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1334 pageDoorbell |= ((uint64_t)data) << 32;
1335}
1336
1337void
1339{
1342 if (bits(pageDoorbell, 28, 28)) {
1345 }
1346}
1347
1348void
1350{
1352 pageDoorbellOffset |= ((uint64_t)data) << 32;
1353}
1354
1355void
1357{
1358 uint32_t rb_size = bits(data, 6, 1);
1359 assert(rb_size >= 6 && rb_size <= 62);
1360 page.size(1 << (rb_size + 2));
1361}
1362
1363void
1365{
1366 pageWptr = insertBits(pageWptr, 31, 0, 0);
1367 pageWptr |= data;
1368}
1369
1370void
1372{
1373 pageWptr = insertBits(pageWptr, 63, 32, 0);
1374 pageWptr |= ((uint64_t)data) << 32;
1375}
1376
1377} // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition amdgpu_vm.hh:94
#define DPRINTF(x,...)
Definition trace.hh:210
const char data[]
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
Definition amdgpu_vm.hh:317
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition amdgpu_vm.hh:188
Addr getMMHUBBase()
Definition amdgpu_vm.hh:203
bool inMMHUB(Addr vaddr)
Definition amdgpu_vm.hh:198
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
SDMAQueueDesc * getMQD()
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
uint64_t pageDoorbell
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unregisterRLCQueue(Addr doorbell)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
void deallocateRLCQueues()
AMDGPUDevice * gpuDevice
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
int getIHClientId()
Returns the client id for the Interrupt Handler.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
STL vector class.
Definition stl.hh:37
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:76
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition bitfield.hh:182
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
#define UNSERIALIZE_ARRAY(member, size)
Definition serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition serialize.hh:610
#define warn(...)
Definition logging.hh:256
#define warn_once(...)
Definition logging.hh:260
Bitfield< 23, 20 > atomic
Bitfield< 27 > q
Definition misc_types.hh:55
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 33 > id
Bitfield< 24, 21 > opcode
Definition types.hh:92
Bitfield< 0 > p
Bitfield< 51, 12 > base
Definition pagetable.hh:141
Bitfield< 3 > addr
Definition types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
Definition serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
output header
Definition nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition serialize.hh:568
PM4 packets.
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()
Definition trace.cc:48

Generated on Mon Jul 10 2023 14:24:30 for gem5 by doxygen 1.9.7