36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
44#include "params/SDMAEngine.hh"
105 panic(
"Unknown SDMA id");
114 addr = (((
addr >> 12) << 3) << 12) | low_bits;
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
139 Addr device_addr = 0;
143 device_addr = raw_addr -
gpuDevice->getVM().getMMHUBBase();
145 device_addr = tmp_addr -
gpuDevice->getVM().getMMHUBBase();
201 rlc0.rptrWbAddr(rptr_wb_addr);
202 rlc0.processing(
false);
204 rlc0.setMQDAddr(mqdAddr);
206 rlc0.setStatic(isStatic);
207 }
else if (!
rlc1.valid()) {
216 rlc1.rptrWbAddr(rptr_wb_addr);
217 rlc1.processing(
false);
219 rlc1.setMQDAddr(mqdAddr);
221 rlc1.setStatic(isStatic);
223 panic(
"No free RLCs. Check they are properly unmapped.");
232 if (!unmap_static &&
rlc0.isStatic()) {
246 [ = ] (
const uint32_t &) { });
249 warn(
"RLC0 SDMAMQD address invalid\n");
253 }
else if (
rlcInfo[1] == doorbell) {
254 if (!unmap_static &&
rlc1.isStatic()) {
268 [ = ] (
const uint32_t &) { });
271 warn(
"RLC1 SDMAMQD address invalid\n");
276 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
296 gfx.setWptr(wptrOffset);
297 if (!
gfx.processing()) {
298 gfx.processing(
true);
307 page.setWptr(wptrOffset);
308 if (!
page.processing()) {
309 page.processing(
true);
318 if (
rlcInfo[0] == doorbellOffset) {
320 }
else if (
rlcInfo[1] == doorbellOffset) {
323 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
331 assert(
rlc0.valid());
333 rlc0.setWptr(wptrOffset);
334 if (!
rlc0.processing()) {
336 rlc0.processing(
true);
345 assert(
rlc1.valid());
347 rlc1.setWptr(wptrOffset);
348 if (!
rlc1.processing()) {
350 rlc1.processing(
true);
361 if (
q->rptr() !=
q->wptr()) {
367 [ = ] (
const uint32_t &
header)
369 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer,
377 q->globalRptr(),
q->rptrWbAddr());
378 if (
q->rptrWbAddr()) {
380 [ = ](
const uint64_t &) { },
q->globalRptr());
383 q->processing(
false);
404 void *dmaBuffer =
nullptr;
411 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
414 for (
int i = 0;
i < NOP_count; ++
i) {
415 if (
q->rptr() ==
q->wptr()) {
416 warn(
"NOP count is beyond wptr, ignoring remaining NOPs");
426 switch (sub_opcode) {
430 [ = ] (
const uint64_t &)
436 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
439 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
442 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
445 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
448 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
451 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
454 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
457 panic(
"SDMA unknown copy sub-opcode.");
463 switch (sub_opcode) {
467 [ = ] (
const uint64_t &)
473 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
483 [ = ] (
const uint64_t &)
493 [ = ] (
const uint64_t &)
502 [ = ] (
const uint64_t &)
509 warn(
"SDMA_OP_SEM not implemented");
516 [ = ] (
const uint64_t &)
520 switch (sub_opcode) {
522 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
525 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
528 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
536 warn(
"SDMA_OP_SEM not implemented");
543 [ = ] (
const uint64_t &)
552 [ = ] (
const uint64_t &)
559 switch (sub_opcode) {
564 [ = ] (
const uint64_t &)
570 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
573 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
576 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
587 switch (sub_opcode) {
597 warn(
"SDMA_OP_TIMESTAMP not implemented");
604 [ = ] (
const uint64_t &)
611 warn(
"SDMA_OP_PRE_EXE not implemented");
616 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
620 panic(
"Invalid SDMA packet.\n");
635 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
646 int bufferSize =
sizeof(uint32_t) * pkt->
count;
647 q->incRptr(bufferSize);
650 for (
int i = 0;
i < pkt->
count; ++
i) {
659 "SDMA write to GART not implemented");
663 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
670 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
685 auto system_ptr =
gpuDevice->CP()->system();
686 if (!system_ptr->isAtomicMode()) {
687 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
688 " This has not been tested in timing mode\n");
727 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
737 uint8_t *buffer_ptr = dmaBuffer;
738 for (; !gen.done(); gen.next()) {
743 gen.size(), gen.addr(), chunk_addr);
745 gpuDevice->getMemMgr()->readRequest(chunk_addr, buffer_ptr,
747 gen.last() ? cb :
nullptr);
748 buffer_ptr += gen.size();
752 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
763 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
769 DPRINTF(SDMAData,
"Copy packet data:\n");
770 for (
int i = 0;
i < pkt->
count/8; ++
i) {
771 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
784 uint8_t *buffer_ptr = dmaBuffer;
785 for (; !gen.done(); gen.next()) {
790 gen.size(), gen.addr(), chunk_addr);
792 gpuDevice->getMemMgr()->writeRequest(chunk_addr, buffer_ptr,
794 gen.last() ? cb :
nullptr);
796 buffer_ptr += gen.size();
801 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
807 if (
gpuDevice->getVM().inGARTRange(device_addr)) {
809 assert((pkt->
count % 8) == 0);
810 for (
int i = 0;
i < pkt->
count/8; ++
i) {
811 Addr gart_addr = device_addr +
i*8 -
gpuDevice->getVM().gartBase();
813 gart_addr, dmaBuffer64[
i]);
814 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[
i];
829 auto system_ptr =
gpuDevice->CP()->system();
830 if (!system_ptr->isAtomicMode()) {
831 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
832 " This has not been tested in timing mode\n");
857 q->ib()->base(pkt->
base);
860 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
861 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
902 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
905 int local_id =
getId();
907 if (
gpuDevice->getGfxVersion() == GfxVersion::gfx942 ||
908 gpuDevice->getGfxVersion() == GfxVersion::gfx950) {
909 node_id =
getId() >> 2;
922 local_id =
getId() % 4;
927 gpuDevice->getIH()->submitInterruptCookie();
942 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
943 uint32_t reg_mask = 0x00000000;
945 if (srbm_header.
byteEnable & 0x8) reg_mask |= 0xFF000000;
946 if (srbm_header.
byteEnable & 0x4) reg_mask |= 0x00FF0000;
947 if (srbm_header.
byteEnable & 0x2) reg_mask |= 0x0000FF00;
948 if (srbm_header.
byteEnable & 0x1) reg_mask |= 0x000000FF;
949 pkt->
data &= reg_mask;
952 reg_addr, pkt->
data);
978 "mask=%p, retry=%d, pinterval=%d\n", prm_header.
mode,
984 if (prm_header.
mode == 1) {
986 if (prm_header.
op == 0) {
988 [ = ] (
const uint32_t &dma_buffer) {
993 panic(
"SDMA poll mem operation not implemented.");
997 warn_once(
"SDMA poll reg is not implemented. If this is required for "
998 "correctness, an SRBM model needs to be implemented.");
1010 uint32_t dma_buffer,
int count)
1015 assert(prm_header.
mode == 1 && prm_header.
op == 0);
1027 [ = ] (
const uint32_t &dma_buffer) {
1048 return value < reference;
1051 return value <= reference;
1054 return value == reference;
1057 return value != reference;
1060 return value >= reference;
1063 return value > reference;
1066 panic(
"SDMA POLL_REGMEM unknown comparison function.");
1083 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
1084 for (
int i = 0;
i < pkt->
count;
i++) {
1093 "SDMA write to GART not implemented");
1097 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1098 sizeof(uint64_t) * pkt->
count, 0,
1105 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
1121 auto system_ptr =
gpuDevice->CP()->system();
1122 if (!system_ptr->isAtomicMode()) {
1123 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
1124 " This has not been tested in timing mode\n");
1137 delete [] dmaBuffer;
1153 uint64_t *dmaBuffer =
new uint64_t;
1155 [ = ] (
const uint64_t &)
1163 uint64_t *dmaBuffer)
1173 int64_t dst_data = *dmaBuffer;
1174 int64_t src_data = pkt->
srcData;
1177 src_data, dst_data + src_data);
1180 *dmaBuffer = dst_data + src_data;
1183 [ = ] (
const uint64_t &)
1187 panic(
"Unsupported SDMA atomic opcode: %d\n", at_header.
opcode);
1193 uint64_t *dmaBuffer)
1219 int fill_bytes = (pkt->
count + 1) * (1 << fill_header.
fillsize);
1220 uint8_t *fill_data =
new uint8_t[fill_bytes];
1222 memset(fill_data, pkt->
srcData, fill_bytes);
1235 uint8_t *fill_data_ptr = fill_data;
1236 for (; !gen.done(); gen.next()) {
1241 gen.size(), gen.addr(), chunk_addr);
1243 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data_ptr,
1245 gen.last() ? cb :
nullptr);
1246 fill_data_ptr += gen.size();
1253 [ = ] (
const uint64_t &)
1264 delete [] fill_data;
1301 auto base = std::make_unique<Addr[]>(num_queues);
1302 auto rptr = std::make_unique<Addr[]>(num_queues);
1303 auto wptr = std::make_unique<Addr[]>(num_queues);
1304 auto size = std::make_unique<Addr[]>(num_queues);
1305 auto processing = std::make_unique<bool[]>(num_queues);
1307 for (
int i = 0;
i < num_queues;
i++) {
1308 base[
i] = queues[
i]->base();
1309 rptr[
i] = queues[
i]->getRptr();
1310 wptr[
i] = queues[
i]->getWptr();
1311 size[
i] = queues[
i]->size();
1312 processing[
i] = queues[
i]->processing();
1323 const int num_rlc_queues = 2;
1328 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1329 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1330 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1331 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1332 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1333 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1334 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1335 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1336 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1337 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1338 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1339 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1340 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1344 for (
int i = 0;
i < num_rlc_queues;
i++) {
1345 rlc_valid[
i] = rlc_queues[
i]->valid();
1348 rlc_base[
i] = rlc_queues[
i]->base();
1349 rlc_rptr[
i] = rlc_queues[
i]->getRptr();
1350 rlc_global_rptr[
i] = rlc_queues[
i]->globalRptr();
1351 rlc_wptr[
i] = rlc_queues[
i]->getWptr();
1352 rlc_size[
i] = rlc_queues[
i]->size();
1353 rlc_rptr_wb_addr[
i] = rlc_queues[
i]->rptrWbAddr();
1354 rlc_processing[
i] = rlc_queues[
i]->processing();
1355 rlc_mqd_addr[
i] = rlc_queues[
i]->getMQDAddr();
1356 rlc_priv[
i] = rlc_queues[
i]->priv();
1357 rlc_static[
i] = rlc_queues[
i]->isStatic();
1358 memcpy(rlc_mqd.get() + 128*
i, rlc_queues[
i]->getMQD(),
1396 auto base = std::make_unique<Addr[]>(num_queues);
1397 auto rptr = std::make_unique<Addr[]>(num_queues);
1398 auto wptr = std::make_unique<Addr[]>(num_queues);
1399 auto size = std::make_unique<Addr[]>(num_queues);
1400 auto processing = std::make_unique<bool[]>(num_queues);
1414 for (
int i = 0;
i < num_queues;
i++) {
1415 queues[
i]->base(
base[
i]);
1416 queues[
i]->rptr(rptr[
i]);
1417 queues[
i]->wptr(wptr[
i]);
1418 queues[
i]->size(size[
i]);
1419 queues[
i]->processing(processing[
i]);
1424 const int num_rlc_queues = 2;
1425 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1426 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1427 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1428 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1429 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1430 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1431 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1432 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1433 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1434 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1435 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1436 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1437 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1458 for (
int i = 0;
i < num_rlc_queues;
i++) {
1459 rlc_queues[
i]->valid(rlc_valid[
i]);
1462 rlc_queues[
i]->base(rlc_base[
i]);
1463 rlc_queues[
i]->rptr(rlc_rptr[
i]);
1464 rlc_queues[
i]->setGlobalRptr(rlc_global_rptr[
i]);
1465 rlc_queues[
i]->wptr(rlc_wptr[
i]);
1466 rlc_queues[
i]->size(rlc_size[
i]);
1467 rlc_queues[
i]->rptrWbAddr(rlc_rptr_wb_addr[
i]);
1468 rlc_queues[
i]->processing(rlc_processing[
i]);
1469 rlc_queues[
i]->setMQDAddr(rlc_mqd_addr[
i]);
1470 rlc_queues[
i]->setPriv(rlc_priv[
i]);
1471 rlc_queues[
i]->setStatic(rlc_static[
i]);
1474 rlc_queues[
i]->setMQD(mqd);
1483 pkt->
getLE<uint32_t>());
1486 switch (mmio_offset) {
1512 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1513 assert(rb_size >= 6 && rb_size <= 62);
1544 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1545 assert(rb_size >= 6 && rb_size <= 62);
1624 uint32_t rb_size =
bits(
data, 6, 1);
1625 assert(rb_size >= 6 && rb_size <= 62);
1626 gfx.size(1 << (rb_size + 2));
1710 uint32_t rb_size =
bits(
data, 6, 1);
1711 assert(rb_size >= 6 && rb_size <= 62);
1712 page.size(1 << (rb_size + 2));
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
Translation range generators.
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void pollRegMemRead(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt, uint32_t header)
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void atomicData(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void atomicDone(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void atomic(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt)
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void srbmWrite(SDMAQueue *q, uint32_t header, sdmaSRBMWrite *pkt)
void fence(SDMAQueue *q, sdmaFence *pkt)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
static constexpr Tick sdma_delay
void pollRegMem(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Bitfield< 23, 20 > atomic
Bitfield< 24, 21 > opcode
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaTimestamp
struct gem5::GEM5_PACKED sdmaConstFillHeader
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
struct gem5::GEM5_PACKED sdmaPredExec
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
struct gem5::GEM5_PACKED sdmaDummyTrap
struct gem5::GEM5_PACKED sdmaSemaphore
struct gem5::GEM5_PACKED sdmaCondExec
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()