36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
44#include "params/SDMAEngine.hh"
105 panic(
"Unknown SDMA id");
114 addr = (((
addr >> 12) << 3) << 12) | low_bits;
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
139 Addr device_addr = 0;
143 device_addr = raw_addr -
gpuDevice->getVM().getMMHUBBase();
145 device_addr = tmp_addr -
gpuDevice->getVM().getMMHUBBase();
201 rlc0.rptrWbAddr(rptr_wb_addr);
202 rlc0.processing(
false);
204 rlc0.setMQDAddr(mqdAddr);
206 rlc0.setStatic(isStatic);
207 }
else if (!
rlc1.valid()) {
216 rlc1.rptrWbAddr(rptr_wb_addr);
217 rlc1.processing(
false);
219 rlc1.setMQDAddr(mqdAddr);
221 rlc1.setStatic(isStatic);
223 panic(
"No free RLCs. Check they are properly unmapped.");
232 if (!unmap_static &&
rlc0.isStatic()) {
246 [ = ] (
const uint32_t &) { });
249 warn(
"RLC0 SDMAMQD address invalid\n");
253 }
else if (
rlcInfo[1] == doorbell) {
254 if (!unmap_static &&
rlc1.isStatic()) {
268 [ = ] (
const uint32_t &) { });
271 warn(
"RLC1 SDMAMQD address invalid\n");
276 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
296 gfx.setWptr(wptrOffset);
297 if (!
gfx.processing()) {
298 gfx.processing(
true);
307 page.setWptr(wptrOffset);
308 if (!
page.processing()) {
309 page.processing(
true);
318 if (
rlcInfo[0] == doorbellOffset) {
320 }
else if (
rlcInfo[1] == doorbellOffset) {
323 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
331 assert(
rlc0.valid());
333 rlc0.setWptr(wptrOffset);
334 if (!
rlc0.processing()) {
336 rlc0.processing(
true);
345 assert(
rlc1.valid());
347 rlc1.setWptr(wptrOffset);
348 if (!
rlc1.processing()) {
350 rlc1.processing(
true);
361 if (
q->rptr() !=
q->wptr()) {
367 [ = ] (
const uint32_t &
header)
369 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
376 q->globalRptr(),
q->rptrWbAddr());
377 if (
q->rptrWbAddr()) {
379 [ = ](
const uint64_t &) { },
q->globalRptr());
382 q->processing(
false);
400 void *dmaBuffer =
nullptr;
407 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
410 for (
int i = 0;
i < NOP_count; ++
i) {
411 if (
q->rptr() ==
q->wptr()) {
412 warn(
"NOP count is beyond wptr, ignoring remaining NOPs");
422 switch (sub_opcode) {
426 [ = ] (
const uint64_t &)
431 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
434 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
437 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
440 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
443 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
446 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
449 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
452 panic(
"SDMA unknown copy sub-opcode.");
458 switch (sub_opcode) {
462 [ = ] (
const uint64_t &)
467 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
477 [ = ] (
const uint64_t &)
485 [ = ] (
const uint64_t &)
493 [ = ] (
const uint64_t &)
499 warn(
"SDMA_OP_SEM not implemented");
506 [ = ] (
const uint64_t &)
509 switch (sub_opcode) {
511 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
514 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
517 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
525 warn(
"SDMA_OP_SEM not implemented");
532 [ = ] (
const uint64_t &)
540 [ = ] (
const uint64_t &)
546 switch (sub_opcode) {
551 [ = ] (
const uint64_t &)
556 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
559 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
562 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
573 switch (sub_opcode) {
583 warn(
"SDMA_OP_TIMESTAMP not implemented");
590 [ = ] (
const uint64_t &)
596 warn(
"SDMA_OP_PRE_EXE not implemented");
601 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
605 panic(
"Invalid SDMA packet.\n");
620 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
631 int bufferSize =
sizeof(uint32_t) * pkt->
count;
632 q->incRptr(bufferSize);
635 for (
int i = 0;
i < pkt->
count; ++
i) {
644 "SDMA write to GART not implemented");
648 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
655 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
670 auto system_ptr =
gpuDevice->CP()->system();
671 if (!system_ptr->isAtomicMode()) {
672 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
673 " This has not been tested in timing mode\n");
707 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
717 uint8_t *buffer_ptr = dmaBuffer;
718 for (; !gen.done(); gen.next()) {
723 gen.size(), gen.addr(), chunk_addr);
725 gpuDevice->getMemMgr()->readRequest(chunk_addr, buffer_ptr,
727 gen.last() ? cb :
nullptr);
728 buffer_ptr += gen.size();
732 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
742 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
748 DPRINTF(SDMAData,
"Copy packet data:\n");
749 for (
int i = 0;
i < pkt->
count/8; ++
i) {
750 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
763 uint8_t *buffer_ptr = dmaBuffer;
764 for (; !gen.done(); gen.next()) {
769 gen.size(), gen.addr(), chunk_addr);
771 gpuDevice->getMemMgr()->writeRequest(chunk_addr, buffer_ptr,
773 gen.last() ? cb :
nullptr);
775 buffer_ptr += gen.size();
780 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
786 if (
gpuDevice->getVM().inGARTRange(device_addr)) {
788 assert((pkt->
count % 8) == 0);
789 for (
int i = 0;
i < pkt->
count/8; ++
i) {
790 Addr gart_addr = device_addr +
i*8 -
gpuDevice->getVM().gartBase();
792 gart_addr, dmaBuffer64[
i]);
793 gpuDevice->getVM().gartTable[gart_addr] = dmaBuffer64[
i];
808 auto system_ptr =
gpuDevice->CP()->system();
809 if (!system_ptr->isAtomicMode()) {
810 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
811 " This has not been tested in timing mode\n");
834 q->ib()->base(pkt->
base);
837 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
838 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
879 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
882 int local_id =
getId();
884 if (
gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
885 node_id =
getId() >> 2;
898 local_id =
getId() % 4;
903 gpuDevice->getIH()->submitInterruptCookie();
918 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
919 uint32_t reg_mask = 0x00000000;
921 if (srbm_header.
byteEnable & 0x8) reg_mask |= 0xFF000000;
922 if (srbm_header.
byteEnable & 0x4) reg_mask |= 0x00FF0000;
923 if (srbm_header.
byteEnable & 0x2) reg_mask |= 0x0000FF00;
924 if (srbm_header.
byteEnable & 0x1) reg_mask |= 0x000000FF;
925 pkt->
data &= reg_mask;
928 reg_addr, pkt->
data);
954 "mask=%p, retry=%d, pinterval=%d\n", prm_header.
mode,
960 if (prm_header.
mode == 1) {
962 if (prm_header.
op == 0) {
964 [ = ] (
const uint32_t &dma_buffer) {
967 (
void *)&cb->dmaBuffer);
969 panic(
"SDMA poll mem operation not implemented.");
973 warn_once(
"SDMA poll reg is not implemented. If this is required for "
974 "correctness, an SRBM model needs to be implemented.");
986 uint32_t dma_buffer,
int count)
991 assert(prm_header.
mode == 1 && prm_header.
op == 0);
1003 [ = ] (
const uint32_t &dma_buffer) {
1006 (
void *)&cb->dmaBuffer);
1024 return value < reference;
1027 return value <= reference;
1030 return value == reference;
1033 return value != reference;
1036 return value >= reference;
1039 return value > reference;
1042 panic(
"SDMA POLL_REGMEM unknown comparison function.");
1059 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
1060 for (
int i = 0;
i < pkt->
count;
i++) {
1069 "SDMA write to GART not implemented");
1073 gpuDevice->getMemMgr()->writeRequest(mmhub_addr, (uint8_t *)dmaBuffer,
1074 sizeof(uint64_t) * pkt->
count, 0,
1081 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
1097 auto system_ptr =
gpuDevice->CP()->system();
1098 if (!system_ptr->isAtomicMode()) {
1099 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
1100 " This has not been tested in timing mode\n");
1113 delete [] dmaBuffer;
1129 uint64_t *dmaBuffer =
new uint64_t;
1131 [ = ] (
const uint64_t &)
1138 uint64_t *dmaBuffer)
1148 int64_t dst_data = *dmaBuffer;
1149 int64_t src_data = pkt->
srcData;
1152 src_data, dst_data + src_data);
1155 *dmaBuffer = dst_data + src_data;
1158 [ = ] (
const uint64_t &)
1162 panic(
"Unsupported SDMA atomic opcode: %d\n", at_header.
opcode);
1168 uint64_t *dmaBuffer)
1194 int fill_bytes = (pkt->
count + 1) * (1 << fill_header.
fillsize);
1195 uint8_t *fill_data =
new uint8_t[fill_bytes];
1197 memset(fill_data, pkt->
srcData, fill_bytes);
1210 uint8_t *fill_data_ptr = fill_data;
1211 for (; !gen.done(); gen.next()) {
1216 gen.size(), gen.addr(), chunk_addr);
1218 gpuDevice->getMemMgr()->writeRequest(chunk_addr, fill_data_ptr,
1220 gen.last() ? cb :
nullptr);
1221 fill_data_ptr += gen.size();
1228 [ = ] (
const uint64_t &)
1239 delete [] fill_data;
1276 auto base = std::make_unique<Addr[]>(num_queues);
1277 auto rptr = std::make_unique<Addr[]>(num_queues);
1278 auto wptr = std::make_unique<Addr[]>(num_queues);
1279 auto size = std::make_unique<Addr[]>(num_queues);
1280 auto processing = std::make_unique<bool[]>(num_queues);
1282 for (
int i = 0;
i < num_queues;
i++) {
1283 base[
i] = queues[
i]->base();
1284 rptr[
i] = queues[
i]->getRptr();
1285 wptr[
i] = queues[
i]->getWptr();
1286 size[
i] = queues[
i]->size();
1287 processing[
i] = queues[
i]->processing();
1298 const int num_rlc_queues = 2;
1303 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1304 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1305 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1306 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1307 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1308 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1309 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1310 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1311 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1312 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1313 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1314 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1315 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1319 for (
int i = 0;
i < num_rlc_queues;
i++) {
1320 rlc_valid[
i] = rlc_queues[
i]->valid();
1323 rlc_base[
i] = rlc_queues[
i]->base();
1324 rlc_rptr[
i] = rlc_queues[
i]->getRptr();
1325 rlc_global_rptr[
i] = rlc_queues[
i]->globalRptr();
1326 rlc_wptr[
i] = rlc_queues[
i]->getWptr();
1327 rlc_size[
i] = rlc_queues[
i]->size();
1328 rlc_rptr_wb_addr[
i] = rlc_queues[
i]->rptrWbAddr();
1329 rlc_processing[
i] = rlc_queues[
i]->processing();
1330 rlc_mqd_addr[
i] = rlc_queues[
i]->getMQDAddr();
1331 rlc_priv[
i] = rlc_queues[
i]->priv();
1332 rlc_static[
i] = rlc_queues[
i]->isStatic();
1333 memcpy(rlc_mqd.get() + 128*
i, rlc_queues[
i]->getMQD(),
1371 auto base = std::make_unique<Addr[]>(num_queues);
1372 auto rptr = std::make_unique<Addr[]>(num_queues);
1373 auto wptr = std::make_unique<Addr[]>(num_queues);
1374 auto size = std::make_unique<Addr[]>(num_queues);
1375 auto processing = std::make_unique<bool[]>(num_queues);
1389 for (
int i = 0;
i < num_queues;
i++) {
1390 queues[
i]->base(
base[
i]);
1391 queues[
i]->rptr(rptr[
i]);
1392 queues[
i]->wptr(wptr[
i]);
1393 queues[
i]->size(size[
i]);
1394 queues[
i]->processing(processing[
i]);
1399 const int num_rlc_queues = 2;
1400 auto rlc_info = std::make_unique<Addr[]>(num_rlc_queues);
1401 auto rlc_valid = std::make_unique<bool[]>(num_rlc_queues);
1402 auto rlc_base = std::make_unique<Addr[]>(num_rlc_queues);
1403 auto rlc_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1404 auto rlc_global_rptr = std::make_unique<Addr[]>(num_rlc_queues);
1405 auto rlc_wptr = std::make_unique<Addr[]>(num_rlc_queues);
1406 auto rlc_size = std::make_unique<Addr[]>(num_rlc_queues);
1407 auto rlc_rptr_wb_addr = std::make_unique<Addr[]>(num_rlc_queues);
1408 auto rlc_processing = std::make_unique<bool[]>(num_rlc_queues);
1409 auto rlc_mqd_addr = std::make_unique<Addr[]>(num_rlc_queues);
1410 auto rlc_priv = std::make_unique<bool[]>(num_rlc_queues);
1411 auto rlc_static = std::make_unique<bool[]>(num_rlc_queues);
1412 auto rlc_mqd = std::make_unique<uint32_t[]>(num_rlc_queues * 128);
1433 for (
int i = 0;
i < num_rlc_queues;
i++) {
1434 rlc_queues[
i]->valid(rlc_valid[
i]);
1437 rlc_queues[
i]->base(rlc_base[
i]);
1438 rlc_queues[
i]->rptr(rlc_rptr[
i]);
1439 rlc_queues[
i]->setGlobalRptr(rlc_global_rptr[
i]);
1440 rlc_queues[
i]->wptr(rlc_wptr[
i]);
1441 rlc_queues[
i]->size(rlc_size[
i]);
1442 rlc_queues[
i]->rptrWbAddr(rlc_rptr_wb_addr[
i]);
1443 rlc_queues[
i]->processing(rlc_processing[
i]);
1444 rlc_queues[
i]->setMQDAddr(rlc_mqd_addr[
i]);
1445 rlc_queues[
i]->setPriv(rlc_priv[
i]);
1446 rlc_queues[
i]->setStatic(rlc_static[
i]);
1449 rlc_queues[
i]->setMQD(mqd);
1458 pkt->
getLE<uint32_t>());
1461 switch (mmio_offset) {
1487 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1488 assert(rb_size >= 6 && rb_size <= 62);
1519 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1520 assert(rb_size >= 6 && rb_size <= 62);
1599 uint32_t rb_size =
bits(
data, 6, 1);
1600 assert(rb_size >= 6 && rb_size <= 62);
1601 gfx.size(1 << (rb_size + 2));
1685 uint32_t rb_size =
bits(
data, 6, 1);
1686 assert(rb_size >= 6 && rb_size <= 62);
1687 page.size(1 << (rb_size + 2));
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
Translation range generators.
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void pollRegMemRead(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void atomicData(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void atomicDone(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void atomic(SDMAQueue *q, uint32_t header, sdmaAtomic *pkt)
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void srbmWrite(SDMAQueue *q, uint32_t header, sdmaSRBMWrite *pkt)
void fence(SDMAQueue *q, sdmaFence *pkt)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, uint32_t header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Bitfield< 23, 20 > atomic
Bitfield< 24, 21 > opcode
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaTimestamp
struct gem5::GEM5_PACKED sdmaConstFillHeader
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
struct gem5::GEM5_PACKED sdmaPredExec
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
struct gem5::GEM5_PACKED sdmaDummyTrap
struct gem5::GEM5_PACKED sdmaSemaphore
struct gem5::GEM5_PACKED sdmaCondExec
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()