36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
44#include "params/SDMAEngine.hh"
51 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
52 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
53 pageWptr(0), gpuDevice(nullptr), walker(
p.walker),
54 mmioBase(
p.mmio_base), mmioSize(
p.mmio_size)
105 panic(
"Unknown SDMA id");
114 addr = (((
addr >> 12) << 3) << 12) | low_bits;
127 auto addr_range = *(tgen->begin());
128 Addr tmp_addr = addr_range.paddr;
139 Addr device_addr = 0;
223 panic(
"No free RLCs. Check they are properly unmapped.");
246 [ = ] (
const uint32_t &) { });
249 warn(
"RLC0 SDMAMQD address invalid\n");
253 }
else if (
rlcInfo[1] == doorbell) {
268 [ = ] (
const uint32_t &) { });
271 warn(
"RLC1 SDMAMQD address invalid\n");
276 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
318 if (
rlcInfo[0] == doorbellOffset) {
320 }
else if (
rlcInfo[1] == doorbellOffset) {
323 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
361 if (
q->rptr() !=
q->wptr()) {
367 [ = ] (
const uint32_t &
header)
369 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
376 q->globalRptr(),
q->rptrWbAddr());
377 if (
q->rptrWbAddr()) {
379 [ = ](
const uint64_t &) { },
q->globalRptr());
382 q->processing(
false);
400 void *dmaBuffer =
nullptr;
406 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
409 for (
int i = 0;
i < NOP_count; ++
i) {
410 if (
q->rptr() ==
q->wptr()) {
411 warn(
"NOP count is beyond wptr, ignoring remaining NOPs");
421 switch (sub_opcode) {
425 [ = ] (
const uint64_t &)
430 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
433 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
436 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
439 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
442 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
445 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
448 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
451 panic(
"SDMA unknown copy sub-opcode.");
457 switch (sub_opcode) {
461 [ = ] (
const uint64_t &)
466 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
476 [ = ] (
const uint64_t &)
484 [ = ] (
const uint64_t &)
492 [ = ] (
const uint64_t &)
498 warn(
"SDMA_OP_SEM not implemented");
507 [ = ] (
const uint64_t &)
510 switch (sub_opcode) {
512 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
515 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
518 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
526 warn(
"SDMA_OP_SEM not implemented");
535 [ = ] (
const uint64_t &)
543 [ = ] (
const uint64_t &)
549 switch (sub_opcode) {
554 [ = ] (
const uint64_t &)
559 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
562 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
565 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
576 switch (sub_opcode) {
586 warn(
"SDMA_OP_TIMESTAMP not implemented");
595 [ = ] (
const uint64_t &)
601 warn(
"SDMA_OP_PRE_EXE not implemented");
606 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
610 panic(
"Invalid SDMA packet.\n");
625 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
636 int bufferSize =
sizeof(uint32_t) * pkt->
count;
637 q->incRptr(bufferSize);
640 for (
int i = 0;
i < pkt->
count; ++
i) {
649 "SDMA write to GART not implemented");
660 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
676 if (!system_ptr->isAtomicMode()) {
677 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
678 " This has not been tested in timing mode\n");
712 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
722 uint8_t *buffer_ptr = dmaBuffer;
723 for (; !gen.done(); gen.next()) {
728 gen.size(), gen.addr(), chunk_addr);
732 gen.last() ? cb :
nullptr);
733 buffer_ptr += gen.size();
737 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
747 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
753 DPRINTF(SDMAData,
"Copy packet data:\n");
754 for (
int i = 0;
i < pkt->
count/8; ++
i) {
755 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
768 uint8_t *buffer_ptr = dmaBuffer;
769 for (; !gen.done(); gen.next()) {
774 gen.size(), gen.addr(), chunk_addr);
778 gen.last() ? cb :
nullptr);
780 buffer_ptr += gen.size();
785 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
793 assert((pkt->
count % 8) == 0);
794 for (
int i = 0;
i < pkt->
count/8; ++
i) {
797 gart_addr, dmaBuffer64[
i]);
814 if (!system_ptr->isAtomicMode()) {
815 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
816 " This has not been tested in timing mode\n");
839 q->ib()->base(pkt->
base);
842 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
843 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
884 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
887 int local_id =
getId();
905 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
906 uint32_t reg_mask = 0x00000000;
908 if (
header->byteEnable & 0x8) reg_mask |= 0xFF000000;
909 if (
header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
910 if (
header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
911 if (
header->byteEnable & 0x1) reg_mask |= 0x000000FF;
912 pkt->
data &= reg_mask;
915 reg_addr, pkt->
data);
936 "mask=%p, retry=%d, pinterval=%d\n",
header->mode,
header->func,
946 [ = ] (
const uint32_t &dma_buffer) {
949 (
void *)&cb->dmaBuffer);
951 panic(
"SDMA poll mem operation not implemented.");
955 warn_once(
"SDMA poll reg is not implemented. If this is required for "
956 "correctness, an SRBM model needs to be implemented.");
983 [ = ] (
const uint32_t &dma_buffer) {
986 (
void *)&cb->dmaBuffer);
1005 return value < reference;
1008 return value <= reference;
1011 return value == reference;
1014 return value != reference;
1017 return value >= reference;
1020 return value > reference;
1023 panic(
"SDMA POLL_REGMEM unknown comparison function.");
1040 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
1041 for (
int i = 0;
i < pkt->
count;
i++) {
1050 "SDMA write to GART not implemented");
1055 sizeof(uint64_t) * pkt->
count, 0,
1062 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
1079 if (!system_ptr->isAtomicMode()) {
1080 warn_once(
"SDMA cleanup assumes 2000 tick timing for completion."
1081 " This has not been tested in timing mode\n");
1094 delete [] dmaBuffer;
1106 uint64_t *dmaBuffer =
new uint64_t;
1108 [ = ] (
const uint64_t &)
1115 uint64_t *dmaBuffer)
1122 int64_t dst_data = *dmaBuffer;
1123 int64_t src_data = pkt->
srcData;
1126 src_data, dst_data + src_data);
1129 *dmaBuffer = dst_data + src_data;
1132 [ = ] (
const uint64_t &)
1136 panic(
"Unsupported SDMA atomic opcode: %d\n",
header->opcode);
1142 uint64_t *dmaBuffer)
1166 int fill_bytes = (pkt->
count + 1) * (1 << fill_header.
fillsize);
1167 uint8_t *fill_data =
new uint8_t[fill_bytes];
1169 memset(fill_data, pkt->
srcData, fill_bytes);
1182 uint8_t *fill_data_ptr = fill_data;
1183 for (; !gen.done(); gen.next()) {
1188 gen.size(), gen.addr(), chunk_addr);
1192 gen.last() ? cb :
nullptr);
1193 fill_data_ptr += gen.size();
1200 [ = ] (
const uint64_t &)
1211 delete [] fill_data;
1249 Addr rptr[num_queues];
1250 Addr wptr[num_queues];
1251 Addr size[num_queues];
1252 bool processing[num_queues];
1254 for (
int i = 0;
i < num_queues;
i++) {
1255 base[
i] = queues[
i]->base();
1256 rptr[
i] = queues[
i]->getRptr();
1257 wptr[
i] = queues[
i]->getWptr();
1258 size[
i] = queues[
i]->size();
1259 processing[
i] = queues[
i]->processing();
1288 Addr rptr[num_queues];
1289 Addr wptr[num_queues];
1290 Addr size[num_queues];
1291 bool processing[num_queues];
1305 for (
int i = 0;
i < num_queues;
i++) {
1306 queues[
i]->base(
base[
i]);
1307 queues[
i]->rptr(rptr[
i]);
1308 queues[
i]->wptr(wptr[
i]);
1309 queues[
i]->size(size[
i]);
1310 queues[
i]->processing(processing[
i]);
1318 pkt->
getLE<uint32_t>());
1321 switch (mmio_offset) {
1347 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1348 assert(rb_size >= 6 && rb_size <= 62);
1379 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1380 assert(rb_size >= 6 && rb_size <= 62);
1459 uint32_t rb_size =
bits(
data, 6, 1);
1460 assert(rb_size >= 6 && rb_size <= 62);
1545 uint32_t rb_size =
bits(
data, 6, 1);
1546 assert(rb_size >= 6 && rb_size <= 62);
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
void unsetDoorbell(uint32_t offset)
void setRegVal(uint64_t addr, uint32_t value)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
GPUCommandProcessor * CP()
void submitInterruptCookie()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id, unsigned node_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Addr gartBase()
Return base address of GART table in framebuffer.
bool inGARTRange(Addr paddr)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
void setStatic(bool isStatic)
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void ptePdeCleanup(uint64_t *dmaBuffer)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void deallocateRLCQueues(bool unmap_static)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void writeCleanup(uint32_t *dmaBuffer)
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void unregisterRLCQueue(Addr doorbell, bool unmap_static)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void copyCleanup(uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
void schedule(Event &event, Tick when)
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
Bitfield< 23, 20 > atomic
Bitfield< 24, 21 > opcode
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaAtomic
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()