36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
43#include "params/SDMAEngine.hh"
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(
p.walker),
53 mmioBase(
p.mmio_base), mmioSize(
p.mmio_size)
104 panic(
"Unknown SDMA id");
113 addr = (((
addr >> 12) << 3) << 12) | low_bits;
126 auto addr_range = *(tgen->begin());
127 Addr tmp_addr = addr_range.paddr;
138 Addr device_addr = 0;
219 panic(
"No free RLCs. Check they are properly unmapped.");
237 [ = ] (
const uint32_t &) { });
240 warn(
"RLC0 SDMAMQD address invalid\n");
244 }
else if (
rlcInfo[1] == doorbell) {
254 [ = ] (
const uint32_t &) { });
257 warn(
"RLC1 SDMAMQD address invalid\n");
262 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
303 if (
rlcInfo[0] == doorbellOffset) {
305 }
else if (
rlcInfo[1] == doorbellOffset) {
308 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
346 if (
q->rptr() !=
q->wptr()) {
352 [ = ] (
const uint32_t &
header)
354 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
361 q->globalRptr(),
q->rptrWbAddr());
362 if (
q->rptrWbAddr()) {
364 [ = ](
const uint64_t &) { },
q->globalRptr());
367 q->processing(
false);
385 void *dmaBuffer =
nullptr;
391 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
394 for (
int i = 0;
i < NOP_count; ++
i) {
395 if (
q->rptr() ==
q->wptr()) {
396 warn(
"NOP count is beyond wptr, ignoring remaining NOPs");
406 switch (sub_opcode) {
410 [ = ] (
const uint64_t &)
415 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
418 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
421 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
424 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
427 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
430 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
433 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
436 panic(
"SDMA unknown copy sub-opcode.");
442 switch (sub_opcode) {
446 [ = ] (
const uint64_t &)
451 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
461 [ = ] (
const uint64_t &)
469 [ = ] (
const uint64_t &)
477 [ = ] (
const uint64_t &)
483 warn(
"SDMA_OP_SEM not implemented");
492 [ = ] (
const uint64_t &)
495 switch (sub_opcode) {
497 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
500 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
503 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
511 warn(
"SDMA_OP_SEM not implemented");
520 [ = ] (
const uint64_t &)
528 [ = ] (
const uint64_t &)
534 switch (sub_opcode) {
539 [ = ] (
const uint64_t &)
544 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
547 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
550 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
561 switch (sub_opcode) {
571 warn(
"SDMA_OP_TIMESTAMP not implemented");
580 [ = ] (
const uint64_t &)
586 warn(
"SDMA_OP_PRE_EXE not implemented");
591 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
595 panic(
"Invalid SDMA packet.\n");
610 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
621 int bufferSize =
sizeof(uint32_t) * pkt->
count;
622 q->incRptr(bufferSize);
625 for (
int i = 0;
i < pkt->
count; ++
i) {
634 "SDMA write to GART not implemented");
645 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
679 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
689 for (; !gen.done(); gen.next()) {
694 gen.size(), gen.addr(), chunk_addr);
698 gen.last() ? cb :
nullptr);
699 dmaBuffer += gen.size();
703 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
713 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
719 DPRINTF(SDMAData,
"Copy packet data:\n");
720 for (
int i = 0;
i < pkt->
count/8; ++
i) {
721 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
734 for (; !gen.done(); gen.next()) {
739 gen.size(), gen.addr(), chunk_addr);
743 gen.last() ? cb :
nullptr);
745 dmaBuffer += gen.size();
749 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
757 assert((pkt->
count % 8) == 0);
758 for (
int i = 0;
i < pkt->
count/8; ++
i) {
761 gart_addr, dmaBuffer64[
i]);
785 q->ib()->base(pkt->
base);
788 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
789 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
830 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
833 int local_id =
getId();
851 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
852 uint32_t reg_mask = 0x00000000;
854 if (
header->byteEnable & 0x8) reg_mask |= 0xFF000000;
855 if (
header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
856 if (
header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
857 if (
header->byteEnable & 0x1) reg_mask |= 0x000000FF;
858 pkt->
data &= reg_mask;
861 reg_addr, pkt->
data);
882 "mask=%p, retry=%d, pinterval=%d\n",
header->mode,
header->func,
892 [ = ] (
const uint32_t &dma_buffer) {
895 (
void *)&cb->dmaBuffer);
897 panic(
"SDMA poll mem operation not implemented.");
901 warn_once(
"SDMA poll reg is not implemented. If this is required for "
902 "correctness, an SRBM model needs to be implemented.");
929 [ = ] (
const uint32_t &dma_buffer) {
932 (
void *)&cb->dmaBuffer);
951 return value < reference;
954 return value <= reference;
957 return value == reference;
960 return value != reference;
963 return value >= reference;
966 return value > reference;
969 panic(
"SDMA POLL_REGMEM unknown comparison function.");
986 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
987 for (
int i = 0;
i < pkt->
count;
i++) {
996 "SDMA write to GART not implemented");
1001 sizeof(uint64_t) * pkt->
count, 0,
1008 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
1035 uint64_t *dmaBuffer =
new uint64_t;
1037 [ = ] (
const uint64_t &)
1044 uint64_t *dmaBuffer)
1051 int64_t dst_data = *dmaBuffer;
1052 int64_t src_data = pkt->
srcData;
1055 src_data, dst_data + src_data);
1058 *dmaBuffer = dst_data + src_data;
1061 [ = ] (
const uint64_t &)
1065 panic(
"Unsupported SDMA atomic opcode: %d\n",
header->opcode);
1071 uint64_t *dmaBuffer)
1095 int fill_bytes = (pkt->
count + 1) * (1 << fill_header.
fillsize);
1096 uint8_t *fill_data =
new uint8_t[fill_bytes];
1098 memset(fill_data, pkt->
srcData, fill_bytes);
1111 for (; !gen.done(); gen.next()) {
1116 gen.size(), gen.addr(), chunk_addr);
1120 gen.last() ? cb :
nullptr);
1121 fill_data += gen.size();
1128 [ = ] (
const uint64_t &)
1139 delete [] fill_data;
1177 Addr rptr[num_queues];
1178 Addr wptr[num_queues];
1179 Addr size[num_queues];
1180 bool processing[num_queues];
1182 for (
int i = 0;
i < num_queues;
i++) {
1183 base[
i] = queues[
i]->base();
1184 rptr[
i] = queues[
i]->getRptr();
1185 wptr[
i] = queues[
i]->getWptr();
1186 size[
i] = queues[
i]->size();
1187 processing[
i] = queues[
i]->processing();
1216 Addr rptr[num_queues];
1217 Addr wptr[num_queues];
1218 Addr size[num_queues];
1219 bool processing[num_queues];
1233 for (
int i = 0;
i < num_queues;
i++) {
1234 queues[
i]->base(
base[
i]);
1235 queues[
i]->rptr(rptr[
i]);
1236 queues[
i]->wptr(wptr[
i]);
1237 queues[
i]->size(size[
i]);
1238 queues[
i]->processing(processing[
i]);
1246 pkt->
getLE<uint32_t>());
1249 switch (mmio_offset) {
1275 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1276 assert(rb_size >= 6 && rb_size <= 62);
1307 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1308 assert(rb_size >= 6 && rb_size <= 62);
1387 uint32_t rb_size =
bits(
data, 6, 1);
1388 assert(rb_size >= 6 && rb_size <= 62);
1473 uint32_t rb_size =
bits(
data, 6, 1);
1474 assert(rb_size >= 6 && rb_size <= 62);
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt, int ip_id=0)
Set handles to GPU blocks.
void unsetDoorbell(uint32_t offset)
void setRegVal(uint64_t addr, uint32_t value)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void submitInterruptCookie()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id, unsigned node_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Addr gartBase()
Return base address of GART table in framebuffer.
bool inGARTRange(Addr paddr)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unregisterRLCQueue(Addr doorbell)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void constFill(SDMAQueue *q, sdmaConstFill *pkt, uint32_t header)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
void deallocateRLCQueues()
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
int getIHClientId(int _id)
Returns the client id for the Interrupt Handler.
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
void constFillDone(SDMAQueue *q, sdmaConstFill *pkt, uint8_t *fill_data)
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
Bitfield< 23, 20 > atomic
Bitfield< 24, 21 > opcode
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaConstFill
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets - see src/core/inc/sdma_registers.h in ROCR-Runtime.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()