36#include "debug/SDMAData.hh"
37#include "debug/SDMAEngine.hh"
43#include "params/SDMAEngine.hh"
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(
p.walker),
53 mmioBase(
p.mmio_base), mmioSize(
p.mmio_size)
104 panic(
"Unknown SDMA id");
113 addr = (((
addr >> 12) << 3) << 12) | low_bits;
126 auto addr_range = *(tgen->begin());
127 Addr tmp_addr = addr_range.paddr;
138 Addr device_addr = 0;
216 panic(
"No free RLCs. Check they are properly unmapped.");
234 [ = ] (
const uint32_t &) { });
237 warn(
"RLC0 SDMAMQD address invalid\n");
241 }
else if (
rlcInfo[1] == doorbell) {
251 [ = ] (
const uint32_t &) { });
254 warn(
"RLC1 SDMAMQD address invalid\n");
259 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
299 if (
rlcInfo[0] == doorbellOffset) {
301 }
else if (
rlcInfo[1] == doorbellOffset) {
304 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
342 if (
q->rptr() !=
q->wptr()) {
348 [ = ] (
const uint32_t &
header)
350 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
357 q->globalRptr(),
q->rptrWbAddr());
358 if (
q->rptrWbAddr()) {
360 [ = ](
const uint64_t &) { },
q->globalRptr());
363 q->processing(
false);
381 void *dmaBuffer =
nullptr;
387 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
389 if (NOP_count > 0)
q->incRptr(NOP_count * 4);
394 switch (sub_opcode) {
398 [ = ] (
const uint64_t &)
403 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
406 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
409 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
412 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
415 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
418 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
421 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
424 panic(
"SDMA unknown copy sub-opcode.");
430 switch (sub_opcode) {
434 [ = ] (
const uint64_t &)
439 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
449 [ = ] (
const uint64_t &)
457 [ = ] (
const uint64_t &)
465 [ = ] (
const uint64_t &)
471 warn(
"SDMA_OP_SEM not implemented");
480 [ = ] (
const uint64_t &)
483 switch (sub_opcode) {
485 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
488 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
491 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
499 warn(
"SDMA_OP_SEM not implemented");
508 [ = ] (
const uint64_t &)
514 warn(
"SDMA_OP_CONST_FILL not implemented");
519 switch (sub_opcode) {
524 [ = ] (
const uint64_t &)
529 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
532 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
535 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
546 switch (sub_opcode) {
556 warn(
"SDMA_OP_TIMESTAMP not implemented");
565 [ = ] (
const uint64_t &)
571 warn(
"SDMA_OP_PRE_EXE not implemented");
576 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
580 panic(
"Invalid SDMA packet.\n");
595 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
606 int bufferSize =
sizeof(uint32_t) * pkt->
count;
607 q->incRptr(bufferSize);
610 for (
int i = 0;
i < pkt->
count; ++
i) {
625 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
655 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
665 for (; !gen.done(); gen.next()) {
670 gen.size(), gen.addr(), chunk_addr);
674 gen.last() ? cb :
nullptr);
675 dmaBuffer += gen.size();
679 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
689 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
695 DPRINTF(SDMAData,
"Copy packet data:\n");
696 for (
int i = 0;
i < pkt->
count/8; ++
i) {
697 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
710 for (; !gen.done(); gen.next()) {
715 gen.size(), gen.addr(), chunk_addr);
719 gen.last() ? cb :
nullptr);
721 dmaBuffer += gen.size();
725 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
747 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
748 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
787 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
804 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
805 uint32_t reg_mask = 0x00000000;
807 if (
header->byteEnable & 0x8) reg_mask |= 0xFF000000;
808 if (
header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
809 if (
header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
810 if (
header->byteEnable & 0x1) reg_mask |= 0x000000FF;
811 pkt->
data &= reg_mask;
814 reg_addr, pkt->
data);
816 warn_once(
"SRBM write not performed, no SRBM model. This needs to be fixed"
817 " if correct system simulation is relying on SRBM registers.");
836 "mask=%p, retry=%d, pinterval=%d\n",
header->mode,
header->func,
846 [ = ] (
const uint32_t &dma_buffer) {
849 (
void *)&cb->dmaBuffer);
851 panic(
"SDMA poll mem operation not implemented.");
855 warn_once(
"SDMA poll reg is not implemented. If this is required for "
856 "correctness, an SRBM model needs to be implemented.");
883 [ = ] (
const uint32_t &dma_buffer) {
886 (
void *)&cb->dmaBuffer);
905 return value < reference;
908 return value <= reference;
911 return value == reference;
914 return value != reference;
917 return value >= reference;
920 return value > reference;
923 panic(
"SDMA POLL_REGMEM unknown comparison function.");
940 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
941 for (
int i = 0;
i < pkt->
count;
i++) {
951 sizeof(uint64_t) * pkt->
count, 0,
955 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
982 uint64_t *dmaBuffer =
new uint64_t;
984 [ = ] (
const uint64_t &)
998 int64_t dst_data = *dmaBuffer;
999 int64_t src_data = pkt->
srcData;
1002 src_data, dst_data + src_data);
1005 *dmaBuffer = dst_data + src_data;
1008 [ = ] (
const uint64_t &)
1012 panic(
"Unsupported SDMA atomic opcode: %d\n",
header->opcode);
1018 uint64_t *dmaBuffer)
1062 Addr rptr[num_queues];
1063 Addr wptr[num_queues];
1064 Addr size[num_queues];
1065 bool processing[num_queues];
1067 for (
int i = 0;
i < num_queues;
i++) {
1068 base[
i] = queues[
i]->base();
1069 rptr[
i] = queues[
i]->getRptr();
1070 wptr[
i] = queues[
i]->getWptr();
1071 size[
i] = queues[
i]->size();
1072 processing[
i] = queues[
i]->processing();
1101 Addr rptr[num_queues];
1102 Addr wptr[num_queues];
1103 Addr size[num_queues];
1104 bool processing[num_queues];
1118 for (
int i = 0;
i < num_queues;
i++) {
1119 queues[
i]->base(
base[
i]);
1120 queues[
i]->rptr(rptr[
i]);
1121 queues[
i]->wptr(wptr[
i]);
1122 queues[
i]->size(size[
i]);
1123 queues[
i]->processing(processing[
i]);
1131 pkt->
getLE<uint32_t>());
1134 switch (mmio_offset) {
1160 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1161 assert(rb_size >= 6 && rb_size <= 62);
1192 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1193 assert(rb_size >= 6 && rb_size <= 62);
1272 uint32_t rb_size =
bits(
data, 6, 1);
1273 assert(rb_size >= 6 && rb_size <= 62);
1358 uint32_t rb_size =
bits(
data, 6, 1);
1359 assert(rb_size >= 6 && rb_size <= 62);
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void submitInterruptCookie()
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
bool inAGP(Addr vaddr)
Methods for resolving apertures.
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setMQD(SDMAQueueDesc *mqd)
void setMQDAddr(Addr mqdAddr)
void incRptr(uint32_t value)
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setGfxSize(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unregisterRLCQueue(Addr doorbell)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
void deallocateRLCQueues()
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
int getIHClientId()
Returns the client id for the Interrupt Handler.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
#define panic(...)
This implements a cprintf based panic() function.
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
Bitfield< 23, 20 > atomic
Bitfield< 24, 21 > opcode
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA3
@ SOC15_IH_CLIENTID_SDMA4
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA5
@ SOC15_IH_CLIENTID_SDMA2
@ SOC15_IH_CLIENTID_SDMA6
@ SOC15_IH_CLIENTID_SDMA7
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_rptr_addr_lo
const std::string & name()