Go to the documentation of this file.
36 #include "debug/SDMAData.hh"
37 #include "debug/SDMAEngine.hh"
43 #include "params/SDMAEngine.hh"
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(
p.walker),
53 mmioBase(
p.mmio_base), mmioSize(
p.mmio_size)
104 panic(
"Unknown SDMA id");
113 addr = (((
addr >> 12) << 3) << 12) | low_bits;
126 auto addr_range = *(tgen->begin());
127 Addr tmp_addr = addr_range.paddr;
138 Addr device_addr = 0;
216 panic(
"No free RLCs. Check they are properly unmapped.");
234 [ = ] (
const uint32_t &) { });
237 warn(
"RLC0 SDMAMQD address invalid\n");
241 }
else if (
rlcInfo[1] == doorbell) {
251 [ = ] (
const uint32_t &) { });
254 warn(
"RLC1 SDMAMQD address invalid\n");
259 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
299 if (
rlcInfo[0] == doorbellOffset) {
301 }
else if (
rlcInfo[1] == doorbellOffset) {
304 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
342 if (
q->rptr() !=
q->wptr()) {
348 [ = ] (
const uint32_t &
header)
350 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
357 q->globalRptr(),
q->rptrWbAddr());
358 if (
q->rptrWbAddr()) {
360 [ = ](
const uint64_t &) { },
q->globalRptr());
363 q->processing(
false);
381 void *dmaBuffer =
nullptr;
387 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
389 if (NOP_count > 0)
q->incRptr(NOP_count * 4);
394 switch (sub_opcode) {
398 [ = ] (
const uint64_t &)
403 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
406 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
409 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
412 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
415 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
418 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
421 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
424 panic(
"SDMA unknown copy sub-opcode.");
430 switch (sub_opcode) {
434 [ = ] (
const uint64_t &)
439 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
449 [ = ] (
const uint64_t &)
457 [ = ] (
const uint64_t &)
465 [ = ] (
const uint64_t &)
471 warn(
"SDMA_OP_SEM not implemented");
480 [ = ] (
const uint64_t &)
483 switch (sub_opcode) {
485 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
488 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
491 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
499 warn(
"SDMA_OP_SEM not implemented");
508 [ = ] (
const uint64_t &)
514 warn(
"SDMA_OP_CONST_FILL not implemented");
519 switch (sub_opcode) {
524 [ = ] (
const uint64_t &)
529 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
532 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
535 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
546 switch (sub_opcode) {
556 warn(
"SDMA_OP_TIMESTAMP not implemented");
565 [ = ] (
const uint64_t &)
571 warn(
"SDMA_OP_PRE_EXE not implemented");
576 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
580 panic(
"Invalid SDMA packet.\n");
595 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
606 int bufferSize =
sizeof(uint32_t) * pkt->
count;
607 q->incRptr(bufferSize);
610 for (
int i = 0;
i < pkt->
count; ++
i) {
625 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
655 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
665 for (; !gen.done(); gen.next()) {
670 gen.size(), gen.addr(), chunk_addr);
674 gen.last() ? cb :
nullptr);
675 dmaBuffer += gen.size();
679 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
689 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
695 DPRINTF(SDMAData,
"Copy packet data:\n");
696 for (
int i = 0;
i < pkt->
count/8; ++
i) {
697 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
710 for (; !gen.done(); gen.next()) {
715 gen.size(), gen.addr(), chunk_addr);
719 gen.last() ? cb :
nullptr);
721 dmaBuffer += gen.size();
725 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
747 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
748 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
787 uint32_t ring_id = (
q->queueType() ==
SDMAPage) ? 3 : 0;
804 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
805 uint32_t reg_mask = 0x00000000;
807 if (
header->byteEnable & 0x8) reg_mask |= 0xFF000000;
808 if (
header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
809 if (
header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
810 if (
header->byteEnable & 0x1) reg_mask |= 0x000000FF;
811 pkt->
data &= reg_mask;
814 reg_addr, pkt->
data);
816 warn_once(
"SRBM write not performed, no SRBM model. This needs to be fixed"
817 " if correct system simulation is relying on SRBM registers.");
836 "mask=%p, retry=%d, pinterval=%d\n",
header->mode,
header->func,
846 [ = ] (
const uint32_t &dma_buffer) {
849 (
void *)&cb->dmaBuffer);
851 panic(
"SDMA poll mem operation not implemented.");
855 warn_once(
"SDMA poll reg is not implemented. If this is required for "
856 "correctness, an SRBM model needs to be implemented.");
883 [ = ] (
const uint32_t &dma_buffer) {
886 (
void *)&cb->dmaBuffer);
905 return value < reference;
908 return value <= reference;
911 return value == reference;
914 return value != reference;
917 return value >= reference;
920 return value > reference;
923 panic(
"SDMA POLL_REGMEM unknown comparison function.");
940 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
941 for (
int i = 0;
i < pkt->
count;
i++) {
951 sizeof(uint64_t) * pkt->
count, 0,
955 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
982 uint64_t *dmaBuffer =
new uint64_t;
984 [ = ] (
const uint64_t &)
998 int64_t dst_data = *dmaBuffer;
999 int64_t src_data = pkt->
srcData;
1002 src_data, dst_data + src_data);
1005 *dmaBuffer = dst_data + src_data;
1008 [ = ] (
const uint64_t &)
1012 panic(
"Unsupported SDMA atomic opcode: %d\n",
header->opcode);
1018 uint64_t *dmaBuffer)
1062 Addr rptr[num_queues];
1063 Addr wptr[num_queues];
1064 Addr size[num_queues];
1065 bool processing[num_queues];
1067 for (
int i = 0;
i < num_queues;
i++) {
1068 base[
i] = queues[
i]->base();
1069 rptr[
i] = queues[
i]->getRptr();
1070 wptr[
i] = queues[
i]->getWptr();
1071 size[
i] = queues[
i]->size();
1072 processing[
i] = queues[
i]->processing();
1101 Addr rptr[num_queues];
1102 Addr wptr[num_queues];
1103 Addr size[num_queues];
1104 bool processing[num_queues];
1118 for (
int i = 0;
i < num_queues;
i++) {
1119 queues[
i]->base(
base[
i]);
1120 queues[
i]->rptr(rptr[
i]);
1121 queues[
i]->wptr(wptr[
i]);
1122 queues[
i]->size(size[
i]);
1123 queues[
i]->processing(processing[
i]);
1131 pkt->
getLE<uint32_t>());
1134 switch (mmio_offset) {
1160 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1161 assert(rb_size >= 6 && rb_size <= 62);
1192 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1193 assert(rb_size >= 6 && rb_size <= 62);
1272 uint32_t rb_size =
bits(
data, 6, 1);
1273 assert(rb_size >= 6 && rb_size <= 62);
1358 uint32_t rb_size =
bits(
data, 6, 1);
1359 assert(rb_size >= 6 && rb_size <= 62);
void setPageDoorbellHi(uint32_t data)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void incRptr(uint32_t value)
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
SDMAEngine(const SDMAEngineParams &p)
struct gem5::GEM5_PACKED sdmaWrite
@ SOC15_IH_CLIENTID_SDMA6
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define UNSERIALIZE_SCALAR(scalar)
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
Wraps a std::function object in a DmaCallback.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
struct gem5::GEM5_PACKED sdmaAtomicHeader
int getIHClientId()
Returns the client id for the Interrupt Handler.
#define SDMA_SUBOP_COPY_SOA
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define mmSDMA_GFX_RB_BASE
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
struct gem5::GEM5_PACKED sdmaTrap
uint64_t getPageDoorbellOffset()
#define SDMA_SUBOP_WRITE_LINEAR
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
uint32_t sdmax_rlcx_rb_cntl
uint64_t getGfxDoorbell()
#define SDMA_SUBOP_COPY_TILED
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void setPageWptrLo(uint32_t data)
@ SOC15_IH_CLIENTID_SDMA0
#define mmSDMA_GFX_DOORBELL_OFFSET
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
Tick write(PacketPtr pkt) override
Inherited methods.
@ SOC15_IH_CLIENTID_SDMA1
void processRLC0(Addr wptrOffset)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
Addr getGARTAddr(Addr addr) const
Methods for translation.
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
#define SDMA_SUBOP_PTEPDE_RMW
void submitInterruptCookie()
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getGfxDoorbellOffset()
void setPageDoorbellOffsetHi(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void setPageSize(uint32_t data)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
bool inAGP(Addr vaddr)
Methods for resolving apertures.
uint32_t sdmax_rlcx_rb_rptr_addr_lo
void unserialize(CheckpointIn &cp) override
Unserialize an object.
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_OP_TIMESTAMP
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
void unregisterRLCQueue(Addr doorbell)
struct gem5::GEM5_PACKED sdmaIndirectBuffer
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
void setMQD(SDMAQueueDesc *mqd)
void setPageBaseHi(uint32_t data)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_RB_BASE_HI
#define SDMA_SUBOP_COPY_LINEAR_PHY
void setGfxBaseLo(uint32_t data)
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Bitfield< 24, 21 > opcode
void setGfxDoorbellHi(uint32_t data)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
void setGfxRptrLo(uint32_t data)
#define SDMA_SUBOP_COPY_LINEAR
void processPage(Addr wptrOffset)
virtual std::string name() const
void setMQDAddr(Addr mqdAddr)
#define SDMA_OP_CONST_FILL
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void setGfxDoorbellOffsetHi(uint32_t data)
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
@ SOC15_IH_CLIENTID_SDMA4
struct gem5::GEM5_PACKED sdmaPtePde
uint64_t gfxDoorbellOffset
struct gem5::GEM5_PACKED sdmaAtomic
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
void serialize(CheckpointOut &cp) const override
Serialize an object.
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_SET
void copy(SDMAQueue *q, sdmaCopy *pkt)
#define SDMA_SUBOP_TIMESTAMP_GET
void setPageRptrHi(uint32_t data)
Device model for an AMD GPU.
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
void serialize(CheckpointOut &cp) const override
Serialize an object.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
#define SERIALIZE_ARRAY(member, size)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
System DMA Engine class for AMD dGPU.
#define mmSDMA_GFX_DOORBELL
#define SDMA_OP_SRBM_WRITE
@ SOC15_IH_CLIENTID_SDMA2
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t pageDoorbellOffset
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaFence
#define SERIALIZE_SCALAR(scalar)
Translation range generators.
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
void processRLC1(Addr wptrOffset)
@ SOC15_IH_CLIENTID_SDMA3
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
uint64_t getPageDoorbell()
AMDGPUMemoryManager * getMemMgr()
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_COPY
#define UNSERIALIZE_ARRAY(member, size)
#define mmSDMA_PAGE_DOORBELL_OFFSET
void setPageRptrLo(uint32_t data)
void deallocateRLCQueues()
#define mmSDMA_PAGE_RB_BASE
void setGfxWptrLo(uint32_t data)
uint32_t sdmax_rlcx_rb_rptr_addr_hi
@ SOC15_IH_CLIENTID_SDMA5
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
constexpr unsigned int SDMA_ATOMIC_ADD64
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setGPUDevice(AMDGPUDevice *gpu_device)
struct gem5::GEM5_PACKED sdmaPollRegMem
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
std::ostream CheckpointOut
void fence(SDMAQueue *q, sdmaFence *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_DUMMY_TRAP
void setPageDoorbellOffsetLo(uint32_t data)
void setDevRequestor(RequestorID mid)
#define SDMA_OP_POLL_REGMEM
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
void setGfxWptrHi(uint32_t data)
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
#define SDMA_SUBOP_COPY_DIRTY_PAGE
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void setGfxSize(uint32_t data)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void setPageBaseLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
@ SOC15_IH_CLIENTID_SDMA7
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
std::unique_ptr< TranslationGen > TranslationGenPtr
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
#define panic(...)
This implements a cprintf based panic() function.
std::array< Addr, 2 > rlcInfo
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void setPageWptrHi(uint32_t data)
Generated on Sun Jul 30 2023 01:56:54 for gem5 by doxygen 1.8.17