36 #include "debug/SDMAData.hh"
37 #include "debug/SDMAEngine.hh"
43 #include "params/SDMAEngine.hh"
50 gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51 pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52 pageWptr(0), gpuDevice(nullptr), walker(
p.walker)
87 panic(
"Unknown SDMA id");
96 addr = (((
addr >> 12) << 3) << 12) | low_bits;
109 auto addr_range = *(tgen->begin());
110 Addr tmp_addr = addr_range.paddr;
121 Addr device_addr = 0;
189 panic(
"No free RLCs. Check they are properly unmapped.");
200 }
else if (
rlcInfo[1] == doorbell) {
204 panic(
"Cannot unregister: no RLC queue at %#lx\n", doorbell);
242 if (
rlcInfo[0] == doorbellOffset) {
244 }
else if (
rlcInfo[1] == doorbellOffset) {
247 panic(
"Cannot process: no RLC queue at %#lx\n", doorbellOffset);
285 if (
q->rptr() !=
q->wptr()) {
291 [ = ] (
const uint32_t &
header)
293 dmaReadVirt(
q->rptr(),
sizeof(uint32_t), cb, &cb->dmaBuffer);
300 q->globalRptr(),
q->rptrWbAddr());
301 if (
q->rptrWbAddr()) {
303 [ = ](
const uint64_t &) { },
q->globalRptr());
306 q->processing(
false);
324 void *dmaBuffer =
nullptr;
330 uint32_t NOP_count = (
header >> 16) & 0x3FFF;
332 if (NOP_count > 0)
q->incRptr(NOP_count * 4);
337 switch (sub_opcode) {
341 [ = ] (
const uint64_t &)
346 panic(
"SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
349 panic(
"SDMA_SUBOP_COPY_TILED not implemented");
352 panic(
"SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
355 panic(
"SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
358 panic(
"SDMA_SUBOP_COPY_SOA not implemented");
361 panic(
"SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
364 panic(
"SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
367 panic(
"SDMA unknown copy sub-opcode.");
373 switch (sub_opcode) {
377 [ = ] (
const uint64_t &)
382 panic(
"SDMA_SUBOP_WRITE_TILED not implemented.\n");
392 [ = ] (
const uint64_t &)
400 [ = ] (
const uint64_t &)
408 [ = ] (
const uint64_t &)
414 warn(
"SDMA_OP_SEM not implemented");
423 [ = ] (
const uint64_t &)
426 switch (sub_opcode) {
428 panic(
"SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
431 panic(
"SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
434 panic(
"SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
442 warn(
"SDMA_OP_SEM not implemented");
451 [ = ] (
const uint64_t &)
457 warn(
"SDMA_OP_CONST_FILL not implemented");
462 switch (sub_opcode) {
467 [ = ] (
const uint64_t &)
472 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
475 panic(
"SDMA_SUBOP_PTEPDE_COPY not implemented");
478 panic(
"SDMA_SUBOP_PTEPDE_RMW not implemented");
489 switch (sub_opcode) {
499 warn(
"SDMA_OP_TIMESTAMP not implemented");
508 [ = ] (
const uint64_t &)
514 warn(
"SDMA_OP_PRE_EXE not implemented");
519 warn(
"SDMA_OP_DUMMY_TRAP not implemented");
523 panic(
"Invalid SDMA packet.\n");
538 uint32_t *dmaBuffer =
new uint32_t[pkt->
count];
549 int bufferSize =
sizeof(uint32_t) * pkt->
count;
550 q->incRptr(bufferSize);
553 for (
int i = 0;
i < pkt->
count; ++
i) {
568 [ = ] (
const uint64_t &) {
writeDone(
q, pkt, dmaBuffer); });
598 uint8_t *dmaBuffer =
new uint8_t[pkt->
count];
608 for (; !gen.done(); gen.next()) {
613 gen.size(), gen.addr(), chunk_addr);
617 gen.last() ? cb :
nullptr);
618 dmaBuffer += gen.size();
622 [ = ] (
const uint64_t &) {
copyReadData(
q, pkt, dmaBuffer); });
632 uint64_t *dmaBuffer64 =
reinterpret_cast<uint64_t *
>(dmaBuffer);
638 DPRINTF(SDMAData,
"Copy packet data:\n");
639 for (
int i = 0;
i < pkt->
count/8; ++
i) {
640 DPRINTF(SDMAData,
"%016lx\n", dmaBuffer64[
i]);
653 for (; !gen.done(); gen.next()) {
658 gen.size(), gen.addr(), chunk_addr);
662 gen.last() ? cb :
nullptr);
664 dmaBuffer += gen.size();
668 [ = ] (
const uint64_t &) {
copyDone(
q, pkt, dmaBuffer); });
690 q->ib()->size(pkt->
size *
sizeof(uint32_t) + 1);
691 q->ib()->setWptr(pkt->
size *
sizeof(uint32_t));
730 uint32_t ring_id = 0;
751 [[maybe_unused]] uint32_t reg_addr = pkt->
regAddr << 2;
752 uint32_t reg_mask = 0x00000000;
754 if (
header->byteEnable & 0x8) reg_mask |= 0xFF000000;
755 if (
header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
756 if (
header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
757 if (
header->byteEnable & 0x1) reg_mask |= 0x000000FF;
758 pkt->
data &= reg_mask;
761 reg_addr, pkt->
data);
763 warn_once(
"SRBM write not performed, no SRBM model. This needs to be fixed"
764 " if correct system simulation is relying on SRBM registers.");
783 "mask=%p, retry=%d, pinterval=%d\n",
header->mode,
header->func,
793 [ = ] (
const uint32_t &dma_buffer) {
796 (
void *)&cb->dmaBuffer);
798 panic(
"SDMA poll mem operation not implemented.");
802 warn_once(
"SDMA poll reg is not implemented. If this is required for "
803 "correctness, an SRBM model needs to be implemented.");
830 [ = ] (
const uint32_t &dma_buffer) {
833 (
void *)&cb->dmaBuffer);
852 return value < reference;
855 return value <= reference;
858 return value == reference;
861 return value != reference;
864 return value >= reference;
867 return value > reference;
870 panic(
"SDMA POLL_REGMEM unknown comparison function.");
887 uint64_t *dmaBuffer =
new uint64_t[pkt->
count];
888 for (
int i = 0;
i < pkt->
count;
i++) {
898 sizeof(uint64_t) * pkt->
count, 0,
902 [ = ] (
const uint64_t &) {
ptePdeDone(
q, pkt, dmaBuffer); });
929 uint64_t *dmaBuffer =
new uint64_t;
931 [ = ] (
const uint64_t &)
945 int64_t dst_data = *dmaBuffer;
946 int64_t src_data = pkt->
srcData;
949 src_data, dst_data + src_data);
952 *dmaBuffer = dst_data + src_data;
955 [ = ] (
const uint64_t &)
959 panic(
"Unsupported SDMA atomic opcode: %d\n",
header->opcode);
1009 Addr rptr[num_queues];
1010 Addr wptr[num_queues];
1011 Addr size[num_queues];
1012 bool processing[num_queues];
1014 for (
int i = 0;
i < num_queues;
i++) {
1015 base[
i] = queues[
i]->base();
1016 rptr[
i] = queues[
i]->getRptr();
1017 wptr[
i] = queues[
i]->getWptr();
1018 size[
i] = queues[
i]->size();
1019 processing[
i] = queues[
i]->processing();
1048 Addr rptr[num_queues];
1049 Addr wptr[num_queues];
1050 Addr size[num_queues];
1051 bool processing[num_queues];
1065 for (
int i = 0;
i < num_queues;
i++) {
1066 queues[
i]->base(
base[
i]);
1067 queues[
i]->rptr(rptr[
i]);
1068 queues[
i]->wptr(wptr[
i]);
1069 queues[
i]->size(size[
i]);
1070 queues[
i]->processing(processing[
i]);
1078 pkt->
getLE<uint32_t>());
1081 switch (mmio_offset) {
1107 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1108 assert(rb_size >= 6 && rb_size <= 62);
1139 uint32_t rb_size =
bits(pkt->
getLE<uint32_t>(), 6, 1);
1140 assert(rb_size >= 6 && rb_size <= 62);
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void submitInterruptCookie()
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
bool inAGP(Addr vaddr)
Methods for resolving apertures.
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
void setPageDoorbellHi(uint32_t data)
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void setGfxSize(uint64_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, Addr rptr_wb_addr)
Methods for RLC queues.
void copy(SDMAQueue *q, sdmaCopy *pkt)
Tick write(PacketPtr pkt) override
Inherited methods.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
void unregisterRLCQueue(Addr doorbell)
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxDoorbellOffsetHi(uint32_t data)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
void deallocateRLCQueues()
Addr getGARTAddr(Addr addr) const
Methods for translation.
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
uint64_t getGfxDoorbell()
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
void setPageDoorbellOffsetLo(uint32_t data)
uint64_t getPageDoorbell()
SDMAEngine(const SDMAEngineParams &p)
void setGPUDevice(AMDGPUDevice *gpu_device)
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
uint64_t pageDoorbellOffset
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void trap(SDMAQueue *q, sdmaTrap *pkt)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
void setGfxBaseHi(uint32_t data)
void setPageSize(uint64_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
void fence(SDMAQueue *q, sdmaFence *pkt)
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
void serialize(CheckpointOut &cp) const override
Serialize an object.
int getIHClientId()
Returns the client id for the Interrupt Handler.
std::array< Addr, 2 > rlcInfo
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
void setDevRequestor(RequestorID mid)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
#define panic(...)
This implements a cprintf based panic() function.
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
Bitfield< 24, 21 > opcode
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_DOORBELL
#define mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_BASE
#define mmSDMA_PAGE_RB_CNTL
#define mmSDMA_GFX_RB_BASE_HI
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)