35#include "debug/PM4PacketProcessor.hh"
42#include "enums/GfxVersion.hh"
96 addr = (((
addr >> 12) << 3) << 12) | low_bits;
112 return result->second;
150 "%d, pipe %d queue: %d size: %d\n",
id,
q->base(),
q->offset(),
151 q->me(),
q->pipe(),
q->queue(),
q->size());
157 q->wptr(wptrOffset *
sizeof(uint32_t));
159 if (!
q->processing()) {
169 q->id(),
q->rptr(),
q->wptr());
171 if (
q->rptr() <
q->wptr()) {
184 q->processing(
false);
200 void *dmaBuffer =
nullptr;
206 if (
header.count != 0x3fff) {
207 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
214 [ = ] (
const uint64_t &)
223 [ = ] (
const uint64_t &)
232 [ = ] (
const uint64_t &)
241 [ = ] (
const uint64_t &)
250 [ = ] (
const uint64_t &)
259 [ = ] (
const uint64_t &)
268 [ = ] (
const uint64_t &)
277 [ = ] (
const uint64_t &)
284 [ = ] (
const uint64_t &)
294 [ = ] (
const uint64_t &)
303 [ = ] (
const uint64_t &)
312 [ = ] (
const uint64_t &)
321 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
326 warn(
"PM4 packet opcode 0x%x not supported.\n",
header.opcode);
329 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
371 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
372 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
373 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->
queueSel, pkt->
vmid,
384 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
393 [ = ] (
const uint32_t &) {
404 [ = ] (
const uint32_t &) {
441 mqd_size, 8, GfxVersion::gfx900,
offset,
459 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
484 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
489 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
497 panic(
"Unimplemented PM4ReleaseMem.dataSelect");
508 "pipe: %d, queueSlot:%d\n",
q->id(), pkt->
intCtxId,
q->me(),
509 q->pipe(),
q->queue());
513 ringId = (
q->queue() << 4) | (
q->me() << 2) |
q->pipe();
537 "pasid: %p doorbellOffset0 %p \n",
583 for (
auto id : iter.second) {
587 if (
queues[
id]->privileged()) {
595 96 *
sizeof(uint32_t));
598 [ = ] (
const uint32_t &) {
604 hsa_pp.unsetDeviceQueueDesc(
id, 8);
610 panic(
"Unrecognized options\n");
635 Addr scratch_base = (
Addr)
bits(shMemBases, 15, 0) << 48;
641 scratch_base + 0xFFFFFFFF);
685 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
701 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
753 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
767 panic(
"query_status with interruptSel %d command %d not supported",
784 switch (mmio_offset) {
1044 int num_queues =
queues.size();
1045 Addr id[num_queues];
1046 Addr mqd_base[num_queues];
1048 Addr rptr[num_queues];
1049 Addr wptr[num_queues];
1050 Addr ib_base[num_queues];
1051 Addr ib_rptr[num_queues];
1052 Addr ib_wptr[num_queues];
1054 bool processing[num_queues];
1055 bool ib[num_queues];
1056 uint32_t
me[num_queues];
1057 uint32_t pipe[num_queues];
1058 uint32_t queue[num_queues];
1059 bool privileged[num_queues];
1060 uint32_t hqd_active[num_queues];
1061 uint32_t hqd_vmid[num_queues];
1062 Addr aql_rptr[num_queues];
1063 uint32_t doorbell[num_queues];
1064 uint32_t hqd_pq_control[num_queues];
1067 for (
auto iter :
queues) {
1070 mqd_base[
i] =
q->mqdBase();
1071 bool cur_state =
q->ib();
1073 base[
i] =
q->base() >> 8;
1074 rptr[
i] =
q->getRptr();
1075 wptr[
i] =
q->getWptr();
1077 ib_base[
i] =
q->ibBase();
1078 ib_rptr[
i] =
q->getRptr();
1079 ib_wptr[
i] =
q->getWptr();
1082 processing[
i] =
q->processing();
1085 pipe[
i] =
q->pipe();
1086 queue[
i] =
q->queue();
1087 privileged[
i] =
q->privileged();
1088 hqd_active[
i] =
q->getMQD()->hqd_active;
1089 hqd_vmid[
i] =
q->getMQD()->hqd_vmid;
1090 aql_rptr[
i] =
q->getMQD()->aqlRptr;
1091 doorbell[
i] =
q->getMQD()->doorbell;
1092 hqd_pq_control[
i] =
q->getMQD()->hqd_pq_control;
1128 Addr id[num_queues];
1129 Addr mqd_base[num_queues];
1131 Addr rptr[num_queues];
1132 Addr wptr[num_queues];
1133 Addr ib_base[num_queues];
1134 Addr ib_rptr[num_queues];
1135 Addr ib_wptr[num_queues];
1137 bool processing[num_queues];
1138 bool ib[num_queues];
1139 uint32_t
me[num_queues];
1140 uint32_t pipe[num_queues];
1141 uint32_t queue[num_queues];
1142 bool privileged[num_queues];
1143 uint32_t hqd_active[num_queues];
1144 uint32_t hqd_vmid[num_queues];
1145 Addr aql_rptr[num_queues];
1146 uint32_t doorbell[num_queues];
1147 uint32_t hqd_pq_control[num_queues];
1170 for (
int i = 0;
i < num_queues;
i++) {
1176 mqd->
rptr = rptr[
i];
1189 queues[
id[
i]]->processing(processing[
i]);
1191 queues[
id[
i]]->setPkt(
me[
i], pipe[
i], queue[
i], privileged[
i]);
1192 queues[
id[
i]]->getMQD()->hqd_active = hqd_active[
i];
1193 queues[
id[
i]]->getMQD()->hqd_vmid = hqd_vmid[
i];
1194 queues[
id[
i]]->getMQD()->aqlRptr = aql_rptr[
i];
1195 queues[
id[
i]]->getMQD()->doorbell = doorbell[
i];
1196 queues[
id[
i]]->getMQD()->hqd_pq_control = hqd_pq_control[
i];
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
GfxVersion getGfxVersion() const
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
void deallocateAllQueues()
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
uint16_t getVMID(Addr doorbell)
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
GPUCommandProcessor * CP()
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void submitInterruptCookie()
Translation range generators.
bool inAGP(Addr vaddr)
Methods for resolving apertures.
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
HSAPacketProcessor & hsaPacketProc()
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setRbWptrPollAddrLo(uint32_t data)
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setRbWptrHi(uint32_t data)
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Addr getGARTAddr(Addr addr) const
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
void setRbWptrLo(uint32_t data)
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void setGPUDevice(AMDGPUDevice *gpu_device)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setRbCntl(uint32_t data)
uint32_t getKiqDoorbellOffset()
void setHqdPqWptrLo(uint32_t data)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setHqdPqRptrReportAddr(uint32_t data)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void writeData(PM4Queue *q, PM4WriteData *pkt)
void setRbBaseHi(uint32_t data)
void setRbVmid(uint32_t data)
void setHqdActive(uint32_t data)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
void setHqdPqControl(uint32_t data)
void setRbBaseLo(uint32_t data)
void setHqdIbCtrl(uint32_t data)
void setRbRptrAddrHi(uint32_t data)
void setHqdPqWptrPollAddr(uint32_t data)
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
void setRbDoorbellRangeLo(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
void setHqdPqBaseHi(uint32_t data)
void runList(PM4Queue *q, PM4RunList *pkt)
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setHqdVmid(uint32_t data)
void setHqdPqDoorbellCtrl(uint32_t data)
void setHqdPqBase(uint32_t data)
void setRbDoorbellRangeHi(uint32_t data)
uint32_t getPqDoorbellOffset()
void doneMQDWrite(Addr mqdAddr, Addr addr)
std::unordered_map< uint16_t, PM4Queue * > queues
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setHqdPqPtr(uint32_t data)
void setHqdPqRptrReportAddrHi(uint32_t data)
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setRbRptrAddrLo(uint32_t data)
void setRbDoorbellCntrl(uint32_t data)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHqdPqWptrPollAddrHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
void setRbWptrPollAddrHi(uint32_t data)
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
Class defining a PM4 queue.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
void setLdsApe(Addr base, Addr limit)
void setScratchApe(Addr base, Addr limit)
void setHwReg(int regIdx, uint32_t val)
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED PM4MapProcessMI200
struct gem5::GEM5_PACKED PM4WriteData
struct gem5::GEM5_PACKED PM4WaitRegMem
std::ostream CheckpointOut
struct gem5::GEM5_PACKED PM4RunList
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_GRBM_CP
struct gem5::GEM5_PACKED PM4ReleaseMem
struct gem5::GEM5_PACKED PM4SwitchBuf
struct gem5::GEM5_PACKED PM4MapQueues
struct gem5::GEM5_PACKED PM4MapProcess
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED PM4UnmapQueues
struct gem5::GEM5_PACKED PM4SetUconfigReg
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED PM4IndirectBuf
Declaration of the Packet class.
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
#define mmCP_RB_DOORBELL_CONTROL
#define mmCP_RB0_RPTR_ADDR_HI
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
#define mmCP_HQD_PQ_DOORBELL_CONTROL
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
#define mmCP_RB_DOORBELL_RANGE_UPPER
#define mmCP_HQD_IB_CONTROL
#define mmCP_RB_WPTR_POLL_ADDR_LO
#define mmCP_HQD_PQ_BASE_HI
#define mmCP_HQD_PQ_WPTR_HI
#define mmCP_HQD_PQ_CONTROL
#define mmCP_RB_DOORBELL_RANGE_LOWER
#define mmCP_RB_WPTR_POLL_ADDR_HI
#define mmCP_RB0_RPTR_ADDR
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
#define mmCP_HQD_PQ_WPTR_LO
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_rb_rptr
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t hqd_pq_wptr_poll_addr_hi
uint32_t sdmax_rlcx_rb_wptr_hi
uint32_t sdmax_rlcx_ib_base_hi
uint32_t hqd_pq_wptr_poll_addr_lo
uint32_t hqd_pq_rptr_report_addr_hi
uint32_t hqd_pq_rptr_report_addr_lo
uint32_t hqd_pq_doorbell_control
uint32_t sdmax_rlcx_rb_rptr_addr_lo
uint32_t sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_rptr_hi
uint64_t completionSignal