Go to the documentation of this file.
35 #include "debug/PM4PacketProcessor.hh"
42 #include "enums/GfxVersion.hh"
96 addr = (((
addr >> 12) << 3) << 12) | low_bits;
112 return result->second;
150 "%d, pipe %d queue: %d size: %d\n",
id,
q->base(),
q->offset(),
151 q->me(),
q->pipe(),
q->queue(),
q->size());
157 q->wptr(wptrOffset *
sizeof(uint32_t));
159 if (!
q->processing()) {
169 q->id(),
q->rptr(),
q->wptr());
171 if (
q->rptr() <
q->wptr()) {
184 q->processing(
false);
200 void *dmaBuffer =
nullptr;
206 if (
header.count != 0x3fff) {
207 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
214 [ = ] (
const uint64_t &)
223 [ = ] (
const uint64_t &)
232 [ = ] (
const uint64_t &)
241 [ = ] (
const uint64_t &)
250 [ = ] (
const uint64_t &)
259 [ = ] (
const uint64_t &)
268 [ = ] (
const uint64_t &)
277 [ = ] (
const uint64_t &)
284 [ = ] (
const uint64_t &)
294 [ = ] (
const uint64_t &)
303 [ = ] (
const uint64_t &)
312 [ = ] (
const uint64_t &)
321 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
326 warn(
"PM4 packet opcode 0x%x not supported.\n",
header.opcode);
329 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
371 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
372 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
373 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->
queueSel, pkt->
vmid,
384 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
393 [ = ] (
const uint32_t &) {
404 [ = ] (
const uint32_t &) {
441 mqd_size, 8, GfxVersion::gfx900,
offset,
459 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
484 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
489 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
497 panic(
"Unimplemented PM4ReleaseMem.dataSelect");
508 "pipe: %d, queueSlot:%d\n",
q->id(), pkt->
intCtxId,
q->me(),
509 q->pipe(),
q->queue());
513 ringId = (
q->queue() << 4) | (
q->me() << 2) |
q->pipe();
537 "pasid: %p doorbellOffset0 %p \n",
583 for (
auto id : iter.second) {
587 if (
queues[
id]->privileged()) {
595 96 *
sizeof(uint32_t));
598 [ = ] (
const uint32_t &) {
604 hsa_pp.unsetDeviceQueueDesc(
id, 8);
610 panic(
"Unrecognized options\n");
635 Addr scratch_base = (
Addr)
bits(shMemBases, 15, 0) << 48;
641 scratch_base + 0xFFFFFFFF);
685 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
701 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
753 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
767 panic(
"query_status with interruptSel %d command %d not supported",
784 switch (mmio_offset) {
1044 int num_queues =
queues.size();
1045 Addr id[num_queues];
1046 Addr mqd_base[num_queues];
1048 Addr rptr[num_queues];
1049 Addr wptr[num_queues];
1050 Addr ib_base[num_queues];
1051 Addr ib_rptr[num_queues];
1052 Addr ib_wptr[num_queues];
1054 bool processing[num_queues];
1055 bool ib[num_queues];
1056 uint32_t
me[num_queues];
1057 uint32_t pipe[num_queues];
1058 uint32_t queue[num_queues];
1059 bool privileged[num_queues];
1060 uint32_t hqd_active[num_queues];
1061 uint32_t hqd_vmid[num_queues];
1062 Addr aql_rptr[num_queues];
1063 uint32_t doorbell[num_queues];
1064 uint32_t hqd_pq_control[num_queues];
1067 for (
auto iter :
queues) {
1070 mqd_base[
i] =
q->mqdBase();
1071 bool cur_state =
q->ib();
1073 base[
i] =
q->base() >> 8;
1074 rptr[
i] =
q->getRptr();
1075 wptr[
i] =
q->getWptr();
1077 ib_base[
i] =
q->ibBase();
1078 ib_rptr[
i] =
q->getRptr();
1079 ib_wptr[
i] =
q->getWptr();
1082 processing[
i] =
q->processing();
1085 pipe[
i] =
q->pipe();
1086 queue[
i] =
q->queue();
1087 privileged[
i] =
q->privileged();
1088 hqd_active[
i] =
q->getMQD()->hqd_active;
1089 hqd_vmid[
i] =
q->getMQD()->hqd_vmid;
1090 aql_rptr[
i] =
q->getMQD()->aqlRptr;
1091 doorbell[
i] =
q->getMQD()->doorbell;
1092 hqd_pq_control[
i] =
q->getMQD()->hqd_pq_control;
1128 Addr id[num_queues];
1129 Addr mqd_base[num_queues];
1131 Addr rptr[num_queues];
1132 Addr wptr[num_queues];
1133 Addr ib_base[num_queues];
1134 Addr ib_rptr[num_queues];
1135 Addr ib_wptr[num_queues];
1137 bool processing[num_queues];
1138 bool ib[num_queues];
1139 uint32_t
me[num_queues];
1140 uint32_t pipe[num_queues];
1141 uint32_t queue[num_queues];
1142 bool privileged[num_queues];
1143 uint32_t hqd_active[num_queues];
1144 uint32_t hqd_vmid[num_queues];
1145 Addr aql_rptr[num_queues];
1146 uint32_t doorbell[num_queues];
1147 uint32_t hqd_pq_control[num_queues];
1170 for (
int i = 0;
i < num_queues;
i++) {
1176 mqd->
rptr = rptr[
i];
1189 queues[
id[
i]]->processing(processing[
i]);
1191 queues[
id[
i]]->setPkt(
me[
i], pipe[
i], queue[
i], privileged[
i]);
1192 queues[
id[
i]]->getMQD()->hqd_active = hqd_active[
i];
1193 queues[
id[
i]]->getMQD()->hqd_vmid = hqd_vmid[
i];
1194 queues[
id[
i]]->getMQD()->aqlRptr = aql_rptr[
i];
1195 queues[
id[
i]]->getMQD()->doorbell = doorbell[
i];
1196 queues[
id[
i]]->getMQD()->hqd_pq_control = hqd_pq_control[
i];
struct gem5::GEM5_PACKED PM4RunList
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setRbRptrAddrLo(uint32_t data)
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED PM4WriteData
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
void setLdsApe(Addr base, Addr limit)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
#define UNSERIALIZE_SCALAR(scalar)
void setRbWptrLo(uint32_t data)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
Wraps a std::function object in a DmaCallback.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
uint32_t hqd_pq_doorbell_control
struct gem5::GEM5_PACKED PM4UnmapQueues
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
SDMAEngine * getSDMAById(int id)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void deallocateAllQueues()
uint32_t sdmax_rlcx_rb_cntl
void setRbCntl(uint32_t data)
Class defining a PM4 queue.
struct gem5::GEM5_PACKED PM4SetUconfigReg
void setRbDoorbellCntrl(uint32_t data)
#define mmCP_HQD_IB_CONTROL
void submitInterruptCookie()
#define mmCP_HQD_PQ_WPTR_LO
HSAPacketProcessor & hsaPacketProc()
uint32_t hqd_pq_wptr_poll_addr_hi
void setRbDoorbellRangeHi(uint32_t data)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setHqdPqRptrReportAddr(uint32_t data)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
bool inAGP(Addr vaddr)
Methods for resolving apertures.
uint32_t sdmax_rlcx_rb_rptr_addr_lo
void setHqdPqControl(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
uint32_t getKiqDoorbellOffset()
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setRbBaseLo(uint32_t data)
struct gem5::GEM5_PACKED PM4MapQueues
#define mmCP_HQD_PQ_DOORBELL_CONTROL
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
uint32_t hqd_pq_rptr_report_addr_lo
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
void mapProcessGfx90a(PM4Queue *q, PM4MapProcessMI200 *pkt)
#define mmCP_RB_WPTR_POLL_ADDR_HI
uint32_t hqd_pq_rptr_report_addr_hi
#define mmCP_HQD_PQ_CONTROL
struct gem5::GEM5_PACKED PM4MapProcess
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
struct gem5::GEM5_PACKED PM4SwitchBuf
void setHqdVmid(uint32_t data)
#define mmCP_RB0_RPTR_ADDR
void setHqdPqRptrReportAddrHi(uint32_t data)
uint32_t sdmax_rlcx_rb_rptr_hi
void setRbWptrPollAddrHi(uint32_t data)
void setGPUDevice(AMDGPUDevice *gpu_device)
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
#define mmCP_HQD_PQ_WPTR_HI
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
#define mmCP_RB_DOORBELL_CONTROL
void setHqdPqPtr(uint32_t data)
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void setHqdPqWptrPollAddrHi(uint32_t data)
void doneMQDWrite(Addr mqdAddr, Addr addr)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
Device model for an AMD GPU.
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd)
Methods for RLC queues.
void serialize(CheckpointOut &cp) const override
Serialize an object.
#define mmCP_HQD_PQ_BASE_HI
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
uint32_t sdmax_rlcx_ib_base_hi
#define SERIALIZE_ARRAY(member, size)
void setHqdActive(uint32_t data)
void setRbWptrHi(uint32_t data)
#define mmCP_RB0_RPTR_ADDR_HI
#define mmCP_RB_DOORBELL_RANGE_LOWER
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
System DMA Engine class for AMD dGPU.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
void setScratchApe(Addr base, Addr limit)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
struct gem5::GEM5_PACKED PM4ReleaseMem
void runList(PM4Queue *q, PM4RunList *pkt)
#define SERIALIZE_SCALAR(scalar)
void setHqdPqWptrLo(uint32_t data)
uint32_t sdmax_rlcx_rb_rptr
uint16_t getVMID(Addr doorbell)
void setHqdPqBase(uint32_t data)
Translation range generators.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
struct gem5::GEM5_PACKED PM4IndirectBuf
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setHqdPqDoorbellCtrl(uint32_t data)
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setRbWptrPollAddrLo(uint32_t data)
void setHqdPqBaseHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
#define UNSERIALIZE_ARRAY(member, size)
struct gem5::GEM5_PACKED PM4MapProcessMI200
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
std::unordered_map< uint16_t, PM4Queue * > queues
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
uint32_t sdmax_rlcx_rb_rptr_addr_hi
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHwReg(int regIdx, uint32_t val)
uint32_t hqd_pq_wptr_poll_addr_lo
void mapProcessGfx9(PM4Queue *q, PM4MapProcess *pkt)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void insertQId(uint16_t vmid, int id)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
uint32_t sdmax_rlcx_rb_wptr
void deallocateVmid(uint16_t vmid)
#define mmCP_RB_DOORBELL_RANGE_UPPER
void deallocatePasid(uint16_t pasid)
std::ostream CheckpointOut
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
void writeData(PM4Queue *q, PM4WriteData *pkt)
GPUCommandProcessor * CP()
uint32_t sdmax_rlcx_rb_wptr_hi
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
void setRbVmid(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
uint32_t getPqDoorbellOffset()
uint32_t sdmax_rlcx_ib_base_lo
void setRbRptrAddrHi(uint32_t data)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void setHqdPqWptrPollAddr(uint32_t data)
GfxVersion getGfxVersion() const
Addr getGARTAddr(Addr addr) const
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
uint64_t completionSignal
struct gem5::GEM5_PACKED PM4WaitRegMem
void setRbBaseHi(uint32_t data)
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
std::unique_ptr< TranslationGen > TranslationGenPtr
@ SOC15_IH_CLIENTID_GRBM_CP
#define mmCP_RB_WPTR_POLL_ADDR_LO
void setRbDoorbellRangeLo(uint32_t data)
#define panic(...)
This implements a cprintf based panic() function.
void setHqdIbCtrl(uint32_t data)
Generated on Sun Jul 30 2023 01:56:54 for gem5 by doxygen 1.8.17