Go to the documentation of this file.
35 #include "debug/PM4PacketProcessor.hh"
41 #include "enums/GfxVersion.hh"
94 addr = (((
addr >> 12) << 3) << 12) | low_bits;
110 return result->second;
148 id,
q->base(),
q->offset());
154 q->wptr(wptrOffset *
sizeof(uint32_t));
156 if (!
q->processing()) {
166 q->id(),
q->rptr(),
q->wptr());
168 if (
q->rptr() <
q->wptr()) {
181 q->processing(
false);
197 void *dmaBuffer =
nullptr;
203 if (
header.count == 0x3fff) {
204 q->fastforwardRptr();
206 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
213 [ = ] (
const uint64_t &)
222 [ = ] (
const uint64_t &)
231 [ = ] (
const uint64_t &)
240 [ = ] (
const uint64_t &)
249 [ = ] (
const uint64_t &)
258 [ = ] (
const uint64_t &)
267 [ = ] (
const uint64_t &)
275 [ = ] (
const uint64_t &)
284 [ = ] (
const uint64_t &)
293 [ = ] (
const uint64_t &)
302 [ = ] (
const uint64_t &)
311 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
316 warn(
"PM4 packet opcode 0x%x not supported.\n",
header.opcode);
319 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
361 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
362 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
363 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->
queueSel, pkt->
vmid,
374 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
383 [ = ] (
const uint32_t &) {
393 [ = ] (
const uint32_t &) {
430 mqd_size, 8, GfxVersion::gfx900,
offset,
443 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x\n", mqd->
rb_base,
469 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
474 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
482 panic(
"Unimplemented PM4ReleaseMem.dataSelect");
493 "%d, queueSlot:%d\n", pkt->
intCtxId,
q->me(),
q->pipe(),
497 uint32_t ringId = (
q->me() << 6) | (
q->pipe() << 4) |
q->queue();
520 "pasid: %p doorbellOffset0 %p \n",
566 for (
auto id : iter.second) {
570 if (
queues[
id]->privileged()) {
578 96 *
sizeof(uint32_t));
581 [ = ] (
const uint32_t &) {
587 hsa_pp.unsetDeviceQueueDesc(
id, 8);
593 panic(
"Unrecognized options\n");
634 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
650 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
702 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
716 panic(
"query_status with interruptSel %d command %d not supported",
733 switch (mmio_offset) {
984 int num_queues =
queues.size();
986 Addr mqd_base[num_queues];
988 Addr rptr[num_queues];
989 Addr wptr[num_queues];
990 Addr ib_base[num_queues];
991 Addr ib_rptr[num_queues];
992 Addr ib_wptr[num_queues];
994 bool processing[num_queues];
998 for (
auto iter :
queues) {
1001 mqd_base[
i] =
q->mqdBase();
1002 bool cur_state =
q->ib();
1004 base[
i] =
q->base() >> 8;
1005 rptr[
i] =
q->getRptr();
1006 wptr[
i] =
q->getWptr();
1008 ib_base[
i] =
q->ibBase();
1009 ib_rptr[
i] =
q->getRptr();
1010 ib_wptr[
i] =
q->getWptr();
1013 processing[
i] =
q->processing();
1041 Addr id[num_queues];
1042 Addr mqd_base[num_queues];
1044 Addr rptr[num_queues];
1045 Addr wptr[num_queues];
1046 Addr ib_base[num_queues];
1047 Addr ib_rptr[num_queues];
1048 Addr ib_wptr[num_queues];
1050 bool processing[num_queues];
1051 bool ib[num_queues];
1065 for (
int i = 0;
i < num_queues;
i++) {
1071 mqd->
rptr = rptr[
i];
1082 queues[
id[
i]]->processing(processing[
i]);
struct gem5::GEM5_PACKED PM4RunList
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setRbRptrAddrLo(uint32_t data)
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED PM4WriteData
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void mapProcess(PM4Queue *q, PM4MapProcess *pkt)
#define UNSERIALIZE_SCALAR(scalar)
void setRbWptrLo(uint32_t data)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
Wraps a std::function object in a DmaCallback.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
uint32_t hqd_pq_doorbell_control
struct gem5::GEM5_PACKED PM4UnmapQueues
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
SDMAEngine * getSDMAById(int id)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void deallocateAllQueues()
void registerRLCQueue(Addr doorbell, Addr rb_base)
Methods for RLC queues.
uint32_t sdmax_rlcx_rb_cntl
void setRbCntl(uint32_t data)
Class defining a PM4 queue.
struct gem5::GEM5_PACKED PM4SetUconfigReg
void setRbDoorbellCntrl(uint32_t data)
#define mmCP_HQD_IB_CONTROL
void submitInterruptCookie()
#define mmCP_HQD_PQ_WPTR_LO
HSAPacketProcessor & hsaPacketProc()
uint32_t hqd_pq_wptr_poll_addr_hi
void setRbDoorbellRangeHi(uint32_t data)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setHqdPqRptrReportAddr(uint32_t data)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
bool inAGP(Addr vaddr)
Methods for resolving apertures.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setPageTableBase(uint16_t vmid, Addr ptBase)
Page table base/start accessors for user VMIDs.
uint32_t getKiqDoorbellOffset()
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
uint32_t sdmax_rlcx_ib_size
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setRbBaseLo(uint32_t data)
struct gem5::GEM5_PACKED PM4MapQueues
#define mmCP_HQD_PQ_DOORBELL_CONTROL
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
uint32_t hqd_pq_rptr_report_addr_lo
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
#define mmCP_RB_WPTR_POLL_ADDR_HI
uint32_t hqd_pq_rptr_report_addr_hi
struct gem5::GEM5_PACKED PM4MapProcess
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
struct gem5::GEM5_PACKED PM4SwitchBuf
void setHqdVmid(uint32_t data)
#define mmCP_RB0_RPTR_ADDR
void setHqdPqRptrReportAddrHi(uint32_t data)
uint32_t sdmax_rlcx_rb_rptr_hi
void setRbWptrPollAddrHi(uint32_t data)
void setGPUDevice(AMDGPUDevice *gpu_device)
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
#define mmCP_HQD_PQ_WPTR_HI
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
#define mmCP_RB_DOORBELL_CONTROL
void setHqdPqPtr(uint32_t data)
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void setHqdPqWptrPollAddrHi(uint32_t data)
void doneMQDWrite(Addr mqdAddr, Addr addr)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
Device model for an AMD GPU.
void serialize(CheckpointOut &cp) const override
Serialize an object.
#define mmCP_HQD_PQ_BASE_HI
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
uint32_t sdmax_rlcx_ib_base_hi
#define SERIALIZE_ARRAY(member, size)
void setHqdActive(uint32_t data)
void setRbWptrHi(uint32_t data)
#define mmCP_RB0_RPTR_ADDR_HI
#define mmCP_RB_DOORBELL_RANGE_LOWER
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
System DMA Engine class for AMD dGPU.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
struct gem5::GEM5_PACKED PM4ReleaseMem
void runList(PM4Queue *q, PM4RunList *pkt)
#define SERIALIZE_SCALAR(scalar)
void setHqdPqWptrLo(uint32_t data)
uint32_t sdmax_rlcx_rb_rptr
uint16_t getVMID(Addr doorbell)
void setHqdPqBase(uint32_t data)
Translation range generators.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
struct gem5::GEM5_PACKED PM4IndirectBuf
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setHqdPqDoorbellCtrl(uint32_t data)
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void setRbWptrPollAddrLo(uint32_t data)
void setHqdPqBaseHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
#define UNSERIALIZE_ARRAY(member, size)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
std::unordered_map< uint16_t, PM4Queue * > queues
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
uint32_t hqd_pq_wptr_poll_addr_lo
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void insertQId(uint16_t vmid, int id)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
uint32_t sdmax_rlcx_rb_wptr
void deallocateVmid(uint16_t vmid)
#define mmCP_RB_DOORBELL_RANGE_UPPER
void deallocatePasid(uint16_t pasid)
std::ostream CheckpointOut
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
void writeData(PM4Queue *q, PM4WriteData *pkt)
GPUCommandProcessor * CP()
uint32_t sdmax_rlcx_rb_wptr_hi
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
void setRbVmid(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
uint32_t getPqDoorbellOffset()
uint32_t sdmax_rlcx_ib_base_lo
void setRbRptrAddrHi(uint32_t data)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void setHqdPqWptrPollAddr(uint32_t data)
Addr getGARTAddr(Addr addr) const
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
uint64_t completionSignal
struct gem5::GEM5_PACKED PM4WaitRegMem
void setRbBaseHi(uint32_t data)
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
std::unique_ptr< TranslationGen > TranslationGenPtr
#define mmCP_RB_WPTR_POLL_ADDR_LO
void setRbDoorbellRangeLo(uint32_t data)
#define panic(...)
This implements a cprintf based panic() function.
void setHqdIbCtrl(uint32_t data)
Generated on Thu Jul 28 2022 13:32:30 for gem5 by doxygen 1.8.17