35#include "debug/PM4PacketProcessor.hh"
42#include "enums/GfxVersion.hh"
96 addr = (((
addr >> 12) << 3) << 12) | low_bits;
112 return result->second;
150 "%d, pipe %d queue: %d size: %d\n",
id,
q->base(),
q->offset(),
151 q->me(),
q->pipe(),
q->queue(),
q->size());
157 q->wptr(wptrOffset *
sizeof(uint32_t));
159 if (!
q->processing()) {
169 q->id(),
q->rptr(),
q->wptr());
171 if (
q->rptr() !=
q->wptr()) {
186 assert(
q->rptr() ==
q->wptr());
187 q->processing(
false);
196 if (
q->getMQD()->aqlRptr) {
198 uint32_t *
data =
new uint32_t;
201 *
data =
q->getRptr() >> 2;
203 [
data](
const uint32_t &) {
delete data; });
217 void *dmaBuffer =
nullptr;
223 if (
header.count != 0x3fff) {
224 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
233 [ = ] (
const uint64_t &)
242 [ = ] (
const uint64_t &)
251 [ = ] (
const uint64_t &)
260 [ = ] (
const uint64_t &)
269 [ = ] (
const uint64_t &)
278 [ = ] (
const uint64_t &)
287 [ = ] (
const uint64_t &)
293 if (
gpuDevice->getGfxVersion() == GfxVersion::gfx90a ||
294 gpuDevice->getGfxVersion() == GfxVersion::gfx942) {
297 [ = ] (
const uint64_t &)
304 [ = ] (
const uint64_t &)
314 [ = ] (
const uint64_t &)
323 [ = ] (
const uint64_t &)
332 [ = ] (
const uint64_t &)
341 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
346 warn(
"PM4 packet opcode 0x%x not supported.\n",
header.opcode);
349 q->incRptr((
header.count + 1) *
sizeof(uint32_t));
361 "addrIncr: %d resume: %d writeConfirm: %d cachePolicy: %d\n",
373 unsigned size = (
header.count - 2) *
sizeof(uint32_t);
383 }
else if (pkt->
destSel == 0) {
394 fatal(
"Unknown PM4 writeData destination %d\n", pkt->
destSel);
417 "pipe: %d, queueSlot: %d, queueType: %d, allocFormat: %d, "
418 "engineSel: %d, numQueues: %d, checkDisable: %d, doorbellOffset:"
419 " %d, mqdAddr: %lx, wptrAddr: %lx\n", pkt->
queueSel, pkt->
vmid,
430 "Mapping mqd from %p %p (vmid %d - last vmid %d).\n",
442 [ = ] (
const uint32_t &) {
453 [ = ] (
const uint32_t &) {
486 auto &hsa_pp =
gpuDevice->CP()->hsaPacketProc();
487 hsa_pp.setDeviceQueueDesc(mqd->
aqlRptr, mqd->
base, new_q->
id(),
488 mqd_size, 8, GfxVersion::gfx900,
offset,
511 "%#x/%#x ib: %#x/%#x size: %d ctrl: %#x rptr wb addr: %#lx\n",
544 "%d destSel %d dataSel %d, address %p data %p, intCtx %p\n",
549 "PM4 release_mem destSel 0 bypasses caches to MC.\n");
557 panic(
"Unimplemented PM4ReleaseMem.dataSelect");
568 "pipe: %d, queueSlot:%d\n",
q->id(), pkt->
intCtxId,
q->me(),
569 q->pipe(),
q->queue());
573 ringId = (
q->queue() << 4) | (
q->me() << 2) |
q->pipe();
578 gpuDevice->getIH()->submitInterruptCookie();
595 auto &hsa_pp =
gpuDevice->CP()->hsaPacketProc();
596 for (
auto iter :
gpuDevice->getUsedVMIDs()) {
597 for (
auto id : iter.second) {
601 if (
queues[
id]->privileged()) {
606 if (!unmap_static &&
queues[
id]->isStatic()) {
618 96 *
sizeof(uint32_t));
621 [ = ] (
const uint32_t &) {
627 hsa_pp.unsetDeviceQueueDesc(
id, 8);
639 "pasid: %p doorbellOffset0 %p \n",
689 panic(
"Unrecognized options\n");
709 gpuDevice->getVM().setPageTableBase(vmid, ptBase);
714 Addr scratch_base = (
Addr)
bits(shMemBases, 15, 0) << 48;
718 gpuDevice->CP()->shader()->setLdsApe(lds_base, lds_base + 0xFFFFFFFF);
719 gpuDevice->CP()->shader()->setScratchApe(scratch_base,
720 scratch_base + 0xFFFFFFFF);
764 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
780 q->wptr(pkt->
ibSize *
sizeof(uint32_t));
811 reg_addr += 0x40000 *
getIpId();
841 " %d command: %d, pasid: %d, doorbellOffset: %d, engineSel: %d "
855 panic(
"query_status with interruptSel %d command %d not supported",
872 switch (mmio_offset) {
990 kiq.hqd_pq_doorbell_control =
data;
1014 kiq.hqd_pq_rptr_report_addr_lo =
data;
1020 kiq.hqd_pq_rptr_report_addr_hi =
data;
1026 kiq.hqd_pq_wptr_poll_addr_lo =
data;
1032 kiq.hqd_pq_wptr_poll_addr_hi =
data;
1056 pq.hqd_pq_control =
data;
1074 pq.queueRptrAddrLo =
data;
1080 pq.queueRptrAddrHi =
data;
1086 pq.hqd_pq_wptr_poll_addr_lo =
data;
1092 pq.hqd_pq_wptr_poll_addr_hi =
data;
1098 pq.hqd_pq_base_lo =
data;
1104 pq.hqd_pq_base_hi =
data;
1110 pq.hqd_pq_doorbell_control =
data;
1111 pq.doorbellOffset =
data & 0x1ffffffc;
1117 pq.doorbellRangeLo =
data;
1123 pq.doorbellRangeHi =
data;
1132 int num_queues =
queues.size();
1133 auto id = std::make_unique<Addr[]>(num_queues);
1134 auto mqd_base = std::make_unique<Addr[]>(num_queues);
1135 auto mqd_read_index = std::make_unique<uint64_t[]>(num_queues);
1136 auto base = std::make_unique<Addr[]>(num_queues);
1137 auto rptr = std::make_unique<Addr[]>(num_queues);
1138 auto wptr = std::make_unique<Addr[]>(num_queues);
1139 auto ib_base = std::make_unique<Addr[]>(num_queues);
1140 auto ib_rptr = std::make_unique<Addr[]>(num_queues);
1141 auto ib_wptr = std::make_unique<Addr[]>(num_queues);
1142 auto offset = std::make_unique<Addr[]>(num_queues);
1143 auto processing = std::make_unique<bool[]>(num_queues);
1144 auto ib = std::make_unique<bool[]>(num_queues);
1145 auto me = std::make_unique<uint32_t[]>(num_queues);
1146 auto pipe = std::make_unique<uint32_t[]>(num_queues);
1147 auto queue = std::make_unique<uint32_t[]>(num_queues);
1148 auto privileged = std::make_unique<bool[]>(num_queues);
1149 auto queue_type = std::make_unique<uint32_t[]>(num_queues);
1150 auto hqd_active = std::make_unique<uint32_t[]>(num_queues);
1151 auto hqd_vmid = std::make_unique<uint32_t[]>(num_queues);
1152 auto aql_rptr = std::make_unique<Addr[]>(num_queues);
1153 auto aql = std::make_unique<uint32_t[]>(num_queues);
1154 auto doorbell = std::make_unique<uint32_t[]>(num_queues);
1155 auto hqd_pq_control = std::make_unique<uint32_t[]>(num_queues);
1158 for (
auto iter :
queues) {
1161 mqd_base[
i] =
q->mqdBase();
1162 mqd_read_index[
i] =
q->getMQD()->mqdReadIndex;
1163 bool cur_state =
q->ib();
1166 rptr[
i] =
q->getRptr();
1167 wptr[
i] =
q->getWptr();
1169 ib_base[
i] =
q->ibBase();
1170 ib_rptr[
i] =
q->getRptr();
1171 ib_wptr[
i] =
q->getWptr();
1174 processing[
i] =
q->processing();
1177 pipe[
i] =
q->pipe();
1178 queue[
i] =
q->queue();
1179 privileged[
i] =
q->privileged();
1180 queue_type[
i] =
q->queueType();
1181 hqd_active[
i] =
q->getMQD()->hqd_active;
1182 hqd_vmid[
i] =
q->getMQD()->hqd_vmid;
1183 aql_rptr[
i] =
q->getMQD()->aqlRptr;
1184 aql[
i] =
q->getMQD()->aql;
1185 doorbell[
i] =
q->getMQD()->doorbell;
1186 hqd_pq_control[
i] =
q->getMQD()->hqd_pq_control;
1225 auto id = std::make_unique<Addr[]>(num_queues);
1226 auto mqd_base = std::make_unique<Addr[]>(num_queues);
1227 auto mqd_read_index = std::make_unique<uint64_t[]>(num_queues);
1228 auto base = std::make_unique<Addr[]>(num_queues);
1229 auto rptr = std::make_unique<Addr[]>(num_queues);
1230 auto wptr = std::make_unique<Addr[]>(num_queues);
1231 auto ib_base = std::make_unique<Addr[]>(num_queues);
1232 auto ib_rptr = std::make_unique<Addr[]>(num_queues);
1233 auto ib_wptr = std::make_unique<Addr[]>(num_queues);
1234 auto offset = std::make_unique<Addr[]>(num_queues);
1235 auto processing = std::make_unique<bool[]>(num_queues);
1236 auto ib = std::make_unique<bool[]>(num_queues);
1237 auto me = std::make_unique<uint32_t[]>(num_queues);
1238 auto pipe = std::make_unique<uint32_t[]>(num_queues);
1239 auto queue = std::make_unique<uint32_t[]>(num_queues);
1240 auto privileged = std::make_unique<bool[]>(num_queues);
1241 auto queue_type = std::make_unique<uint32_t[]>(num_queues);
1242 auto hqd_active = std::make_unique<uint32_t[]>(num_queues);
1243 auto hqd_vmid = std::make_unique<uint32_t[]>(num_queues);
1244 auto aql_rptr = std::make_unique<Addr[]>(num_queues);
1245 auto aql = std::make_unique<uint32_t[]>(num_queues);
1246 auto doorbell = std::make_unique<uint32_t[]>(num_queues);
1247 auto hqd_pq_control = std::make_unique<uint32_t[]>(num_queues);
1273 for (
int i = 0;
i < num_queues;
i++) {
1295 queues[
id[
i]]->processing(processing[
i]);
1296 queues[
id[
i]]->setPkt(
me[
i], pipe[
i], queue[
i], privileged[
i],
1298 queues[
id[
i]]->getMQD()->hqd_active = hqd_active[
i];
1299 queues[
id[
i]]->getMQD()->hqd_vmid = hqd_vmid[
i];
1300 queues[
id[
i]]->getMQD()->aqlRptr = aql_rptr[
i];
1301 queues[
id[
i]]->getMQD()->doorbell = doorbell[
i];
1302 queues[
id[
i]]->getMQD()->hqd_pq_control = hqd_pq_control[
i];
1305 int mqd_size = (1 << ((hqd_pq_control[
i] & 0x3f) + 1)) * 4;
1306 auto &hsa_pp =
gpuDevice->CP()->hsaPacketProc();
1307 hsa_pp.setDeviceQueueDesc(aql_rptr[
i],
base[
i],
id[
i],
1308 mqd_size, 8, GfxVersion::gfx900,
offset[
i],
Device model for an AMD GPU.
Translation range generators.
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
DmaVirtDevice(const Params &p)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setRbWptrPollAddrLo(uint32_t data)
void decodeHeader(PM4Queue *q, PM4Header header)
This method calls other PM4 packet processing methods based on the header of a PM4 packet.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setRbWptrHi(uint32_t data)
void mapKiq(Addr offset)
The first compute queue, the Kernel Interface Queueu a.k.a.
Addr getGARTAddr(Addr addr) const
void writeDataDone(PM4Queue *q, PM4WriteData *pkt, Addr addr)
void setRbWptrLo(uint32_t data)
void switchBuffer(PM4Queue *q, PM4SwitchBuf *pkt)
void setGPUDevice(AMDGPUDevice *gpu_device)
void serialize(CheckpointOut &cp) const override
Serialize an object.
void setRbCntl(uint32_t data)
uint32_t getKiqDoorbellOffset()
void setHqdPqWptrLo(uint32_t data)
std::unordered_map< uint32_t, PM4Queue * > queuesMap
void setUconfigReg(PM4Queue *q, PM4SetUconfigReg *pkt)
void queryStatus(PM4Queue *q, PM4QueryStatus *pkt)
void releaseMem(PM4Queue *q, PM4ReleaseMem *pkt)
void releaseMemDone(PM4Queue *q, PM4ReleaseMem *pkt, Addr addr)
void setHqdPqRptrReportAddr(uint32_t data)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void setRbBaseHi(uint32_t data)
void mapProcessV1(PM4Queue *q, PM4MapProcess *pkt)
void setRbVmid(uint32_t data)
void setHqdActive(uint32_t data)
void processSDMAMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, SDMAQueueDesc *mqd, uint16_t vmid)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
void setHqdPqControl(uint32_t data)
void setRbBaseLo(uint32_t data)
void mapProcessV2(PM4Queue *q, PM4MapProcessV2 *pkt)
void setHqdIbCtrl(uint32_t data)
void setRbRptrAddrHi(uint32_t data)
void setHqdPqWptrPollAddr(uint32_t data)
void newQueue(QueueDesc *q, Addr offset, PM4MapQueues *pkt=nullptr, int id=-1)
This method creates a new PM4Queue based on a queue descriptor and an offset.
void unmapQueues(PM4Queue *q, PM4UnmapQueues *pkt)
void queryStatusDone(PM4Queue *q, PM4QueryStatus *pkt)
void mapProcess(uint32_t pasid, uint64_t ptBase, uint32_t shMemBases)
void setRbDoorbellRangeLo(uint32_t data)
void waitRegMem(PM4Queue *q, PM4WaitRegMem *pkt)
void setHqdPqBaseHi(uint32_t data)
void runList(PM4Queue *q, PM4RunList *pkt)
void decodeNext(PM4Queue *q)
This method decodes the next packet in a PM4Queue.
void mapPq(Addr offset)
The first graphics queue, the Primary Queueu a.k.a.
void setHqdVmid(uint32_t data)
void writeData(PM4Queue *q, PM4WriteData *pkt, PM4Header header)
void setHqdPqDoorbellCtrl(uint32_t data)
void setHqdPqBase(uint32_t data)
void setRbDoorbellRangeHi(uint32_t data)
uint32_t getPqDoorbellOffset()
void doneMQDWrite(Addr mqdAddr, Addr addr)
std::unordered_map< uint16_t, PM4Queue * > queues
void indirectBuffer(PM4Queue *q, PM4IndirectBuf *pkt)
PM4PacketProcessor(const PM4PacketProcessorParams &p)
void unmapAllQueues(bool unmap_static)
void setHqdPqPtr(uint32_t data)
void setHqdPqRptrReportAddrHi(uint32_t data)
void mapQueues(PM4Queue *q, PM4MapQueues *pkt)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Method for functional translation.
void processMQD(PM4MapQueues *pkt, PM4Queue *q, Addr addr, QueueDesc *mqd, uint16_t vmid)
void setRbRptrAddrLo(uint32_t data)
void setRbDoorbellCntrl(uint32_t data)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void setHqdPqWptrPollAddrHi(uint32_t data)
void setHqdPqWptrHi(uint32_t data)
void setRbWptrPollAddrHi(uint32_t data)
Class defining a PM4 queue.
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
System DMA Engine class for AMD dGPU.
void registerRLCQueue(Addr doorbell, Addr mqdAddr, SDMAQueueDesc *mqd, bool isStatic)
Methods for RLC queues.
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
std::list< AddrRange > AddrRangeList
Convenience typedef for a collection of address ranges.
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
#define panic(...)
This implements a cprintf based panic() function.
#define fatal(...)
This implements a cprintf based fatal() function.
#define UNSERIALIZE_UNIQUE_PTR_ARRAY(member, size)
#define SERIALIZE_UNIQUE_PTR_ARRAY(member, size)
Copyright (c) 2024 Arm Limited All rights reserved.
struct gem5::GEM5_PACKED PM4WriteData
struct gem5::GEM5_PACKED PM4WaitRegMem
std::ostream CheckpointOut
struct gem5::GEM5_PACKED PM4RunList
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
@ SOC15_IH_CLIENTID_GRBM_CP
struct gem5::GEM5_PACKED PM4ReleaseMem
struct gem5::GEM5_PACKED PM4SwitchBuf
struct gem5::GEM5_PACKED PM4Header
PM4 packets.
struct gem5::GEM5_PACKED PM4MapQueues
struct gem5::GEM5_PACKED PM4MapProcess
struct gem5::GEM5_PACKED PM4MapProcessV2
struct gem5::GEM5_PACKED SDMAQueueDesc
Queue descriptor for SDMA-based user queues (RLC queues).
struct gem5::GEM5_PACKED PM4UnmapQueues
std::unique_ptr< TranslationGen > TranslationGenPtr
struct gem5::GEM5_PACKED PM4SetUconfigReg
struct gem5::GEM5_PACKED PM4QueryStatus
struct gem5::GEM5_PACKED QueueDesc
Queue descriptor with relevant MQD attributes.
struct gem5::GEM5_PACKED PM4IndirectBuf
Declaration of the Packet class.
#define PACKET3_SET_UCONFIG_REG_START
Value from vega10/pm4_header.h.
#define mmCP_RB_DOORBELL_CONTROL
#define mmCP_RB0_RPTR_ADDR_HI
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR
#define mmCP_HQD_PQ_DOORBELL_CONTROL
#define mmCP_HQD_PQ_WPTR_POLL_ADDR
#define mmCP_HQD_PQ_RPTR_REPORT_ADDR_HI
#define mmCP_RB_DOORBELL_RANGE_UPPER
#define mmCP_HQD_IB_CONTROL
#define mmCP_RB_WPTR_POLL_ADDR_LO
#define mmCP_HQD_PQ_BASE_HI
#define mmCP_HQD_PQ_WPTR_HI
#define mmCP_HQD_PQ_CONTROL
#define mmCP_RB_DOORBELL_RANGE_LOWER
#define mmCP_RB_WPTR_POLL_ADDR_HI
#define mmCP_RB0_RPTR_ADDR
#define mmCP_HQD_PQ_WPTR_POLL_ADDR_HI
#define mmCP_HQD_PQ_WPTR_LO
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
uint32_t sdmax_rlcx_ib_base_lo
uint32_t sdmax_rlcx_rb_rptr
uint32_t sdmax_rlcx_rb_rptr_addr_hi
uint32_t sdmax_rlcx_rb_cntl
uint32_t sdmax_rlcx_rb_wptr_hi
uint32_t sdmax_rlcx_ib_base_hi
uint32_t sdmax_rlcx_rb_rptr_addr_lo
uint32_t sdmax_rlcx_rb_wptr
uint32_t sdmax_rlcx_rb_rptr_hi
uint64_t completionSignal