Go to the documentation of this file.
41 #include "debug/HSAPacketProcessor.hh"
46 #include "enums/GfxVersion.hh"
55 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
57 HSAPacketProcessor::XEVENT::description() const \
62 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
63 HSA_PACKET_HEADER_TYPE) & mask(HSA_PACKET_HEADER_WIDTH_TYPE)))
67 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
68 HSA_PACKET_HEADER_BARRIER) & \
69 mask(HSA_PACKET_HEADER_WIDTH_BARRIER)))
78 numHWQueues(
p.numHWQueues), pioAddr(
p.pioAddr),
79 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(
p.pktProcessDelay)
83 regdQList.resize(numHWQueues);
84 for (
int i = 0;
i < numHWQueues;
i++) {
113 uint64_t basePointer,
115 uint32_t size,
int doorbellSize,
116 GfxVersion gfxVersion,
120 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
121 (
void *)basePointer, queue_id, size);
123 basePointer, queue_id, size, doorbellSize,
124 gfxVersion,
offset, rd_idx);
148 "%s: write of size %d to reg-offset %d (0x%x)\n",
149 __FUNCTION__, pkt->
getSize(), daddr, daddr);
153 uint64_t doorbell_reg(0);
155 doorbell_reg = pkt->
getLE<uint64_t>() + 1;
157 doorbell_reg = pkt->
getLE<uint32_t>();
159 fatal(
"invalid db size");
162 "%s: write data 0x%x to offset %d (0x%x)\n",
163 __FUNCTION__, doorbell_reg, daddr, daddr);
184 auto process =
sys->
threads[0]->getProcessPtr();
186 return process->pTable->translateRange(
vaddr, size);
216 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
222 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
223 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
233 bool isRead, uint32_t ix_start,
unsigned num_pkts,
236 uint32_t rl_idx = series_ctx->
rl_idx;
242 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
243 ix_start, num_pkts, series_ctx->
pkts_2_go,
251 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
252 " dispIdx %d, active list ID = %d\n",
253 __FUNCTION__, aqlRingBuffer->
rdIdx(),
254 aqlRingBuffer->
wrIdx(), aqlRingBuffer->
dispIdx(), rl_idx);
297 regdQList[rl_idx]->compltnPending() > 0) {
302 " list ID = %d\n", __FUNCTION__, rl_idx);
308 " active list ID = %d\n", __FUNCTION__, rl_idx);
314 " active list ID = %d\n", __FUNCTION__, rl_idx);
325 " list ID = %d\n", __FUNCTION__, rl_idx);
330 " active list ID = %d\n", __FUNCTION__, rl_idx);
337 if (bar_and_pkt->dep_signal[
i]) {
340 uint64_t signal_addr =
341 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
345 " , sig addr %x, value %d active list ID = %d\n",
346 __FUNCTION__,
i, signal_addr,
347 *signal_val, rl_idx);
351 if (*signal_val != 0) {
356 [ = ] (
const uint32_t &dma_data)
362 " active list %d\n", __FUNCTION__,
369 [ = ] (
const uint32_t &dma_data)
375 " active list %d\n", __FUNCTION__,
383 " active list ID = %d\n", __FUNCTION__, rl_idx);
391 if (bar_and_pkt->completion_signal != 0) {
398 bar_and_pkt->completion_signal);
401 " completion signal! Addr: %x\n",
402 bar_and_pkt->completion_signal);
410 dep_sgnl_rd_st->
allRead =
false;
414 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
416 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
419 " active list ID = %d\n", __FUNCTION__, rl_idx);
422 (
void *)disp_pkt, rl_idx, host_pkt_addr);
426 fatal(
"Unsupported packet type %d\n", pkt_type);
439 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
440 " dispIdx %d, active list ID = %d\n",
441 __FUNCTION__, aqlRingBuffer->
rdIdx(),
447 "Dummy wakeup with barrier bit for rdIdx %d\n",
rqIdx);
455 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
457 __FUNCTION__, aqlRingBuffer->
dispIdx());
463 __FUNCTION__, aqlRingBuffer->
dispIdx());
481 panic(
"Unknown queue state\n");
489 assert(pendingReads > 0);
491 if (pendingReads == 0) {
506 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
507 " doorbell(%d)[0x%x] \n",
520 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
523 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
525 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
530 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
531 ttl_aql_buf, dma_start_ix, num_umq);
533 if (got_aql_buf == 0) {
541 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
542 while (got_aql_buf != 0 && num_umq != 0) {
543 uint32_t umq_b4_wrap = qDesc->
numObjs() -
546 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
550 dma_start_ix, rl_idx);
553 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
555 [ = ] (
const uint32_t &dma_data)
557 num_2_xfer, series_ctx, aql_buf); }, 0);
565 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
566 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
568 num_umq -= num_2_xfer;
569 got_aql_buf -= num_2_xfer;
570 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
571 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
572 if (got_aql_buf == 0 && num_umq != 0) {
587 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
588 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
595 const std::string
name)
596 :
_name(
name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
630 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
636 uint32_t old_rdIdx =
rdIdx();
642 return (old_rdIdx !=
rdIdx());
660 if (nBufReq >
nFree())
666 __FUNCTION__, nBufReq,
wrIdx());
678 if (
regdQList[rl_idx]->getBarrierBit() &&
679 regdQList[rl_idx]->isLastOutstandingPkt()) {
681 "Unset barrier bit for active list ID %d\n", rl_idx);
686 "Rescheduling active list ID %d after unsetting barrier "
693 if (
regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
696 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
697 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
701 qDesc->
isEmpty()?
"true" :
"false", rl_idx);
717 uint64_t signal_addr =
718 (uint64_t) (((uint64_t *)agent_pkt->completion_signal) + 1);
720 " completion signal: %x!\n", signal_addr);
745 uint64_t signal_addr = (uint64_t) (((uint64_t *)signal) + 1);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
AQLRingBuffer(uint32_t size, const std::string name)
void schedAQLProcessing(uint32_t rl_idx)
int allocEntry(uint32_t nBufReq)
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor * hsaPP
Q_STATE processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
AQL kernel dispatch packet.
uint64_t hostReadIndexPtr
Wraps a std::function object in a DmaCallback.
void setWrIdx(uint64_t value)
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
AddrRange RangeSize(Addr start, Addr size)
int32_t hsa_signal_value_t
Signal value.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
#define NumSignalsPerBarrier
void schedule(Event &event, Tick when)
uint64_t ptr(uint64_t ix)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void makeAtomicResponse()
Addr hostDispAddr() const
void getCommandsFromHost(int pid, uint32_t rl_idx)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
bool freeEntry(void *pkt)
GPUCommandProcessor * gpu_device
void cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t ix_start, unsigned num_pkts, dma_series_ctx *series_ctx, void *dest_4debug)
virtual Tick read(Packet *) override
@ HSA_PACKET_TYPE_KERNEL_DISPATCH
Packet used by agents for dispatching jobs to kernel agents.
void finishPkt(void *pkt, uint32_t rl_idx)
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
Calls getCurrentEntry once the queueEntry has been dmaRead.
void incWrIdx(uint64_t value)
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
uint64_t Tick
Tick count type.
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
bool stalledOnDmaBufAvailability
Device model for an AMD GPU.
void sendCompletionSignal(hsa_signal_value_t signal)
void unregisterQueue(uint64_t queue_id, int doorbellSize)
@ HSA_PACKET_TYPE_BARRIER_OR
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
void displayQueueDescriptor(int pid, uint32_t rl_idx)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void updateReadIndex(int, uint32_t)
@ HSA_PACKET_TYPE_VENDOR_SPECIFIC
Vendor-specific packet.
void setDispIdx(uint64_t value)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void incDispIdx(uint64_t value)
@ HSA_PACKET_TYPE_INVALID
The packet has been processed in the past, but has not been reassigned to the packet processor.
hsa_packet_type_t
Packet type.
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
@ HSA_PACKET_TYPE_AGENT_DISPATCH
Packet used by agents for dispatching jobs to agents.
void sendAgentDispatchCompletionSignal(void *pkt, hsa_signal_value_t signal)
std::vector< Addr > _hostDispAddresses
std::vector< class RQLEntry * > regdQList
const Tick pktProcessDelay
void setDevice(GPUCommandProcessor *dev)
void incRdIdx(uint64_t value)
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
virtual AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
@ HSA_PACKET_TYPE_BARRIER_AND
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
std::vector< hsa_signal_value_t > values
void write(Addr db_addr, uint64_t doorbell_reg)
std::vector< bool > _aqlComplete
void updateReadDispIdDma()
this event is used to update the read_disp_id field (the read pointer) of the MQD,...
void setDevRequestor(RequestorID mid)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
virtual Tick write(Packet *) override
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unique_ptr< TranslationGen > TranslationGenPtr
void setRdIdx(uint64_t value)
bool scheduled() const
Determine if the current event is scheduled.
#define panic(...)
This implements a cprintf based panic() function.
GPUComputeDriver * driver()
QueueProcessEvent aqlProcessEvent
Generated on Sun Jul 30 2023 01:56:55 for gem5 by doxygen 1.8.17