Go to the documentation of this file.
41 #include "debug/HSAPacketProcessor.hh"
45 #include "enums/GfxVersion.hh"
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
55 HSAPacketProcessor::XEVENT::description() const \
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
65 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
66 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
75 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(
p.pktProcessDelay)
79 regdQList.resize(numHWQueues);
80 for (
int i = 0;
i < numHWQueues;
i++) {
100 uint64_t basePointer,
102 uint32_t size,
int doorbellSize,
103 GfxVersion gfxVersion)
106 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
107 (
void *)basePointer, queue_id, size);
109 basePointer, queue_id, size, doorbellSize,
134 "%s: write of size %d to reg-offset %d (0x%x)\n",
135 __FUNCTION__, pkt->
getSize(), daddr, daddr);
139 uint64_t doorbell_reg(0);
141 doorbell_reg = pkt->
getLE<uint64_t>() + 1;
143 doorbell_reg = pkt->
getLE<uint32_t>();
145 fatal(
"invalid db size");
148 "%s: write data 0x%x to offset %d (0x%x)\n",
149 __FUNCTION__, doorbell_reg, daddr, daddr);
169 auto process =
sys->
threads[0]->getProcessPtr();
171 return process->pTable->translateRange(
vaddr, size);
194 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
200 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
201 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
211 bool isRead, uint32_t ix_start,
unsigned num_pkts,
214 uint32_t rl_idx = series_ctx->
rl_idx;
220 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
221 ix_start, num_pkts, series_ctx->
pkts_2_go,
229 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
230 " dispIdx %d, active list ID = %d\n",
231 __FUNCTION__, aqlRingBuffer->
rdIdx(),
232 aqlRingBuffer->
wrIdx(), aqlRingBuffer->
dispIdx(), rl_idx);
275 regdQList[rl_idx]->compltnPending() > 0) {
280 " list ID = %d\n", __FUNCTION__, rl_idx);
286 " active list ID = %d\n", __FUNCTION__, rl_idx);
292 " active list ID = %d\n", __FUNCTION__, rl_idx);
303 " list ID = %d\n", __FUNCTION__, rl_idx);
308 " active list ID = %d\n", __FUNCTION__, rl_idx);
315 if (bar_and_pkt->dep_signal[
i]) {
318 uint64_t signal_addr =
319 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
323 " , sig addr %x, value %d active list ID = %d\n",
324 __FUNCTION__,
i, signal_addr,
325 *signal_val, rl_idx);
329 if (*signal_val != 0) {
334 [ = ] (
const uint32_t &dma_data)
340 " active list %d\n", __FUNCTION__,
347 [ = ] (
const uint32_t &dma_data)
353 " active list %d\n", __FUNCTION__,
361 " active list ID = %d\n", __FUNCTION__, rl_idx);
369 if (bar_and_pkt->completion_signal != 0) {
376 bar_and_pkt->completion_signal);
379 " completion signal! Addr: %x\n",
380 bar_and_pkt->completion_signal);
388 dep_sgnl_rd_st->
allRead =
false;
392 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
394 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
397 " active list ID = %d\n", __FUNCTION__, rl_idx);
400 (
void *)disp_pkt, rl_idx, host_pkt_addr);
404 fatal(
"Unsupported packet type %d\n", pkt_type);
417 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
418 " dispIdx %d, active list ID = %d\n",
419 __FUNCTION__, aqlRingBuffer->
rdIdx(),
425 "Dummy wakeup with barrier bit for rdIdx %d\n",
rqIdx);
433 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
435 __FUNCTION__, aqlRingBuffer->
dispIdx());
441 __FUNCTION__, aqlRingBuffer->
dispIdx());
459 panic(
"Unknown queue state\n");
467 assert(pendingReads > 0);
469 if (pendingReads == 0) {
484 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
485 " doorbell(%d)[0x%x] \n",
498 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
501 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
503 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
508 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
509 ttl_aql_buf, dma_start_ix, num_umq);
511 if (got_aql_buf == 0) {
519 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
520 while (got_aql_buf != 0 && num_umq != 0) {
521 uint32_t umq_b4_wrap = qDesc->
numObjs() -
524 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
528 dma_start_ix, rl_idx);
531 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
533 [ = ] (
const uint32_t &dma_data)
535 num_2_xfer, series_ctx, aql_buf); }, 0);
543 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
544 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
546 num_umq -= num_2_xfer;
547 got_aql_buf -= num_2_xfer;
548 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
549 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
550 if (got_aql_buf == 0 && num_umq != 0) {
565 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
566 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
573 const std::string
name)
574 :
_name(
name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
590 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
596 uint32_t old_rdIdx =
rdIdx();
602 return (old_rdIdx !=
rdIdx());
620 if (nBufReq >
nFree())
626 __FUNCTION__, nBufReq,
wrIdx());
638 if (
regdQList[rl_idx]->getBarrierBit() &&
639 regdQList[rl_idx]->isLastOutstandingPkt()) {
641 "Unset barrier bit for active list ID %d\n", rl_idx);
646 "Rescheduling active list ID %d after unsetting barrier "
653 if (
regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
656 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
657 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
661 qDesc->
isEmpty()?
"true" :
"false", rl_idx);
677 uint64_t signal_addr =
678 (uint64_t) (((uint64_t *)agent_pkt->completion_signal) + 1);
680 " completion signal: %x!\n", signal_addr);
705 uint64_t signal_addr = (uint64_t) (((uint64_t *)signal) + 1);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
AQLRingBuffer(uint32_t size, const std::string name)
void schedAQLProcessing(uint32_t rl_idx)
int allocEntry(uint32_t nBufReq)
HSAPacketProcessor * hsaPP
Q_STATE processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
AQL kernel dispatch packet.
uint64_t hostReadIndexPtr
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
AddrRange RangeSize(Addr start, Addr size)
int32_t hsa_signal_value_t
Signal value.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
#define NumSignalsPerBarrier
void schedule(Event &event, Tick when)
uint64_t ptr(uint64_t ix)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void makeAtomicResponse()
Addr hostDispAddr() const
void getCommandsFromHost(int pid, uint32_t rl_idx)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
bool freeEntry(void *pkt)
GPUCommandProcessor * gpu_device
void cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t ix_start, unsigned num_pkts, dma_series_ctx *series_ctx, void *dest_4debug)
virtual Tick read(Packet *) override
@ HSA_PACKET_TYPE_KERNEL_DISPATCH
Packet used by agents for dispatching jobs to kernel agents.
void finishPkt(void *pkt, uint32_t rl_idx)
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Calls getCurrentEntry once the queueEntry has been dmaRead.
void incWrIdx(uint64_t value)
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
uint64_t Tick
Tick count type.
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
bool stalledOnDmaBufAvailability
void sendCompletionSignal(hsa_signal_value_t signal)
void unregisterQueue(uint64_t queue_id, int doorbellSize)
@ HSA_PACKET_TYPE_BARRIER_OR
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
void displayQueueDescriptor(int pid, uint32_t rl_idx)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void updateReadIndex(int, uint32_t)
@ HSA_PACKET_TYPE_VENDOR_SPECIFIC
Vendor-specific packet.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void incDispIdx(uint64_t value)
@ HSA_PACKET_TYPE_INVALID
The packet has been processed in the past, but has not been reassigned to the packet processor.
hsa_packet_type_t
Packet type.
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion)
@ HSA_PACKET_TYPE_AGENT_DISPATCH
Packet used by agents for dispatching jobs to agents.
void sendAgentDispatchCompletionSignal(void *pkt, hsa_signal_value_t signal)
std::vector< Addr > _hostDispAddresses
std::vector< class RQLEntry * > regdQList
const Tick pktProcessDelay
void setDevice(GPUCommandProcessor *dev)
void incRdIdx(uint64_t value)
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
virtual AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
@ HSA_PACKET_TYPE_BARRIER_AND
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
std::vector< hsa_signal_value_t > values
void write(Addr db_addr, uint64_t doorbell_reg)
std::vector< bool > _aqlComplete
void updateReadDispIdDma()
this event is used to update the read_disp_id field (the read pointer) of the MQD,...
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
virtual Tick write(Packet *) override
std::unique_ptr< TranslationGen > TranslationGenPtr
bool scheduled() const
Determine if the current event is scheduled.
#define panic(...)
This implements a cprintf based panic() function.
GPUComputeDriver * driver()
QueueProcessEvent aqlProcessEvent
Generated on Tue Feb 8 2022 11:47:07 for gem5 by doxygen 1.8.17