Go to the documentation of this file.
43 #include "debug/HSAPacketProcessor.hh"
54 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
56 HSAPacketProcessor::XEVENT::description() const \
61 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
62 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
66 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
67 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
76 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(
p.pktProcessDelay)
80 regdQList.resize(numHWQueues);
81 for (
int i = 0;
i < numHWQueues;
i++) {
101 uint64_t basePointer,
103 uint32_t size,
int doorbellSize)
106 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
107 (
void *)basePointer, queue_id, size);
109 basePointer, queue_id, size, doorbellSize);
133 "%s: write of size %d to reg-offset %d (0x%x)\n",
134 __FUNCTION__, pkt->
getSize(), daddr, daddr);
138 uint64_t doorbell_reg(0);
140 doorbell_reg = pkt->
getLE<uint64_t>() + 1;
142 doorbell_reg = pkt->
getLE<uint32_t>();
144 fatal(
"invalid db size");
147 "%s: write data 0x%x to offset %d (0x%x)\n",
148 __FUNCTION__, doorbell_reg, daddr, daddr);
168 auto process =
sys->
threads[0]->getProcessPtr();
170 if (!process->pTable->translate(
vaddr, paddr))
171 fatal(
"failed translation: vaddr 0x%x\n",
vaddr);
194 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
200 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
201 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
211 bool isRead, uint32_t ix_start,
unsigned num_pkts,
214 uint32_t rl_idx = series_ctx->
rl_idx;
220 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
221 ix_start, num_pkts, series_ctx->
pkts_2_go,
229 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
230 " dispIdx %d, active list ID = %d\n",
231 __FUNCTION__, aqlRingBuffer->rdIdx(),
232 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
275 regdQList[rl_idx]->compltnPending() > 0) {
280 " list ID = %d\n", __FUNCTION__, rl_idx);
286 " active list ID = %d\n", __FUNCTION__, rl_idx);
292 " active list ID = %d\n", __FUNCTION__, rl_idx);
303 " list ID = %d\n", __FUNCTION__, rl_idx);
308 " active list ID = %d\n", __FUNCTION__, rl_idx);
315 if (bar_and_pkt->dep_signal[
i]) {
318 uint64_t signal_addr =
319 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
323 " , sig addr %x, value %d active list ID = %d\n",
324 __FUNCTION__,
i, signal_addr,
325 *signal_val, rl_idx);
329 if (*signal_val != 0) {
334 [ = ] (
const uint32_t &dma_data)
340 " active list %d\n", __FUNCTION__,
347 [ = ] (
const uint32_t &dma_data)
353 " active list %d\n", __FUNCTION__,
361 " active list ID = %d\n", __FUNCTION__, rl_idx);
369 if (bar_and_pkt->completion_signal != 0) {
376 bar_and_pkt->completion_signal);
379 " completion signal! Addr: %x\n",
380 bar_and_pkt->completion_signal);
388 dep_sgnl_rd_st->
allRead =
false;
392 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
394 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
397 " active list ID = %d\n", __FUNCTION__, rl_idx);
400 (
void *)disp_pkt, rl_idx, host_pkt_addr);
404 fatal(
"Unsupported packet type %d\n", pkt_type);
417 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
418 " dispIdx %d, active list ID = %d\n",
419 __FUNCTION__, aqlRingBuffer->
rdIdx(),
425 "Dummy wakeup with barrier bit for rdIdx %d\n",
rqIdx);
433 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
435 __FUNCTION__, aqlRingBuffer->
dispIdx());
441 __FUNCTION__, aqlRingBuffer->
dispIdx());
459 panic(
"Unknown queue state\n");
467 assert(pendingReads > 0);
469 if (pendingReads == 0) {
484 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
485 " doorbell(%d)[0x%x] \n",
498 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
501 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
503 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
508 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
509 ttl_aql_buf, dma_start_ix, num_umq);
511 if (got_aql_buf == 0) {
519 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
520 while (got_aql_buf != 0 && num_umq != 0) {
521 uint32_t umq_b4_wrap = qDesc->
numObjs() -
524 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
528 dma_start_ix, rl_idx);
531 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
533 [ = ] (
const uint32_t &dma_data)
535 num_2_xfer, series_ctx, aql_buf); }, 0);
543 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
544 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
546 num_umq -= num_2_xfer;
547 got_aql_buf -= num_2_xfer;
548 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
549 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
550 if (got_aql_buf == 0 && num_umq != 0) {
564 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
565 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
566 __FUNCTION__, pid, qDesc->basePointer,
567 qDesc->doorbellPointer, qDesc->writeIndex,
568 qDesc->readIndex, qDesc->numElts);
572 const std::string
name)
573 :
_name(
name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
589 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
595 uint32_t old_rdIdx =
rdIdx();
601 return (old_rdIdx !=
rdIdx());
619 if (nBufReq >
nFree())
625 __FUNCTION__, nBufReq,
wrIdx());
637 if (
regdQList[rl_idx]->getBarrierBit() &&
638 regdQList[rl_idx]->isLastOutstandingPkt()) {
640 "Unset barrier bit for active list ID %d\n", rl_idx);
645 "Rescheduling active list ID %d after unsetting barrier "
652 if (
regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
655 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
656 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
660 qDesc->
isEmpty()?
"true" :
"false", rl_idx);
676 uint64_t signal_addr =
677 (uint64_t) (((uint64_t *)agent_pkt->completion_signal) + 1);
679 " completion signal: %x!\n", signal_addr);
704 uint64_t signal_addr = (uint64_t) (((uint64_t *)signal) + 1);
Tick curTick()
The universal simulation clock.
#define fatal(...)
This implements a cprintf based fatal() function.
AQLRingBuffer(uint32_t size, const std::string name)
void schedAQLProcessing(uint32_t rl_idx)
int allocEntry(uint32_t nBufReq)
HSAPacketProcessor * hsaPP
Q_STATE processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
AQL kernel dispatch packet.
uint64_t hostReadIndexPtr
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
AddrRange RangeSize(Addr start, Addr size)
int32_t hsa_signal_value_t
Signal value.
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
#define NumSignalsPerBarrier
void schedule(Event &event, Tick when)
uint64_t ptr(uint64_t ix)
void makeAtomicResponse()
Addr hostDispAddr() const
void getCommandsFromHost(int pid, uint32_t rl_idx)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize)
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
bool freeEntry(void *pkt)
GPUCommandProcessor * gpu_device
void cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t ix_start, unsigned num_pkts, dma_series_ctx *series_ctx, void *dest_4debug)
virtual Tick read(Packet *) override
@ HSA_PACKET_TYPE_KERNEL_DISPATCH
Packet used by agents for dispatching jobs to kernel agents.
void finishPkt(void *pkt, uint32_t rl_idx)
virtual std::string name() const
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Calls getCurrentEntry once the queueEntry has been dmaRead.
void incWrIdx(uint64_t value)
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
uint64_t Tick
Tick count type.
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize)
void translateOrDie(Addr vaddr, Addr &paddr) override
Function used to translate from virtual to physical addresses.
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
bool stalledOnDmaBufAvailability
void sendCompletionSignal(hsa_signal_value_t signal)
void unregisterQueue(uint64_t queue_id, int doorbellSize)
@ HSA_PACKET_TYPE_BARRIER_OR
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
void displayQueueDescriptor(int pid, uint32_t rl_idx)
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
void updateReadIndex(int, uint32_t)
@ HSA_PACKET_TYPE_VENDOR_SPECIFIC
Vendor-specific packet.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
void incDispIdx(uint64_t value)
@ HSA_PACKET_TYPE_INVALID
The packet has been processed in the past, but has not been reassigned to the packet processor.
hsa_packet_type_t
Packet type.
@ HSA_PACKET_TYPE_AGENT_DISPATCH
Packet used by agents for dispatching jobs to agents.
void sendAgentDispatchCompletionSignal(void *pkt, hsa_signal_value_t signal)
std::vector< Addr > _hostDispAddresses
std::vector< class RQLEntry * > regdQList
const Tick pktProcessDelay
void setDevice(GPUCommandProcessor *dev)
void incRdIdx(uint64_t value)
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
uint64_t functionalReadHsaSignal(Addr signal_handle)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
virtual AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
@ HSA_PACKET_TYPE_BARRIER_AND
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
std::vector< hsa_signal_value_t > values
void write(Addr db_addr, uint64_t doorbell_reg)
std::vector< bool > _aqlComplete
void updateReadDispIdDma()
this event is used to update the read_disp_id field (the read pointer) of the MQD,...
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
virtual Tick write(Packet *) override
bool scheduled() const
Determine if the current event is scheduled.
#define panic(...)
This implements a cprintf based panic() function.
GPUComputeDriver * driver()
QueueProcessEvent aqlProcessEvent
Generated on Tue Sep 21 2021 12:25:15 for gem5 by doxygen 1.8.17