32#ifndef __DEV_HSA_HSA_PACKET_PROCESSOR__
33#define __DEV_HSA_HSA_PACKET_PROCESSOR__
40#include "debug/HSAPacketProcessor.hh"
44#include "enums/GfxVersion.hh"
45#include "params/HSAPacketProcessor.hh"
48#define AQL_PACKET_SIZE 64
50#define NUM_DMA_BUFS 16
51#define DMA_BUF_SIZE (AQL_PACKET_SIZE * NUM_DMA_BUFS)
53#define NumSignalsPerBarrier 5
74class GPUCommandProcessor;
92 uint64_t hri_ptr, uint32_t size,
119 uint64_t retAddr = 0ll;
122 "index: 0x%x, numElts: 0x%x, objSize: 0x%x, "
170 for (
int i = 0;
i < num_pkts; ++
i) {
317 return regdQList.at(queId)->qCntxt.qDesc;
328 auto aqlBuf =
regdQList.at(queId)->qCntxt.aqlBuf;
329 return aqlBuf->dispIdx() - aqlBuf->rdIdx();
343 uint64_t basePointer,
345 uint32_t size,
int doorbellSize,
346 GfxVersion gfxVersion,
359 void finishPkt(
void *pkt, uint32_t rl_idx);
391 uint32_t ix_start,
unsigned num_pkts,
392 dma_series_ctx *series_ctx,
void *dest_4debug);
Defines global host-dependent types: Counter, Tick, and (indirectly) {int,uint}{8,...
Device model for an AMD GPU.
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer.
void setRdIdx(uint64_t value)
std::vector< bool > _aqlComplete
int allocEntry(uint32_t nBufReq)
void incDispIdx(uint64_t value)
void setDispIdx(uint64_t value)
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
void setWrIdx(uint64_t value)
bool isLastOutstandingPkt() const
Packets aren't guaranteed to be completed in-order, and we need to know when the last packet is finis...
Addr hostDispAddr() const
uint64_t compltnPending()
AQLRingBuffer(uint32_t size, const std::string name)
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
void incWrIdx(uint64_t value)
std::vector< Addr > _hostDispAddresses
bool freeEntry(void *pkt)
void incRdIdx(uint64_t value)
HSAPacketProcessor * hsaPP
QueueProcessEvent(HSAPacketProcessor *_hsaPP, uint32_t _rqIdx)
virtual const char * description() const
Return a C string describing the event.
QueueProcessEvent aqlProcessEvent
void setBarrierBit(bool set_val)
RQLEntry(HSAPacketProcessor *hsaPP, uint32_t rqIdx)
bool getBarrierBit() const
SignalState depSignalRdState
bool isLastOutstandingPkt() const
uint64_t compltnPending()
std::vector< hsa_signal_value_t > values
void sendAgentDispatchCompletionSignal(void *pkt, hsa_signal_value_t signal)
std::vector< class RQLEntry * > regdQList
void updateReadIndex(int, uint32_t)
virtual Tick write(Packet *) override
void cmdQueueCmdDma(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t ix_start, unsigned num_pkts, dma_series_ctx *series_ctx, void *dest_4debug)
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
uint64_t inFlightPkts(uint32_t queId)
HWScheduler * hwScheduler()
void sendCompletionSignal(hsa_signal_value_t signal)
GPUCommandProcessor * gpu_device
void updateReadDispIdDma()
this event is used to update the read_disp_id field (the read pointer) of the MQD,...
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessorParams Params
void getCommandsFromHost(int pid, uint32_t rl_idx)
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
void finishPkt(void *pkt)
class RQLEntry * getRegdListEntry(uint32_t queId)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size, int doorbellSize, GfxVersion gfxVersion, Addr offset=0, uint64_t rd_idx=0)
void displayQueueDescriptor(int pid, uint32_t rl_idx)
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Q_STATE processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
void finishPkt(void *pkt, uint32_t rl_idx)
virtual AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unsetDeviceQueueDesc(uint64_t queue_id, int doorbellSize)
void schedAQLProcessing(uint32_t rl_idx)
void setDevice(GPUCommandProcessor *dev)
const Tick pktProcessDelay
virtual Tick read(Packet *) override
HSAPacketProcessor(const Params &p)
HSAQueueDescriptor(uint64_t base_ptr, uint64_t db_ptr, uint64_t hri_ptr, uint32_t size, GfxVersion gfxVersion)
uint64_t ptr(uint64_t ix)
uint64_t spaceRemaining()
uint64_t hostReadIndexPtr
bool stalledOnDmaBufAvailability
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
static const Priority Default_Pri
Default is zero for historical reasons.
@ HSA_PACKET_HEADER_TYPE
Packet type.
@ HSA_PACKET_TYPE_INVALID
The packet has been processed in the past, but has not been reassigned to the packet processor.
@ HSA_PACKET_HEADER_WIDTH_TYPE
int32_t hsa_signal_value_t
Signal value.
#define NumSignalsPerBarrier
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
std::unique_ptr< TranslationGen > TranslationGenPtr
Calls getCurrentEntry once the queueEntry has been dmaRead.
dma_series_ctx(uint32_t _pkts_ttl, uint32_t _pkts_2_go, uint32_t _start_ix, uint32_t _rl_idx)
QCntxt(HSAQueueDescriptor *q_desc, AQLRingBuffer *aql_buf)
HSAQueueDescriptor * qDesc