42 #include "debug/HSAPacketProcessor.hh" 53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \ 55 HSAPacketProcessor::XEVENT::description() const \ 60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \ 61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1))) 69 :
DmaDevice(
p), numHWQueues(p->numHWQueues), pioAddr(p->pioAddr),
70 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(p->pktProcessDelay)
74 regdQList.resize(numHWQueues);
75 for (
int i = 0;
i < numHWQueues;
i++) {
100 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
101 (
void *)basePointer, queue_id, size);
103 basePointer, queue_id, size);
127 "%s: write of size %d to reg-offset %d (0x%x)\n",
128 __FUNCTION__, pkt->
getSize(), daddr, daddr);
130 uint32_t doorbell_reg = pkt->
getLE<uint32_t>();
133 "%s: write data 0x%x to offset %d (0x%x)\n",
134 __FUNCTION__, doorbell_reg, daddr, daddr);
156 if (!process->pTable->translate(vaddr, paddr))
157 fatal(
"failed translation: vaddr 0x%x\n", vaddr);
170 uint8_t *loc_data = (uint8_t*)data;
181 (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
183 loc_data += gen.size();
192 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
205 :
Event(Default_Pri, AutoDelete)
219 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
222 sizeof(aqlbuf->
rdIdx()),
226 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
227 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
237 uint32_t _ix_start,
unsigned _num_pkts,
240 ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
241 dest_4debug(_dest_4debug)
246 "active list ID = %d\n", __FUNCTION__,
259 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
268 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
269 " dispIdx %d, active list ID = %d\n",
270 __FUNCTION__, aqlRingBuffer->rdIdx(),
271 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
296 bool is_submitted =
false;
309 " active list ID = %d\n", __FUNCTION__, rl_idx);
315 " active list ID = %d\n", __FUNCTION__, rl_idx);
321 " active list ID = %d\n", __FUNCTION__, rl_idx);
328 if (bar_and_pkt->dep_signal[
i]) {
331 uint64_t signal_addr =
332 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
336 " , sig addr %x, value %d active list ID = %d\n",
337 __FUNCTION__,
i, signal_addr,
338 *signal_val, rl_idx);
342 if (*signal_val != 0) {
348 sgnl_rd_evnt, signal_val);
351 " active list %d\n", __FUNCTION__,
360 sgnl_rd_evnt, signal_val);
363 " active list %d\n", __FUNCTION__,
371 " active list ID = %d\n", __FUNCTION__, rl_idx);
379 if (bar_and_pkt->completion_signal != 0) {
382 uint64_t signal_addr =
383 (uint64_t) (((uint64_t *)
384 bar_and_pkt->completion_signal) + 1);
386 " completion signal: %x!\n", signal_addr);
399 prev_signal.
copyIn(virt_proxy);
410 dep_sgnl_rd_st->
allRead =
false;
414 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
416 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
418 fatal(
"Unsupported packet type %d\n", pkt_type);
431 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
432 " dispIdx %d, active list ID = %d\n",
433 __FUNCTION__, aqlRingBuffer->
rdIdx(),
434 aqlRingBuffer->
wrIdx(), aqlRingBuffer->
dispIdx(), rqIdx);
440 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
442 __FUNCTION__, aqlRingBuffer->
dispIdx());
447 __FUNCTION__, aqlRingBuffer->
dispIdx());
463 assert(pendingReads > 0);
465 if (pendingReads == 0) {
480 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]" 481 " doorbell(%d)[0x%x] \n",
494 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
497 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
499 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
504 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
505 ttl_aql_buf, dma_start_ix, num_umq);
507 if (got_aql_buf == 0) {
515 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
516 while (got_aql_buf != 0 && num_umq != 0) {
517 uint32_t umq_b4_wrap = qDesc->
numObjs() -
520 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
524 dma_start_ix, rl_idx);
527 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
530 num_2_xfer, series_ctx, aql_buf);
532 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
533 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
541 num_umq -= num_2_xfer;
542 got_aql_buf -= num_2_xfer;
543 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
544 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
545 if (got_aql_buf == 0 && num_umq != 0) {
559 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], " 560 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
561 __FUNCTION__, pid, qDesc->basePointer,
562 qDesc->doorbellPointer, qDesc->writeIndex,
563 qDesc->readIndex, qDesc->numElts);
567 const std::string
name)
568 : _name(name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
584 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
590 uint32_t old_rdIdx =
rdIdx();
596 return (old_rdIdx !=
rdIdx());
602 this->hsa_device = dev;
614 if (nBufReq >
nFree())
620 __FUNCTION__, nBufReq,
wrIdx());
625 HSAPacketProcessorParams::create()
634 if (regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
635 updateReadIndex(0, rl_idx);
637 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
638 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
642 qDesc->
isEmpty()?
"true" :
"false", rl_idx);
647 getCommandsFromHost(0, rl_idx);
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
AddrRange RangeSize(Addr start, Addr size)
This file defines buffer classes used to handle pointer arguments in emulated syscalls.
void displayQueueDescriptor(int pid, uint32_t rl_idx)
const Tick pktProcessDelay
void incDispIdx(uint64_t value)
#define fatal(...)
This implements a cprintf based fatal() function.
bool stalledOnDmaBufAvailability
void dmaWrite(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
static const Priority Default_Pri
Default is zero for historical reasons.
Packet used by agents for dispatching jobs to kernel agents.
bool copyIn(PortProxy &memproxy)
copy data into simulator space (read from target memory)
void unregisterQueue(uint64_t queue_id)
std::vector< bool > _aqlComplete
bool processPkt(void *pkt, uint32_t rl_idx, Addr host_pkt_addr)
#define NumSignalsPerBarrier
HSAPacketProcessor * hsaPP
virtual PortProxy & getVirtProxy()=0
virtual Process * getProcessPtr()=0
virtual AddrRangeList getAddrRanges() const
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void saveHostDispAddr(Addr host_pkt_addr, int num_pkts, int ix)
the kernel may try to read from the dispatch packet, so we need to keep the host address that corresp...
TypedBufferArg is a class template; instances of this template represent typed buffers in target user...
int allocEntry(uint32_t nBufReq)
void dmaVirt(DmaFnPtr, Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
void dmaReadVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
static const FlagsType AutoDelete
ThreadContext * getThreadContext(ContextID tid) const
int32_t hsa_signal_value_t
Signal value.
std::vector< hsa_signal_value_t > values
void dmaWriteVirt(Addr host_addr, unsigned size, Event *event, void *data, Tick delay=0)
Tick curTick()
The current simulated tick.
virtual const std::string name() const
void finishPkt(void *pkt, uint32_t rl_idx)
virtual Tick write(Packet *)
void makeAtomicResponse()
QueueProcessEvent aqlProcessEvent
uint64_t Tick
Tick count type.
void dmaRead(Addr addr, int size, Event *event, uint8_t *data, uint32_t sid, uint32_t ssid, Tick delay=0)
void translateOrDie(Addr vaddr, Addr &paddr)
void setDeviceQueueDesc(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void setDevice(HSADevice *dev)
CmdQueueCmdDmaEvent(HSAPacketProcessor *hsaPP, int pid, bool isRead, uint32_t dma_buf_ix, uint num_bufs, dma_series_ctx *series_ctx, void *dest_4debug)
virtual void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() accepts vendor specific packets from the HSA packet processor.
Addr hostDispAddr() const
The packet has been processed in the past, but has not been reassigned to the packet processor...
void getCommandsFromHost(int pid, uint32_t rl_idx)
void schedule(Event &event, Tick when)
void setFlags(Flags _flags)
hsa_packet_type_t
Packet type.
bool freeEntry(void *pkt)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
#define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT)
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
void unsetDeviceQueueDesc(uint64_t queue_id)
bool scheduled() const
Determine if the current event is scheduled.
UpdateReadDispIdDmaEvent()
std::vector< class RQLEntry * > regdQList
Declarations of a non-full system Page Table.
std::vector< hsa_kernel_dispatch_packet_t > _aqlBuf
void incRdIdx(uint64_t value)
void incWrIdx(uint64_t value)
void updateReadIndex(int, uint32_t)
virtual Tick read(Packet *)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Internal ring buffer which is used to prefetch/store copies of the in-memory HSA ring buffer...
void schedAQLProcessing(uint32_t rl_idx)
dma_series_ctx * series_ctx
Declaration and inline definition of ChunkGenerator object.
void write(Addr db_addr, uint32_t doorbell_reg)
virtual void submitDispatchPkt(void *raw_pkt, uint32_t qID, Addr host_pkt_addr)
submitDispatchPkt() accepts AQL dispatch packets from the HSA packet processor.
Calls getCurrentEntry once the queueEntry has been dmaRead.
Packet used by agents to delay processing of subsequent packets, and to express complex dependencies ...
EventQueue * queue
queue to which this event belongs (though it may or may not be scheduled on this queue yet) ...
void(DmaDevice::* DmaFnPtr)(Addr, int, Event *, uint8_t *, Tick)
uint64_t ptr(uint64_t ix)
AQLRingBuffer(uint32_t size, const std::string name)
void registerNewQueue(uint64_t hostReadIndexPointer, uint64_t basePointer, uint64_t queue_id, uint32_t size)
uint64_t hostReadIndexPtr
std::vector< Addr > _hostDispAddresses
AQL kernel dispatch packet.