43 #include "debug/HSAPacketProcessor.hh"
54 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
56 HSAPacketProcessor::XEVENT::description() const \
61 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
62 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
66 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
67 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
75 :
DmaDevice(
p), numHWQueues(
p.numHWQueues), pioAddr(
p.pioAddr),
76 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(
p.pktProcessDelay)
80 regdQList.resize(numHWQueues);
81 for (
int i = 0;
i < numHWQueues;
i++) {
101 uint64_t basePointer,
106 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
107 (
void *)basePointer, queue_id, size);
109 basePointer, queue_id, size);
133 "%s: write of size %d to reg-offset %d (0x%x)\n",
134 __FUNCTION__, pkt->
getSize(), daddr, daddr);
136 uint32_t doorbell_reg = pkt->
getLE<uint32_t>();
139 "%s: write data 0x%x to offset %d (0x%x)\n",
140 __FUNCTION__, doorbell_reg, daddr, daddr);
160 auto process =
sys->
threads[0]->getProcessPtr();
162 if (!process->pTable->translate(
vaddr, paddr))
163 fatal(
"failed translation: vaddr 0x%x\n",
vaddr);
176 uint8_t *loc_data = (uint8_t*)
data;
187 (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
189 loc_data += gen.size();
198 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
211 :
Event(Default_Pri, AutoDelete)
225 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
228 sizeof(aqlbuf->
rdIdx()),
232 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
233 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
243 uint32_t _ix_start,
unsigned _num_pkts,
245 :
Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
246 ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
247 dest_4debug(_dest_4debug)
252 "active list ID = %d\n", __FUNCTION__,
259 uint32_t rl_idx = series_ctx->rl_idx;
261 hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
263 hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
265 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
266 ix_start, num_pkts, series_ctx->pkts_2_go,
269 series_ctx->pkts_2_go -= num_pkts;
270 if (series_ctx->pkts_2_go == 0) {
274 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
275 " dispIdx %d, active list ID = %d\n",
276 __FUNCTION__, aqlRingBuffer->rdIdx(),
277 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
279 hsaPP->schedAQLProcessing(rl_idx);
320 regdQList[rl_idx]->compltnPending() > 0) {
325 " list ID = %d\n", __FUNCTION__, rl_idx);
331 " active list ID = %d\n", __FUNCTION__, rl_idx);
337 " active list ID = %d\n", __FUNCTION__, rl_idx);
343 " active list ID = %d\n", __FUNCTION__, rl_idx);
350 if (bar_and_pkt->dep_signal[
i]) {
353 uint64_t signal_addr =
354 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
358 " , sig addr %x, value %d active list ID = %d\n",
359 __FUNCTION__,
i, signal_addr,
360 *signal_val, rl_idx);
364 if (*signal_val != 0) {
370 sgnl_rd_evnt, signal_val);
373 " active list %d\n", __FUNCTION__,
382 sgnl_rd_evnt, signal_val);
385 " active list %d\n", __FUNCTION__,
393 " active list ID = %d\n", __FUNCTION__, rl_idx);
401 if (bar_and_pkt->completion_signal != 0) {
408 bar_and_pkt->completion_signal);
411 " completion signal! Addr: %x\n",
412 bar_and_pkt->completion_signal);
420 dep_sgnl_rd_st->
allRead =
false;
424 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
426 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
429 " active list ID = %d\n", __FUNCTION__, rl_idx);
432 (
void *)disp_pkt, rl_idx, host_pkt_addr);
436 fatal(
"Unsupported packet type %d\n", pkt_type);
447 AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
449 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
450 " dispIdx %d, active list ID = %d\n",
451 __FUNCTION__, aqlRingBuffer->
rdIdx(),
452 aqlRingBuffer->
wrIdx(), aqlRingBuffer->
dispIdx(), rqIdx);
455 if (hsaPP->regdQList[rqIdx]->getBarrierBit()) {
457 "Dummy wakeup with barrier bit for rdIdx %d\n", rqIdx);
464 while (hsaPP->regdQList[rqIdx]->dispPending()) {
465 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
467 __FUNCTION__, aqlRingBuffer->
dispIdx());
469 Q_STATE q_state = hsaPP->processPkt(pkt, rqIdx, host_addr);
473 __FUNCTION__, aqlRingBuffer->
dispIdx());
474 if (hsaPP->regdQList[rqIdx]->dispPending()) {
475 hsaPP->schedAQLProcessing(rqIdx);
481 hsaPP->schedAQLProcessing(rqIdx);
488 hsaPP->schedAQLProcessing(rqIdx);
491 panic(
"Unknown queue state\n");
499 assert(pendingReads > 0);
501 if (pendingReads == 0) {
516 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
517 " doorbell(%d)[0x%x] \n",
530 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
533 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
535 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
540 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
541 ttl_aql_buf, dma_start_ix, num_umq);
543 if (got_aql_buf == 0) {
551 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
552 while (got_aql_buf != 0 && num_umq != 0) {
553 uint32_t umq_b4_wrap = qDesc->
numObjs() -
556 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
560 dma_start_ix, rl_idx);
563 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
566 num_2_xfer, series_ctx, aql_buf);
568 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
569 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
577 num_umq -= num_2_xfer;
578 got_aql_buf -= num_2_xfer;
579 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
580 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
581 if (got_aql_buf == 0 && num_umq != 0) {
595 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
596 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
597 __FUNCTION__, pid, qDesc->basePointer,
598 qDesc->doorbellPointer, qDesc->writeIndex,
599 qDesc->readIndex, qDesc->numElts);
603 const std::string
name)
604 : _name(
name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
620 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
626 uint32_t old_rdIdx =
rdIdx();
632 return (old_rdIdx !=
rdIdx());
650 if (nBufReq >
nFree())
656 __FUNCTION__, nBufReq,
wrIdx());
668 if (
regdQList[rl_idx]->getBarrierBit() &&
669 regdQList[rl_idx]->isLastOutstandingPkt()) {
671 "Unset barrier bit for active list ID %d\n", rl_idx);
674 "There should be pending kernels in this queue\n");
676 "Rescheduling active list ID %d after unsetting barrier "
685 if (
regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
688 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
689 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
693 qDesc->
isEmpty()?
"true" :
"false", rl_idx);
709 uint64_t signal_addr =
710 (uint64_t) (((uint64_t *)agent_pkt->completion_signal) + 1);
712 " completion signal: %x!\n", signal_addr);
737 uint64_t signal_addr = (uint64_t) (((uint64_t *)signal) + 1);