42 #include "debug/HSAPacketProcessor.hh"
53 #define HSAPP_EVENT_DESCRIPTION_GENERATOR(XEVENT) \
55 HSAPacketProcessor::XEVENT::description() const \
60 #define PKT_TYPE(PKT) ((hsa_packet_type_t)(((PKT->header) >> \
61 HSA_PACKET_HEADER_TYPE) & (HSA_PACKET_HEADER_WIDTH_TYPE - 1)))
65 #define IS_BARRIER(PKT) ((hsa_packet_header_t)(((PKT->header) >> \
66 HSA_PACKET_HEADER_BARRIER) & HSA_PACKET_HEADER_WIDTH_BARRIER))
74 :
DmaDevice(
p), numHWQueues(
p->numHWQueues), pioAddr(
p->pioAddr),
75 pioSize(
PAGE_SIZE), pioDelay(10), pktProcessDelay(
p->pktProcessDelay)
79 regdQList.resize(numHWQueues);
80 for (
int i = 0;
i < numHWQueues;
i++) {
100 uint64_t basePointer,
105 "%s:base = %p, qID = %d, ze = %d\n", __FUNCTION__,
106 (
void *)basePointer, queue_id, size);
108 basePointer, queue_id, size);
132 "%s: write of size %d to reg-offset %d (0x%x)\n",
133 __FUNCTION__, pkt->
getSize(), daddr, daddr);
135 uint32_t doorbell_reg = pkt->
getLE<uint32_t>();
138 "%s: write data 0x%x to offset %d (0x%x)\n",
139 __FUNCTION__, doorbell_reg, daddr, daddr);
159 auto process =
sys->
threads[0]->getProcessPtr();
161 if (!process->pTable->translate(
vaddr, paddr))
162 fatal(
"failed translation: vaddr 0x%x\n",
vaddr);
175 uint8_t *loc_data = (uint8_t*)
data;
186 (this->*dmaFn)(phys, gen.size(), ev, loc_data, delay);
188 loc_data += gen.size();
197 "%s:host_addr = 0x%lx, size = %d\n", __FUNCTION__, host_addr, size);
210 :
Event(Default_Pri, AutoDelete)
224 "%s: read-pointer offset [0x%x]\n", __FUNCTION__, aqlbuf->
rdIdx());
227 sizeof(aqlbuf->
rdIdx()),
231 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
232 " q size = %d, is_empty = %s, active list ID = %d\n", __FUNCTION__,
242 uint32_t _ix_start,
unsigned _num_pkts,
244 :
Event(Default_Pri, AutoDelete), hsaPP(_hsaPP), pid(_pid), isRead(_isRead),
245 ix_start(_ix_start), num_pkts(_num_pkts), series_ctx(_series_ctx),
246 dest_4debug(_dest_4debug)
251 "active list ID = %d\n", __FUNCTION__,
258 uint32_t rl_idx = series_ctx->rl_idx;
260 hsaPP->regdQList[rl_idx]->qCntxt.aqlBuf;
262 hsaPP->regdQList[rl_idx]->qCntxt.qDesc;
264 " pktsRemaining = %d, active list ID = %d\n", __FUNCTION__,
265 ix_start, num_pkts, series_ctx->pkts_2_go,
268 series_ctx->pkts_2_go -= num_pkts;
269 if (series_ctx->pkts_2_go == 0) {
273 "%s: schedule Qwakeup next cycle, rdIdx %d, wrIdx %d," \
274 " dispIdx %d, active list ID = %d\n",
275 __FUNCTION__, aqlRingBuffer->rdIdx(),
276 aqlRingBuffer->wrIdx(), aqlRingBuffer->dispIdx(), rl_idx);
278 hsaPP->schedAQLProcessing(rl_idx);
319 regdQList[rl_idx]->compltnPending() > 0) {
324 " list ID = %d\n", __FUNCTION__, rl_idx);
330 " active list ID = %d\n", __FUNCTION__, rl_idx);
336 " active list ID = %d\n", __FUNCTION__, rl_idx);
342 " active list ID = %d\n", __FUNCTION__, rl_idx);
349 if (bar_and_pkt->dep_signal[
i]) {
352 uint64_t signal_addr =
353 (uint64_t) (((uint64_t *) bar_and_pkt->dep_signal[
i]) + 1);
357 " , sig addr %x, value %d active list ID = %d\n",
358 __FUNCTION__,
i, signal_addr,
359 *signal_val, rl_idx);
363 if (*signal_val != 0) {
369 sgnl_rd_evnt, signal_val);
372 " active list %d\n", __FUNCTION__,
381 sgnl_rd_evnt, signal_val);
384 " active list %d\n", __FUNCTION__,
392 " active list ID = %d\n", __FUNCTION__, rl_idx);
400 if (bar_and_pkt->completion_signal != 0) {
403 uint64_t signal_addr =
404 (uint64_t) (((uint64_t *)
405 bar_and_pkt->completion_signal) + 1);
407 " completion signal: %x!\n", signal_addr);
428 dep_sgnl_rd_st->
allRead =
false;
432 fatal(
"Unsupported packet type HSA_PACKET_TYPE_BARRIER_OR");
434 fatal(
"Unsupported packet type HSA_PACKET_TYPE_INVALID");
436 fatal(
"Unsupported packet type %d\n", pkt_type);
447 AQLRingBuffer *aqlRingBuffer = hsaPP->regdQList[rqIdx]->qCntxt.aqlBuf;
449 "%s: Qwakeup , rdIdx %d, wrIdx %d," \
450 " dispIdx %d, active list ID = %d\n",
451 __FUNCTION__, aqlRingBuffer->
rdIdx(),
452 aqlRingBuffer->
wrIdx(), aqlRingBuffer->
dispIdx(), rqIdx);
455 if (hsaPP->regdQList[rqIdx]->getBarrierBit()) {
457 "Dummy wakeup with barrier bit for rdIdx %d\n", rqIdx);
464 while (hsaPP->regdQList[rqIdx]->dispPending()) {
465 void *pkt = aqlRingBuffer->
ptr(aqlRingBuffer->
dispIdx());
467 __FUNCTION__, aqlRingBuffer->
dispIdx());
469 Q_STATE q_state = hsaPP->processPkt(pkt, rqIdx, host_addr);
473 __FUNCTION__, aqlRingBuffer->
dispIdx());
474 if (hsaPP->regdQList[rqIdx]->dispPending()) {
475 hsaPP->schedAQLProcessing(rqIdx);
481 hsaPP->schedAQLProcessing(rqIdx);
488 hsaPP->schedAQLProcessing(rqIdx);
491 panic(
"Unknown queue state\n");
499 assert(pendingReads > 0);
501 if (pendingReads == 0) {
516 "%s: read-pointer offset[0x%x], write-pointer offset[0x%x]"
517 " doorbell(%d)[0x%x] \n",
530 uint32_t ttl_aql_buf = aqlRingBuffer->
numObjs();
533 uint32_t got_aql_buf = aqlRingBuffer->
allocEntry(num_umq);
535 uint32_t dma_start_ix = (aqlRingBuffer->
wrIdx() - got_aql_buf) %
540 "dma_start_ix = %d, num_umq = %d\n", __FUNCTION__, umq_nxt,
541 ttl_aql_buf, dma_start_ix, num_umq);
543 if (got_aql_buf == 0) {
551 uint32_t dma_b4_wrap = ttl_aql_buf - dma_start_ix;
552 while (got_aql_buf != 0 && num_umq != 0) {
553 uint32_t umq_b4_wrap = qDesc->
numObjs() -
556 = std::min({umq_b4_wrap, dma_b4_wrap, num_umq, got_aql_buf});
560 dma_start_ix, rl_idx);
563 void *aql_buf = aqlRingBuffer->
ptr(dma_start_ix);
566 num_2_xfer, series_ctx, aql_buf);
568 "%s: aql_buf = %p, umq_nxt = %d, dma_ix = %d, num2xfer = %d\n",
569 __FUNCTION__, aql_buf, umq_nxt, dma_start_ix, num_2_xfer);
577 num_umq -= num_2_xfer;
578 got_aql_buf -= num_2_xfer;
579 dma_start_ix = (dma_start_ix + num_2_xfer) % ttl_aql_buf;
580 umq_nxt = (umq_nxt + num_2_xfer) % qDesc->
numObjs();
581 if (got_aql_buf == 0 && num_umq != 0) {
595 "%s: pid[%d], basePointer[0x%lx], dBPointer[0x%lx], "
596 "writeIndex[0x%x], readIndex[0x%x], size(bytes)[0x%x]\n",
597 __FUNCTION__, pid, qDesc->basePointer,
598 qDesc->doorbellPointer, qDesc->writeIndex,
599 qDesc->readIndex, qDesc->numElts);
603 const std::string
name)
604 : _name(
name), _wrIdx(0), _rdIdx(0), _dispIdx(0)
620 " # free entries = %d, wrIdx = %d, rdIdx = %d\n", __FUNCTION__,
626 uint32_t old_rdIdx =
rdIdx();
632 return (old_rdIdx !=
rdIdx());
650 if (nBufReq >
nFree())
656 __FUNCTION__, nBufReq,
wrIdx());
661 HSAPacketProcessorParams::create()
674 if (
regdQList[rl_idx]->getBarrierBit() &&
675 regdQList[rl_idx]->isLastOutstandingPkt()) {
677 "Unset barrier bit for active list ID %d\n", rl_idx);
680 "There should be pending kernels in this queue\n");
682 "Rescheduling active list ID %d after unsetting barrier "
691 if (
regdQList[rl_idx]->qCntxt.aqlBuf->freeEntry(pvPkt))
694 "%s: rd-ptr offset [0x%x], wr-ptr offset [0x%x], space used = %d," \
695 " q size = %d, stalled = %s, empty = %s, active list ID = %d\n",
699 qDesc->
isEmpty()?
"true" :
"false", rl_idx);