37 #include "debug/GPUFetch.hh"
38 #include "debug/GPUPort.hh"
39 #include "debug/GPUTLB.hh"
53 : timingSim(true), computeUnit(cu), fetchScheduler(
p),
54 waveList(nullptr), fetchDepth(
p.fetch_depth)
95 if (!fetch_buf.hasFreeSpace()) {
96 fetch_buf.checkWaveReleaseBuf();
98 if (fetch_buf.hasFetchDataToProcess()) {
99 fetch_buf.decodeInsts();
157 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Id%d: Initiate fetch "
161 DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
184 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
195 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
200 DPRINTF(GPUTLB,
"sent FETCH translation request for %#x\n",
vaddr);
214 if (!pkt->
req->systemReq()) {
225 fetch(pkt, wavefront);
232 assert(pkt->
req->hasPaddr());
233 assert(pkt->
req->hasSize());
235 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
237 pkt->
req->getPaddr());
266 if (!pkt->
req->systemReq()) {
276 .reservedBuf(pkt->
req->getVaddr()));
284 if (pkt->
req->systemReq()) {
292 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
294 pkt->
req->getPaddr());
296 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
298 pkt->
req->getPaddr());
310 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->
senderState);
314 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch addr %#x returned "
357 "Cache line size should be a power of two.");
372 restartFromBranch =
true;
383 freeList.push_back(bufStart +
i * cacheLineSize);
386 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
387 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
396 if (bufferedAndReservedLines()) {
397 Addr last_line_fetched = 0;
398 if (!reservedLines()) {
403 last_line_fetched = bufferedPCs.rbegin()->first;
405 last_line_fetched = reservedPCs.rbegin()->first;
408 next_line = last_line_fetched + cacheLineSize;
414 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
415 assert(reservedPCs.find(next_line) == reservedPCs.end());
434 if (restartFromBranch) {
435 restartFromBranch =
false;
439 readPtr += byte_offset;
451 assert(hasFreeSpace());
452 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
453 assert(reservedPCs.find(
vaddr) == reservedPCs.end());
454 assert(bufferedAndReservedLines() <
fetchDepth);
456 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d reserved fetch buffer entry "
457 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
458 wavefront->wfDynId,
vaddr);
466 uint8_t *inst_buf = freeList.front();
467 reservedPCs.emplace(
vaddr, inst_buf);
468 freeList.pop_front();
474 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
475 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
476 wavefront->simdId, wavefront->wfSlotId,
477 wavefront->wfDynId,
vaddr);
484 auto reserved_pc = reservedPCs.find(
vaddr);
485 assert(reserved_pc != reservedPCs.end());
486 bufferedPCs.emplace(
vaddr, reserved_pc->second);
488 if (readPtr == bufEnd) {
492 reserved_pc->second =
nullptr;
493 reservedPCs.erase(reserved_pc);
506 wavefront->computeUnit->cacheLineSize());
507 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
508 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d current wave PC(%#x) still "
509 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
510 wavefront->wfDynId, cur_wave_pc);
513 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
518 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
519 auto oldest_buffered_pc = bufferedPCs.begin();
521 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d checking if PC block addr = %#x"
522 "(PC = %#x) can be released.\n", wavefront->simdId,
523 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
528 for (
const auto &buf_pc : bufferedPCs) {
529 DPRINTF(GPUFetch,
"PC[%d] = %#x\n", idx, buf_pc.first);
536 assert(current_buffered_pc != bufferedPCs.end());
544 if (current_buffered_pc != oldest_buffered_pc) {
545 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for PC = %#x, "
546 "removing it from the fetch buffer.\n", wavefront->simdId,
547 wavefront->wfSlotId, wavefront->wfDynId,
548 oldest_buffered_pc->first);
550 freeList.emplace_back(oldest_buffered_pc->second);
551 oldest_buffered_pc->second =
nullptr;
552 bufferedPCs.erase(oldest_buffered_pc);
553 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d has %d lines buffered.\n",
554 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
568 while (wavefront->instructionBuffer.size() < maxIbSize
569 && hasFetchDataToProcess()) {
575 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
576 readPtr += gpu_static_inst->
instSize();
578 assert(readPtr <= bufEnd);
581 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
582 wavefront, gpu_static_inst,
583 wavefront->computeUnit->
585 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
587 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%ld decoded %s (%d bytes). "
588 "%d bytes remain.\n", wavefront->simdId,
589 wavefront->wfSlotId, wavefront->wfDynId,
592 fetchBytesRemaining());
601 int dword_size =
sizeof(uint32_t);
604 for (
int i = 0;
i < num_dwords; ++
i) {
606 *
reinterpret_cast<uint32_t*
>(readPtr));
607 if (readPtr + dword_size >= bufEnd) {
612 assert(readPtr == bufStart);
616 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
617 readPtr += (gpu_static_inst->
instSize() - dword_size);
618 assert(readPtr < bufEnd);
621 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
622 wavefront, gpu_static_inst,
623 wavefront->computeUnit->
625 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
627 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d decoded split inst %s (%#x) "
628 "(%d bytes). %d bytes remain in %d buffered lines.\n",
629 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
631 gpu_static_inst->
instSize(), fetchBytesRemaining(),
650 int bytes_remaining = 0;
652 if (bufferedLines() && readPtr != bufEnd) {
653 auto last_buf_pc = bufferedPCs.rbegin();
654 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
655 int byte_diff = end_ptr - readPtr;
657 if (end_ptr > readPtr) {
658 bytes_remaining = byte_diff;
659 }
else if (end_ptr < readPtr) {
660 bytes_remaining = bufferedBytes() + byte_diff;
664 assert(bytes_remaining <= bufferedBytes());
665 return bytes_remaining;
671 reqPkt->makeResponse();
672 fetchUnit->computeUnit.handleSQCReturn(reqPkt);