36 #include "debug/GPUFetch.hh"
37 #include "debug/GPUPort.hh"
38 #include "debug/GPUTLB.hh"
52 : timingSim(true), computeUnit(cu), fetchScheduler(
p),
53 waveList(nullptr), fetchDepth(
p.fetch_depth)
94 if (!fetch_buf.hasFreeSpace()) {
95 fetch_buf.checkWaveReleaseBuf();
97 if (fetch_buf.hasFetchDataToProcess()) {
98 fetch_buf.decodeInsts();
156 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Id%d: Initiate fetch "
160 DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
183 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
194 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
199 DPRINTF(GPUTLB,
"sent FETCH translation request for %#x\n",
vaddr);
215 fetch(pkt, wavefront);
222 assert(pkt->
req->hasPaddr());
223 assert(pkt->
req->hasSize());
225 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
227 pkt->
req->getPaddr());
257 .reservedBuf(pkt->
req->getVaddr()));
269 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
271 pkt->
req->getPaddr());
273 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
275 pkt->
req->getPaddr());
287 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->
senderState);
291 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch addr %#x returned "
334 "Cache line size should be a power of two.");
349 restartFromBranch =
true;
360 freeList.push_back(bufStart +
i * cacheLineSize);
363 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
364 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
373 if (bufferedAndReservedLines()) {
374 Addr last_line_fetched = 0;
375 if (!reservedLines()) {
380 last_line_fetched = bufferedPCs.rbegin()->first;
382 last_line_fetched = reservedPCs.rbegin()->first;
385 next_line = last_line_fetched + cacheLineSize;
391 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
392 assert(reservedPCs.find(next_line) == reservedPCs.end());
411 if (restartFromBranch) {
412 restartFromBranch =
false;
416 readPtr += byte_offset;
428 assert(hasFreeSpace());
429 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
430 assert(reservedPCs.find(
vaddr) == reservedPCs.end());
431 assert(bufferedAndReservedLines() <
fetchDepth);
433 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d reserved fetch buffer entry "
434 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
435 wavefront->wfDynId,
vaddr);
443 uint8_t *inst_buf = freeList.front();
444 reservedPCs.emplace(
vaddr, inst_buf);
445 freeList.pop_front();
451 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
452 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
453 wavefront->simdId, wavefront->wfSlotId,
454 wavefront->wfDynId,
vaddr);
461 auto reserved_pc = reservedPCs.find(
vaddr);
462 assert(reserved_pc != reservedPCs.end());
463 bufferedPCs.emplace(
vaddr, reserved_pc->second);
465 if (readPtr == bufEnd) {
469 reserved_pc->second =
nullptr;
470 reservedPCs.erase(reserved_pc);
483 wavefront->computeUnit->cacheLineSize());
484 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
485 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d current wave PC(%#x) still "
486 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
487 wavefront->wfDynId, cur_wave_pc);
490 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
495 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
496 auto oldest_buffered_pc = bufferedPCs.begin();
498 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d checking if PC block addr = %#x"
499 "(PC = %#x) can be released.\n", wavefront->simdId,
500 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
505 for (
const auto &buf_pc : bufferedPCs) {
506 DPRINTF(GPUFetch,
"PC[%d] = %#x\n", idx, buf_pc.first);
513 assert(current_buffered_pc != bufferedPCs.end());
521 if (current_buffered_pc != oldest_buffered_pc) {
522 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for PC = %#x, "
523 "removing it from the fetch buffer.\n", wavefront->simdId,
524 wavefront->wfSlotId, wavefront->wfDynId,
525 oldest_buffered_pc->first);
527 freeList.emplace_back(oldest_buffered_pc->second);
528 oldest_buffered_pc->second =
nullptr;
529 bufferedPCs.erase(oldest_buffered_pc);
530 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d has %d lines buffered.\n",
531 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
545 while (wavefront->instructionBuffer.size() < maxIbSize
546 && hasFetchDataToProcess()) {
552 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
553 readPtr += gpu_static_inst->
instSize();
555 assert(readPtr <= bufEnd);
558 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
559 wavefront, gpu_static_inst,
560 wavefront->computeUnit->
562 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
564 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%ld decoded %s (%d bytes). "
565 "%d bytes remain.\n", wavefront->simdId,
566 wavefront->wfSlotId, wavefront->wfDynId,
569 fetchBytesRemaining());
578 int dword_size =
sizeof(uint32_t);
581 for (
int i = 0;
i < num_dwords; ++
i) {
583 *
reinterpret_cast<uint32_t*
>(readPtr));
584 if (readPtr + dword_size >= bufEnd) {
589 assert(readPtr == bufStart);
593 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
594 readPtr += (gpu_static_inst->
instSize() - dword_size);
595 assert(readPtr < bufEnd);
598 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
599 wavefront, gpu_static_inst,
600 wavefront->computeUnit->
602 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
604 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d decoded split inst %s (%#x) "
605 "(%d bytes). %d bytes remain in %d buffered lines.\n",
606 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
608 gpu_static_inst->
instSize(), fetchBytesRemaining(),
627 int bytes_remaining = 0;
629 if (bufferedLines() && readPtr != bufEnd) {
630 auto last_buf_pc = bufferedPCs.rbegin();
631 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
632 int byte_diff = end_ptr - readPtr;
634 if (end_ptr > readPtr) {
635 bytes_remaining = byte_diff;
636 }
else if (end_ptr < readPtr) {
637 bytes_remaining = bufferedBytes() + byte_diff;
641 assert(bytes_remaining <= bufferedBytes());
642 return bytes_remaining;