36 #include "debug/GPUFetch.hh"
37 #include "debug/GPUPort.hh"
38 #include "debug/GPUTLB.hh"
49 : timingSim(true), computeUnit(cu), fetchScheduler(
p),
50 waveList(nullptr), fetchDepth(
p->fetch_depth)
91 if (!fetch_buf.hasFreeSpace()) {
92 fetch_buf.checkWaveReleaseBuf();
94 if (fetch_buf.hasFetchDataToProcess()) {
95 fetch_buf.decodeInsts();
153 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Id%d: Initiate fetch "
157 DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
180 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
191 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
196 DPRINTF(GPUTLB,
"sent FETCH translation request for %#x\n",
vaddr);
205 TheISA::GpuTLB::TranslationState *sender_state =
206 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
208 delete sender_state->tlbEntry;
212 fetch(pkt, wavefront);
219 assert(pkt->
req->hasPaddr());
220 assert(pkt->
req->hasSize());
222 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
224 pkt->
req->getPaddr());
252 .reservedBuf(pkt->
req->getVaddr()));
264 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
266 pkt->
req->getPaddr());
268 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
270 pkt->
req->getPaddr());
282 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->
senderState);
286 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch addr %#x returned "
329 "Cache line size should be a power of two.");
344 restartFromBranch =
true;
355 freeList.push_back(bufStart +
i * cacheLineSize);
358 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
359 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
368 if (bufferedAndReservedLines()) {
369 Addr last_line_fetched = 0;
370 if (!reservedLines()) {
375 last_line_fetched = bufferedPCs.rbegin()->first;
377 last_line_fetched = reservedPCs.rbegin()->first;
380 next_line = last_line_fetched + cacheLineSize;
386 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
387 assert(reservedPCs.find(next_line) == reservedPCs.end());
406 if (restartFromBranch) {
407 restartFromBranch =
false;
411 readPtr += byte_offset;
423 assert(hasFreeSpace());
424 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
425 assert(reservedPCs.find(
vaddr) == reservedPCs.end());
426 assert(bufferedAndReservedLines() <
fetchDepth);
428 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d reserved fetch buffer entry "
429 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
430 wavefront->wfDynId,
vaddr);
438 uint8_t *inst_buf = freeList.front();
439 reservedPCs.emplace(
vaddr, inst_buf);
440 freeList.pop_front();
446 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
447 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
448 wavefront->simdId, wavefront->wfSlotId,
449 wavefront->wfDynId,
vaddr);
456 auto reserved_pc = reservedPCs.find(
vaddr);
457 assert(reserved_pc != reservedPCs.end());
458 bufferedPCs.emplace(
vaddr, reserved_pc->second);
460 if (readPtr == bufEnd) {
464 reserved_pc->second =
nullptr;
465 reservedPCs.erase(reserved_pc);
478 wavefront->computeUnit->cacheLineSize());
479 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
480 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d current wave PC(%#x) still "
481 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
482 wavefront->wfDynId, cur_wave_pc);
485 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
490 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
491 auto oldest_buffered_pc = bufferedPCs.begin();
493 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d checking if PC block addr = %#x"
494 "(PC = %#x) can be released.\n", wavefront->simdId,
495 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
500 for (
const auto &buf_pc : bufferedPCs) {
501 DPRINTF(GPUFetch,
"PC[%d] = %#x\n", idx, buf_pc.first);
508 assert(current_buffered_pc != bufferedPCs.end());
516 if (current_buffered_pc != oldest_buffered_pc) {
517 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for PC = %#x, "
518 "removing it from the fetch buffer.\n", wavefront->simdId,
519 wavefront->wfSlotId, wavefront->wfDynId,
520 oldest_buffered_pc->first);
522 freeList.emplace_back(oldest_buffered_pc->second);
523 oldest_buffered_pc->second =
nullptr;
524 bufferedPCs.erase(oldest_buffered_pc);
525 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d has %d lines buffered.\n",
526 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
540 while (wavefront->instructionBuffer.size() < maxIbSize
541 && hasFetchDataToProcess()) {
547 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
548 readPtr += gpu_static_inst->
instSize();
550 assert(readPtr <= bufEnd);
553 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
554 wavefront, gpu_static_inst,
555 wavefront->computeUnit->
557 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
559 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%ld decoded %s (%d bytes). "
560 "%d bytes remain.\n", wavefront->simdId,
561 wavefront->wfSlotId, wavefront->wfDynId,
564 fetchBytesRemaining());
573 int dword_size =
sizeof(uint32_t);
576 for (
int i = 0;
i < num_dwords; ++
i) {
577 ((uint32_t*)(&split_inst))[
i] = *
reinterpret_cast<uint32_t*
>(readPtr);
578 if (readPtr + dword_size >= bufEnd) {
583 assert(readPtr == bufStart);
587 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
588 readPtr += (gpu_static_inst->
instSize() - dword_size);
589 assert(readPtr < bufEnd);
592 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
593 wavefront, gpu_static_inst,
594 wavefront->computeUnit->
596 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
598 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d decoded split inst %s (%#x) "
599 "(%d bytes). %d bytes remain in %d buffered lines.\n",
600 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
602 gpu_static_inst->
instSize(), fetchBytesRemaining(),
621 int bytes_remaining = 0;
623 if (bufferedLines() && readPtr != bufEnd) {
624 auto last_buf_pc = bufferedPCs.rbegin();
625 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
626 int byte_diff = end_ptr - readPtr;
628 if (end_ptr > readPtr) {
629 bytes_remaining = byte_diff;
630 }
else if (end_ptr < readPtr) {
631 bytes_remaining = bufferedBytes() + byte_diff;
635 assert(bytes_remaining <= bufferedBytes());
636 return bytes_remaining;