37 #include "debug/GPUFetch.hh"
38 #include "debug/GPUPort.hh"
39 #include "debug/GPUTLB.hh"
53 : timingSim(true), computeUnit(cu), fetchScheduler(
p),
54 waveList(nullptr), fetchDepth(
p.fetch_depth)
95 if (!fetch_buf.hasFreeSpace()) {
96 fetch_buf.checkWaveReleaseBuf();
98 if (fetch_buf.hasFetchDataToProcess()) {
99 fetch_buf.decodeInsts();
157 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Id%d: Initiate fetch "
161 DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
184 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
195 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
200 DPRINTF(GPUTLB,
"sent FETCH translation request for %#x\n",
vaddr);
209 TheISA::GpuTLB::TranslationState *sender_state =
210 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
212 delete sender_state->tlbEntry;
216 fetch(pkt, wavefront);
223 assert(pkt->
req->hasPaddr());
224 assert(pkt->
req->hasSize());
226 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
228 pkt->
req->getPaddr());
258 .reservedBuf(pkt->
req->getVaddr()));
270 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
272 pkt->
req->getPaddr());
274 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
276 pkt->
req->getPaddr());
288 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->
senderState);
292 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch addr %#x returned "
335 "Cache line size should be a power of two.");
350 restartFromBranch =
true;
361 freeList.push_back(bufStart +
i * cacheLineSize);
364 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
365 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
374 if (bufferedAndReservedLines()) {
375 Addr last_line_fetched = 0;
376 if (!reservedLines()) {
381 last_line_fetched = bufferedPCs.rbegin()->first;
383 last_line_fetched = reservedPCs.rbegin()->first;
386 next_line = last_line_fetched + cacheLineSize;
392 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
393 assert(reservedPCs.find(next_line) == reservedPCs.end());
412 if (restartFromBranch) {
413 restartFromBranch =
false;
417 readPtr += byte_offset;
429 assert(hasFreeSpace());
430 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
431 assert(reservedPCs.find(
vaddr) == reservedPCs.end());
432 assert(bufferedAndReservedLines() <
fetchDepth);
434 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d reserved fetch buffer entry "
435 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
436 wavefront->wfDynId,
vaddr);
444 uint8_t *inst_buf = freeList.front();
445 reservedPCs.emplace(
vaddr, inst_buf);
446 freeList.pop_front();
452 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
453 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
454 wavefront->simdId, wavefront->wfSlotId,
455 wavefront->wfDynId,
vaddr);
462 auto reserved_pc = reservedPCs.find(
vaddr);
463 assert(reserved_pc != reservedPCs.end());
464 bufferedPCs.emplace(
vaddr, reserved_pc->second);
466 if (readPtr == bufEnd) {
470 reserved_pc->second =
nullptr;
471 reservedPCs.erase(reserved_pc);
484 wavefront->computeUnit->cacheLineSize());
485 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
486 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d current wave PC(%#x) still "
487 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
488 wavefront->wfDynId, cur_wave_pc);
491 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
496 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
497 auto oldest_buffered_pc = bufferedPCs.begin();
499 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d checking if PC block addr = %#x"
500 "(PC = %#x) can be released.\n", wavefront->simdId,
501 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
506 for (
const auto &buf_pc : bufferedPCs) {
507 DPRINTF(GPUFetch,
"PC[%d] = %#x\n", idx, buf_pc.first);
514 assert(current_buffered_pc != bufferedPCs.end());
522 if (current_buffered_pc != oldest_buffered_pc) {
523 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for PC = %#x, "
524 "removing it from the fetch buffer.\n", wavefront->simdId,
525 wavefront->wfSlotId, wavefront->wfDynId,
526 oldest_buffered_pc->first);
528 freeList.emplace_back(oldest_buffered_pc->second);
529 oldest_buffered_pc->second =
nullptr;
530 bufferedPCs.erase(oldest_buffered_pc);
531 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d has %d lines buffered.\n",
532 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
546 while (wavefront->instructionBuffer.size() < maxIbSize
547 && hasFetchDataToProcess()) {
553 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
554 readPtr += gpu_static_inst->
instSize();
556 assert(readPtr <= bufEnd);
559 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
560 wavefront, gpu_static_inst,
561 wavefront->computeUnit->
563 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
565 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%ld decoded %s (%d bytes). "
566 "%d bytes remain.\n", wavefront->simdId,
567 wavefront->wfSlotId, wavefront->wfDynId,
570 fetchBytesRemaining());
579 int dword_size =
sizeof(uint32_t);
582 for (
int i = 0;
i < num_dwords; ++
i) {
584 *
reinterpret_cast<uint32_t*
>(readPtr));
585 if (readPtr + dword_size >= bufEnd) {
590 assert(readPtr == bufStart);
594 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
595 readPtr += (gpu_static_inst->
instSize() - dword_size);
596 assert(readPtr < bufEnd);
599 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
600 wavefront, gpu_static_inst,
601 wavefront->computeUnit->
603 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
605 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d decoded split inst %s (%#x) "
606 "(%d bytes). %d bytes remain in %d buffered lines.\n",
607 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
609 gpu_static_inst->
instSize(), fetchBytesRemaining(),
628 int bytes_remaining = 0;
630 if (bufferedLines() && readPtr != bufEnd) {
631 auto last_buf_pc = bufferedPCs.rbegin();
632 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
633 int byte_diff = end_ptr - readPtr;
635 if (end_ptr > readPtr) {
636 bytes_remaining = byte_diff;
637 }
else if (end_ptr < readPtr) {
638 bytes_remaining = bufferedBytes() + byte_diff;
642 assert(bytes_remaining <= bufferedBytes());
643 return bytes_remaining;