37 #include "debug/GPUFetch.hh"
38 #include "debug/GPUPort.hh"
39 #include "debug/GPUTLB.hh"
50 : timingSim(true), computeUnit(cu), fetchScheduler(
p),
51 waveList(nullptr), fetchDepth(
p.fetch_depth)
92 if (!fetch_buf.hasFreeSpace()) {
93 fetch_buf.checkWaveReleaseBuf();
95 if (fetch_buf.hasFetchDataToProcess()) {
96 fetch_buf.decodeInsts();
154 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Id%d: Initiate fetch "
158 DPRINTF(GPUTLB,
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
181 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
192 DPRINTF(GPUTLB,
"Failed to send TLB req for FETCH addr %#x\n",
197 DPRINTF(GPUTLB,
"sent FETCH translation request for %#x\n",
vaddr);
206 TheISA::GpuTLB::TranslationState *sender_state =
207 safe_cast<TheISA::GpuTLB::TranslationState*>(pkt->
senderState);
209 delete sender_state->tlbEntry;
213 fetch(pkt, wavefront);
220 assert(pkt->
req->hasPaddr());
221 assert(pkt->
req->hasSize());
223 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
225 pkt->
req->getPaddr());
255 .reservedBuf(pkt->
req->getVaddr()));
267 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
269 pkt->
req->getPaddr());
271 DPRINTF(GPUPort,
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
273 pkt->
req->getPaddr());
285 safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->
senderState);
289 DPRINTF(GPUFetch,
"CU%d: WF[%d][%d]: Fetch addr %#x returned "
332 "Cache line size should be a power of two.");
347 restartFromBranch =
true;
358 freeList.push_back(bufStart +
i * cacheLineSize);
361 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
362 "buffer\n", wavefront->simdId, wavefront->wfSlotId,
371 if (bufferedAndReservedLines()) {
372 Addr last_line_fetched = 0;
373 if (!reservedLines()) {
378 last_line_fetched = bufferedPCs.rbegin()->first;
380 last_line_fetched = reservedPCs.rbegin()->first;
383 next_line = last_line_fetched + cacheLineSize;
389 assert(bufferedPCs.find(next_line) == bufferedPCs.end());
390 assert(reservedPCs.find(next_line) == reservedPCs.end());
409 if (restartFromBranch) {
410 restartFromBranch =
false;
414 readPtr += byte_offset;
426 assert(hasFreeSpace());
427 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
428 assert(reservedPCs.find(
vaddr) == reservedPCs.end());
429 assert(bufferedAndReservedLines() <
fetchDepth);
431 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d reserved fetch buffer entry "
432 "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
433 wavefront->wfDynId,
vaddr);
441 uint8_t *inst_buf = freeList.front();
442 reservedPCs.emplace(
vaddr, inst_buf);
443 freeList.pop_front();
449 assert(bufferedPCs.find(
vaddr) == bufferedPCs.end());
450 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
451 wavefront->simdId, wavefront->wfSlotId,
452 wavefront->wfDynId,
vaddr);
459 auto reserved_pc = reservedPCs.find(
vaddr);
460 assert(reserved_pc != reservedPCs.end());
461 bufferedPCs.emplace(
vaddr, reserved_pc->second);
463 if (readPtr == bufEnd) {
467 reserved_pc->second =
nullptr;
468 reservedPCs.erase(reserved_pc);
481 wavefront->computeUnit->cacheLineSize());
482 if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
483 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d current wave PC(%#x) still "
484 "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
485 wavefront->wfDynId, cur_wave_pc);
488 assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
493 auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
494 auto oldest_buffered_pc = bufferedPCs.begin();
496 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d checking if PC block addr = %#x"
497 "(PC = %#x) can be released.\n", wavefront->simdId,
498 wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
503 for (
const auto &buf_pc : bufferedPCs) {
504 DPRINTF(GPUFetch,
"PC[%d] = %#x\n", idx, buf_pc.first);
511 assert(current_buffered_pc != bufferedPCs.end());
519 if (current_buffered_pc != oldest_buffered_pc) {
520 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d done fetching for PC = %#x, "
521 "removing it from the fetch buffer.\n", wavefront->simdId,
522 wavefront->wfSlotId, wavefront->wfDynId,
523 oldest_buffered_pc->first);
525 freeList.emplace_back(oldest_buffered_pc->second);
526 oldest_buffered_pc->second =
nullptr;
527 bufferedPCs.erase(oldest_buffered_pc);
528 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d has %d lines buffered.\n",
529 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
543 while (wavefront->instructionBuffer.size() < maxIbSize
544 && hasFetchDataToProcess()) {
550 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
551 readPtr += gpu_static_inst->
instSize();
553 assert(readPtr <= bufEnd);
556 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
557 wavefront, gpu_static_inst,
558 wavefront->computeUnit->
560 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
562 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%ld decoded %s (%d bytes). "
563 "%d bytes remain.\n", wavefront->simdId,
564 wavefront->wfSlotId, wavefront->wfDynId,
567 fetchBytesRemaining());
576 int dword_size =
sizeof(uint32_t);
579 for (
int i = 0;
i < num_dwords; ++
i) {
581 *
reinterpret_cast<uint32_t*
>(readPtr));
582 if (readPtr + dword_size >= bufEnd) {
587 assert(readPtr == bufStart);
591 GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
592 readPtr += (gpu_static_inst->
instSize() - dword_size);
593 assert(readPtr < bufEnd);
596 = std::make_shared<GPUDynInst>(wavefront->computeUnit,
597 wavefront, gpu_static_inst,
598 wavefront->computeUnit->
600 wavefront->instructionBuffer.push_back(gpu_dyn_inst);
602 DPRINTF(GPUFetch,
"WF[%d][%d]: Id%d decoded split inst %s (%#x) "
603 "(%d bytes). %d bytes remain in %d buffered lines.\n",
604 wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
606 gpu_static_inst->
instSize(), fetchBytesRemaining(),
625 int bytes_remaining = 0;
627 if (bufferedLines() && readPtr != bufEnd) {
628 auto last_buf_pc = bufferedPCs.rbegin();
629 uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
630 int byte_diff = end_ptr - readPtr;
632 if (end_ptr > readPtr) {
633 bytes_remaining = byte_diff;
634 }
else if (end_ptr < readPtr) {
635 bytes_remaining = bufferedBytes() + byte_diff;
639 assert(bytes_remaining <= bufferedBytes());
640 return bytes_remaining;