37#include "debug/GPUFetch.hh" 
   38#include "debug/GPUPort.hh" 
   39#include "debug/GPUTLB.hh" 
   96        if (!fetch_buf.hasFreeSpace()) {
 
   97            fetch_buf.checkWaveReleaseBuf();
 
   99        if (fetch_buf.hasFetchDataToProcess()) {
 
  100            fetch_buf.decodeInsts();
 
  105    for (
int j = 0; j < 
computeUnit.shader->n_wf; ++j) {
 
 
  158    DPRINTF(GPUFetch, 
"CU%d: WF[%d][%d]: Id%d: Initiate fetch " 
  162    DPRINTF(GPUTLB, 
"CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
 
  185            DPRINTF(GPUTLB, 
"Failed to send TLB req for FETCH addr %#x\n",
 
  189        } 
else if (!
computeUnit.sqcTLBPort.sendTimingReq(pkt)) {
 
  196            DPRINTF(GPUTLB, 
"Failed to send TLB req for FETCH addr %#x\n",
 
  201            DPRINTF(GPUTLB, 
"sent FETCH translation request for %#x\n", 
vaddr);
 
  215        if (!pkt->
req->systemReq()) {
 
  226        fetch(pkt, wavefront);
 
 
  233    assert(pkt->
req->hasPaddr());
 
  234    assert(pkt->
req->hasSize());
 
  236    DPRINTF(GPUFetch, 
"CU%d: WF[%d][%d]: Fetch Access: %#x\n",
 
  238            pkt->
req->getPaddr());
 
  267    if (!pkt->
req->systemReq()) {
 
  277                    .reservedBuf(pkt->
req->getVaddr()));
 
  285        if (pkt->
req->systemReq()) {
 
  288            computeUnit.shader->systemHub->sendRequest(pkt, resp_event);
 
  289        } 
else if (!
computeUnit.sqcPort.sendTimingReq(pkt)) {
 
  290            computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,
 
  293            DPRINTF(GPUPort, 
"CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
 
  295                    pkt->
req->getPaddr());
 
  297            DPRINTF(GPUPort, 
"CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
 
  299                    pkt->
req->getPaddr());
 
 
  315    DPRINTF(GPUFetch, 
"CU%d: WF[%d][%d]: Fetch addr %#x returned " 
 
  358        "Cache line size should be a power of two.");
 
 
  387    DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d Fetch dropped, flushing fetch " 
 
  398        Addr last_line_fetched = 0;
 
 
  457    DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d reserved fetch buffer entry " 
  467    uint8_t *inst_buf = 
freeList.front();
 
 
  491    DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d done fetching for addr %#x\n",
 
  508    reserved_pc->second = 
nullptr;
 
 
  522                                 wavefront->computeUnit->cacheLineSize());
 
  524        DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d current wave PC(%#x) still " 
  534    auto current_buffered_pc = 
bufferedPCs.find(cur_wave_pc);
 
  537    DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d checking if PC block addr = %#x" 
  538            "(PC = %#x) can be released.\n", 
wavefront->simdId,
 
  545        DPRINTF(GPUFetch, 
"PC[%d] = %#x\n", idx, buf_pc.first);
 
  560    if (current_buffered_pc != oldest_buffered_pc) {
 
  561        DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d done fetching for PC = %#x, " 
  562                "removing it from the fetch buffer.\n", 
wavefront->simdId,
 
  564                oldest_buffered_pc->first);
 
  566        freeList.emplace_back(oldest_buffered_pc->second);
 
  567        oldest_buffered_pc->second = 
nullptr;
 
  569        DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d has %d lines buffered.\n",
 
 
  589            TheGpuISA::MachInst mach_inst
 
  590                = 
reinterpret_cast<TheGpuISA::MachInst
>(
readPtr);
 
  597                = std::make_shared<GPUDynInst>(
wavefront->computeUnit,
 
  601            wavefront->instructionBuffer.push_back(gpu_dyn_inst);
 
  603            DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%ld decoded %s (%d bytes). " 
 
  616    TheGpuISA::RawMachInst split_inst = 0;
 
  617    int dword_size = 
sizeof(uint32_t);
 
  618    int num_dwords = 
sizeof(TheGpuISA::RawMachInst) / dword_size;
 
  620    for (
int i = 0; 
i < num_dwords; ++
i) {
 
  622            *
reinterpret_cast<uint32_t*
>(
readPtr));
 
  630    TheGpuISA::MachInst mach_inst
 
  631        = 
reinterpret_cast<TheGpuISA::MachInst
>(&split_inst);
 
  637        = std::make_shared<GPUDynInst>(
wavefront->computeUnit,
 
  641    wavefront->instructionBuffer.push_back(gpu_dyn_inst);
 
  643    DPRINTF(GPUFetch, 
"WF[%d][%d]: Id%d decoded split inst %s (%#x) " 
  644            "(%d bytes). %d bytes remain in %d buffered lines.\n",
 
 
  658    bool is_split = (
readPtr + 
sizeof(TheGpuISA::RawMachInst)) > 
bufEnd;
 
 
  666    int bytes_remaining = 0;
 
  671        int byte_diff = end_ptr - 
readPtr;
 
  674            bytes_remaining = byte_diff;
 
  675        } 
else if (end_ptr < 
readPtr) {
 
  681    return bytes_remaining;
 
 
int reservedLines() const
std::map< Addr, uint8_t * > reservedPCs
void reserveBuf(Addr vaddr)
reserve an entry in the fetch buffer for PC = vaddr,
uint8_t * readPtr
pointer that points to the next chunk of inst data to be decoded.
int fetchBytesRemaining() const
calculates the number of fetched bytes that have yet to be decoded.
void checkWaveReleaseBuf()
checks if the wavefront can release any of its fetch buffer entries.
bool hasFetchDataToProcess() const
checks if the buffer contains valid data.
std::map< Addr, uint8_t * > bufferedPCs
the set of PCs (fetch addresses) that are currently buffered.
int bufferedAndReservedLines() const
void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf)
allocate the fetch buffer space, and set the fetch depth (number of lines that may be buffered),...
int bufferedLines() const
bool hasFreeSpace() const
void fetchDone(PacketPtr ptr)
bool splitDecode() const
check if the next instruction to be processed out of the fetch buffer is split across the end/beginni...
void decodeInsts()
each time the fetch stage is ticked, we check if there are any data in the fetch buffer that may be d...
uint8_t * bufStart
raw instruction buffer.
TheGpuISA::Decoder * _decoder
int bufferedBytes() const
std::deque< uint8_t * > freeList
represents the fetch buffer free list.
static uint32_t globalFetchUnitID
std::vector< Wavefront * > * waveList
void bindWaveList(std::vector< Wavefront * > *list)
FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu)
void fetch(PacketPtr pkt, Wavefront *wavefront)
std::vector< Wavefront * > fetchQueue
void initiateFetch(Wavefront *wavefront)
int fetchDepth
number of cache lines we can fetch and buffer.
TheGpuISA::Decoder decoder
ComputeUnit & computeUnit
void processFetchReturn(PacketPtr pkt)
std::vector< FetchBufDesc > fetchBuf
void flushBuf(int wfSlotId)
std::vector< std::pair< Wavefront *, bool > > fetchStatusQueue
const std::string & disassemble()
virtual int instSize() const =0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
SenderState * senderState
This packet's sender state.
RequestPtr req
A pointer to the original request.
MemCmd cmd
The command field of the packet.
@ INST_FETCH
The request was an instruction fetch.
std::deque< GPUDynInstPtr > instructionBuffer
@ S_WAITCNT
wavefront has unsatisfied wait counts
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
static constexpr bool isPowerOf2(const T &n)
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Addr makeLineAddress(Addr addr, int cacheLineBits)
Copyright (c) 2024 Arm Limited All rights reserved.
T safe_cast(U &&ref_or_ptr)
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
SenderState is information carried along with the packet throughout the TLB hierarchy.
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...