32#define __STDC_FORMAT_MACROS
34#include "debug/GPUCoalescer.hh"
35#include "debug/GPUMem.hh"
36#include "debug/GPUReg.hh"
49 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
50 gmQueueSize(
p.global_mem_queue_size),
51 maxWaveRequests(
p.max_wave_requests), inflightStores(0),
52 inflightLoads(0), stats(&cu)
67 assert(!
mp->isSystemReq());
76 DPRINTF(GPUCoalescer,
"Checking for %d tokens\n", token_count);
77 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
78 DPRINTF(GPUCoalescer,
"Stalling inst because coalsr is busy!\n");
92 DPRINTF(GPUCoalescer,
"Acquiring %d token(s)\n", token_count);
93 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
94 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
102 if ((
mp->wavefront()->outstandingReqsRdGm
116 bool accessVrf =
true;
121 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
125 canScheduleWriteOperandsFromLoad(
w,
m);
135 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
136 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
139 w->decLGKMInstsIssued();
141 w->decVMemInstsIssued();
143 if (
m->isLoad() ||
m->isAtomicRet()) {
144 w->computeUnit->vrf[
w->simdId]->
145 scheduleWriteOperandsFromLoad(
w,
m);
154 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
160 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
166 w->validateRequestCounters();
178 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
186 if (
mp->isLoad() ||
mp->isAtomic()) {
192 }
else if (
mp->isStore()) {
200 DPRINTF(GPUCoalescer,
"initiateAcc for %s seqNum %d\n",
201 mp->disassemble(),
mp->seqNum());
204 if (
mp->isStore() &&
mp->isGlobalSeg()) {
205 mp->wavefront()->decExpInstsIssued();
208 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
219 std::make_pair(
mp,
false)));
222 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
238 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
249 if (mem_req->second.second) {
250 return mem_req->second.first;
260 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
263 }
else if (gpuDynInst->isStore()) {
282 if (gpuDynInst->isLoad()) {
285 }
else if (gpuDynInst->isStore()) {
299 gpuDynInst->setAccessTime(
curTick());
312 mem_req->second.second =
true;
317 : statistics::
Group(parent,
"GlobalMemPipeline"),
318 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles GM data "
319 "are delayed before updating the VRF")
WaitClass vectorGlobalMemUnit
TokenManager * getTokenManager()
std::vector< VectorRegisterFile * > vrf
GPUDynInstPtr getNextReadyResp()
Find the next ready response to service.
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
void issueRequest(GPUDynInstPtr gpuDynInst)
Issues a request to the pipeline (i.e., enqueue it in the request buffer).
bool outstandingReqsCheck(GPUDynInstPtr mp) const
void handleResponse(GPUDynInstPtr gpuDynInst)
This method handles responses sent to this GM pipeline by the CU.
void acqCoalescerToken(GPUDynInstPtr mp)
GlobalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
std::queue< GPUDynInstPtr > gmIssuedRequests
ComputeUnit & computeUnit
bool coalescerReady(GPUDynInstPtr mp) const
void ScheduleAdd(int *val, Tick when, int x)
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick > > &roundTripTime)
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
void sampleLoad(const Tick accessTime)
void sampleStore(const Tick accessTime)
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
bool rdy(Cycles cycles=Cycles(0)) const
void validateRequestCounters()
ComputeUnit * computeUnit
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
uint64_t Tick
Tick count type.
GlobalMemPipelineStats(statistics::Group *parent)
const std::string & name()