Go to the documentation of this file.
34 #define __STDC_FORMAT_MACROS
36 #include "debug/GPUCoalescer.hh"
37 #include "debug/GPUMem.hh"
38 #include "debug/GPUReg.hh"
48 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
49 gmQueueSize(
p.global_mem_queue_size),
50 maxWaveRequests(
p.max_wave_requests), inflightStores(0),
51 inflightLoads(0), stats(&cu)
72 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
88 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
89 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
97 if ((
mp->wavefront()->outstandingReqsRdGm
111 bool accessVrf =
true;
116 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
119 accessVrf =
w->computeUnit->vrf[
w->simdId]->
120 canScheduleWriteOperandsFromLoad(
w,
m);
130 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
131 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
134 w->decLGKMInstsIssued();
136 w->decVMemInstsIssued();
138 if (
m->isLoad() ||
m->isAtomicRet()) {
139 w->computeUnit->vrf[
w->simdId]->
140 scheduleWriteOperandsFromLoad(
w,
m);
149 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
155 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
161 w->validateRequestCounters();
173 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
181 if (
mp->isLoad() ||
mp->isAtomic()) {
187 }
else if (
mp->isStore()) {
196 mp->disassemble(),
mp->seqNum());
199 if (
mp->isStore() &&
mp->isGlobalSeg()) {
200 mp->wavefront()->decExpInstsIssued();
203 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
214 std::make_pair(
mp,
false)));
217 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
233 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
244 if (mem_req->second.second) {
245 return mem_req->second.first;
255 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
258 }
else if (gpuDynInst->isStore()) {
276 gpuDynInst->setAccessTime(
curTick());
289 mem_req->second.second =
true;
294 :
Stats::Group(parent,
"GlobalMemPipeline"),
295 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles GM data "
296 "are delayed before updating the VRF")
void issueRequest(GPUDynInstPtr gpuDynInst)
Issues a request to the pipeline (i.e., enqueue it in the request buffer).
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
void acqCoalescerToken(GPUDynInstPtr mp)
std::queue< GPUDynInstPtr > gmIssuedRequests
uint64_t Tick
Tick count type.
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
WaitClass vectorGlobalMemUnit
bool rdy(Cycles cycles=Cycles(0)) const
bool outstandingReqsCheck(GPUDynInstPtr mp) const
GPUDynInstPtr getNextReadyResp()
Find the next ready response to service.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
const std::string & name()
GlobalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
TokenManager * getTokenManager()
GlobalMemPipelineStats(Stats::Group *parent)
void ScheduleAdd(int *val, Tick when, int x)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
ComputeUnit & computeUnit
Tick curTick()
The universal simulation clock.
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
void handleResponse(GPUDynInstPtr gpuDynInst)
This method handles responses sent to this GM pipeline by the CU.
bool coalescerReady(GPUDynInstPtr mp) const
void sampleLoad(const Tick accessTime)
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
void sampleStore(const Tick accessTime)
Generated on Tue Jun 22 2021 15:28:28 for gem5 by doxygen 1.8.17