32 #define __STDC_FORMAT_MACROS
34 #include "debug/GPUCoalescer.hh"
35 #include "debug/GPUMem.hh"
36 #include "debug/GPUReg.hh"
49 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
50 gmQueueSize(
p.global_mem_queue_size),
51 maxWaveRequests(
p.max_wave_requests), inflightStores(0),
52 inflightLoads(0), stats(&cu)
67 assert(!
mp->isSystemReq());
76 DPRINTF(GPUCoalescer,
"Checking for %d tokens\n", token_count);
77 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
78 DPRINTF(GPUCoalescer,
"Stalling inst because coalsr is busy!\n");
92 DPRINTF(GPUCoalescer,
"Acquiring %d token(s)\n", token_count);
93 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
94 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
102 if ((
mp->wavefront()->outstandingReqsRdGm
116 bool accessVrf =
true;
121 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
124 accessVrf =
w->computeUnit->vrf[
w->simdId]->
125 canScheduleWriteOperandsFromLoad(
w,
m);
135 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
136 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
139 w->decLGKMInstsIssued();
141 w->decVMemInstsIssued();
143 if (
m->isLoad() ||
m->isAtomicRet()) {
144 w->computeUnit->vrf[
w->simdId]->
145 scheduleWriteOperandsFromLoad(
w,
m);
154 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
160 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
166 w->validateRequestCounters();
178 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
186 if (
mp->isLoad() ||
mp->isAtomic()) {
192 }
else if (
mp->isStore()) {
200 DPRINTF(GPUCoalescer,
"initiateAcc for %s seqNum %d\n",
201 mp->disassemble(),
mp->seqNum());
204 if (
mp->isStore() &&
mp->isGlobalSeg()) {
205 mp->wavefront()->decExpInstsIssued();
208 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
219 std::make_pair(
mp,
false)));
222 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
238 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
249 if (mem_req->second.second) {
250 return mem_req->second.first;
260 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
263 }
else if (gpuDynInst->isStore()) {
282 if (gpuDynInst->isLoad()) {
285 }
else if (gpuDynInst->isStore()) {
299 gpuDynInst->setAccessTime(
curTick());
312 mem_req->second.second =
true;
317 : statistics::
Group(parent,
"GlobalMemPipeline"),
318 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles GM data "
319 "are delayed before updating the VRF")