32 #define __STDC_FORMAT_MACROS
34 #include "debug/GPUCoalescer.hh"
35 #include "debug/GPUMem.hh"
36 #include "debug/GPUReg.hh"
49 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
50 gmQueueSize(
p.global_mem_queue_size),
51 maxWaveRequests(
p.max_wave_requests), inflightStores(0),
52 inflightLoads(0), stats(&cu)
72 DPRINTF(GPUCoalescer,
"Checking for %d tokens\n", token_count);
73 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
74 DPRINTF(GPUCoalescer,
"Stalling inst because coalsr is busy!\n");
88 DPRINTF(GPUCoalescer,
"Acquiring %d token(s)\n", token_count);
89 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
90 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
98 if ((
mp->wavefront()->outstandingReqsRdGm
112 bool accessVrf =
true;
117 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
120 accessVrf =
w->computeUnit->vrf[
w->simdId]->
121 canScheduleWriteOperandsFromLoad(
w,
m);
131 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
132 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
135 w->decLGKMInstsIssued();
137 w->decVMemInstsIssued();
139 if (
m->isLoad() ||
m->isAtomicRet()) {
140 w->computeUnit->vrf[
w->simdId]->
141 scheduleWriteOperandsFromLoad(
w,
m);
150 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
156 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
162 w->validateRequestCounters();
174 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
182 if (
mp->isLoad() ||
mp->isAtomic()) {
188 }
else if (
mp->isStore()) {
196 DPRINTF(GPUCoalescer,
"initiateAcc for %s seqNum %d\n",
197 mp->disassemble(),
mp->seqNum());
200 if (
mp->isStore() &&
mp->isGlobalSeg()) {
201 mp->wavefront()->decExpInstsIssued();
204 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
215 std::make_pair(
mp,
false)));
218 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
234 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
245 if (mem_req->second.second) {
246 return mem_req->second.first;
256 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
259 }
else if (gpuDynInst->isStore()) {
278 if (gpuDynInst->isLoad()) {
281 }
else if (gpuDynInst->isStore()) {
295 gpuDynInst->setAccessTime(
curTick());
308 mem_req->second.second =
true;
313 : statistics::
Group(parent,
"GlobalMemPipeline"),
314 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles GM data "
315 "are delayed before updating the VRF")