34 #define __STDC_FORMAT_MACROS
36 #include "debug/GPUCoalescer.hh"
37 #include "debug/GPUMem.hh"
38 #include "debug/GPUReg.hh"
51 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
52 gmQueueSize(
p.global_mem_queue_size),
53 maxWaveRequests(
p.max_wave_requests), inflightStores(0),
54 inflightLoads(0), stats(&cu)
74 DPRINTF(GPUCoalescer,
"Checking for %d tokens\n", token_count);
75 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
76 DPRINTF(GPUCoalescer,
"Stalling inst because coalsr is busy!\n");
90 DPRINTF(GPUCoalescer,
"Acquiring %d token(s)\n", token_count);
91 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
92 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
100 if ((
mp->wavefront()->outstandingReqsRdGm
114 bool accessVrf =
true;
119 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
122 accessVrf =
w->computeUnit->vrf[
w->simdId]->
123 canScheduleWriteOperandsFromLoad(
w,
m);
133 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
134 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
137 w->decLGKMInstsIssued();
139 w->decVMemInstsIssued();
141 if (
m->isLoad() ||
m->isAtomicRet()) {
142 w->computeUnit->vrf[
w->simdId]->
143 scheduleWriteOperandsFromLoad(
w,
m);
152 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
158 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
164 w->validateRequestCounters();
176 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
184 if (
mp->isLoad() ||
mp->isAtomic()) {
190 }
else if (
mp->isStore()) {
198 DPRINTF(GPUCoalescer,
"initiateAcc for %s seqNum %d\n",
199 mp->disassemble(),
mp->seqNum());
202 if (
mp->isStore() &&
mp->isGlobalSeg()) {
203 mp->wavefront()->decExpInstsIssued();
206 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
217 std::make_pair(
mp,
false)));
220 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
236 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
247 if (mem_req->second.second) {
248 return mem_req->second.first;
258 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
261 }
else if (gpuDynInst->isStore()) {
280 if (gpuDynInst->isLoad()) {
283 }
else if (gpuDynInst->isStore()) {
297 gpuDynInst->setAccessTime(
curTick());
310 mem_req->second.second =
true;
315 : statistics::
Group(parent,
"GlobalMemPipeline"),
316 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles GM data "
317 "are delayed before updating the VRF")