32#define __STDC_FORMAT_MACROS
34#include "debug/GPUCoalescer.hh"
35#include "debug/GPUMem.hh"
36#include "debug/GPUReg.hh"
67 assert(!
mp->isSystemReq());
76 DPRINTF(GPUCoalescer,
"Checking for %d tokens\n", token_count);
77 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
78 DPRINTF(GPUCoalescer,
"Stalling inst because coalsr is busy!\n");
92 DPRINTF(GPUCoalescer,
"Acquiring %d token(s)\n", token_count);
93 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
94 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
102 if ((
mp->wavefront()->outstandingReqsRdGm
116 bool accessVrf =
true;
121 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
124 accessVrf =
w->computeUnit->vrf[
w->simdId]->
125 canScheduleWriteOperandsFromLoad(
w,
m);
129 if (
m &&
m->latency.rdy() &&
computeUnit.glbMemToVrfBus.rdy() &&
130 accessVrf && (
computeUnit.shader->coissue_return ||
135 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
136 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
139 w->decLGKMInstsIssued();
140 w->untrackLGKMInst(
m);
142 w->decVMemInstsIssued();
143 w->untrackVMemInst(
m);
145 if (
m->isLoad() ||
m->isAtomicRet()) {
146 w->computeUnit->vrf[
w->simdId]->
147 scheduleWriteOperandsFromLoad(
w,
m);
155 computeUnit.shader->ScheduleAdd(&
w->outstandingReqs,
m->time, -1);
156 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
158 computeUnit.shader->ScheduleAdd(&
w->outstandingReqsWrGm,
162 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
164 computeUnit.shader->ScheduleAdd(&
w->outstandingReqsRdGm,
168 w->validateRequestCounters();
174 computeUnit.shader->sampleInstRoundTrip(
m->getRoundTripTime());
175 computeUnit.shader->sampleLineRoundTrip(
m->getLineAddressTime());
180 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
188 if (
mp->isLoad() ||
mp->isAtomic()) {
194 }
else if (
mp->isStore()) {
202 DPRINTF(GPUCoalescer,
"initiateAcc for %s seqNum %d\n",
203 mp->disassemble(),
mp->seqNum());
206 if (
mp->isStore() &&
mp->isGlobalSeg()) {
207 mp->wavefront()->decExpInstsIssued();
208 mp->wavefront()->untrackExpInst(
mp);
211 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
222 std::make_pair(
mp,
false)));
225 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
241 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
252 if (mem_req->second.second) {
253 return mem_req->second.first;
263 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
266 }
else if (gpuDynInst->isStore()) {
285 if (gpuDynInst->isLoad()) {
288 }
else if (gpuDynInst->isStore()) {
302 gpuDynInst->setAccessTime(
curTick());
315 mem_req->second.second =
true;
326 auto &inst_pair = pair.second;
327 auto &inst = inst_pair.first;
328 std::cout <<
"\t" << inst->disassemble() <<
" -- " << inst_pair.second
337 "are delayed before updating the VRF")
GPUDynInstPtr getNextReadyResp()
Find the next ready response to service.
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
void issueRequest(GPUDynInstPtr gpuDynInst)
Issues a request to the pipeline (i.e., enqueue it in the request buffer).
bool outstandingReqsCheck(GPUDynInstPtr mp) const
void handleResponse(GPUDynInstPtr gpuDynInst)
This method handles responses sent to this GM pipeline by the CU.
void acqCoalescerToken(GPUDynInstPtr mp)
GlobalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
std::queue< GPUDynInstPtr > gmIssuedRequests
ComputeUnit & computeUnit
bool coalescerReady(GPUDynInstPtr mp) const
const std::string & name() const
gem5::GlobalMemPipeline::GlobalMemPipelineStats stats
void validateRequestCounters()
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
uint64_t Tick
Tick count type.
statistics::Scalar loadVrfBankConflictCycles
GlobalMemPipelineStats(statistics::Group *parent)