Go to the documentation of this file.
34 #define __STDC_FORMAT_MACROS
36 #include "debug/GPUCoalescer.hh"
37 #include "debug/GPUMem.hh"
38 #include "debug/GPUReg.hh"
48 : computeUnit(cu), _name(cu.
name() +
".GlobalMemPipeline"),
49 gmQueueSize(
p->global_mem_queue_size),
50 maxWaveRequests(
p->max_wave_requests), inflightStores(0),
72 if (!
mp->computeUnit()->getTokenManager()->haveTokens(token_count)) {
88 assert(
mp->computeUnit()->getTokenManager()->haveTokens(token_count));
89 mp->computeUnit()->getTokenManager()->acquireTokens(token_count);
97 if ((
mp->wavefront()->outstandingReqsRdGm
111 bool accessVrf =
true;
116 if (
m && (
m->isLoad() ||
m->isAtomicRet())) {
119 accessVrf =
w->computeUnit->vrf[
w->simdId]->
120 canScheduleWriteOperandsFromLoad(
w,
m);
130 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing global mem instr %s\n",
131 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
133 w->decVMemInstsIssued();
135 if (
m->isLoad() ||
m->isAtomicRet()) {
136 w->computeUnit->vrf[
w->simdId]->
137 scheduleWriteOperandsFromLoad(
w,
m);
146 if (
m->isStore() ||
m->isAtomic() ||
m->isMemSync()) {
152 if (
m->isLoad() ||
m->isAtomic() ||
m->isMemSync()) {
158 w->validateRequestCounters();
170 w->computeUnit->vectorGlobalMemUnit.set(
m->time);
178 if (
mp->isLoad() ||
mp->isAtomic()) {
184 }
else if (
mp->isStore()) {
193 mp->disassemble(),
mp->seqNum());
196 if (((
mp->isMemSync() && !
mp->isEndOfKernel()) || !
mp->isMemSync())) {
207 std::make_pair(
mp,
false)));
210 if (!
mp->isMemSync() && !
mp->isEndOfKernel() &&
mp->allLanesZero()) {
226 DPRINTF(GPUMem,
"CU%d: WF[%d][%d] Popping 0 mem_op = \n",
237 if (mem_req->second.second) {
238 return mem_req->second.first;
248 if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
251 }
else if (gpuDynInst->isStore()) {
269 gpuDynInst->setAccessTime(
curTick());
282 mem_req->second.second =
true;
289 .
name(
name() +
".load_vrf_bank_conflict_cycles")
290 .
desc(
"total number of cycles GM data are delayed before updating "
void issueRequest(GPUDynInstPtr gpuDynInst)
Issues a request to the pipeline (i.e., enqueue it in the request buffer).
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
void acqCoalescerToken(GPUDynInstPtr mp)
std::queue< GPUDynInstPtr > gmIssuedRequests
uint64_t Tick
Tick count type.
void sampleLineRoundTrip(const std::map< Addr, std::vector< Tick >> &roundTripTime)
WaitClass vectorGlobalMemUnit
bool rdy(Cycles cycles=Cycles(0)) const
Stats::Scalar loadVrfBankConflictCycles
bool outstandingReqsCheck(GPUDynInstPtr mp) const
GPUDynInstPtr getNextReadyResp()
Find the next ready response to service.
GlobalMemPipeline(const ComputeUnitParams *p, ComputeUnit &cu)
void sampleInstRoundTrip(std::vector< Tick > roundTripTime)
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
const std::string & name()
TokenManager * getTokenManager()
void ScheduleAdd(int *val, Tick when, int x)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
ComputeUnit & computeUnit
const std::string & name() const
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
void handleResponse(GPUDynInstPtr gpuDynInst)
This method handles responses sent to this GM pipeline by the CU.
bool coalescerReady(GPUDynInstPtr mp) const
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
void sampleLoad(const Tick accessTime)
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
void sampleStore(const Tick accessTime)
Tick curTick()
The current simulated tick.
Generated on Wed Sep 30 2020 14:02:12 for gem5 by doxygen 1.8.17