34#include "debug/GPUMem.hh"
35#include "debug/GPUPort.hh"
46 : computeUnit(cu), _name(cu.
name() +
".LocalMemPipeline"),
47 lmQueueSize(
p.local_mem_queue_size), stats(&cu)
58 bool accessVrf =
true;
61 if ((
m) &&
m->latency.rdy() && (
m->isLoad() ||
m->isAtomicRet())) {
65 canScheduleWriteOperandsFromLoad(
w,
m);
77 if (
m->isFlat() && !
m->isMemSync() && !
m->isEndOfKernel()
78 &&
m->allLanesZero()) {
82 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: Completing local mem instr %s\n",
83 m->cu_id,
m->simdId,
m->wfSlotId,
m->disassemble());
85 w->decLGKMInstsIssued();
87 if (
m->isLoad() ||
m->isAtomicRet()) {
88 w->computeUnit->vrf[
w->simdId]->
89 scheduleWriteOperandsFromLoad(
w,
m);
95 if (
m->isStore() ||
m->isAtomic()) {
100 if (
m->isLoad() ||
m->isAtomic()) {
108 w->computeUnit->vectorSharedMemUnit.set(
m->time);
120 DPRINTF(GPUPort,
"packet was nack'd and put in retry queue");
130 if (gpuDynInst->isLoad()) {
133 }
else if (gpuDynInst->isStore()) {
147 gpuDynInst->setAccessTime(
curTick());
154 : statistics::
Group(parent,
"LocalMemPipeline"),
155 ADD_STAT(loadVrfBankConflictCycles,
"total number of cycles LDS data "
156 "are delayed before updating the VRF")
TokenManager * getTokenManager()
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
WaitClass vectorSharedMemUnit
std::vector< VectorRegisterFile * > vrf
ComputeUnit & computeUnit
std::queue< GPUDynInstPtr > lmReturnedRequests
LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
void issueRequest(GPUDynInstPtr gpuDynInst)
std::queue< GPUDynInstPtr > lmIssuedRequests
void ScheduleAdd(int *val, Tick when, int x)
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
bool rdy(Cycles cycles=Cycles(0)) const
void validateRequestCounters()
ComputeUnit * computeUnit
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Tick curTick()
The universal simulation clock.
LocalMemPipelineStats(statistics::Group *parent)
const std::string & name()