gem5  v22.1.0.0
local_memory_pipeline.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include "debug/GPUMem.hh"
35 #include "debug/GPUPort.hh"
38 #include "gpu-compute/shader.hh"
40 #include "gpu-compute/wavefront.hh"
41 
42 namespace gem5
43 {
44 
45 LocalMemPipeline::LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
46  : computeUnit(cu), _name(cu.name() + ".LocalMemPipeline"),
47  lmQueueSize(p.local_mem_queue_size), stats(&cu)
48 {
49 }
50 
51 void
53 {
54  // apply any returned shared (LDS) memory operations
56  lmReturnedRequests.front() : nullptr;
57 
58  bool accessVrf = true;
59  Wavefront *w = nullptr;
60 
61  if ((m) && m->latency.rdy() && (m->isLoad() || m->isAtomicRet())) {
62  w = m->wavefront();
63 
64  accessVrf = w->computeUnit->vrf[w->simdId]->
65  canScheduleWriteOperandsFromLoad(w, m);
66 
67  }
68 
69  if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf &&
73 
74  lmReturnedRequests.pop();
75  w = m->wavefront();
76 
77  if (m->isFlat() && !m->isMemSync() && !m->isEndOfKernel()
78  && m->allLanesZero()) {
80  }
81 
82  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",
83  m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
84  m->completeAcc(m);
85  w->decLGKMInstsIssued();
86 
87  if (m->isLoad() || m->isAtomicRet()) {
88  w->computeUnit->vrf[w->simdId]->
89  scheduleWriteOperandsFromLoad(w, m);
90  }
91 
92  // Decrement outstanding request count
93  computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
94 
95  if (m->isStore() || m->isAtomic()) {
96  computeUnit.shader->ScheduleAdd(&w->outstandingReqsWrLm,
97  m->time, -1);
98  }
99 
100  if (m->isLoad() || m->isAtomic()) {
101  computeUnit.shader->ScheduleAdd(&w->outstandingReqsRdLm,
102  m->time, -1);
103  }
104 
105  // Mark write bus busy for appropriate amount of time
108  w->computeUnit->vectorSharedMemUnit.set(m->time);
109  }
110 
111  // If pipeline has executed a local memory instruction
112  // execute local memory packet and issue the packets
113  // to LDS
114  if (!lmIssuedRequests.empty() && lmReturnedRequests.size() < lmQueueSize) {
115 
116  GPUDynInstPtr m = lmIssuedRequests.front();
117 
118  bool returnVal = computeUnit.sendToLds(m);
119  if (!returnVal) {
120  DPRINTF(GPUPort, "packet was nack'd and put in retry queue");
121  }
122  lmIssuedRequests.pop();
123  }
124 }
125 
126 void
128 {
129  Wavefront *wf = gpuDynInst->wavefront();
130  if (gpuDynInst->isLoad()) {
131  wf->rdLmReqsInPipe--;
132  wf->outstandingReqsRdLm++;
133  } else if (gpuDynInst->isStore()) {
134  wf->wrLmReqsInPipe--;
135  wf->outstandingReqsWrLm++;
136  } else {
137  // Atomic, both read and write
138  wf->rdLmReqsInPipe--;
139  wf->outstandingReqsRdLm++;
140  wf->wrLmReqsInPipe--;
141  wf->outstandingReqsWrLm++;
142  }
143 
144  wf->outstandingReqs++;
146 
147  gpuDynInst->setAccessTime(curTick());
148  lmIssuedRequests.push(gpuDynInst);
149 }
150 
151 
154  : statistics::Group(parent, "LocalMemPipeline"),
155  ADD_STAT(loadVrfBankConflictCycles, "total number of cycles LDS data "
156  "are delayed before updating the VRF")
157 {
158 }
159 
160 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
WaitClass locMemToVrfBus
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
WaitClass vectorSharedMemUnit
TokenManager * getTokenManager()
std::queue< GPUDynInstPtr > lmReturnedRequests
LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
void issueRequest(GPUDynInstPtr gpuDynInst)
std::queue< GPUDynInstPtr > lmIssuedRequests
void ScheduleAdd(int *val, Tick when, int x)
Definition: shader.cc:357
int coissue_return
Definition: shader.hh:229
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
Definition: token_port.cc:155
void set(uint64_t i)
Definition: misc.hh:82
bool rdy(Cycles cycles=Cycles(0)) const
Definition: misc.hh:93
void validateRequestCounters()
Definition: wavefront.cc:770
int outstandingReqsRdLm
Definition: wavefront.hh:179
int outstandingReqsWrLm
Definition: wavefront.hh:175
Statistics container.
Definition: group.hh:94
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition: group.hh:75
Bitfield< 6 > w
Definition: pagetable.hh:59
Bitfield< 54 > p
Definition: pagetable.hh:70
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
Tick curTick()
The universal simulation clock.
Definition: cur_tick.hh:46
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:35 for gem5 by doxygen 1.9.1