gem5 v24.0.0.0
Loading...
Searching...
No Matches
local_memory_pipeline.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUMem.hh"
35#include "debug/GPUPort.hh"
38#include "gpu-compute/shader.hh"
41
42namespace gem5
43{
44
45LocalMemPipeline::LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
46 : computeUnit(cu), _name(cu.name() + ".LocalMemPipeline"),
47 lmQueueSize(p.local_mem_queue_size), stats(&cu)
48{
49}
50
51void
53{
54 // apply any returned shared (LDS) memory operations
56 lmReturnedRequests.front() : nullptr;
57
58 bool accessVrf = true;
59 Wavefront *w = nullptr;
60
61 if ((m) && m->latency.rdy() && (m->isLoad() || m->isAtomicRet())) {
62 w = m->wavefront();
63
64 accessVrf = w->computeUnit->vrf[w->simdId]->
65 canScheduleWriteOperandsFromLoad(w, m);
66
67 }
68
69 if (!lmReturnedRequests.empty() && m->latency.rdy() && accessVrf &&
73
75 w = m->wavefront();
76
77 if (m->isFlat() && !m->isMemSync() && !m->isEndOfKernel()
78 && m->allLanesZero()) {
80 }
81
82 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing local mem instr %s\n",
83 m->cu_id, m->simdId, m->wfSlotId, m->disassemble());
84 m->completeAcc(m);
85 w->decLGKMInstsIssued();
86
87 if (m->isLoad() || m->isAtomicRet()) {
88 w->computeUnit->vrf[w->simdId]->
89 scheduleWriteOperandsFromLoad(w, m);
90 }
91
92 // Decrement outstanding request count
93 computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
94
95 if (m->isStore() || m->isAtomic()) {
96 computeUnit.shader->ScheduleAdd(&w->outstandingReqsWrLm,
97 m->time, -1);
98 }
99
100 if (m->isLoad() || m->isAtomic()) {
101 computeUnit.shader->ScheduleAdd(&w->outstandingReqsRdLm,
102 m->time, -1);
103 }
104
105 // Mark write bus busy for appropriate amount of time
108 w->computeUnit->vectorSharedMemUnit.set(m->time);
109 }
110
111 // If pipeline has executed a local memory instruction
112 // execute local memory packet and issue the packets
113 // to LDS
114 if (!lmIssuedRequests.empty() && lmReturnedRequests.size() < lmQueueSize) {
115
117
118 bool returnVal = computeUnit.sendToLds(m);
119 if (!returnVal) {
120 DPRINTF(GPUPort, "packet was nack'd and put in retry queue");
121 }
122 lmIssuedRequests.pop();
123 }
124}
125
126void
128{
129 Wavefront *wf = gpuDynInst->wavefront();
130 if (gpuDynInst->isLoad()) {
131 wf->rdLmReqsInPipe--;
133 } else if (gpuDynInst->isStore()) {
134 wf->wrLmReqsInPipe--;
136 } else {
137 // Atomic, both read and write
138 wf->rdLmReqsInPipe--;
140 wf->wrLmReqsInPipe--;
142 }
143
144 wf->outstandingReqs++;
146
147 gpuDynInst->setAccessTime(curTick());
148 lmIssuedRequests.push(gpuDynInst);
149}
150
151
154 : statistics::Group(parent, "LocalMemPipeline"),
155 ADD_STAT(loadVrfBankConflictCycles, "total number of cycles LDS data "
156 "are delayed before updating the VRF")
157{
158}
159
160} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
WaitClass locMemToVrfBus
TokenManager * getTokenManager()
bool sendToLds(GPUDynInstPtr gpuDynInst)
send a general request to the LDS make sure to look at the return value here as your request might be...
WaitClass vectorSharedMemUnit
std::vector< VectorRegisterFile * > vrf
std::queue< GPUDynInstPtr > lmReturnedRequests
LocalMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
void issueRequest(GPUDynInstPtr gpuDynInst)
std::queue< GPUDynInstPtr > lmIssuedRequests
void ScheduleAdd(int *val, Tick when, int x)
Definition shader.cc:376
int coissue_return
Definition shader.hh:241
void recvTokens(int num_tokens)
Increment the number of available tokens by num_tokens.
void set(uint64_t i)
Definition misc.hh:82
bool rdy(Cycles cycles=Cycles(0)) const
Definition misc.hh:93
void validateRequestCounters()
Definition wavefront.cc:801
ComputeUnit * computeUnit
Definition wavefront.hh:108
Statistics container.
Definition group.hh:93
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
Bitfield< 0 > m
Bitfield< 0 > p
Bitfield< 0 > w
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0