gem5 v24.0.0.0
Loading...
Searching...
No Matches
scalar_memory_pipeline.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUMem.hh"
35#include "debug/GPUReg.hh"
39#include "gpu-compute/shader.hh"
41
42namespace gem5
43{
44
45ScalarMemPipeline::ScalarMemPipeline(const ComputeUnitParams &p,
46 ComputeUnit &cu)
47 : computeUnit(cu), _name(cu.name() + ".ScalarMemPipeline"),
48 queueSize(p.scalar_mem_queue_size),
49 inflightStores(0), inflightLoads(0)
50{
51}
52
53void
55{
56 // afind oldest scalar request whose data has arrived
57 GPUDynInstPtr m = !returnedLoads.empty() ? returnedLoads.front() :
58 !returnedStores.empty() ? returnedStores.front() : nullptr;
59
60 Wavefront *w = nullptr;
61
62 bool accessSrf = true;
63 // check the SRF to see if the operands of a load (or load component
64 // of an atomic) are accessible
65 if ((m) && (m->isLoad() || m->isAtomicRet())) {
66 w = m->wavefront();
67
68 accessSrf =
69 w->computeUnit->srf[w->simdId]->
70 canScheduleWriteOperandsFromLoad(w, m);
71 }
72
73 if ((!returnedStores.empty() || !returnedLoads.empty()) &&
74 m->latency.rdy() && computeUnit.scalarMemToSrfBus.rdy() &&
75 accessSrf &&
78
79 w = m->wavefront();
80
81 if (m->isLoad() || m->isAtomicRet()) {
82 w->computeUnit->srf[w->simdId]->
83 scheduleWriteOperandsFromLoad(w, m);
84 }
85
86 m->completeAcc(m);
87 w->decLGKMInstsIssued();
88
89 if (m->isLoad() || m->isAtomic()) {
90 returnedLoads.pop();
91 assert(inflightLoads > 0);
93 } else {
94 returnedStores.pop();
95 assert(inflightStores > 0);
97 }
98
99 // Decrement outstanding register count
100 computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
101
102 if (m->isStore() || m->isAtomic()) {
103 computeUnit.shader->ScheduleAdd(&w->scalarOutstandingReqsWrGm,
104 m->time, -1);
105 }
106
107 if (m->isLoad() || m->isAtomic()) {
108 computeUnit.shader->ScheduleAdd(&w->scalarOutstandingReqsRdGm,
109 m->time, -1);
110 }
111
112 // Mark write bus busy for appropriate amount of time
115 w->computeUnit->scalarMemUnit.set(m->time);
116 }
117
118 // If pipeline has executed a global memory instruction
119 // execute global memory packets and issue global
120 // memory packets to DTLB
121 if (!issuedRequests.empty()) {
123 if (mp->isLoad() || mp->isAtomic()) {
124
125 if (inflightLoads >= queueSize) {
126 return;
127 } else {
129 }
130 } else {
131 if (inflightStores >= queueSize) {
132 return;
133 } else {
135 }
136 }
137 mp->initiateAcc(mp);
138 issuedRequests.pop();
139
140 DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping scalar mem_op\n",
141 computeUnit.cu_id, mp->simdId, mp->wfSlotId);
142 }
143}
144
145void
147{
148 Wavefront *wf = gpuDynInst->wavefront();
149 if (gpuDynInst->isLoad()) {
152 } else if (gpuDynInst->isStore()) {
155 }
156
157 wf->outstandingReqs++;
159
160 issuedRequests.push(gpuDynInst);
161}
162
163void
165 bool kernelMemSync,
166 RequestPtr req)
167{
168 assert(gpuDynInst->isScalar());
169
170 if (!req) {
171 req = std::make_shared<Request>(
172 0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId);
173 } else {
174 req->requestorId(computeUnit.requestorId());
175 }
176
177 // When the SQC invalidate instruction is executed, it calls
178 // injectScalarMemFence. The instruction does not contain an address
179 // as one of its operands. Therefore, set the physical address of the
180 // invalidation request to 0 and handle it in the sequencer
181 req->setPaddr(0);
182
183 PacketPtr sqc_pkt = nullptr;
184
185 // If kernelMemSync is true, then the invalidation request is from
186 // kernel launch and is an implicit invalidation.If false, then it is
187 // due to an S_ICACHE_INV instruction
188 if (kernelMemSync) {
189 req->setCacheCoherenceFlags(Request::INV_L1);
190 req->setReqInstSeqNum(gpuDynInst->seqNum());
191 req->setFlags(Request::KERNEL);
192 sqc_pkt = new Packet(req, MemCmd::MemSyncReq);
193 sqc_pkt->pushSenderState(
195 gpuDynInst->wavefront(), nullptr));
196 } else {
197 gpuDynInst->setRequestFlags(req);
198
199 req->setReqInstSeqNum(gpuDynInst->seqNum());
200
201 sqc_pkt = new Packet(req, MemCmd::MemSyncReq);
202 sqc_pkt->pushSenderState(
204 gpuDynInst->wavefront(), nullptr));
205 }
206
209 (computeUnit.sqcPort, sqc_pkt);
212
213 // When the SQC is invalidated, perform a scalar cache invalidate as well.
214 // The SQC and Scalar cache are implement using the same SLICC SM, so this
215 // invalidate is identical to the SQC invalidate, however we need to make
216 // a new packet and request as they have different cache destinations.
217 PacketPtr scalar_pkt = nullptr;
218 RequestPtr scalar_req(req);
219
220 if (kernelMemSync) {
221 scalar_req->setCacheCoherenceFlags(Request::INV_L1);
222 scalar_req->setReqInstSeqNum(gpuDynInst->seqNum());
223 scalar_req->setFlags(Request::KERNEL);
224 scalar_pkt = new Packet(scalar_req, MemCmd::MemSyncReq);
225 scalar_pkt->pushSenderState(
227 gpuDynInst));
228 } else {
229 gpuDynInst->setRequestFlags(scalar_req);
230
231 scalar_req->setReqInstSeqNum(gpuDynInst->seqNum());
232
233 scalar_pkt = new Packet(scalar_req, MemCmd::MemSyncReq);
234 scalar_pkt->pushSenderState(
236 gpuDynInst));
237 }
238
241 (computeUnit.scalarDataPort, scalar_pkt);
244}
245
246} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:210
WaitClass scalarMemUnit
WaitClass scalarMemToSrfBus
std::vector< ScalarRegisterFile * > srf
ScalarDataPort scalarDataPort
RequestorID requestorId()
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition packet.cc:334
@ KERNEL
The request should be marked with KERNEL.
Definition request.hh:183
void injectScalarMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req)
std::queue< GPUDynInstPtr > returnedLoads
ScalarMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
std::queue< GPUDynInstPtr > issuedRequests
std::queue< GPUDynInstPtr > returnedStores
void issueRequest(GPUDynInstPtr gpuDynInst)
void ScheduleAdd(int *val, Tick when, int x)
Definition shader.cc:376
int coissue_return
Definition shader.hh:241
void set(uint64_t i)
Definition misc.hh:82
bool rdy(Cycles cycles=Cycles(0)) const
Definition misc.hh:93
void validateRequestCounters()
Definition wavefront.cc:801
int scalarOutstandingReqsWrGm
Definition wavefront.hh:187
ComputeUnit * computeUnit
Definition wavefront.hh:108
int scalarOutstandingReqsRdGm
Definition wavefront.hh:185
void schedule(Event &event, Tick when)
Definition eventq.hh:1012
Bitfield< 0 > m
Bitfield< 11 > mp
Bitfield< 0 > p
Bitfield< 0 > w
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:04 for gem5 by doxygen 1.11.0