gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
scalar_memory_pipeline.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2016-2017 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include "debug/GPUMem.hh"
35#include "debug/GPUReg.hh"
39#include "gpu-compute/shader.hh"
41
42namespace gem5
43{
44
45ScalarMemPipeline::ScalarMemPipeline(const ComputeUnitParams &p,
46 ComputeUnit &cu)
47 : computeUnit(cu), _name(cu.name() + ".ScalarMemPipeline"),
48 queueSize(p.scalar_mem_queue_size),
50{
51}
52
53void
55{
56 // afind oldest scalar request whose data has arrived
57 GPUDynInstPtr m = !returnedLoads.empty() ? returnedLoads.front() :
58 !returnedStores.empty() ? returnedStores.front() : nullptr;
59
60 Wavefront *w = nullptr;
61
62 bool accessSrf = true;
63 // check the SRF to see if the operands of a load (or load component
64 // of an atomic) are accessible
65 if ((m) && (m->isLoad() || m->isAtomicRet())) {
66 w = m->wavefront();
67
68 accessSrf =
69 w->computeUnit->srf[w->simdId]->
70 canScheduleWriteOperandsFromLoad(w, m);
71 }
72
73 if ((!returnedStores.empty() || !returnedLoads.empty()) &&
74 m->latency.rdy() && computeUnit.scalarMemToSrfBus.rdy() &&
75 accessSrf &&
76 (computeUnit.shader->coissue_return ||
77 computeUnit.scalarMemUnit.rdy())) {
78
79 w = m->wavefront();
80
81 if (m->isLoad() || m->isAtomicRet()) {
82 w->computeUnit->srf[w->simdId]->
83 scheduleWriteOperandsFromLoad(w, m);
84 }
85
86 m->completeAcc(m);
87 w->decLGKMInstsIssued();
88 w->untrackLGKMInst(m);
89
90 if (m->isLoad() || m->isAtomic()) {
91 returnedLoads.pop();
92 assert(inflightLoads > 0);
94 } else {
95 returnedStores.pop();
96 assert(inflightStores > 0);
98 }
99
100 // Decrement outstanding register count
101 computeUnit.shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
102
103 if (m->isStore() || m->isAtomic()) {
104 computeUnit.shader->ScheduleAdd(&w->scalarOutstandingReqsWrGm,
105 m->time, -1);
106 }
107
108 if (m->isLoad() || m->isAtomic()) {
109 computeUnit.shader->ScheduleAdd(&w->scalarOutstandingReqsRdGm,
110 m->time, -1);
111 }
112
113 // Mark write bus busy for appropriate amount of time
114 computeUnit.scalarMemToSrfBus.set(m->time);
115 if (!computeUnit.shader->coissue_return)
116 w->computeUnit->scalarMemUnit.set(m->time);
117 }
118
119 // If pipeline has executed a global memory instruction
120 // execute global memory packets and issue global
121 // memory packets to DTLB
122 if (!issuedRequests.empty()) {
124 if (mp->isLoad() || mp->isAtomic()) {
125
126 if (inflightLoads >= queueSize) {
127 return;
128 } else {
130 }
131 } else {
132 if (inflightStores >= queueSize) {
133 return;
134 } else {
136 }
137 }
138 mp->initiateAcc(mp);
139 issuedRequests.pop();
140
141 DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping scalar mem_op\n",
142 computeUnit.cu_id, mp->simdId, mp->wfSlotId);
143 }
144}
145
146void
148{
149 Wavefront *wf = gpuDynInst->wavefront();
150 if (gpuDynInst->isLoad()) {
153 } else if (gpuDynInst->isStore()) {
156 }
157
158 wf->outstandingReqs++;
160
161 issuedRequests.push(gpuDynInst);
162}
163
164void
166 bool kernelMemSync,
167 RequestPtr req)
168{
169 assert(gpuDynInst->isScalar());
170
171 if (!req) {
172 req = std::make_shared<Request>(
173 0, 0, 0, computeUnit.requestorId(), 0, gpuDynInst->wfDynId);
174 } else {
175 req->requestorId(computeUnit.requestorId());
176 }
177
178 // When the SQC invalidate instruction is executed, it calls
179 // injectScalarMemFence. The instruction does not contain an address
180 // as one of its operands. Therefore, set the physical address of the
181 // invalidation request to 0 and handle it in the sequencer
182 req->setPaddr(0);
183
184 PacketPtr sqc_pkt = nullptr;
185
186 // If kernelMemSync is true, then the invalidation request is from
187 // kernel launch and is an implicit invalidation.If false, then it is
188 // due to an S_ICACHE_INV instruction
189 if (kernelMemSync) {
190 req->setCacheCoherenceFlags(Request::INV_L1);
191 req->setReqInstSeqNum(gpuDynInst->seqNum());
192 req->setFlags(Request::KERNEL);
193 sqc_pkt = new Packet(req, MemCmd::MemSyncReq);
194 sqc_pkt->pushSenderState(
196 gpuDynInst->wavefront(), nullptr));
197 } else {
198 gpuDynInst->setRequestFlags(req);
199
200 req->setReqInstSeqNum(gpuDynInst->seqNum());
201
202 sqc_pkt = new Packet(req, MemCmd::MemSyncReq);
203 sqc_pkt->pushSenderState(
205 gpuDynInst->wavefront(), nullptr));
206 }
207
210 (computeUnit.sqcPort, sqc_pkt);
211 computeUnit.schedule(
212 sqc_event, curTick() + computeUnit.scalar_req_tick_latency);
213
214 // When the SQC is invalidated, perform a scalar cache invalidate as well.
215 // The SQC and Scalar cache are implement using the same SLICC SM, so this
216 // invalidate is identical to the SQC invalidate, however we need to make
217 // a new packet and request as they have different cache destinations.
218 PacketPtr scalar_pkt = nullptr;
219 RequestPtr scalar_req(req);
220
221 if (kernelMemSync) {
222 scalar_req->setCacheCoherenceFlags(Request::INV_L1);
223 scalar_req->setReqInstSeqNum(gpuDynInst->seqNum());
224 scalar_req->setFlags(Request::KERNEL);
225 scalar_pkt = new Packet(scalar_req, MemCmd::MemSyncReq);
226 scalar_pkt->pushSenderState(
228 gpuDynInst));
229 } else {
230 gpuDynInst->setRequestFlags(scalar_req);
231
232 scalar_req->setReqInstSeqNum(gpuDynInst->seqNum());
233
234 scalar_pkt = new Packet(scalar_req, MemCmd::MemSyncReq);
235 scalar_pkt->pushSenderState(
237 gpuDynInst));
238 }
239
242 (computeUnit.scalarDataPort, scalar_pkt);
243 computeUnit.schedule(
244 scalar_event, curTick() + computeUnit.scalar_req_tick_latency);
245}
246
247void
249{
250 std::cout << "Scalar issued: " << issuedRequests.size() << " returned: "
251 << returnedLoads.size() << "/" << returnedStores.size() << "\n";
252}
253
254} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition packet.cc:334
@ KERNEL
The request should be marked with KERNEL.
Definition request.hh:183
const std::string & name() const
void injectScalarMemFence(GPUDynInstPtr gpuDynInst, bool kernelMemSync, RequestPtr req)
std::queue< GPUDynInstPtr > returnedLoads
ScalarMemPipeline(const ComputeUnitParams &p, ComputeUnit &cu)
std::queue< GPUDynInstPtr > issuedRequests
std::queue< GPUDynInstPtr > returnedStores
void issueRequest(GPUDynInstPtr gpuDynInst)
void validateRequestCounters()
Definition wavefront.cc:827
int scalarOutstandingReqsWrGm
Definition wavefront.hh:189
int scalarOutstandingReqsRdGm
Definition wavefront.hh:187
Bitfield< 0 > m
Bitfield< 11 > mp
Bitfield< 0 > p
Bitfield< 0 > w
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
Packet * PacketPtr

Generated on Mon May 26 2025 09:19:11 for gem5 by doxygen 1.13.2