gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
gpu_mem_helpers.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
33#define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
34
37#include "debug/GPUMem.hh"
39
40namespace gem5
41{
42
49template<typename T, int N>
50inline void
51initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
52 bool is_atomic=false)
53{
54 // local variables
55 int req_size = N * sizeof(T);
56 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
57 Addr vaddr = 0, split_addr = 0;
58 bool misaligned_acc = false;
59 RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
60 PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
61
62 gpuDynInst->resetEntireStatusVector();
63 for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
64 if (gpuDynInst->exec_mask[lane]) {
65 vaddr = gpuDynInst->addr[lane];
66
71 split_addr = roundDown(vaddr + req_size - 1, block_size);
72
73 assert(split_addr <= vaddr || split_addr - vaddr < block_size);
79 misaligned_acc = split_addr > vaddr;
80
81 if (is_atomic) {
82 // make sure request is word aligned
83 assert((vaddr & 0x3) == 0);
84
85 // a given lane's atomic can't cross cache lines
86 assert(!misaligned_acc);
87
88 req = std::make_shared<Request>(vaddr, sizeof(T), 0,
89 gpuDynInst->computeUnit()->requestorId(), 0,
90 gpuDynInst->wfDynId,
91 gpuDynInst->makeAtomicOpFunctor<T>(
92 &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
93 &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
94 } else {
95 req = std::make_shared<Request>(vaddr, req_size, 0,
96 gpuDynInst->computeUnit()->requestorId(), 0,
97 gpuDynInst->wfDynId);
98 }
99
100 if (misaligned_acc) {
101 gpuDynInst->setStatusVector(lane, 2);
102 req->splitOnVaddr(split_addr, req1, req2);
103 gpuDynInst->setRequestFlags(req1);
104 gpuDynInst->setRequestFlags(req2);
105 pkt1 = new Packet(req1, mem_req_type);
106 pkt2 = new Packet(req2, mem_req_type);
107 pkt1->dataStatic(&(reinterpret_cast<T*>(
108 gpuDynInst->d_data))[lane * N]);
109 pkt2->dataStatic(&(reinterpret_cast<T*>(
110 gpuDynInst->d_data))[lane * N +
111 req1->getSize()/sizeof(T)]);
112 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
113 "request for %#x\n", gpuDynInst->cu_id,
114 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
115 split_addr);
116 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
117 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
118 } else {
119 gpuDynInst->setStatusVector(lane, 1);
120 gpuDynInst->setRequestFlags(req);
121 pkt = new Packet(req, mem_req_type);
122 pkt->dataStatic(&(reinterpret_cast<T*>(
123 gpuDynInst->d_data))[lane * N]);
124 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
125 }
126 } else { // if lane is not active, then no pending requests
127 gpuDynInst->setStatusVector(lane, 0);
128 }
129 }
130}
131
132template<int N>
133inline void
135{
136 // This function should be used for 1+ DWORD scratch accesses. 1+ DWORD
137 // scratch accesses are special in that they send multiple single DWORD
138 // requests in a swizzled manner to memory.
139 int req_size = sizeof(VegaISA::VecElemU32);
140 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
141
142 gpuDynInst->resetEntireStatusVector();
143 for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
144 if (gpuDynInst->exec_mask[lane]) {
145 Addr vaddr[N];
146
147 for (int dword = 0; dword < N; ++dword) {
149 * sizeof(VegaISA::VecElemU32);
150 vaddr[dword] = gpuDynInst->addr[lane] + dword * stride;
151
152 // Do not allow misaligned for simplicity for now.
153 Addr split_addr = roundDown(vaddr[dword] + req_size - 1,
154 block_size);
155 panic_if(split_addr > vaddr[dword], "Misaligned swizzled "
156 "scratch access not yet implemented\n");
157 }
158
159 gpuDynInst->setStatusVector(lane, N);
160
161 RequestPtr req[N];
162 PacketPtr pkt[N];
163 for (int dword = 0; dword < N; ++dword) {
164 req[dword] = std::make_shared<Request>(vaddr[dword], req_size,
165 0, gpuDynInst->computeUnit()->requestorId(), 0,
166 gpuDynInst->wfDynId);
167 gpuDynInst->setRequestFlags(req[dword]);
168 pkt[dword] = new Packet(req[dword], mem_req_type);
169
170 int data_elem = lane + dword * VegaISA::NumVecElemPerVecReg;
171 pkt[dword]->dataStatic(
172 &(reinterpret_cast<VegaISA::VecElemU32*>(
173 gpuDynInst->d_data))[data_elem]);
174
175 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane,
176 pkt[dword]);
177 }
178 } else { // if lane is not active, then no pending requests
179 gpuDynInst->setStatusVector(lane, 0);
180 }
181 }
182}
183
190template<typename T, int N>
191inline void
193{
194 int req_size = N * sizeof(T);
195 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
196 Addr vaddr = gpuDynInst->scalarAddr;
197
202 Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
203
204 assert(split_addr <= vaddr || split_addr - vaddr < block_size);
210 bool misaligned_acc = split_addr > vaddr &&
211 !gpuDynInst->staticInstruction()->hasNoAddr();
212
213 Request::Flags flags;
214 if (gpuDynInst->staticInstruction()->hasNoAddr()) {
216 }
217 RequestPtr req = std::make_shared<Request>(
218 vaddr, req_size, std::move(flags),
219 gpuDynInst->computeUnit()->requestorId(), 0,
220 gpuDynInst->wfDynId);
221
222 if (misaligned_acc) {
223 RequestPtr req1, req2;
224 req->splitOnVaddr(split_addr, req1, req2);
225 gpuDynInst->numScalarReqs = 2;
226 gpuDynInst->setRequestFlags(req1);
227 gpuDynInst->setRequestFlags(req2);
228 PacketPtr pkt1 = new Packet(req1, mem_req_type);
229 PacketPtr pkt2 = new Packet(req2, mem_req_type);
230 pkt1->dataStatic(gpuDynInst->scalar_data);
231 pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
232 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
233 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
234 gpuDynInst->wfSlotId, split_addr);
235 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
236 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
237 } else {
238 gpuDynInst->numScalarReqs = 1;
239 gpuDynInst->setRequestFlags(req);
240 PacketPtr pkt = new Packet(req, mem_req_type);
241 pkt->dataStatic(gpuDynInst->scalar_data);
242 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
243 }
244}
245
246} // namespace gem5
247
248#endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__
#define DPRINTF(x,...)
Definition trace.hh:209
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition packet.hh:1175
@ HAS_NO_ADDR
TLBI_EXT_SYNC_COMP seems to be the largest value of FlagsType, so HAS_NO_ADDR's value is that << 1.
Definition request.hh:261
gem5::Flags< FlagsType > Flags
Definition request.hh:102
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition intmath.hh:279
void set(Type mask)
Set all flag's bits matching the given mask.
Definition flags.hh:116
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition logging.hh:246
Bitfield< 21, 20 > stride
uint32_t VecElemU32
const int NumVecElemPerVecReg(64)
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
void initScratchReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Packet * PacketPtr
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.

Generated on Mon May 26 2025 09:18:28 for gem5 by doxygen 1.13.2