gem5  v22.1.0.0
gpu_mem_helpers.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
33 #define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
34 
37 #include "debug/GPUMem.hh"
39 
40 namespace gem5
41 {
42 
49 template<typename T, int N>
50 inline void
51 initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
52  bool is_atomic=false)
53 {
54  // local variables
55  int req_size = N * sizeof(T);
56  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
57  Addr vaddr = 0, split_addr = 0;
58  bool misaligned_acc = false;
59  RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
60  PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
61 
62  gpuDynInst->resetEntireStatusVector();
63  for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
64  if (gpuDynInst->exec_mask[lane]) {
65  vaddr = gpuDynInst->addr[lane];
66 
71  split_addr = roundDown(vaddr + req_size - 1, block_size);
72 
73  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
79  misaligned_acc = split_addr > vaddr;
80 
81  if (is_atomic) {
82  // make sure request is word aligned
83  assert((vaddr & 0x3) == 0);
84 
85  // a given lane's atomic can't cross cache lines
86  assert(!misaligned_acc);
87 
88  req = std::make_shared<Request>(vaddr, sizeof(T), 0,
89  gpuDynInst->computeUnit()->requestorId(), 0,
90  gpuDynInst->wfDynId,
91  gpuDynInst->makeAtomicOpFunctor<T>(
92  &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
93  &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
94  } else {
95  req = std::make_shared<Request>(vaddr, req_size, 0,
96  gpuDynInst->computeUnit()->requestorId(), 0,
97  gpuDynInst->wfDynId);
98  }
99 
100  if (misaligned_acc) {
101  gpuDynInst->setStatusVector(lane, 2);
102  req->splitOnVaddr(split_addr, req1, req2);
103  gpuDynInst->setRequestFlags(req1);
104  gpuDynInst->setRequestFlags(req2);
105  pkt1 = new Packet(req1, mem_req_type);
106  pkt2 = new Packet(req2, mem_req_type);
107  pkt1->dataStatic(&(reinterpret_cast<T*>(
108  gpuDynInst->d_data))[lane * N]);
109  pkt2->dataStatic(&(reinterpret_cast<T*>(
110  gpuDynInst->d_data))[lane * N +
111  req1->getSize()/sizeof(T)]);
112  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
113  "request for %#x\n", gpuDynInst->cu_id,
114  gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
115  split_addr);
116  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
117  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
118  } else {
119  gpuDynInst->setStatusVector(lane, 1);
120  gpuDynInst->setRequestFlags(req);
121  pkt = new Packet(req, mem_req_type);
122  pkt->dataStatic(&(reinterpret_cast<T*>(
123  gpuDynInst->d_data))[lane * N]);
124  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
125  }
126  } else { // if lane is not active, then no pending requests
127  gpuDynInst->setStatusVector(lane, 0);
128  }
129  }
130 }
131 
138 template<typename T, int N>
139 inline void
140 initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
141 {
142  int req_size = N * sizeof(T);
143  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
144  Addr vaddr = gpuDynInst->scalarAddr;
145 
150  Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
151 
152  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
158  bool misaligned_acc = split_addr > vaddr;
159 
160  RequestPtr req = std::make_shared<Request>(vaddr, req_size, 0,
161  gpuDynInst->computeUnit()->requestorId(), 0,
162  gpuDynInst->wfDynId);
163 
164  if (misaligned_acc) {
165  RequestPtr req1, req2;
166  req->splitOnVaddr(split_addr, req1, req2);
167  gpuDynInst->numScalarReqs = 2;
168  gpuDynInst->setRequestFlags(req1);
169  gpuDynInst->setRequestFlags(req2);
170  PacketPtr pkt1 = new Packet(req1, mem_req_type);
171  PacketPtr pkt2 = new Packet(req2, mem_req_type);
172  pkt1->dataStatic(gpuDynInst->scalar_data);
173  pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
174  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
175  " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
176  gpuDynInst->wfSlotId, split_addr);
177  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
178  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
179  } else {
180  gpuDynInst->numScalarReqs = 1;
181  gpuDynInst->setRequestFlags(req);
182  PacketPtr pkt = new Packet(req, mem_req_type);
183  pkt->dataStatic(gpuDynInst->scalar_data);
184  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
185  }
186 }
187 
188 } // namespace gem5
189 
190 #endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__
#define DPRINTF(x,...)
Definition: trace.hh:186
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
const int NumVecElemPerVecReg(64)
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
Packet * PacketPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.

Generated on Wed Dec 21 2022 10:22:16 for gem5 by doxygen 1.9.1