gem5  v21.1.0.2
gpu_mem_helpers.hh
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 #ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
35 #define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
36 
39 #include "debug/GPUMem.hh"
41 
42 namespace gem5
43 {
44 
51 template<typename T, int N>
52 inline void
53 initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
54  bool is_atomic=false)
55 {
56  // local variables
57  int req_size = N * sizeof(T);
58  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
59  Addr vaddr = 0, split_addr = 0;
60  bool misaligned_acc = false;
61  RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
62  PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
63 
64  gpuDynInst->resetEntireStatusVector();
65  for (int lane = 0; lane < VegaISA::NumVecElemPerVecReg; ++lane) {
66  if (gpuDynInst->exec_mask[lane]) {
67  vaddr = gpuDynInst->addr[lane];
68 
73  split_addr = roundDown(vaddr + req_size - 1, block_size);
74 
75  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
81  misaligned_acc = split_addr > vaddr;
82 
83  if (is_atomic) {
84  // make sure request is word aligned
85  assert((vaddr & 0x3) == 0);
86 
87  // a given lane's atomic can't cross cache lines
88  assert(!misaligned_acc);
89 
90  req = std::make_shared<Request>(vaddr, sizeof(T), 0,
91  gpuDynInst->computeUnit()->requestorId(), 0,
92  gpuDynInst->wfDynId,
93  gpuDynInst->makeAtomicOpFunctor<T>(
94  &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
95  &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
96  } else {
97  req = std::make_shared<Request>(vaddr, req_size, 0,
98  gpuDynInst->computeUnit()->requestorId(), 0,
99  gpuDynInst->wfDynId);
100  }
101 
102  if (misaligned_acc) {
103  gpuDynInst->setStatusVector(lane, 2);
104  req->splitOnVaddr(split_addr, req1, req2);
105  gpuDynInst->setRequestFlags(req1);
106  gpuDynInst->setRequestFlags(req2);
107  pkt1 = new Packet(req1, mem_req_type);
108  pkt2 = new Packet(req2, mem_req_type);
109  pkt1->dataStatic(&(reinterpret_cast<T*>(
110  gpuDynInst->d_data))[lane * N]);
111  pkt2->dataStatic(&(reinterpret_cast<T*>(
112  gpuDynInst->d_data))[lane * N + req1->getSize()]);
113  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
114  "request for %#x\n", gpuDynInst->cu_id,
115  gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
116  split_addr);
117  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
118  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
119  } else {
120  gpuDynInst->setStatusVector(lane, 1);
121  gpuDynInst->setRequestFlags(req);
122  pkt = new Packet(req, mem_req_type);
123  pkt->dataStatic(&(reinterpret_cast<T*>(
124  gpuDynInst->d_data))[lane * N]);
125  gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
126  }
127  } else { // if lane is not active, then no pending requests
128  gpuDynInst->setStatusVector(lane, 0);
129  }
130  }
131 }
132 
139 template<typename T, int N>
140 inline void
141 initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
142 {
143  int req_size = N * sizeof(T);
144  int block_size = gpuDynInst->computeUnit()->cacheLineSize();
145  Addr vaddr = gpuDynInst->scalarAddr;
146 
151  Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
152 
153  assert(split_addr <= vaddr || split_addr - vaddr < block_size);
159  bool misaligned_acc = split_addr > vaddr;
160 
161  RequestPtr req = std::make_shared<Request>(vaddr, req_size, 0,
162  gpuDynInst->computeUnit()->requestorId(), 0,
163  gpuDynInst->wfDynId);
164 
165  if (misaligned_acc) {
166  RequestPtr req1, req2;
167  req->splitOnVaddr(split_addr, req1, req2);
168  gpuDynInst->numScalarReqs = 2;
169  gpuDynInst->setRequestFlags(req1);
170  gpuDynInst->setRequestFlags(req2);
171  PacketPtr pkt1 = new Packet(req1, mem_req_type);
172  PacketPtr pkt2 = new Packet(req2, mem_req_type);
173  pkt1->dataStatic(gpuDynInst->scalar_data);
174  pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
175  DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
176  " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
177  gpuDynInst->wfSlotId, split_addr);
178  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
179  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
180  } else {
181  gpuDynInst->numScalarReqs = 1;
182  gpuDynInst->setRequestFlags(req);
183  PacketPtr pkt = new Packet(req, mem_req_type);
184  pkt->dataStatic(gpuDynInst->scalar_data);
185  gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
186  }
187 }
188 
189 } // namespace gem5
190 
191 #endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__
gpu_static_inst.hh
gem5::VegaISA::NumVecElemPerVecReg
const int NumVecElemPerVecReg(64)
gem5::initMemReqScalarHelper
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.
Definition: gpu_mem_helpers.hh:141
gem5::PacketPtr
Packet * PacketPtr
Definition: thread_context.hh:75
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gpu_dyn_inst.hh
op_encodings.hh
gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:51
gem5::initMemReqHelper
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
Definition: gpu_mem_helpers.hh:53
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: decoder.cc:40

Generated on Tue Sep 21 2021 12:24:00 for gem5 by doxygen 1.8.17