34 #ifndef __ARCH_VEGA_GPU_MEM_HELPERS_HH__
35 #define __ARCH_VEGA_GPU_MEM_HELPERS_HH__
39 #include "debug/GPUMem.hh"
51 template<
typename T,
int N>
57 int req_size = N *
sizeof(T);
58 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
60 bool misaligned_acc =
false;
61 RequestPtr req =
nullptr, req1 =
nullptr, req2 =
nullptr;
62 PacketPtr pkt =
nullptr, pkt1 =
nullptr, pkt2 =
nullptr;
64 gpuDynInst->resetEntireStatusVector();
66 if (gpuDynInst->exec_mask[lane]) {
67 vaddr = gpuDynInst->addr[lane];
75 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
81 misaligned_acc = split_addr >
vaddr;
85 assert((
vaddr & 0x3) == 0);
88 assert(!misaligned_acc);
90 req = std::make_shared<Request>(
vaddr,
sizeof(T), 0,
91 gpuDynInst->computeUnit()->requestorId(), 0,
93 gpuDynInst->makeAtomicOpFunctor<T>(
94 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
95 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]));
97 req = std::make_shared<Request>(
vaddr, req_size, 0,
98 gpuDynInst->computeUnit()->requestorId(), 0,
102 if (misaligned_acc) {
103 gpuDynInst->setStatusVector(lane, 2);
104 req->splitOnVaddr(split_addr, req1, req2);
105 gpuDynInst->setRequestFlags(req1);
106 gpuDynInst->setRequestFlags(req2);
107 pkt1 =
new Packet(req1, mem_req_type);
108 pkt2 =
new Packet(req2, mem_req_type);
109 pkt1->dataStatic(&(
reinterpret_cast<T*
>(
110 gpuDynInst->d_data))[lane * N]);
111 pkt2->dataStatic(&(
reinterpret_cast<T*
>(
112 gpuDynInst->d_data))[lane * N + req1->getSize()]);
113 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: index: %d unaligned memory "
114 "request for %#x\n", gpuDynInst->cu_id,
115 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
117 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
118 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
120 gpuDynInst->setStatusVector(lane, 1);
121 gpuDynInst->setRequestFlags(req);
122 pkt =
new Packet(req, mem_req_type);
123 pkt->dataStatic(&(
reinterpret_cast<T*
>(
124 gpuDynInst->d_data))[lane * N]);
125 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
128 gpuDynInst->setStatusVector(lane, 0);
139 template<
typename T,
int N>
143 int req_size = N *
sizeof(T);
144 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
153 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
159 bool misaligned_acc = split_addr >
vaddr;
162 gpuDynInst->computeUnit()->requestorId(), 0,
163 gpuDynInst->wfDynId);
165 if (misaligned_acc) {
167 req->splitOnVaddr(split_addr, req1, req2);
168 gpuDynInst->numScalarReqs = 2;
169 gpuDynInst->setRequestFlags(req1);
170 gpuDynInst->setRequestFlags(req2);
173 pkt1->dataStatic(gpuDynInst->scalar_data);
174 pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
175 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: unaligned scalar memory request for"
176 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
177 gpuDynInst->wfSlotId, split_addr);
178 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
179 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
181 gpuDynInst->numScalarReqs = 1;
182 gpuDynInst->setRequestFlags(req);
184 pkt->dataStatic(gpuDynInst->scalar_data);
185 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
191 #endif // __ARCH_VEGA_GPU_MEM_HELPERS_HH__