34 #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__
35 #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__
39 #include "debug/GPUMem.hh"
48 template<
typename T,
int N>
54 int req_size = N *
sizeof(T);
55 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
57 bool misaligned_acc =
false;
58 RequestPtr req =
nullptr, req1 =
nullptr, req2 =
nullptr;
59 PacketPtr pkt =
nullptr, pkt1 =
nullptr, pkt2 =
nullptr;
61 gpuDynInst->resetEntireStatusVector();
63 if (gpuDynInst->exec_mask[lane]) {
64 vaddr = gpuDynInst->addr[lane];
72 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
78 misaligned_acc = split_addr >
vaddr;
82 assert((
vaddr & 0x3) == 0);
85 assert(!misaligned_acc);
87 req = std::make_shared<Request>(
vaddr,
sizeof(T), 0,
88 gpuDynInst->computeUnit()->requestorId(), 0,
90 gpuDynInst->makeAtomicOpFunctor<T>(
91 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
92 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]));
94 req = std::make_shared<Request>(
vaddr, req_size, 0,
95 gpuDynInst->computeUnit()->requestorId(), 0,
100 gpuDynInst->setStatusVector(lane, 2);
101 req->splitOnVaddr(split_addr, req1, req2);
102 gpuDynInst->setRequestFlags(req1);
103 gpuDynInst->setRequestFlags(req2);
104 pkt1 =
new Packet(req1, mem_req_type);
105 pkt2 =
new Packet(req2, mem_req_type);
106 pkt1->dataStatic(&(
reinterpret_cast<T*
>(
107 gpuDynInst->d_data))[lane * N]);
108 pkt2->dataStatic(&(
reinterpret_cast<T*
>(
109 gpuDynInst->d_data))[lane * N + req1->getSize()]);
110 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: index: %d unaligned memory "
111 "request for %#x\n", gpuDynInst->cu_id,
112 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
114 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
115 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
117 gpuDynInst->setStatusVector(lane, 1);
118 gpuDynInst->setRequestFlags(req);
119 pkt =
new Packet(req, mem_req_type);
121 gpuDynInst->d_data))[lane * N]);
122 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
125 gpuDynInst->setStatusVector(lane, 0);
136 template<
typename T,
int N>
140 int req_size = N *
sizeof(T);
141 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
150 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
156 bool misaligned_acc = split_addr >
vaddr;
159 gpuDynInst->computeUnit()->requestorId(), 0,
160 gpuDynInst->wfDynId);
162 if (misaligned_acc) {
164 req->splitOnVaddr(split_addr, req1, req2);
165 gpuDynInst->numScalarReqs = 2;
166 gpuDynInst->setRequestFlags(req1);
167 gpuDynInst->setRequestFlags(req2);
171 pkt2->
dataStatic(gpuDynInst->scalar_data + req1->getSize());
172 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: unaligned scalar memory request for"
173 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
174 gpuDynInst->wfSlotId, split_addr);
175 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
176 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
178 gpuDynInst->numScalarReqs = 1;
179 gpuDynInst->setRequestFlags(req);
182 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
186 #endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__