36 #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__
37 #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__
41 #include "debug/GPUMem.hh"
50 template<
typename T,
int N>
56 int req_size = N *
sizeof(T);
57 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
59 bool misaligned_acc =
false;
60 RequestPtr req =
nullptr, req1 =
nullptr, req2 =
nullptr;
61 PacketPtr pkt =
nullptr, pkt1 =
nullptr, pkt2 =
nullptr;
63 gpuDynInst->resetEntireStatusVector();
65 if (gpuDynInst->exec_mask[lane]) {
66 vaddr = gpuDynInst->addr[lane];
74 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
80 misaligned_acc = split_addr >
vaddr;
84 assert((
vaddr & 0x3) == 0);
87 assert(!misaligned_acc);
89 req = std::make_shared<Request>(
vaddr,
sizeof(T), 0,
90 gpuDynInst->computeUnit()->requestorId(), 0,
92 gpuDynInst->makeAtomicOpFunctor<T>(
93 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
94 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]));
96 req = std::make_shared<Request>(
vaddr, req_size, 0,
97 gpuDynInst->computeUnit()->requestorId(), 0,
101 if (misaligned_acc) {
102 gpuDynInst->setStatusVector(lane, 2);
103 req->splitOnVaddr(split_addr, req1, req2);
104 gpuDynInst->setRequestFlags(req1);
105 gpuDynInst->setRequestFlags(req2);
106 pkt1 =
new Packet(req1, mem_req_type);
107 pkt2 =
new Packet(req2, mem_req_type);
108 pkt1->dataStatic(&(
reinterpret_cast<T*
>(
109 gpuDynInst->d_data))[lane * N]);
110 pkt2->dataStatic(&(
reinterpret_cast<T*
>(
111 gpuDynInst->d_data))[lane * N + req1->getSize()]);
112 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: index: %d unaligned memory "
113 "request for %#x\n", gpuDynInst->cu_id,
114 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
116 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
117 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
119 gpuDynInst->setStatusVector(lane, 1);
120 gpuDynInst->setRequestFlags(req);
121 pkt =
new Packet(req, mem_req_type);
123 gpuDynInst->d_data))[lane * N]);
124 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
127 gpuDynInst->setStatusVector(lane, 0);
138 template<
typename T,
int N>
142 int req_size = N *
sizeof(T);
143 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
152 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
158 bool misaligned_acc = split_addr >
vaddr;
161 gpuDynInst->computeUnit()->requestorId(), 0,
162 gpuDynInst->wfDynId);
164 if (misaligned_acc) {
166 req->splitOnVaddr(split_addr, req1, req2);
167 gpuDynInst->numScalarReqs = 2;
168 gpuDynInst->setRequestFlags(req1);
169 gpuDynInst->setRequestFlags(req2);
173 pkt2->
dataStatic(gpuDynInst->scalar_data + req1->getSize());
174 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: unaligned scalar memory request for"
175 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
176 gpuDynInst->wfSlotId, split_addr);
177 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
178 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
180 gpuDynInst->numScalarReqs = 1;
181 gpuDynInst->setRequestFlags(req);
184 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
188 #endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__