55 int req_size = N *
sizeof(T);
56 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
58 bool misaligned_acc =
false;
59 RequestPtr req =
nullptr, req1 =
nullptr, req2 =
nullptr;
60 PacketPtr pkt =
nullptr, pkt1 =
nullptr, pkt2 =
nullptr;
62 gpuDynInst->resetEntireStatusVector();
64 if (gpuDynInst->exec_mask[lane]) {
73 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
79 misaligned_acc = split_addr >
vaddr;
83 assert((
vaddr & 0x3) == 0);
86 assert(!misaligned_acc);
88 req = std::make_shared<Request>(
vaddr,
sizeof(T), 0,
89 gpuDynInst->computeUnit()->requestorId(), 0,
91 gpuDynInst->makeAtomicOpFunctor<T>(
92 &(
reinterpret_cast<T*
>(gpuDynInst->a_data))[lane],
93 &(
reinterpret_cast<T*
>(gpuDynInst->x_data))[lane]));
95 req = std::make_shared<Request>(
vaddr, req_size, 0,
96 gpuDynInst->computeUnit()->requestorId(), 0,
100 if (misaligned_acc) {
101 gpuDynInst->setStatusVector(lane, 2);
102 req->splitOnVaddr(split_addr, req1, req2);
103 gpuDynInst->setRequestFlags(req1);
104 gpuDynInst->setRequestFlags(req2);
105 pkt1 =
new Packet(req1, mem_req_type);
106 pkt2 =
new Packet(req2, mem_req_type);
107 pkt1->dataStatic(&(
reinterpret_cast<T*
>(
108 gpuDynInst->d_data))[lane * N]);
109 pkt2->dataStatic(&(
reinterpret_cast<T*
>(
110 gpuDynInst->d_data))[lane * N +
111 req1->getSize()/
sizeof(T)]);
112 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: index: %d unaligned memory "
113 "request for %#x\n", gpuDynInst->cu_id,
114 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
116 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
117 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
119 gpuDynInst->setStatusVector(lane, 1);
120 gpuDynInst->setRequestFlags(req);
121 pkt =
new Packet(req, mem_req_type);
123 gpuDynInst->d_data))[lane * N]);
124 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
127 gpuDynInst->setStatusVector(lane, 0);
142 int req_size = N *
sizeof(T);
143 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
152 assert(split_addr <=
vaddr || split_addr -
vaddr < block_size);
158 bool misaligned_acc = split_addr >
vaddr;
161 gpuDynInst->computeUnit()->requestorId(), 0,
162 gpuDynInst->wfDynId);
164 if (misaligned_acc) {
166 req->splitOnVaddr(split_addr, req1, req2);
167 gpuDynInst->numScalarReqs = 2;
168 gpuDynInst->setRequestFlags(req1);
169 gpuDynInst->setRequestFlags(req2);
173 pkt2->
dataStatic(gpuDynInst->scalar_data + req1->getSize());
174 DPRINTF(GPUMem,
"CU%d: WF[%d][%d]: unaligned scalar memory request for"
175 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
176 gpuDynInst->wfSlotId, split_addr);
177 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
178 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
180 gpuDynInst->numScalarReqs = 1;
181 gpuDynInst->setRequestFlags(req);
184 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);