32 #ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
33 #define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
39 #include "debug/GPUExec.hh"
40 #include "debug/VEGA.hh"
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
218 gpu_dyn_inst->scalarAddr =
vaddr;
232 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
240 if (!rsrc_desc.stride &&
offset >= rsrc_desc.numRecords) {
241 clamped_offset = rsrc_desc.numRecords;
242 }
else if (rsrc_desc.stride &&
offset
243 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
244 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
247 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
248 gpu_dyn_inst->scalarAddr =
vaddr;
408 if (gpuDynInst->exec_mask[lane]) {
411 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
424 if (gpuDynInst->exec_mask[lane]) {
425 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
426 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
428 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
430 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
443 if (gpuDynInst->exec_mask[lane]) {
446 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
458 if (gpuDynInst->exec_mask[lane]) {
459 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
460 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
462 gpuDynInst->d_data))[lane * 2]);
464 gpuDynInst->d_data))[lane * 2 + 1]);
476 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
506 VectorMask old_exec_mask = gpuDynInst->exec_mask;
507 gpuDynInst->exec_mask &= ~
oobMask;
509 gpuDynInst->exec_mask = old_exec_mask;
520 VectorMask old_exec_mask = gpuDynInst->exec_mask;
521 gpuDynInst->exec_mask &= ~
oobMask;
523 gpuDynInst->exec_mask = old_exec_mask;
533 VectorMask old_exec_mask = gpuDynInst->exec_mask;
534 gpuDynInst->exec_mask &= ~
oobMask;
536 gpuDynInst->exec_mask = old_exec_mask;
546 VectorMask old_exec_mask = gpuDynInst->exec_mask;
547 gpuDynInst->exec_mask &= ~
oobMask;
549 gpuDynInst->exec_mask = old_exec_mask;
556 gpuDynInst->resetEntireStatusVector();
557 gpuDynInst->setStatusVector(0, 1);
558 RequestPtr req = std::make_shared<Request>(0, 0, 0,
559 gpuDynInst->computeUnit()->
561 gpuDynInst->wfDynId);
562 gpuDynInst->setRequestFlags(req);
563 gpuDynInst->computeUnit()->
587 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
590 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
599 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
602 base_addr = rsrc_desc.baseAddr;
604 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
605 + rsrc_desc.stride) : rsrc_desc.stride;
608 if (gpuDynInst->exec_mask[lane]) {
609 vaddr = base_addr + s_offset.rawData();
615 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
617 buf_off = v_off[lane] + inst_offset;
627 if (
stride == 0 || !rsrc_desc.swizzleEn) {
628 if (buf_off +
stride * buf_idx >=
629 rsrc_desc.numRecords - s_offset.rawData()) {
630 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
631 "lane = %d, buffer_offset = %llx, "
632 "const_stride = %llx, "
633 "const_num_records = %llx\n",
634 lane, buf_off +
stride * buf_idx,
635 stride, rsrc_desc.numRecords);
641 if (
stride != 0 && rsrc_desc.swizzleEn) {
642 if (buf_idx >= rsrc_desc.numRecords ||
644 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
645 "lane = %d, offset = %llx, "
647 "const_num_records = %llx\n",
648 lane, buf_off, buf_idx,
649 rsrc_desc.numRecords);
655 if (rsrc_desc.swizzleEn) {
656 Addr idx_stride = 8 << rsrc_desc.idxStride;
657 Addr elem_size = 2 << rsrc_desc.elemSize;
658 Addr idx_msb = buf_idx / idx_stride;
659 Addr idx_lsb = buf_idx % idx_stride;
660 Addr off_msb = buf_off / elem_size;
661 Addr off_lsb = buf_off % elem_size;
662 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
663 "idx_stride = %llx, elem_size = %llx, "
664 "idx_msb = %llx, idx_lsb = %llx, "
665 "off_msb = %llx, off_lsb = %llx\n",
666 lane, idx_stride, elem_size, idx_msb, idx_lsb,
670 * idx_stride + idx_lsb * elem_size + off_lsb);
675 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
676 "vaddr = %llx, base_addr = %llx, "
677 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
680 gpuDynInst->addr.at(lane) =
vaddr;
810 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
817 if ((gpuDynInst->executedAs() == enums::SC_GLOBAL &&
isFlat())
819 gpuDynInst->computeUnit()->globalMemoryPipe
820 .issueRequest(gpuDynInst);
821 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
823 gpuDynInst->computeUnit()->localMemoryPipe
824 .issueRequest(gpuDynInst);
826 fatal(
"Unsupported scope for flat instruction.\n");
847 if (gpuDynInst->exec_mask[lane]) {
848 gpuDynInst->addr.at(lane) =
859 if (gpuDynInst->exec_mask[lane]) {
860 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
868 #endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__