34 #ifndef __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
35 #define __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__
41 #include "debug/GPUExec.hh"
42 #include "debug/VEGA.hh"
197 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
208 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
220 gpu_dyn_inst->scalarAddr =
vaddr;
234 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
242 if (!rsrc_desc.stride &&
offset >= rsrc_desc.numRecords) {
243 clamped_offset = rsrc_desc.numRecords;
244 }
else if (rsrc_desc.stride &&
offset
245 > (rsrc_desc.stride * rsrc_desc.numRecords)) {
246 clamped_offset = (rsrc_desc.stride * rsrc_desc.numRecords);
249 Addr vaddr = ((rsrc_desc.baseAddr + clamped_offset) & ~0x3);
250 gpu_dyn_inst->scalarAddr =
vaddr;
410 if (gpuDynInst->exec_mask[lane]) {
413 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
426 if (gpuDynInst->exec_mask[lane]) {
427 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
428 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
430 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
432 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
445 if (gpuDynInst->exec_mask[lane]) {
448 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
460 if (gpuDynInst->exec_mask[lane]) {
461 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
462 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
464 gpuDynInst->d_data))[lane * 2]);
466 gpuDynInst->d_data))[lane * 2 + 1]);
478 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
508 VectorMask old_exec_mask = gpuDynInst->exec_mask;
509 gpuDynInst->exec_mask &= ~
oobMask;
511 gpuDynInst->exec_mask = old_exec_mask;
522 VectorMask old_exec_mask = gpuDynInst->exec_mask;
523 gpuDynInst->exec_mask &= ~
oobMask;
525 gpuDynInst->exec_mask = old_exec_mask;
535 VectorMask old_exec_mask = gpuDynInst->exec_mask;
536 gpuDynInst->exec_mask &= ~
oobMask;
538 gpuDynInst->exec_mask = old_exec_mask;
548 VectorMask old_exec_mask = gpuDynInst->exec_mask;
549 gpuDynInst->exec_mask &= ~
oobMask;
551 gpuDynInst->exec_mask = old_exec_mask;
558 gpuDynInst->resetEntireStatusVector();
559 gpuDynInst->setStatusVector(0, 1);
560 RequestPtr req = std::make_shared<Request>(0, 0, 0,
561 gpuDynInst->computeUnit()->
563 gpuDynInst->wfDynId);
564 gpuDynInst->setRequestFlags(req);
565 gpuDynInst->computeUnit()->
589 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
592 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
601 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
604 base_addr = rsrc_desc.baseAddr;
606 stride = rsrc_desc.addTidEn ? ((rsrc_desc.dataFmt << 14)
607 + rsrc_desc.stride) : rsrc_desc.stride;
610 if (gpuDynInst->exec_mask[lane]) {
611 vaddr = base_addr + s_offset.rawData();
617 buf_idx = v_idx[lane] + (rsrc_desc.addTidEn ? lane : 0);
619 buf_off = v_off[lane] + inst_offset;
629 if (
stride == 0 || !rsrc_desc.swizzleEn) {
630 if (buf_off +
stride * buf_idx >=
631 rsrc_desc.numRecords - s_offset.rawData()) {
632 DPRINTF(VEGA,
"mubuf out-of-bounds condition 1: "
633 "lane = %d, buffer_offset = %llx, "
634 "const_stride = %llx, "
635 "const_num_records = %llx\n",
636 lane, buf_off +
stride * buf_idx,
637 stride, rsrc_desc.numRecords);
643 if (
stride != 0 && rsrc_desc.swizzleEn) {
644 if (buf_idx >= rsrc_desc.numRecords ||
646 DPRINTF(VEGA,
"mubuf out-of-bounds condition 2: "
647 "lane = %d, offset = %llx, "
649 "const_num_records = %llx\n",
650 lane, buf_off, buf_idx,
651 rsrc_desc.numRecords);
657 if (rsrc_desc.swizzleEn) {
658 Addr idx_stride = 8 << rsrc_desc.idxStride;
659 Addr elem_size = 2 << rsrc_desc.elemSize;
660 Addr idx_msb = buf_idx / idx_stride;
661 Addr idx_lsb = buf_idx % idx_stride;
662 Addr off_msb = buf_off / elem_size;
663 Addr off_lsb = buf_off % elem_size;
664 DPRINTF(VEGA,
"mubuf swizzled lane %d: "
665 "idx_stride = %llx, elem_size = %llx, "
666 "idx_msb = %llx, idx_lsb = %llx, "
667 "off_msb = %llx, off_lsb = %llx\n",
668 lane, idx_stride, elem_size, idx_msb, idx_lsb,
672 * idx_stride + idx_lsb * elem_size + off_lsb);
677 DPRINTF(VEGA,
"Calculating mubuf address for lane %d: "
678 "vaddr = %llx, base_addr = %llx, "
679 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
682 gpuDynInst->addr.at(lane) =
vaddr;
800 if (gpuDynInst->exec_mask[lane]) {
801 gpuDynInst->addr.at(lane) =
addr[lane] +
offset;
804 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
815 #endif // __ARCH_VEGA_INSTS_OP_ENCODINGS_HH__