32#ifndef __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
33#define __ARCH_GCN3_INSTS_OP_ENCODINGS_HH__
39#include "debug/GCN3.hh"
40#include "debug/GPUExec.hh"
195 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
206 initMemReqScalarHelper<ScalarRegU32, N>(gpuDynInst,
218 gpu_dyn_inst->scalarAddr =
vaddr;
232 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.
rawDataPtr(),
248 gpu_dyn_inst->scalarAddr =
vaddr;
408 if (gpuDynInst->exec_mask[lane]) {
411 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
424 if (gpuDynInst->exec_mask[lane]) {
426 for (
int i = 0;
i < N; ++
i) {
428 gpuDynInst->d_data))[lane * N +
i]
443 if (gpuDynInst->exec_mask[lane]) {
444 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
445 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
447 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2]
449 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane * 2 + 1]
462 if (gpuDynInst->exec_mask[lane]) {
465 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
477 if (gpuDynInst->exec_mask[lane]) {
479 for (
int i = 0;
i < N; ++
i) {
483 gpuDynInst->d_data))[lane * N +
i]);
496 if (gpuDynInst->exec_mask[lane]) {
497 Addr vaddr0 = gpuDynInst->addr[lane] + offset0;
498 Addr vaddr1 = gpuDynInst->addr[lane] + offset1;
500 gpuDynInst->d_data))[lane * 2]);
502 gpuDynInst->d_data))[lane * 2 + 1]);
514 gpuDynInst->addr.at(lane) = (
Addr)
addr[lane];
544 VectorMask old_exec_mask = gpuDynInst->exec_mask;
545 gpuDynInst->exec_mask &= ~oobMask;
547 gpuDynInst->exec_mask = old_exec_mask;
558 VectorMask old_exec_mask = gpuDynInst->exec_mask;
559 gpuDynInst->exec_mask &= ~oobMask;
561 gpuDynInst->exec_mask = old_exec_mask;
571 VectorMask old_exec_mask = gpuDynInst->exec_mask;
572 gpuDynInst->exec_mask &= ~oobMask;
574 gpuDynInst->exec_mask = old_exec_mask;
584 VectorMask old_exec_mask = gpuDynInst->exec_mask;
585 gpuDynInst->exec_mask &= ~oobMask;
587 gpuDynInst->exec_mask = old_exec_mask;
594 gpuDynInst->resetEntireStatusVector();
595 gpuDynInst->setStatusVector(0, 1);
596 RequestPtr req = std::make_shared<Request>(0, 0, 0,
597 gpuDynInst->computeUnit()->
599 gpuDynInst->wfDynId);
600 gpuDynInst->setRequestFlags(req);
601 gpuDynInst->computeUnit()->
625 template<
typename VOFF,
typename VIDX,
typename SRSRC,
typename SOFF>
628 SRSRC s_rsrc_desc, SOFF s_offset,
int inst_offset)
635 Addr buffer_offset = 0;
638 std::memcpy((
void*)&rsrc_desc, s_rsrc_desc.rawDataPtr(),
647 if (gpuDynInst->exec_mask[lane]) {
648 vaddr = base_addr + s_offset.rawData();
654 buf_idx = v_idx[lane] + (rsrc_desc.
addTidEn ? lane : 0);
656 buf_off = v_off[lane] + inst_offset;
661 Addr idx_msb = buf_idx / idx_stride;
662 Addr idx_lsb = buf_idx % idx_stride;
663 Addr off_msb = buf_off / elem_size;
664 Addr off_lsb = buf_off % elem_size;
665 DPRINTF(GCN3,
"mubuf swizzled lane %d: "
666 "idx_stride = %llx, elem_size = %llx, "
667 "idx_msb = %llx, idx_lsb = %llx, "
668 "off_msb = %llx, off_lsb = %llx\n",
669 lane, idx_stride, elem_size, idx_msb, idx_lsb,
672 buffer_offset =(idx_msb *
stride + off_msb * elem_size)
673 * idx_stride + idx_lsb * elem_size + off_lsb;
675 buffer_offset = buf_off +
stride * buf_idx;
689 DPRINTF(GCN3,
"mubuf out-of-bounds condition 1: "
690 "lane = %d, buffer_offset = %llx, "
691 "const_stride = %llx, "
692 "const_num_records = %llx\n",
693 lane, buf_off +
stride * buf_idx,
703 DPRINTF(GCN3,
"mubuf out-of-bounds condition 2: "
704 "lane = %d, offset = %llx, "
706 "const_num_records = %llx\n",
707 lane, buf_off, buf_idx,
714 vaddr += buffer_offset;
716 DPRINTF(GCN3,
"Calculating mubuf address for lane %d: "
717 "vaddr = %llx, base_addr = %llx, "
718 "stride = %llx, buf_idx = %llx, buf_off = %llx\n",
721 gpuDynInst->addr.at(lane) =
vaddr;
803 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
805 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
808 if (gpuDynInst->exec_mask[lane]) {
810 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]
821 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
823 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
826 if (gpuDynInst->exec_mask[lane]) {
828 for (
int i = 0;
i < N; ++
i) {
830 gpuDynInst->d_data))[lane * N +
i]
843 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
845 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
848 if (gpuDynInst->exec_mask[lane]) {
851 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane]);
861 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
863 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
866 if (gpuDynInst->exec_mask[lane]) {
868 for (
int i = 0;
i < N; ++
i) {
872 gpuDynInst->d_data))[lane * N +
i]);
883 if (gpuDynInst->executedAs() == enums::SC_GLOBAL) {
885 }
else if (gpuDynInst->executedAs() == enums::SC_GROUP) {
888 if (gpuDynInst->exec_mask[lane]) {
891 gpuDynInst->makeAtomicOpFunctor<T>(
892 &(
reinterpret_cast<T*
>(
893 gpuDynInst->a_data))[lane],
894 &(
reinterpret_cast<T*
>(
895 gpuDynInst->x_data))[lane]);
898 (*amo_op)(
reinterpret_cast<uint8_t *
>(&tmp));
900 (
reinterpret_cast<T*
>(gpuDynInst->d_data))[lane] = tmp;
910 if (gpuDynInst->exec_mask[lane]) {
911 gpuDynInst->addr.at(lane) =
addr[lane];
914 gpuDynInst->resolveFlatSegment(gpuDynInst->exec_mask);
const std::string & opcode() const
void initMemWrite(GPUDynInstPtr gpuDynInst, Addr offset)
void generateDisassembly() override
void initOperandInfo() override
void initDualMemRead(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
void initMemRead(GPUDynInstPtr gpuDynInst, Addr offset)
void initDualMemWrite(GPUDynInstPtr gpuDynInst, Addr offset0, Addr offset1)
int instSize() const override
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU32 &addr)
int instSize() const override
void initOperandInfo() override
void initMemRead(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initAtomicAccess(GPUDynInstPtr gpuDynInst)
void calcAddr(GPUDynInstPtr gpuDynInst, ConstVecOperandU64 &addr)
int instSize() const override
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
void initOperandInfo() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_MTBUF *)
void injectGlobalMemFence(GPUDynInstPtr gpuDynInst)
void initMemWrite(GPUDynInstPtr gpuDynInst)
void initOperandInfo() override
int instSize() const override
void initMemRead(GPUDynInstPtr gpuDynInst)
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpuDynInst, VOFF v_off, VIDX v_idx, SRSRC s_rsrc_desc, SOFF s_offset, int inst_offset)
MUBUF insructions calculate their addresses as follows:
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU64 &addr, ScalarRegU32 offset)
For normal s_load_dword/s_store_dword instruction addresses.
void initMemWrite(GPUDynInstPtr gpuDynInst)
initiate a memory write access for N dwords
void generateDisassembly() override
void calcAddr(GPUDynInstPtr gpu_dyn_inst, ConstScalarOperandU128 &s_rsrc_desc, ScalarRegU32 offset)
For s_buffer_load_dword/s_buffer_store_dword instruction addresses.
void initMemRead(GPUDynInstPtr gpuDynInst)
initiate a memory read access for N dwords
void initOperandInfo() override
int instSize() const override
int instSize() const override
bool hasSecondDword(InFmt_SOP1 *)
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOP2 *)
void generateDisassembly() override
void initOperandInfo() override
int instSize() const override
bool hasSecondDword(InFmt_SOPC *)
int instSize() const override
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
void generateDisassembly() override
bool hasSecondDword(InFmt_SOPK *)
void initOperandInfo() override
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
int instSize() const override
void generateDisassembly() override
int instSize() const override
void initOperandInfo() override
bool hasSecondDword(InFmt_VOP1 *)
int instSize() const override
void initOperandInfo() override
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP2 *)
void initOperandInfo() override
int instSize() const override
InFmt_VOP3_SDST_ENC instData
void generateDisassembly() override
bool hasSecondDword(InFmt_VOP3_SDST_ENC *)
void initOperandInfo() override
const bool sgprDst
the v_cmp and readlane instructions in the VOP3 encoding are unique because they are the only instruc...
int instSize() const override
bool hasSecondDword(InFmt_VOP3 *)
void generateDisassembly() override
bool hasSecondDword(InFmt_VOPC *)
void initOperandInfo() override
void generateDisassembly() override
int instSize() const override
void write(const uint32_t index, const T value)
a write operation
T read(const uint32_t index)
a read operation
constexpr unsigned NumVecElemPerVecReg
Bitfield< 21, 20 > stride
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask