32#ifndef __ARCH_VEGA_OPERAND_HH__
33#define __ARCH_VEGA_OPERAND_HH__
97 template<
typename DataType,
bool Const,
size_t NumDwords>
100 template<
typename DataType,
bool Const,
101 size_t NumDwords =
sizeof(DataType) /
sizeof(
VecElemU32)>
105 "Incorrect number of DWORDS for VEGA operand.");
135 ->reservedScalarRegs);
150 assert(_gpuDynInst->wavefront());
151 assert(_gpuDynInst->computeUnit());
152 Wavefront *wf = _gpuDynInst->wavefront();
155 for (
auto i = 0;
i < NumDwords; ++
i) {
157 vrfData[
i] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
159 DPRINTF(GPUVRF,
"Read v[%d]\n", vgprIdx);
160 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
163 if (NumDwords == 1) {
168 std::memcpy((
void*)&vgpr[lane],
169 (
void*)®_file_vgpr[lane],
sizeof(DataType));
171 }
else if (NumDwords == 2) {
180 ((
VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
181 ((
VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
182 vgpr[lane] = tmp_val;
202 assert(_gpuDynInst->wavefront());
203 assert(_gpuDynInst->computeUnit());
204 Wavefront *wf = _gpuDynInst->wavefront();
207 ? _gpuDynInst->exec_mask : wf->
execMask();
209 if (NumDwords == 1) {
211 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
217 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
218 std::memcpy((
void*)®_file_vgpr[lane],
219 (
void*)&vgpr[lane],
sizeof(DataType));
223 DPRINTF(GPUVRF,
"Write v[%d]\n", vgprIdx);
224 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
225 }
else if (NumDwords == 2) {
228 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx0);
229 vrfData[1] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx1);
237 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
238 reg_file_vgpr0[lane] = ((
VecElemU32*)&vgpr[lane])[0];
239 reg_file_vgpr1[lane] = ((
VecElemU32*)&vgpr[lane])[1];
243 DPRINTF(GPUVRF,
"Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
244 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx0);
245 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx1);
266 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
267 typename std::enable_if<Condition, const DataType>::type
273 DataType ret_val = scRegData.rawData();
276 assert(std::is_floating_point_v<DataType>);
277 ret_val = std::fabs(ret_val);
281 assert(std::is_floating_point_v<DataType>);
288 DataType ret_val = vgpr[idx];
291 assert(std::is_floating_point_v<DataType>);
292 ret_val = std::fabs(ret_val);
296 assert(std::is_floating_point_v<DataType>);
309 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
310 typename std::enable_if<Condition, DataType&>::type
361 std::array<VecRegContainerU32*, NumDwords>
vrfData;
364 template<
typename DataType,
bool Const,
365 size_t NumDwords =
sizeof(DataType) /
sizeof(
ScalarRegU32)>
369 "Incorrect number of DWORDS for VEGA operand.");
376 std::memset(srfData.data(), 0, NumDwords *
sizeof(
ScalarRegU32));
390 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
391 typename std::enable_if<Condition, DataType>::type
394 assert(
sizeof(DataType) <=
sizeof(srfData));
395 DataType raw_data((DataType)0);
396 std::memcpy((
void*)&raw_data, (
void*)srfData.data(),
405 return (
void*)srfData.data();
411 Wavefront *wf = _gpuDynInst->wavefront();
417 for (
auto i = 0;
i < NumDwords; ++
i) {
418 int sgprIdx = regIdx(
i);
419 srfData[
i] = cu->
srf[wf->
simdId]->read(sgprIdx);
420 DPRINTF(GPUSRF,
"Read s[%d]\n", sgprIdx);
421 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
429 Wavefront *wf = _gpuDynInst->wavefront();
436 if (NumDwords == 1) {
437 std::memcpy((
void*)&new_exec_mask_val,
439 }
else if (NumDwords == 2) {
440 std::memcpy((
void*)&new_exec_mask_val,
443 panic(
"Trying to write more than 2 DWORDS to EXEC\n");
447 DPRINTF(GPUSRF,
"Write EXEC\n");
448 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
454 assert(NumDwords == 1);
458 std::memcpy((
void*)&new_exec_mask_hi_val,
459 (
void*)srfData.data(),
sizeof(new_exec_mask_hi_val));
461 new_exec_mask_hi_val);
464 DPRINTF(GPUSRF,
"Write EXEC\n");
465 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
467 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
470 for (
auto i = 0;
i < NumDwords; ++
i) {
471 int sgprIdx = regIdx(
i);
472 auto &sgpr = cu->
srf[wf->
simdId]->readWriteable(sgprIdx);
473 if (_gpuDynInst->isLoad()) {
476 _gpuDynInst->scalar_data)[
i];
480 DPRINTF(GPUSRF,
"Write s[%d]\n", sgprIdx);
481 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
489 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
490 typename std::enable_if<Condition, void>::type
493 GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
497 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
498 typename std::enable_if<Condition, ScalarOperand&>::type
501 std::memcpy((
void*)srfData.data(), (
void*)&rhs,
sizeof(DataType));
515 assert(NumDwords == 1 || NumDwords == 2);
519 assert(
sizeof(DataType) <=
sizeof(srfData));
520 DataType misc_val(0);
522 misc_val = (DataType)_gpuDynInst
523 ->readConstVal<DataType>(_opIdx);
524 std::memcpy((
void*)srfData.data(), (
void*)&misc_val,
531 assert(
sizeof(DataType) <=
sizeof(srfData));
532 DataType misc_val(0);
533 misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
534 std::memcpy((
void*)srfData.data(), (
void*)&misc_val,
543 if constexpr (NumDwords == 2) {
545 execMask().to_ullong();
546 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask,
548 DPRINTF(GPUSRF,
"Read EXEC\n");
549 DPRINTF(GPUSRF,
"EXEC = %#x\n", exec_mask);
552 execMask().to_ullong();
555 std::memcpy((
void*)srfData.data(),
556 (
void*)&exec_mask_lo,
sizeof(exec_mask_lo));
557 DPRINTF(GPUSRF,
"Read EXEC_LO\n");
558 DPRINTF(GPUSRF,
"EXEC_LO = %#x\n", exec_mask_lo);
568 assert(NumDwords == 1);
570 ->execMask().to_ullong();
573 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask_hi,
574 sizeof(exec_mask_hi));
575 DPRINTF(GPUSRF,
"Read EXEC_HI\n");
576 DPRINTF(GPUSRF,
"EXEC_HI = %#x\n", exec_mask_hi);
589 srfData[0] = _gpuDynInst->srcLiteral();
590 if constexpr (NumDwords == 2) {
591 if constexpr (std::is_integral_v<DataType>) {
592 if constexpr (std::is_signed_v<DataType>) {
593 if (
bits(srfData[0], 31, 31) == 1) {
594 srfData[1] = 0xffffffff;
602 srfData[1] = _gpuDynInst->srcLiteral();
609 assert(NumDwords == 2);
610 if constexpr (NumDwords == 2) {
613 std::memcpy((
void*)srfData.data(), (
void*)&shared_base,
615 DPRINTF(GPUSRF,
"Read SHARED_BASE = %#x\n",
622 assert(NumDwords == 2);
623 if constexpr (NumDwords == 2) {
626 std::memcpy((
void*)srfData.data(),
627 (
void*)&shared_limit,
sizeof(srfData));
628 DPRINTF(GPUSRF,
"Read SHARED_LIMIT = %#x\n",
635 assert(NumDwords == 2);
636 if constexpr (NumDwords == 2) {
639 std::memcpy((
void*)srfData.data(), (
void*)&priv_base,
641 DPRINTF(GPUSRF,
"Read PRIVATE_BASE = %#x\n",
648 assert(NumDwords == 2);
649 if constexpr (NumDwords == 2) {
653 std::memcpy((
void*)srfData.data(), (
void*)&priv_limit,
655 DPRINTF(GPUSRF,
"Read PRIVATE_LIMIT = %#x\n",
663 std::memcpy((
void*)srfData.data(), (
void*)&pos_half,
671 std::memcpy((
void*)srfData.data(), (
void*)&neg_half,
678 std::memcpy(srfData.data(), &pos_one,
sizeof(pos_one));
684 std::memcpy(srfData.data(), &neg_one,
sizeof(neg_one));
690 std::memcpy(srfData.data(), &pos_two,
sizeof(pos_two));
696 std::memcpy(srfData.data(), &neg_two,
sizeof(neg_two));
702 std::memcpy(srfData.data(), &pos_four,
sizeof(pos_four));
708 std::memcpy((
void*)srfData.data(), (
void*)&neg_four ,
721 std::memcpy((
void*)srfData.data(),
722 (
void*)&pi_u64,
sizeof(pi_u64));
724 std::memcpy((
void*)srfData.data(),
725 (
void*)&pi_u32,
sizeof(pi_u32));
730 panic(
"Invalid special register index: %d\n", _opIdx);
743 Wavefront *wf = _gpuDynInst->wavefront();
757 assert(NumDwords == 1);
764 assert(sgprIdx > -1);
842template<
int BITS,
int ELEM_SIZE>
851 static_assert(BITS % 32 == 0);
852 static_assert(BITS % ELEM_SIZE == 0);
853 static_assert(ELEM_SIZE <= 32);
855 static constexpr int NumDwords = BITS / 32;
856 uint32_t dwords[NumDwords] = {};
864 assert(dw < NumDwords);
871 assert(dw < NumDwords);
878 assert(elem < (BITS / ELEM_SIZE));
882 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
883 lbit = elem * ELEM_SIZE;
891 assert(udw == ldw || udw == ldw + 1);
895 int dw_lbit = lbit % 32;
897 uint32_t
elem_mask = (1UL << ELEM_SIZE) - 1;
898 uint32_t rv = (dwords[ldw] >> dw_lbit) &
elem_mask;
906 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
908 int qw_lbit = lbit % 32;
910 uint64_t
elem_mask = (1ULL << ELEM_SIZE) - 1;
911 uint32_t rv = uint32_t((qword >> qw_lbit) &
elem_mask);
919 assert(elem < (BITS / ELEM_SIZE));
923 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
924 lbit = elem * ELEM_SIZE;
932 assert(udw == ldw || udw == ldw + 1);
936 int dw_lbit = lbit % 32;
939 uint32_t
elem_mask = (1UL << ELEM_SIZE) - 1;
944 dwords[ldw] &= ~elem_mask;
947 dwords[ldw] |= value;
955 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
957 int qw_lbit = lbit % 32;
960 uint64_t
elem_mask = (1ULL << ELEM_SIZE) - 1;
969 dwords[udw] = uint32_t(qword >> 32);
970 dwords[ldw] = uint32_t(qword &
mask(32));
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
std::vector< VectorRegisterFile * > vrf
int mapVgpr(Wavefront *w, int vgprIndex)
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & scratchApe() const
const ApertureRegister & ldsApe() const
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
int _opIdx
op selector value for this operand.
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.
uint32_t getElem(int elem)
void setDword(int dw, uint32_t value)
void setElem(int elem, uint32_t value)
uint32_t getDword(int dw)
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
bool scalar
whether this operand a scalar or not.
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
bool absMod
absolute value and negative modifiers.
void write() override
write to the vrf.
std::enable_if< Condition, constDataType >::type operator[](size_t idx) const
getter [] operator.
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define panic(...)
This implements a cprintf based panic() function.
constexpr unsigned NumVecElemPerVecReg
int elem_mask(const T *vs, const int index)
bool isVectorReg(int opIdx)
int opSelectorToRegIdx(int idx, int numScalarRegs)
bool isConstVal(int opIdx)
bool isScalarReg(int opIdx)
constexpr size_t MaxOperandDwords(16)
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
typedef GEM5_ALIGNED(8) uint64_t uint64_ta
std::shared_ptr< GPUDynInst > GPUDynInstPtr
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Overload hash function for BasicBlockRange type.
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Vector Registers layout specification.