32#ifndef __ARCH_VEGA_OPERAND_HH__
33#define __ARCH_VEGA_OPERAND_HH__
96 template<
typename DataType,
bool Const,
size_t NumDwords>
99 template<
typename DataType,
bool Const,
100 size_t NumDwords =
sizeof(DataType) /
sizeof(
VecElemU32)>
104 "Incorrect number of DWORDS for VEGA operand.");
134 ->reservedScalarRegs);
149 assert(_gpuDynInst->wavefront());
150 assert(_gpuDynInst->computeUnit());
151 Wavefront *wf = _gpuDynInst->wavefront();
154 for (
auto i = 0;
i < NumDwords; ++
i) {
156 vrfData[
i] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
158 DPRINTF(GPUVRF,
"Read v[%d]\n", vgprIdx);
159 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
162 if (NumDwords == 1) {
164 auto vgpr = vecReg.template as<DataType>();
165 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
167 std::memcpy((
void*)&vgpr[lane],
168 (
void*)®_file_vgpr[lane],
sizeof(DataType));
170 }
else if (NumDwords == 2) {
173 auto vgpr = vecReg.template as<VecElemU64>();
174 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
175 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
179 ((
VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
180 ((
VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
181 vgpr[lane] = tmp_val;
201 assert(_gpuDynInst->wavefront());
202 assert(_gpuDynInst->computeUnit());
203 Wavefront *wf = _gpuDynInst->wavefront();
206 ? _gpuDynInst->exec_mask : wf->
execMask();
208 if (NumDwords == 1) {
210 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
212 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
213 auto vgpr = vecReg.template as<DataType>();
216 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
217 std::memcpy((
void*)®_file_vgpr[lane],
218 (
void*)&vgpr[lane],
sizeof(DataType));
222 DPRINTF(GPUVRF,
"Write v[%d]\n", vgprIdx);
223 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
224 }
else if (NumDwords == 2) {
227 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx0);
228 vrfData[1] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx1);
231 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
232 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
233 auto vgpr = vecReg.template as<VecElemU64>();
236 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
237 reg_file_vgpr0[lane] = ((
VecElemU32*)&vgpr[lane])[0];
238 reg_file_vgpr1[lane] = ((
VecElemU32*)&vgpr[lane])[1];
242 DPRINTF(GPUVRF,
"Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
243 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx0);
244 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx1);
265 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
266 typename std::enable_if<Condition, const DataType>::type
272 DataType ret_val = scRegData.rawData();
275 assert(std::is_floating_point_v<DataType>);
276 ret_val = std::fabs(ret_val);
280 assert(std::is_floating_point_v<DataType>);
286 auto vgpr = vecReg.template as<DataType>();
287 DataType ret_val = vgpr[idx];
290 assert(std::is_floating_point_v<DataType>);
291 ret_val = std::fabs(ret_val);
295 assert(std::is_floating_point_v<DataType>);
308 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
309 typename std::enable_if<Condition, DataType&>::type
315 return vecReg.template as<DataType>()[idx];
360 std::array<VecRegContainerU32*, NumDwords>
vrfData;
363 template<
typename DataType,
bool Const,
364 size_t NumDwords =
sizeof(DataType) /
sizeof(
ScalarRegU32)>
367 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
368 "Incorrect number of DWORDS for VEGA operand.");
375 std::memset(srfData.data(), 0, NumDwords *
sizeof(
ScalarRegU32));
389 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
390 typename std::enable_if<Condition, DataType>::type
393 assert(
sizeof(DataType) <=
sizeof(srfData));
394 DataType raw_data((DataType)0);
395 std::memcpy((
void*)&raw_data, (
void*)srfData.data(),
404 return (
void*)srfData.data();
410 Wavefront *wf = _gpuDynInst->wavefront();
413 if (!isScalarReg(_opIdx)) {
416 for (
auto i = 0;
i < NumDwords; ++
i) {
417 int sgprIdx = regIdx(
i);
418 srfData[
i] = cu->
srf[wf->
simdId]->read(sgprIdx);
419 DPRINTF(GPUSRF,
"Read s[%d]\n", sgprIdx);
420 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
428 Wavefront *wf = _gpuDynInst->wavefront();
431 if (!isScalarReg(_opIdx)) {
432 if (_opIdx == REG_EXEC_LO) {
435 if (NumDwords == 1) {
436 std::memcpy((
void*)&new_exec_mask_val,
438 }
else if (NumDwords == 2) {
439 std::memcpy((
void*)&new_exec_mask_val,
442 panic(
"Trying to write more than 2 DWORDS to EXEC\n");
446 DPRINTF(GPUSRF,
"Write EXEC\n");
447 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
448 }
else if (_opIdx == REG_EXEC_HI) {
453 assert(NumDwords == 1);
457 std::memcpy((
void*)&new_exec_mask_hi_val,
458 (
void*)srfData.data(),
sizeof(new_exec_mask_hi_val));
460 new_exec_mask_hi_val);
463 DPRINTF(GPUSRF,
"Write EXEC\n");
464 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
466 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
469 for (
auto i = 0;
i < NumDwords; ++
i) {
470 int sgprIdx = regIdx(
i);
471 auto &sgpr = cu->
srf[wf->
simdId]->readWriteable(sgprIdx);
472 if (_gpuDynInst->isLoad()) {
475 _gpuDynInst->scalar_data)[
i];
479 DPRINTF(GPUSRF,
"Write s[%d]\n", sgprIdx);
480 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
488 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
489 typename std::enable_if<Condition, void>::type
492 DataType &sgpr = *((DataType*)srfData.data());
496 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
497 typename std::enable_if<Condition, ScalarOperand&>::type
500 std::memcpy((
void*)srfData.data(), (
void*)&rhs,
sizeof(DataType));
514 assert(NumDwords == 1 || NumDwords == 2);
520 execMask().to_ullong();
521 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask,
523 DPRINTF(GPUSRF,
"Read EXEC\n");
524 DPRINTF(GPUSRF,
"EXEC = %#x\n", exec_mask);
533 assert(NumDwords == 1);
535 ->execMask().to_ullong();
538 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask_hi,
539 sizeof(exec_mask_hi));
540 DPRINTF(GPUSRF,
"Read EXEC_HI\n");
541 DPRINTF(GPUSRF,
"EXEC_HI = %#x\n", exec_mask_hi);
546 case REG_SRC_LITERAL:
547 assert(NumDwords == 1);
548 srfData[0] = _gpuDynInst->srcLiteral();
553 std::memcpy((
void*)srfData.data(), (
void*)&pos_half,
561 std::memcpy((
void*)srfData.data(), (
void*)&neg_half,
568 std::memcpy(srfData.data(), &pos_one,
sizeof(pos_one));
574 std::memcpy(srfData.data(), &neg_one,
sizeof(neg_one));
580 std::memcpy(srfData.data(), &pos_two,
sizeof(pos_two));
586 std::memcpy(srfData.data(), &neg_two,
sizeof(neg_two));
592 std::memcpy(srfData.data(), &pos_four,
sizeof(pos_four));
598 std::memcpy((
void*)srfData.data(), (
void*)&neg_four ,
611 std::memcpy((
void*)srfData.data(),
612 (
void*)&pi_u64,
sizeof(pi_u64));
614 std::memcpy((
void*)srfData.data(),
615 (
void*)&pi_u32,
sizeof(pi_u32));
621 assert(
sizeof(DataType) <=
sizeof(srfData));
622 DataType misc_val(0);
623 if (isConstVal(_opIdx)) {
624 misc_val = (DataType)_gpuDynInst
625 ->readConstVal<DataType>(_opIdx);
627 misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
629 std::memcpy((
void*)srfData.data(), (
void*)&misc_val,
643 Wavefront *wf = _gpuDynInst->wavefront();
647 if (_opIdx == REG_VCC_HI) {
650 }
else if (_opIdx == REG_VCC_LO) {
653 }
else if (_opIdx == REG_FLAT_SCRATCH_HI) {
656 }
else if (_opIdx == REG_FLAT_SCRATCH_LO) {
657 assert(NumDwords == 1);
664 assert(sgprIdx > -1);
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
std::vector< VectorRegisterFile * > vrf
int mapVgpr(Wavefront *w, int vgprIndex)
int mapSgpr(Wavefront *w, int sgprIndex)
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
int _opIdx
op selector value for this operand.
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.
std::array< ScalarRegU32, NumDwords > srfData
in VEGA each register is represented as a 32b unsigned value, however operands may require up to 16 r...
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
bool scalar
whether this operand a scalar or not.
void read() override
read from the vrf.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
bool absMod
absolute value and negative modifiers.
void write() override
write to the vrf.
std::enable_if< Condition, constDataType >::type operator[](size_t idx) const
getter [] operator.
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define panic(...)
This implements a cprintf based panic() function.
constexpr unsigned NumVecElemPerVecReg
bool isVectorReg(int opIdx)
int opSelectorToRegIdx(int opIdx, int numScalarRegs)
constexpr size_t MaxOperandDwords(16)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Vector Registers layout specification.