32#ifndef __ARCH_GCN3_OPERAND_HH__
33#define __ARCH_GCN3_OPERAND_HH__
96 template<
typename DataType,
bool Const,
size_t NumDwords>
99 template<
typename DataType,
bool Const,
100 size_t NumDwords =
sizeof(DataType) /
sizeof(
VecElemU32)>
104 "Incorrect number of DWORDS for GCN3 operand.");
134 ->reservedScalarRegs);
149 assert(_gpuDynInst->wavefront());
150 assert(_gpuDynInst->computeUnit());
151 Wavefront *wf = _gpuDynInst->wavefront();
154 for (
auto i = 0;
i < NumDwords; ++
i) {
156 vrfData[
i] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
158 DPRINTF(GPUVRF,
"Read v[%d]\n", vgprIdx);
159 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
162 if (NumDwords == 1) {
164 auto vgpr = vecReg.template as<DataType>();
165 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
167 std::memcpy((
void*)&vgpr[lane],
168 (
void*)®_file_vgpr[lane],
sizeof(DataType));
170 }
else if (NumDwords == 2) {
173 auto vgpr = vecReg.template as<VecElemU64>();
174 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
175 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
179 ((
VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
180 ((
VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
181 vgpr[lane] = tmp_val;
201 assert(_gpuDynInst->wavefront());
202 assert(_gpuDynInst->computeUnit());
203 Wavefront *wf = _gpuDynInst->wavefront();
206 ? _gpuDynInst->exec_mask : wf->
execMask();
208 if (NumDwords == 1) {
210 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx);
212 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
213 auto vgpr = vecReg.template as<DataType>();
216 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
217 std::memcpy((
void*)®_file_vgpr[lane],
218 (
void*)&vgpr[lane],
sizeof(DataType));
222 DPRINTF(GPUVRF,
"Write v[%d]\n", vgprIdx);
223 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx);
224 }
else if (NumDwords == 2) {
227 vrfData[0] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx0);
228 vrfData[1] = &cu->
vrf[wf->
simdId]->readWriteable(vgprIdx1);
231 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
232 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
233 auto vgpr = vecReg.template as<VecElemU64>();
236 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
237 reg_file_vgpr0[lane] = ((
VecElemU32*)&vgpr[lane])[0];
238 reg_file_vgpr1[lane] = ((
VecElemU32*)&vgpr[lane])[1];
242 DPRINTF(GPUVRF,
"Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
243 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx0);
244 cu->
vrf[wf->
simdId]->printReg(wf, vgprIdx1);
265 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
266 typename std::enable_if_t<Condition, const DataType>
272 DataType ret_val = scRegData.rawData();
275 assert(std::is_floating_point_v<DataType>);
276 ret_val = std::fabs(ret_val);
280 assert(std::is_floating_point_v<DataType>);
286 auto vgpr = vecReg.template as<DataType>();
287 DataType ret_val = vgpr[idx];
290 assert(std::is_floating_point_v<DataType>);
291 ret_val = std::fabs(ret_val);
295 assert(std::is_floating_point_v<DataType>);
308 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
309 typename std::enable_if_t<Condition, DataType&>
315 return vecReg.template as<DataType>()[idx];
360 std::array<VecRegContainerU32*, NumDwords>
vrfData;
363 template<
typename DataType,
bool Const,
364 size_t NumDwords =
sizeof(DataType) /
sizeof(
ScalarRegU32)>
368 "Incorrect number of DWORDS for GCN3 operand.");
375 std::memset(srfData.data(), 0, NumDwords *
sizeof(
ScalarRegU32));
389 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
390 typename std::enable_if_t<Condition, DataType>
393 assert(
sizeof(DataType) <=
sizeof(srfData));
394 DataType raw_data((DataType)0);
395 std::memcpy((
void*)&raw_data, (
void*)srfData.data(),
404 return (
void*)srfData.data();
410 Wavefront *wf = _gpuDynInst->wavefront();
416 for (
auto i = 0;
i < NumDwords; ++
i) {
417 int sgprIdx = regIdx(
i);
418 srfData[
i] = cu->
srf[wf->
simdId]->read(sgprIdx);
419 DPRINTF(GPUSRF,
"Read s[%d]\n", sgprIdx);
420 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
428 Wavefront *wf = _gpuDynInst->wavefront();
435 if (NumDwords == 1) {
436 std::memcpy((
void*)&new_exec_mask_val,
438 }
else if (NumDwords == 2) {
439 std::memcpy((
void*)&new_exec_mask_val,
442 panic(
"Trying to write more than 2 DWORDS to EXEC\n");
446 DPRINTF(GPUSRF,
"Write EXEC\n");
447 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
453 assert(NumDwords == 1);
457 std::memcpy((
void*)&new_exec_mask_hi_val,
458 (
void*)srfData.data(),
sizeof(new_exec_mask_hi_val));
460 new_exec_mask_hi_val);
463 DPRINTF(GPUSRF,
"Write EXEC\n");
464 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
466 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
469 for (
auto i = 0;
i < NumDwords; ++
i) {
470 int sgprIdx = regIdx(
i);
471 auto &sgpr = cu->
srf[wf->
simdId]->readWriteable(sgprIdx);
472 if (_gpuDynInst->isLoad()) {
475 _gpuDynInst->scalar_data)[
i];
479 DPRINTF(GPUSRF,
"Write s[%d]\n", sgprIdx);
480 cu->
srf[wf->
simdId]->printReg(wf, sgprIdx);
488 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
489 typename std::enable_if_t<Condition, void>
492 DataType &sgpr = *((DataType*)srfData.data());
496 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
497 typename std::enable_if_t<Condition, ScalarOperand&>
500 std::memcpy((
void*)srfData.data(), (
void*)&rhs,
sizeof(DataType));
514 assert(NumDwords == 1 || NumDwords == 2);
519 if (NumDwords == 1) {
521 execMask().to_ulong();
522 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask,
524 DPRINTF(GPUSRF,
"Read EXEC\n");
525 DPRINTF(GPUSRF,
"EXEC = %#x\n", exec_mask);
527 assert(NumDwords == 2);
529 execMask().to_ullong();
530 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask,
532 DPRINTF(GPUSRF,
"Read EXEC\n");
533 DPRINTF(GPUSRF,
"EXEC = %#x\n", exec_mask);
543 assert(NumDwords == 1);
545 ->execMask().to_ullong();
548 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask_hi,
549 sizeof(exec_mask_hi));
550 DPRINTF(GPUSRF,
"Read EXEC_HI\n");
551 DPRINTF(GPUSRF,
"EXEC_HI = %#x\n", exec_mask_hi);
557 assert(NumDwords == 1);
558 srfData[0] = _gpuDynInst->srcLiteral();
563 std::memcpy((
void*)srfData.data(), (
void*)&pos_half,
571 std::memcpy((
void*)srfData.data(), (
void*)&neg_half,
578 std::memcpy(srfData.data(), &pos_one,
sizeof(pos_one));
584 std::memcpy(srfData.data(), &neg_one,
sizeof(neg_one));
590 std::memcpy(srfData.data(), &pos_two,
sizeof(pos_two));
596 std::memcpy(srfData.data(), &neg_two,
sizeof(neg_two));
602 std::memcpy(srfData.data(), &pos_four,
sizeof(pos_four));
608 std::memcpy((
void*)srfData.data(), (
void*)&neg_four ,
621 std::memcpy((
void*)srfData.data(),
622 (
void*)&pi_u64,
sizeof(pi_u64));
624 std::memcpy((
void*)srfData.data(),
625 (
void*)&pi_u32,
sizeof(pi_u32));
631 assert(
sizeof(DataType) <=
sizeof(srfData));
632 DataType misc_val(0);
634 misc_val = (DataType)_gpuDynInst
635 ->readConstVal<DataType>(_opIdx);
637 misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
639 std::memcpy((
void*)srfData.data(), (
void*)&misc_val,
653 Wavefront *wf = _gpuDynInst->wavefront();
667 assert(NumDwords == 1);
674 assert(sgprIdx > -1);
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
std::vector< VectorRegisterFile * > vrf
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
int _opIdx
op selector value for this operand.
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.
std::enable_if_t< Condition, void > setBit(int bit, int bit_val)
bit access to scalar data.
std::enable_if_t< Condition, DataType > rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
void read() override
read from and write to the underlying register(s) that this operand is referring to.
std::array< ScalarRegU32, NumDwords > srfData
in GCN3 each register is represented as a 32b unsigned value, however operands may require up to 16 r...
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
std::enable_if_t< Condition, ScalarOperand & > operator=(DataType rhs)
void readSrc()
certain vector operands can read from the vrf/srf or constants.
std::enable_if_t< Condition, DataType & > operator[](size_t idx)
setter [] operator.
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
void write() override
write to the vrf.
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
void read() override
read from the vrf.
std::enable_if_t< Condition, const DataType > operator[](size_t idx) const
getter [] operator.
bool absMod
absolute value and negative modifiers.
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
bool scalar
whether this operand a scalar or not.
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
int mapVgpr(Wavefront *w, int vgprIndex)
int mapSgpr(Wavefront *w, int sgprIndex)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
#define panic(...)
This implements a cprintf based panic() function.
constexpr unsigned NumVecElemPerVecReg
int opSelectorToRegIdx(int opIdx, int numScalarRegs)
bool isScalarReg(int opIdx)
constexpr size_t MaxOperandDwords(16)
bool isVectorReg(int opIdx)
bool isConstVal(int opIdx)
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< GPUDynInst > GPUDynInstPtr
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Vector Registers layout specification.