36 #ifndef __ARCH_GCN3_OPERAND_HH__ 37 #define __ARCH_GCN3_OPERAND_HH__ 43 #include "gpu-compute/scalar_register_file.hh" 70 : _gpuDynInst(gpuDynInst), _opIdx(opIdx)
80 virtual void read() = 0;
81 virtual void write() = 0;
97 template<
typename DataType,
bool Const,
size_t NumDwords>
100 template<
typename DataType,
bool Const,
101 size_t NumDwords =
sizeof(DataType) /
sizeof(
VecElemU32)>
105 "Incorrect number of DWORDS for GCN3 operand.");
111 :
Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
112 negMod(false), scRegData(gpuDynInst, _opIdx),
135 ->reservedScalarRegs);
150 assert(_gpuDynInst->wavefront());
151 assert(_gpuDynInst->computeUnit());
152 Wavefront *wf = _gpuDynInst->wavefront();
155 for (
auto i = 0;
i < NumDwords; ++
i) {
156 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx +
i);
157 vrfData[
i] = &cu->
vrf[wf->simdId]->readWriteable(vgprIdx);
159 DPRINTF(GPUVRF,
"Read v[%d]\n", vgprIdx);
160 cu->
vrf[wf->simdId]->printReg(wf, vgprIdx);
163 if (NumDwords == 1) {
165 auto vgpr = vecReg.template as<DataType>();
166 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
168 std::memcpy((
void*)&vgpr[lane],
169 (
void*)®_file_vgpr[lane],
sizeof(DataType));
171 }
else if (NumDwords == 2) {
174 auto vgpr = vecReg.template as<VecElemU64>();
175 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
176 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
180 ((
VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
181 ((
VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
182 vgpr[lane] = tmp_val;
202 assert(_gpuDynInst->wavefront());
203 assert(_gpuDynInst->computeUnit());
204 Wavefront *wf = _gpuDynInst->wavefront();
207 ? _gpuDynInst->exec_mask : wf->execMask();
209 if (NumDwords == 1) {
210 int vgprIdx = cu->registerManager.mapVgpr(wf, _opIdx);
211 vrfData[0] = &cu->
vrf[wf->simdId]->readWriteable(vgprIdx);
213 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
214 auto vgpr = vecReg.template as<DataType>();
217 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
218 std::memcpy((
void*)®_file_vgpr[lane],
219 (
void*)&vgpr[lane],
sizeof(DataType));
223 DPRINTF(GPUVRF,
"Write v[%d]\n", vgprIdx);
224 cu->
vrf[wf->simdId]->printReg(wf, vgprIdx);
225 }
else if (NumDwords == 2) {
226 int vgprIdx0 = cu->registerManager.mapVgpr(wf, _opIdx);
227 int vgprIdx1 = cu->registerManager.mapVgpr(wf, _opIdx + 1);
228 vrfData[0] = &cu->
vrf[wf->simdId]->readWriteable(vgprIdx0);
229 vrfData[1] = &cu->
vrf[wf->simdId]->readWriteable(vgprIdx1);
232 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
233 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
234 auto vgpr = vecReg.template as<VecElemU64>();
237 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
238 reg_file_vgpr0[lane] = ((
VecElemU32*)&vgpr[lane])[0];
239 reg_file_vgpr1[lane] = ((
VecElemU32*)&vgpr[lane])[1];
243 DPRINTF(GPUVRF,
"Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
244 cu->
vrf[wf->simdId]->printReg(wf, vgprIdx0);
245 cu->
vrf[wf->simdId]->printReg(wf, vgprIdx1);
266 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
273 DataType ret_val = scRegData.rawData();
276 assert(std::is_floating_point<DataType>::value);
277 ret_val = std::fabs(ret_val);
281 assert(std::is_floating_point<DataType>::value);
287 auto vgpr = vecReg.template as<DataType>();
288 DataType ret_val = vgpr[idx];
291 assert(std::is_floating_point<DataType>::value);
292 ret_val = std::fabs(ret_val);
296 assert(std::is_floating_point<DataType>::value);
309 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
316 return vecReg.template as<DataType>()[idx];
331 using VecRegCont =
typename std::conditional<NumDwords == 2,
334 typename std::conditional<
sizeof(DataType)
365 std::array<VecRegContainerU32*, NumDwords>
vrfData;
368 template<
typename DataType,
bool Const,
369 size_t NumDwords =
sizeof(DataType) /
sizeof(
ScalarRegU32)>
373 "Incorrect number of DWORDS for GCN3 operand.");
380 std::memset(srfData.data(), 0, NumDwords *
sizeof(
ScalarRegU32));
394 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
398 assert(
sizeof(DataType) <=
sizeof(srfData));
399 DataType raw_data((DataType)0);
400 std::memcpy((
void*)&raw_data, (
void*)srfData.data(),
409 return (
void*)srfData.data();
415 Wavefront *wf = _gpuDynInst->wavefront();
421 for (
auto i = 0;
i < NumDwords; ++
i) {
422 int sgprIdx = regIdx(
i);
423 srfData[
i] = cu->srf[wf->
simdId]->read(sgprIdx);
424 DPRINTF(GPUSRF,
"Read s[%d]\n", sgprIdx);
425 cu->srf[wf->
simdId]->printReg(wf, sgprIdx);
433 Wavefront *wf = _gpuDynInst->wavefront();
438 uint64_t new_exec_mask_val(0);
439 std::memcpy((
void*)&new_exec_mask_val,
440 (
void*)srfData.data(),
sizeof(new_exec_mask_val));
443 DPRINTF(GPUSRF,
"Write EXEC\n");
444 DPRINTF(GPUSRF,
"EXEC = %#x\n", new_exec_mask_val);
446 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
449 for (
auto i = 0;
i < NumDwords; ++
i) {
450 int sgprIdx = regIdx(
i);
451 auto &sgpr = cu->srf[wf->
simdId]->readWriteable(sgprIdx);
452 if (_gpuDynInst->isLoad()) {
455 _gpuDynInst->scalar_data)[
i];
459 DPRINTF(GPUSRF,
"Write s[%d]\n", sgprIdx);
460 cu->srf[wf->
simdId]->printReg(wf, sgprIdx);
468 template<
bool Condition = NumDwords == 1 || NumDwords == 2>
472 DataType &sgpr = *((DataType*)srfData.data());
476 template<
bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
480 std::memcpy((
void*)srfData.data(), (
void*)&rhs,
sizeof(DataType));
494 assert(NumDwords == 1 || NumDwords == 2);
499 assert(NumDwords == 2);
501 execMask().to_ullong();
502 std::memcpy((
void*)srfData.data(), (
void*)&exec_mask,
504 DPRINTF(GPUSRF,
"Read EXEC\n");
505 DPRINTF(GPUSRF,
"EXEC = %#x\n", exec_mask);
511 assert(NumDwords == 1);
512 srfData[0] = _gpuDynInst->srcLiteral();
517 std::memcpy((
void*)srfData.data(), (
void*)&pos_half,
525 std::memcpy((
void*)srfData.data(), (
void*)&neg_half,
532 std::memcpy(srfData.data(), &pos_one,
sizeof(srfData));
538 std::memcpy(srfData.data(), &neg_one,
sizeof(srfData));
544 std::memcpy(srfData.data(), &pos_two,
sizeof(srfData));
550 std::memcpy(srfData.data(), &neg_two,
sizeof(srfData));
556 std::memcpy(srfData.data(), &pos_four,
sizeof(srfData));
562 std::memcpy((
void*)srfData.data(), (
void*)&neg_four ,
575 std::memcpy((
void*)srfData.data(),
576 (
void*)&pi_u64,
sizeof(srfData));
578 std::memcpy((
void*)srfData.data(),
579 (
void*)&pi_u32,
sizeof(srfData));
585 assert(
sizeof(DataType) <=
sizeof(srfData));
587 = (DataType)_gpuDynInst->readMiscReg(_opIdx);
588 std::memcpy((
void*)srfData.data(), (
void*)&misc_val,
602 Wavefront *wf = _gpuDynInst->wavefront();
607 sgprIdx = cu->registerManager
608 .mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
610 sgprIdx = cu->registerManager
611 .mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
613 assert(NumDwords == 1);
614 sgprIdx = cu->registerManager
615 .mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
617 sgprIdx = cu->registerManager.mapSgpr(wf, _opIdx + dword);
620 assert(sgprIdx > -1);
696 #endif // __ARCH_GCN3_OPERAND_HH__
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
bool isScalarReg(int opIdx)
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b...
std::array< ScalarRegU32, NumDwords > srfData
in GCN3 each register is represented as a 32b unsigned value, however operands may require up to 16 r...
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
typename std::conditional< NumDwords==2, VecRegContainerU64, typename std::conditional< sizeof(DataType)==sizeof(VecElemU16), VecRegContainerU16, typename std::conditional< sizeof(DataType)==sizeof(VecElemU8), VecRegContainerU8, VecRegContainerU32 >::type >::type >::type VecRegCont
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
bool isVectorReg(int opIdx)
std::shared_ptr< GPUDynInst > GPUDynInstPtr
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file)...
void read() override
read from and write to the underlying register(s) that this operand is referring to.
VecRegU64::Container VecRegContainerU64
classes that represnt vector/scalar operands in GCN3 ISA.
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file...
void replaceBits(T &val, int first, int last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
int opSelectorToRegIdx(int idx, int numScalarRegs)
void read() override
read from the vrf.
VecRegU16::Container VecRegContainerU16
void write() override
write to the vrf.
bool scalar
whether this operand a scalar or not.
void readSrc()
certain vector operands can read from the vrf/srf or constants.
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
std::vector< VectorRegisterFile * > vrf
Vector Registers layout specification.
VectorMask execMask() const
convenience traits so we can automatically infer the correct FP type without looking at the number of...
std::enable_if< Condition, const DataType >::type operator[](size_t idx) const
getter [] operator.
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
int _opIdx
op selector value for this operand.
VecRegU8::Container VecRegContainerU8
VecRegU32::Container VecRegContainerU32
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
bool absMod
absolute value and negative modifiers.
const int NumVecElemPerVecReg(64)
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
constexpr size_t MaxOperandDwords(16)
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...