gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
operand.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __ARCH_VEGA_OPERAND_HH__
33#define __ARCH_VEGA_OPERAND_HH__
34
35#include <array>
36
39#include "debug/GPUTrace.hh"
41#include "gpu-compute/shader.hh"
44
45namespace gem5
46{
47
53
54namespace VegaISA
55{
61 template<typename T> struct OpTraits { typedef float FloatT; };
62 template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
63 template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
64
65 class Operand
66 {
67 public:
68 Operand() = delete;
69
70 Operand(GPUDynInstPtr gpuDynInst, int opIdx)
71 : _gpuDynInst(gpuDynInst), _opIdx(opIdx)
72 {
73 assert(_gpuDynInst);
74 assert(_opIdx >= 0);
75 }
76
81 virtual void read() = 0;
82 virtual void write() = 0;
83
84 protected:
95 int _opIdx;
96 };
97
98 template<typename DataType, bool Const, size_t NumDwords>
99 class ScalarOperand;
100
101 template<typename DataType, bool Const,
102 size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
103 class VecOperand final : public Operand
104 {
105 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
106 "Incorrect number of DWORDS for VEGA operand.");
107
108 public:
109 VecOperand() = delete;
110
111 VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
112 : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
113 negMod(false), scRegData(gpuDynInst, _opIdx),
114 vrfData{{ nullptr }}
115 {
116 vecReg.zero();
117 }
118
120 {
121 }
122
131 void
133 {
134 if (isVectorReg(_opIdx)) {
136 ->reservedScalarRegs);
137 read();
138 } else {
139 readScalar();
140 }
141 }
142
147 void
148 read() override
149 {
150 assert(_gpuDynInst);
151 assert(_gpuDynInst->wavefront());
152 assert(_gpuDynInst->computeUnit());
153 Wavefront *wf = _gpuDynInst->wavefront();
154 ComputeUnit *cu = _gpuDynInst->computeUnit();
155
156 for (auto i = 0; i < NumDwords; ++i) {
157 int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx + i);
158 vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
159
160 DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
161 DPRINTF(GPUTrace, "wave[%d] Read v[%d] by instruction %s\n",
162 wf->wfDynId, vgprIdx,
163 _gpuDynInst->disassemble().c_str());
164 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
165 }
166
167 if (NumDwords == 1) {
168 assert(vrfData[0]);
169 auto vgpr = vecReg.template as<DataType>();
170 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
171 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
172 std::memcpy((void*)&vgpr[lane],
173 (void*)&reg_file_vgpr[lane], sizeof(DataType));
174 }
175 } else if (NumDwords == 2) {
176 assert(vrfData[0]);
177 assert(vrfData[1]);
178 auto vgpr = vecReg.template as<VecElemU64>();
179 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
180 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
181
182 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
183 VecElemU64 tmp_val(0);
184 ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
185 ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
186 vgpr[lane] = tmp_val;
187 }
188 }
189 }
190
202 void
203 write() override
204 {
205 assert(_gpuDynInst);
206 assert(_gpuDynInst->wavefront());
207 assert(_gpuDynInst->computeUnit());
208 Wavefront *wf = _gpuDynInst->wavefront();
209 ComputeUnit *cu = _gpuDynInst->computeUnit();
210 VectorMask &exec_mask = _gpuDynInst->isLoad()
211 ? _gpuDynInst->exec_mask : wf->execMask();
212
213 if (NumDwords == 1) {
214 int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx);
215 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
216 assert(vrfData[0]);
217 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
218 auto vgpr = vecReg.template as<DataType>();
219
220 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
221 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
222 std::memcpy((void*)&reg_file_vgpr[lane],
223 (void*)&vgpr[lane], sizeof(DataType));
224 }
225 }
226
227 DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
228 DPRINTF(GPUTrace, "wave[%d] Write v[%d] by instruction %s\n",
229 wf->wfDynId, vgprIdx,
230 _gpuDynInst->disassemble().c_str());
231 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
232 } else if (NumDwords == 2) {
233 int vgprIdx0 = cu->registerManager->mapVgpr(wf, _opIdx);
234 int vgprIdx1 = cu->registerManager->mapVgpr(wf, _opIdx + 1);
235 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
236 vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
237 assert(vrfData[0]);
238 assert(vrfData[1]);
239 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
240 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
241 auto vgpr = vecReg.template as<VecElemU64>();
242
243 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
244 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
245 reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
246 reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
247 }
248 }
249
250 DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
251 DPRINTF(GPUTrace, "wave[%d] Write v[%d:%d] by instruction "
252 "%s\n", wf->wfDynId, vgprIdx0, vgprIdx1,
253 _gpuDynInst->disassemble().c_str());
254 cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
255 cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
256 }
257 }
258
259 void
261 {
262 negMod = true;
263 }
264
265 void
267 {
268 absMod = true;
269 }
270
276 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
277 typename std::enable_if<Condition, const DataType>::type
278 operator[](size_t idx) const
279 {
280 assert(idx < NumVecElemPerVecReg);
281
282 if (scalar) {
283 DataType ret_val = scRegData.rawData();
284
285 if (absMod) {
286 assert(std::is_floating_point_v<DataType>);
287 ret_val = std::fabs(ret_val);
288 }
289
290 if (negMod) {
291 assert(std::is_floating_point_v<DataType>);
292 ret_val = -ret_val;
293 }
294
295 return ret_val;
296 } else {
297 auto vgpr = vecReg.template as<DataType>();
298 DataType ret_val = vgpr[idx];
299
300 if (absMod) {
301 assert(std::is_floating_point_v<DataType>);
302 ret_val = std::fabs(ret_val);
303 }
304
305 if (negMod) {
306 assert(std::is_floating_point_v<DataType>);
307 ret_val = -ret_val;
308 }
309
310 return ret_val;
311 }
312 }
313
319 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
320 typename std::enable_if<Condition, DataType&>::type
321 operator[](size_t idx)
322 {
323 assert(!scalar);
324 assert(idx < NumVecElemPerVecReg);
325
326 return vecReg.template as<DataType>()[idx];
327 }
328
329 private:
334 void
336 {
337 scalar = true;
338 scRegData.read();
339 }
340
342 VecRegContainer<sizeof(DataType) * NumVecElemPerVecReg>;
343
347 bool scalar;
354 bool absMod;
355 bool negMod;
371 std::array<VecRegContainerU32*, NumDwords> vrfData;
372 };
373
374 template<typename DataType, bool Const,
375 size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
376 class ScalarOperand final : public Operand
377 {
378 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
379 "Incorrect number of DWORDS for VEGA operand.");
380 public:
381 ScalarOperand() = delete;
382
383 ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
384 : Operand(gpuDynInst, opIdx)
385 {
386 std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
387 }
388
390 {
391 }
392
400 template<bool Condition = NumDwords == 1 || NumDwords == 2>
401 typename std::enable_if<Condition, DataType>::type
402 rawData() const
403 {
404 assert(sizeof(DataType) <= sizeof(srfData));
405 DataType raw_data((DataType)0);
406 std::memcpy((void*)&raw_data, (void*)srfData.data(),
407 sizeof(DataType));
408
409 return raw_data;
410 }
411
412 void*
414 {
415 return (void*)srfData.data();
416 }
417
418 void
419 read() override
420 {
421 Wavefront *wf = _gpuDynInst->wavefront();
422 ComputeUnit *cu = _gpuDynInst->computeUnit();
423
424 if (!isScalarReg(_opIdx)) {
426 } else {
427 for (auto i = 0; i < NumDwords; ++i) {
428 int sgprIdx = regIdx(i);
429 srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
430 DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
431 DPRINTF(GPUTrace, "wave[%d] Read s[%d] by instruction "
432 "%s\n", wf->wfDynId, sgprIdx,
433 _gpuDynInst->disassemble().c_str());
434 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
435 }
436 }
437 }
438
439 void
440 write() override
441 {
442 Wavefront *wf = _gpuDynInst->wavefront();
443 ComputeUnit *cu = _gpuDynInst->computeUnit();
444
445 if (!isScalarReg(_opIdx)) {
446 if (_opIdx == REG_EXEC_LO) {
447 ScalarRegU64 new_exec_mask_val
448 = wf->execMask().to_ullong();
449 if (NumDwords == 1) {
450 std::memcpy((void*)&new_exec_mask_val,
451 (void*)srfData.data(), sizeof(VecElemU32));
452 } else if (NumDwords == 2) {
453 std::memcpy((void*)&new_exec_mask_val,
454 (void*)srfData.data(), sizeof(VecElemU64));
455 } else {
456 panic("Trying to write more than 2 DWORDS to EXEC\n");
457 }
458 VectorMask new_exec_mask(new_exec_mask_val);
459 wf->execMask() = new_exec_mask;
460 DPRINTF(GPUSRF, "Write EXEC\n");
461 DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
462 } else if (_opIdx == REG_EXEC_HI) {
467 assert(NumDwords == 1);
468 ScalarRegU32 new_exec_mask_hi_val(0);
469 ScalarRegU64 new_exec_mask_val
470 = wf->execMask().to_ullong();
471 std::memcpy((void*)&new_exec_mask_hi_val,
472 (void*)srfData.data(), sizeof(new_exec_mask_hi_val));
473 replaceBits(new_exec_mask_val, 63, 32,
474 new_exec_mask_hi_val);
475 VectorMask new_exec_mask(new_exec_mask_val);
476 wf->execMask() = new_exec_mask;
477 DPRINTF(GPUSRF, "Write EXEC\n");
478 DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
479 } else {
480 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
481 }
482 } else {
483 for (auto i = 0; i < NumDwords; ++i) {
484 int sgprIdx = regIdx(i);
485 auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
486 if (_gpuDynInst->isLoad()) {
487 assert(sizeof(DataType) <= sizeof(ScalarRegU64));
488 sgpr = reinterpret_cast<ScalarRegU32*>(
489 _gpuDynInst->scalar_data)[i];
490 } else {
491 sgpr = srfData[i];
492 }
493 DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
494 DPRINTF(GPUTrace, "wave[%d] Write s[%d] by instruction "
495 "%s\n", wf->wfDynId, sgprIdx,
496 _gpuDynInst->disassemble().c_str());
497 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
498 }
499 }
500 }
501
505 template<bool Condition = NumDwords == 1 || NumDwords == 2>
506 typename std::enable_if<Condition, void>::type
507 setBit(int bit, int bit_val)
508 {
509 GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
510 replaceBits(sgpr, bit, bit_val);
511 }
512
513 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
514 typename std::enable_if<Condition, ScalarOperand&>::type
515 operator=(DataType rhs)
516 {
517 std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
518 return *this;
519 }
520
521 private:
528 void
530 {
531 assert(NumDwords == 1 || NumDwords == 2);
532
535 assert(sizeof(DataType) <= sizeof(srfData));
536 DataType misc_val(0);
537 assert(isConstVal(_opIdx));
538 misc_val = (DataType)_gpuDynInst
539 ->readConstVal<DataType>(_opIdx);
540 std::memcpy((void*)srfData.data(), (void*)&misc_val,
541 sizeof(DataType));
542
543 return;
544 }
545
546 if (_opIdx == REG_M0 || _opIdx == REG_ZERO || _opIdx == REG_SCC) {
547 assert(sizeof(DataType) <= sizeof(srfData));
548 DataType misc_val(0);
549 misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
550 std::memcpy((void*)srfData.data(), (void*)&misc_val,
551 sizeof(DataType));
552
553 return;
554 }
555
556 switch(_opIdx) {
557 case REG_EXEC_LO:
558 {
559 if constexpr (NumDwords == 2) {
560 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
561 execMask().to_ullong();
562 std::memcpy((void*)srfData.data(), (void*)&exec_mask,
563 sizeof(exec_mask));
564 DPRINTF(GPUSRF, "Read EXEC\n");
565 DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
566 } else {
567 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
568 execMask().to_ullong();
569
570 ScalarRegU32 exec_mask_lo = bits(exec_mask, 31, 0);
571 std::memcpy((void*)srfData.data(),
572 (void*)&exec_mask_lo, sizeof(exec_mask_lo));
573 DPRINTF(GPUSRF, "Read EXEC_LO\n");
574 DPRINTF(GPUSRF, "EXEC_LO = %#x\n", exec_mask_lo);
575 }
576 }
577 break;
578 case REG_EXEC_HI:
579 {
584 assert(NumDwords == 1);
585 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()
586 ->execMask().to_ullong();
587
588 ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);
589 std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,
590 sizeof(exec_mask_hi));
591 DPRINTF(GPUSRF, "Read EXEC_HI\n");
592 DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);
593 }
594 break;
595 case REG_SRC_SWDA:
596 case REG_SRC_DPP:
597 case REG_SRC_LITERAL:
605 srfData[0] = _gpuDynInst->srcLiteral();
606 if constexpr (NumDwords == 2) {
607 if constexpr (std::is_integral_v<DataType>) {
608 if constexpr (std::is_signed_v<DataType>) {
609 if (bits(srfData[0], 31, 31) == 1) {
610 srfData[1] = 0xffffffff;
611 } else {
612 srfData[1] = 0;
613 }
614 } else {
615 srfData[1] = 0;
616 }
617 } else {
618 srfData[1] = _gpuDynInst->srcLiteral();
619 srfData[0] = 0;
620 }
621 }
622 break;
623 case REG_SHARED_BASE:
624 {
625 assert(NumDwords == 2);
626 if constexpr (NumDwords == 2) {
627 ComputeUnit *cu = _gpuDynInst->computeUnit();
628 ScalarRegU64 shared_base = cu->shader->ldsApe().base;
629 std::memcpy((void*)srfData.data(), (void*)&shared_base,
630 sizeof(srfData));
631 DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n",
632 shared_base);
633 }
634 }
635 break;
636 case REG_SHARED_LIMIT:
637 {
638 assert(NumDwords == 2);
639 if constexpr (NumDwords == 2) {
640 ComputeUnit *cu = _gpuDynInst->computeUnit();
641 ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
642 std::memcpy((void*)srfData.data(),
643 (void*)&shared_limit, sizeof(srfData));
644 DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n",
645 shared_limit);
646 }
647 }
648 break;
649 case REG_PRIVATE_BASE:
650 {
651 assert(NumDwords == 2);
652 if constexpr (NumDwords == 2) {
653 ComputeUnit *cu = _gpuDynInst->computeUnit();
654 ScalarRegU64 priv_base = cu->shader->scratchApe().base;
655 std::memcpy((void*)srfData.data(), (void*)&priv_base,
656 sizeof(srfData));
657 DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n",
658 priv_base);
659 }
660 }
661 break;
663 {
664 assert(NumDwords == 2);
665 if constexpr (NumDwords == 2) {
666 ComputeUnit *cu = _gpuDynInst->computeUnit();
667 ScalarRegU64 priv_limit =
668 cu->shader->scratchApe().limit;
669 std::memcpy((void*)srfData.data(), (void*)&priv_limit,
670 sizeof(srfData));
671 DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
672 priv_limit);
673 }
674 }
675 break;
676 case REG_POS_HALF:
677 {
678 typename OpTraits<DataType>::FloatT pos_half = 0.5;
679 std::memcpy((void*)srfData.data(), (void*)&pos_half,
680 sizeof(pos_half));
681
682 }
683 break;
684 case REG_NEG_HALF:
685 {
686 typename OpTraits<DataType>::FloatT neg_half = -0.5;
687 std::memcpy((void*)srfData.data(), (void*)&neg_half,
688 sizeof(neg_half));
689 }
690 break;
691 case REG_POS_ONE:
692 {
693 typename OpTraits<DataType>::FloatT pos_one = 1.0;
694 std::memcpy(srfData.data(), &pos_one, sizeof(pos_one));
695 }
696 break;
697 case REG_NEG_ONE:
698 {
699 typename OpTraits<DataType>::FloatT neg_one = -1.0;
700 std::memcpy(srfData.data(), &neg_one, sizeof(neg_one));
701 }
702 break;
703 case REG_POS_TWO:
704 {
705 typename OpTraits<DataType>::FloatT pos_two = 2.0;
706 std::memcpy(srfData.data(), &pos_two, sizeof(pos_two));
707 }
708 break;
709 case REG_NEG_TWO:
710 {
711 typename OpTraits<DataType>::FloatT neg_two = -2.0;
712 std::memcpy(srfData.data(), &neg_two, sizeof(neg_two));
713 }
714 break;
715 case REG_POS_FOUR:
716 {
717 typename OpTraits<DataType>::FloatT pos_four = 4.0;
718 std::memcpy(srfData.data(), &pos_four, sizeof(pos_four));
719 }
720 break;
721 case REG_NEG_FOUR:
722 {
723 typename OpTraits<DataType>::FloatT neg_four = -4.0;
724 std::memcpy((void*)srfData.data(), (void*)&neg_four ,
725 sizeof(neg_four));
726 }
727 break;
728 case REG_PI:
729 {
730 assert(sizeof(DataType) == sizeof(ScalarRegF64)
731 || sizeof(DataType) == sizeof(ScalarRegF32));
732
733 const ScalarRegU32 pi_u32(0x3e22f983UL);
734 const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
735
736 if (sizeof(DataType) == sizeof(ScalarRegF64)) {
737 std::memcpy((void*)srfData.data(),
738 (void*)&pi_u64, sizeof(pi_u64));
739 } else {
740 std::memcpy((void*)srfData.data(),
741 (void*)&pi_u32, sizeof(pi_u32));
742 }
743 }
744 break;
745 default:
746 panic("Invalid special register index: %d\n", _opIdx);
747 break;
748 }
749 }
750
756 int
757 regIdx(int dword) const
758 {
759 Wavefront *wf = _gpuDynInst->wavefront();
760 ComputeUnit *cu = _gpuDynInst->computeUnit();
761 int sgprIdx(-1);
762
763 if (_opIdx == REG_VCC_HI) {
764 sgprIdx = cu->registerManager
765 ->mapSgpr(wf, wf->reservedScalarRegs - 1 + dword);
766 } else if (_opIdx == REG_VCC_LO) {
767 sgprIdx = cu->registerManager
768 ->mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
769 } else if (_opIdx == REG_FLAT_SCRATCH_HI) {
770 sgprIdx = cu->registerManager
771 ->mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
772 } else if (_opIdx == REG_FLAT_SCRATCH_LO) {
773 assert(NumDwords == 1);
774 sgprIdx = cu->registerManager
775 ->mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
776 } else {
777 sgprIdx = cu->registerManager->mapSgpr(wf, _opIdx + dword);
778 }
779
780 assert(sgprIdx > -1);
781
782 return sgprIdx;
783 }
784
793 GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;
794 };
795
796 // typedefs for the various sizes/types of scalar operands
810 // non-writeable versions of scalar operands
824 // typedefs for the various sizes/types of vector operands
839 // non-writeable versions of vector operands
854
855
856// Helper class for using multiple VecElemU32 to represent data types which
857// do not divide a dword evenly.
858template<int BITS, int ELEM_SIZE>
860{
861 // Logical view is:
862 // dword N, dword N - 1, ..., dword 1, dword 0.
863 // Within each dword, the element starts at [ELEM_SIZE:0]. For example,
864 // for ELEM_SIZE = 6 for fp6 types, [5:0] is the first value, [11:6] is
865 // the second, and so forth. For 6 bits specifically, the 6th element
866 // spans dword 0 and dword 1.
867 static_assert(BITS % 32 == 0);
868 static_assert(BITS % ELEM_SIZE == 0);
869 static_assert(ELEM_SIZE <= 32);
870
871 static constexpr int NumDwords = BITS / 32;
872 uint32_t dwords[NumDwords] = {};
873
874 public:
875 PackedReg() = default;
876
877 void
878 setDword(int dw, uint32_t value)
879 {
880 assert(dw < NumDwords);
881 dwords[dw] = value;
882 }
883
884 uint32_t
885 getDword(int dw)
886 {
887 assert(dw < NumDwords);
888 return dwords[dw];
889 }
890
891 uint32_t
892 getElem(int elem)
893 {
894 assert(elem < (BITS / ELEM_SIZE));
895
896 // Get the upper/lower *bit* location of the element.
897 int ubit, lbit;
898 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
899 lbit = elem * ELEM_SIZE;
900
901 // Convert the bit locations to upper/lower dwords. It is possible
902 // to span two dwords but this does not have to support spanning
903 // more than two dwords.
904 int udw, ldw;
905 udw = ubit / 32;
906 ldw = lbit / 32;
907 assert(udw == ldw || udw == ldw + 1);
908
909 if (udw == ldw) {
910 // Easy case, just shift the dword value and mask to get value.
911 int dw_lbit = lbit % 32;
912
913 uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;
914 uint32_t rv = (dwords[ldw] >> dw_lbit) & elem_mask;
915
916 return rv;
917 }
918
919 // Harder case. To make it easier put into a quad word and shift
920 // that variable instead of trying to work with two.
921 uint64_t qword =
922 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
923
924 int qw_lbit = lbit % 32;
925
926 uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
927 uint32_t rv = uint32_t((qword >> qw_lbit) & elem_mask);
928
929 return rv;
930 }
931
932 void
933 setElem(int elem, uint32_t value)
934 {
935 assert(elem < (BITS / ELEM_SIZE));
936
937 // Get the upper/lower *bit* location of the element.
938 int ubit, lbit;
939 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
940 lbit = elem * ELEM_SIZE;
941
942 // Convert the bit locations to upper/lower dwords. It is possible
943 // to span two dwords but this does not have to support spanning
944 // more than two dwords.
945 int udw, ldw;
946 udw = ubit / 32;
947 ldw = lbit / 32;
948 assert(udw == ldw || udw == ldw + 1);
949
950 if (udw == ldw) {
951 // Easy case, just shift the dword value and mask to get value.
952 int dw_lbit = lbit % 32;
953
954 // Make sure the value is not going to clobber another element.
955 uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;
956 value &= elem_mask;
957
958 // Clear the bits we are setting.
959 elem_mask <<= dw_lbit;
960 dwords[ldw] &= ~elem_mask;
961
962 value <<= dw_lbit;
963 dwords[ldw] |= value;
964
965 return;
966 }
967
968 // Harder case. Put the two dwords in a quad word and manipulate that.
969 // Then place the two new dwords back into the storage.
970 uint64_t qword =
971 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
972
973 int qw_lbit = lbit % 32;
974
975 // Make sure the value is not going to clobber another element.
976 uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
977 value &= elem_mask;
978
979 // Clear the bits where the value goes so that operator| can be used.
980 elem_mask <<= qw_lbit;
981 qword &= ~elem_mask;
982
983 // Promote to 64-bit to prevent shifting out of range
984 uint64_t value64 = value;
985 value64 <<= qw_lbit;
986 qword |= value64;
987
988 dwords[udw] = uint32_t(qword >> 32);
989 dwords[ldw] = uint32_t(qword & mask(32));
990 }
991};
992
993}
994
995} // namespace gem5
996
997#endif // __ARCH_VEGA_OPERAND_HH__
#define DPRINTF(x,...)
Definition trace.hh:209
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
std::vector< VectorRegisterFile * > vrf
int mapVgpr(Wavefront *w, int vgprIndex)
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & scratchApe() const
Definition shader.hh:159
const ApertureRegister & ldsApe() const
Definition shader.hh:146
Vector Register Abstraction This generic class is the model in a particularization of MVC,...
Definition vec_reg.hh:126
virtual void write()=0
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:70
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
Definition operand.hh:88
int _opIdx
op selector value for this operand.
Definition operand.hh:95
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.
uint32_t dwords[NumDwords]
Definition operand.hh:872
uint32_t getElem(int elem)
Definition operand.hh:892
void setDword(int dw, uint32_t value)
Definition operand.hh:878
static constexpr int NumDwords
Definition operand.hh:871
void setElem(int elem, uint32_t value)
Definition operand.hh:933
uint32_t getDword(int dw)
Definition operand.hh:885
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:419
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:402
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:383
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
Definition operand.hh:515
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:507
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:132
std::array< VecRegContainerU32 *, NumDwords > vrfData
Definition operand.hh:371
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
Definition operand.hh:321
void write() override
write to the vrf.
Definition operand.hh:203
VecRegContainer< sizeof(DataType) *NumVecElemPerVecReg > VecRegCont
Definition operand.hh:341
std::enable_if< Condition, constDataType >::type operator[](size_t idx) const
getter [] operator.
Definition operand.hh:278
ScalarOperand< VecElemU8, Const, NumDwords > scRegData
Definition operand.hh:366
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:111
const int simdId
Definition wavefront.hh:102
VectorMask & execMask()
uint64_t wfDynId
Definition wavefront.hh:235
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 36 > as
int elem_mask(const T *vs, const int index)
Definition utility.hh:307
classes that represnt vector/scalar operands in VEGA ISA.
Definition faults.cc:39
ScalarOperand< ScalarRegU32, false, 4 > ScalarOperandU128
Definition operand.hh:807
bool isVectorReg(int opIdx)
ScalarOperand< ScalarRegU64, false > ScalarOperandU64
Definition operand.hh:804
ScalarOperand< ScalarRegI8, true, 1 > ConstScalarOperandI8
Definition operand.hh:812
ScalarOperand< ScalarRegU8, false, 1 > ScalarOperandU8
Definition operand.hh:797
ScalarOperand< ScalarRegU32, false, 8 > ScalarOperandU256
Definition operand.hh:808
VecOperand< VecElemF32, true > ConstVecOperandF32
Definition operand.hh:846
VecOperand< VecElemU32, true, 8 > ConstVecOperandU256
Definition operand.hh:852
ScalarOperand< ScalarRegI32, false > ScalarOperandI32
Definition operand.hh:802
VecOperand< VecElemU32, false, 4 > VecOperandU128
Definition operand.hh:836
VecOperand< VecElemU32, false > VecOperandU32
Definition operand.hh:829
ScalarOperand< ScalarRegU32, true > ConstScalarOperandU32
Definition operand.hh:815
VecOperand< VecElemF64, true > ConstVecOperandF64
Definition operand.hh:849
int opSelectorToRegIdx(int idx, int numScalarRegs)
ScalarOperand< ScalarRegI8, false, 1 > ScalarOperandI8
Definition operand.hh:798
VecOperand< VecElemI16, false, 1 > VecOperandI16
Definition operand.hh:828
VecOperand< VecElemU8, true, 1 > ConstVecOperandU8
Definition operand.hh:840
ScalarOperand< ScalarRegU32, true, 8 > ConstScalarOperandU256
Definition operand.hh:822
VecOperand< VecElemU32, false, 8 > VecOperandU256
Definition operand.hh:837
VecOperand< VecElemU32, true, 16 > ConstVecOperandU512
Definition operand.hh:853
VecOperand< VecElemU32, true, 4 > ConstVecOperandU128
Definition operand.hh:851
VecOperand< VecElemI32, true > ConstVecOperandI32
Definition operand.hh:845
uint64_t ScalarRegU64
VecOperand< VecElemU32, true > ConstVecOperandU32
Definition operand.hh:844
uint16_t VecElemU16
uint16_t ScalarRegU16
ScalarOperand< ScalarRegF64, false > ScalarOperandF64
Definition operand.hh:806
VecOperand< VecElemI8, false, 1 > VecOperandI8
Definition operand.hh:826
VecOperand< VecElemI8, true, 1 > ConstVecOperandI8
Definition operand.hh:841
ScalarOperand< ScalarRegU32, true, 4 > ConstScalarOperandU128
Definition operand.hh:821
VecOperand< VecElemI64, true > ConstVecOperandI64
Definition operand.hh:848
ScalarOperand< ScalarRegI64, true > ConstScalarOperandI64
Definition operand.hh:819
bool isConstVal(int opIdx)
uint32_t VecElemU32
VecOperand< VecElemU32, false, 3 > VecOperandU96
Definition operand.hh:835
ScalarOperand< ScalarRegI16, true, 1 > ConstScalarOperandI16
Definition operand.hh:814
VecOperand< VecElemU16, false, 1 > VecOperandU16
Definition operand.hh:827
ScalarOperand< ScalarRegU64, true > ConstScalarOperandU64
Definition operand.hh:818
VecOperand< VecElemU16, true, 1 > ConstVecOperandU16
Definition operand.hh:842
ScalarOperand< ScalarRegF32, true > ConstScalarOperandF32
Definition operand.hh:817
ScalarOperand< ScalarRegI64, false > ScalarOperandI64
Definition operand.hh:805
ScalarOperand< ScalarRegU16, false, 1 > ScalarOperandU16
Definition operand.hh:799
ScalarOperand< ScalarRegU32, false > ScalarOperandU32
Definition operand.hh:801
ScalarOperand< ScalarRegU32, true, 16 > ConstScalarOperandU512
Definition operand.hh:823
VecOperand< VecElemI64, false > VecOperandI64
Definition operand.hh:834
const int NumVecElemPerVecReg(64)
ScalarOperand< ScalarRegF32, false > ScalarOperandF32
Definition operand.hh:803
ScalarOperand< ScalarRegU32, false, 16 > ScalarOperandU512
Definition operand.hh:809
VecOperand< VecElemU32, false, 16 > VecOperandU512
Definition operand.hh:838
bool isScalarReg(int opIdx)
uint64_t VecElemU64
VecOperand< VecElemU64, false > VecOperandU64
Definition operand.hh:832
VecOperand< VecElemI32, false > VecOperandI32
Definition operand.hh:830
VecOperand< VecElemU32, true, 3 > ConstVecOperandU96
Definition operand.hh:850
VecOperand< VecElemI16, true, 1 > ConstVecOperandI16
Definition operand.hh:843
constexpr size_t MaxOperandDwords(16)
ScalarOperand< ScalarRegF64, true > ConstScalarOperandF64
Definition operand.hh:820
ScalarOperand< ScalarRegU16, true, 1 > ConstScalarOperandU16
Definition operand.hh:813
VecOperand< VecElemU64, true > ConstVecOperandU64
Definition operand.hh:847
ScalarOperand< ScalarRegU8, true, 1 > ConstScalarOperandU8
Definition operand.hh:811
VecOperand< VecElemF64, false > VecOperandF64
Definition operand.hh:833
ScalarOperand< ScalarRegI32, true > ConstScalarOperandI32
Definition operand.hh:816
ScalarOperand< ScalarRegI16, false, 1 > ScalarOperandI16
Definition operand.hh:800
uint32_t ScalarRegU32
VecOperand< VecElemF32, false > VecOperandF32
Definition operand.hh:831
VecOperand< VecElemU8, false, 1 > VecOperandU8
Definition operand.hh:825
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
typedef GEM5_ALIGNED(8) uint64_t uint64_ta
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Definition operand.hh:61
Vector Registers layout specification.

Generated on Mon May 26 2025 09:18:57 for gem5 by doxygen 1.13.2