gem5 v24.0.0.0
Loading...
Searching...
No Matches
operand.hh
Go to the documentation of this file.
1/*
2 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
32#ifndef __ARCH_VEGA_OPERAND_HH__
33#define __ARCH_VEGA_OPERAND_HH__
34
35#include <array>
36
40#include "gpu-compute/shader.hh"
43
44namespace gem5
45{
46
53namespace VegaISA
54{
60 template<typename T> struct OpTraits { typedef float FloatT; };
61 template<> struct OpTraits<ScalarRegF64> { typedef double FloatT; };
62 template<> struct OpTraits<ScalarRegU64> { typedef double FloatT; };
63
64 class Operand
65 {
66 public:
67 Operand() = delete;
68
69 Operand(GPUDynInstPtr gpuDynInst, int opIdx)
70 : _gpuDynInst(gpuDynInst), _opIdx(opIdx)
71 {
72 assert(_gpuDynInst);
73 assert(_opIdx >= 0);
74 }
75
80 virtual void read() = 0;
81 virtual void write() = 0;
82
83 protected:
94 int _opIdx;
95 };
96
97 template<typename DataType, bool Const, size_t NumDwords>
98 class ScalarOperand;
99
100 template<typename DataType, bool Const,
101 size_t NumDwords = sizeof(DataType) / sizeof(VecElemU32)>
102 class VecOperand final : public Operand
103 {
104 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
105 "Incorrect number of DWORDS for VEGA operand.");
106
107 public:
108 VecOperand() = delete;
109
110 VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
111 : Operand(gpuDynInst, opIdx), scalar(false), absMod(false),
112 negMod(false), scRegData(gpuDynInst, _opIdx),
113 vrfData{{ nullptr }}
114 {
115 vecReg.zero();
116 }
117
119 {
120 }
121
130 void
132 {
133 if (isVectorReg(_opIdx)) {
134 _opIdx = opSelectorToRegIdx(_opIdx, _gpuDynInst->wavefront()
135 ->reservedScalarRegs);
136 read();
137 } else {
138 readScalar();
139 }
140 }
141
146 void
147 read() override
148 {
149 assert(_gpuDynInst);
150 assert(_gpuDynInst->wavefront());
151 assert(_gpuDynInst->computeUnit());
152 Wavefront *wf = _gpuDynInst->wavefront();
153 ComputeUnit *cu = _gpuDynInst->computeUnit();
154
155 for (auto i = 0; i < NumDwords; ++i) {
156 int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx + i);
157 vrfData[i] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
158
159 DPRINTF(GPUVRF, "Read v[%d]\n", vgprIdx);
160 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
161 }
162
163 if (NumDwords == 1) {
164 assert(vrfData[0]);
165 auto vgpr = vecReg.template as<DataType>();
166 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
167 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
168 std::memcpy((void*)&vgpr[lane],
169 (void*)&reg_file_vgpr[lane], sizeof(DataType));
170 }
171 } else if (NumDwords == 2) {
172 assert(vrfData[0]);
173 assert(vrfData[1]);
174 auto vgpr = vecReg.template as<VecElemU64>();
175 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
176 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
177
178 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
179 VecElemU64 tmp_val(0);
180 ((VecElemU32*)&tmp_val)[0] = reg_file_vgpr0[lane];
181 ((VecElemU32*)&tmp_val)[1] = reg_file_vgpr1[lane];
182 vgpr[lane] = tmp_val;
183 }
184 }
185 }
186
198 void
199 write() override
200 {
201 assert(_gpuDynInst);
202 assert(_gpuDynInst->wavefront());
203 assert(_gpuDynInst->computeUnit());
204 Wavefront *wf = _gpuDynInst->wavefront();
205 ComputeUnit *cu = _gpuDynInst->computeUnit();
206 VectorMask &exec_mask = _gpuDynInst->isLoad()
207 ? _gpuDynInst->exec_mask : wf->execMask();
208
209 if (NumDwords == 1) {
210 int vgprIdx = cu->registerManager->mapVgpr(wf, _opIdx);
211 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx);
212 assert(vrfData[0]);
213 auto reg_file_vgpr = vrfData[0]->template as<VecElemU32>();
214 auto vgpr = vecReg.template as<DataType>();
215
216 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
217 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
218 std::memcpy((void*)&reg_file_vgpr[lane],
219 (void*)&vgpr[lane], sizeof(DataType));
220 }
221 }
222
223 DPRINTF(GPUVRF, "Write v[%d]\n", vgprIdx);
224 cu->vrf[wf->simdId]->printReg(wf, vgprIdx);
225 } else if (NumDwords == 2) {
226 int vgprIdx0 = cu->registerManager->mapVgpr(wf, _opIdx);
227 int vgprIdx1 = cu->registerManager->mapVgpr(wf, _opIdx + 1);
228 vrfData[0] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx0);
229 vrfData[1] = &cu->vrf[wf->simdId]->readWriteable(vgprIdx1);
230 assert(vrfData[0]);
231 assert(vrfData[1]);
232 auto reg_file_vgpr0 = vrfData[0]->template as<VecElemU32>();
233 auto reg_file_vgpr1 = vrfData[1]->template as<VecElemU32>();
234 auto vgpr = vecReg.template as<VecElemU64>();
235
236 for (int lane = 0; lane < NumVecElemPerVecReg; ++lane) {
237 if (exec_mask[lane] || _gpuDynInst->ignoreExec()) {
238 reg_file_vgpr0[lane] = ((VecElemU32*)&vgpr[lane])[0];
239 reg_file_vgpr1[lane] = ((VecElemU32*)&vgpr[lane])[1];
240 }
241 }
242
243 DPRINTF(GPUVRF, "Write v[%d:%d]\n", vgprIdx0, vgprIdx1);
244 cu->vrf[wf->simdId]->printReg(wf, vgprIdx0);
245 cu->vrf[wf->simdId]->printReg(wf, vgprIdx1);
246 }
247 }
248
249 void
251 {
252 negMod = true;
253 }
254
255 void
257 {
258 absMod = true;
259 }
260
266 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && Const>
267 typename std::enable_if<Condition, const DataType>::type
268 operator[](size_t idx) const
269 {
270 assert(idx < NumVecElemPerVecReg);
271
272 if (scalar) {
273 DataType ret_val = scRegData.rawData();
274
275 if (absMod) {
276 assert(std::is_floating_point_v<DataType>);
277 ret_val = std::fabs(ret_val);
278 }
279
280 if (negMod) {
281 assert(std::is_floating_point_v<DataType>);
282 ret_val = -ret_val;
283 }
284
285 return ret_val;
286 } else {
287 auto vgpr = vecReg.template as<DataType>();
288 DataType ret_val = vgpr[idx];
289
290 if (absMod) {
291 assert(std::is_floating_point_v<DataType>);
292 ret_val = std::fabs(ret_val);
293 }
294
295 if (negMod) {
296 assert(std::is_floating_point_v<DataType>);
297 ret_val = -ret_val;
298 }
299
300 return ret_val;
301 }
302 }
303
309 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
310 typename std::enable_if<Condition, DataType&>::type
311 operator[](size_t idx)
312 {
313 assert(!scalar);
314 assert(idx < NumVecElemPerVecReg);
315
316 return vecReg.template as<DataType>()[idx];
317 }
318
319 private:
324 void
326 {
327 scalar = true;
328 scRegData.read();
329 }
330
332 VecRegContainer<sizeof(DataType) * NumVecElemPerVecReg>;
333
337 bool scalar;
344 bool absMod;
345 bool negMod;
361 std::array<VecRegContainerU32*, NumDwords> vrfData;
362 };
363
364 template<typename DataType, bool Const,
365 size_t NumDwords = sizeof(DataType) / sizeof(ScalarRegU32)>
366 class ScalarOperand final : public Operand
367 {
368 static_assert(NumDwords >= 1 && NumDwords <= MaxOperandDwords,
369 "Incorrect number of DWORDS for VEGA operand.");
370 public:
371 ScalarOperand() = delete;
372
373 ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
374 : Operand(gpuDynInst, opIdx)
375 {
376 std::memset(srfData.data(), 0, NumDwords * sizeof(ScalarRegU32));
377 }
378
380 {
381 }
382
390 template<bool Condition = NumDwords == 1 || NumDwords == 2>
391 typename std::enable_if<Condition, DataType>::type
392 rawData() const
393 {
394 assert(sizeof(DataType) <= sizeof(srfData));
395 DataType raw_data((DataType)0);
396 std::memcpy((void*)&raw_data, (void*)srfData.data(),
397 sizeof(DataType));
398
399 return raw_data;
400 }
401
402 void*
404 {
405 return (void*)srfData.data();
406 }
407
408 void
409 read() override
410 {
411 Wavefront *wf = _gpuDynInst->wavefront();
412 ComputeUnit *cu = _gpuDynInst->computeUnit();
413
414 if (!isScalarReg(_opIdx)) {
415 readSpecialVal();
416 } else {
417 for (auto i = 0; i < NumDwords; ++i) {
418 int sgprIdx = regIdx(i);
419 srfData[i] = cu->srf[wf->simdId]->read(sgprIdx);
420 DPRINTF(GPUSRF, "Read s[%d]\n", sgprIdx);
421 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
422 }
423 }
424 }
425
426 void
427 write() override
428 {
429 Wavefront *wf = _gpuDynInst->wavefront();
430 ComputeUnit *cu = _gpuDynInst->computeUnit();
431
432 if (!isScalarReg(_opIdx)) {
433 if (_opIdx == REG_EXEC_LO) {
434 ScalarRegU64 new_exec_mask_val
435 = wf->execMask().to_ullong();
436 if (NumDwords == 1) {
437 std::memcpy((void*)&new_exec_mask_val,
438 (void*)srfData.data(), sizeof(VecElemU32));
439 } else if (NumDwords == 2) {
440 std::memcpy((void*)&new_exec_mask_val,
441 (void*)srfData.data(), sizeof(VecElemU64));
442 } else {
443 panic("Trying to write more than 2 DWORDS to EXEC\n");
444 }
445 VectorMask new_exec_mask(new_exec_mask_val);
446 wf->execMask() = new_exec_mask;
447 DPRINTF(GPUSRF, "Write EXEC\n");
448 DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
449 } else if (_opIdx == REG_EXEC_HI) {
454 assert(NumDwords == 1);
455 ScalarRegU32 new_exec_mask_hi_val(0);
456 ScalarRegU64 new_exec_mask_val
457 = wf->execMask().to_ullong();
458 std::memcpy((void*)&new_exec_mask_hi_val,
459 (void*)srfData.data(), sizeof(new_exec_mask_hi_val));
460 replaceBits(new_exec_mask_val, 63, 32,
461 new_exec_mask_hi_val);
462 VectorMask new_exec_mask(new_exec_mask_val);
463 wf->execMask() = new_exec_mask;
464 DPRINTF(GPUSRF, "Write EXEC\n");
465 DPRINTF(GPUSRF, "EXEC = %#x\n", new_exec_mask_val);
466 } else {
467 _gpuDynInst->writeMiscReg(_opIdx, srfData[0]);
468 }
469 } else {
470 for (auto i = 0; i < NumDwords; ++i) {
471 int sgprIdx = regIdx(i);
472 auto &sgpr = cu->srf[wf->simdId]->readWriteable(sgprIdx);
473 if (_gpuDynInst->isLoad()) {
474 assert(sizeof(DataType) <= sizeof(ScalarRegU64));
475 sgpr = reinterpret_cast<ScalarRegU32*>(
476 _gpuDynInst->scalar_data)[i];
477 } else {
478 sgpr = srfData[i];
479 }
480 DPRINTF(GPUSRF, "Write s[%d]\n", sgprIdx);
481 cu->srf[wf->simdId]->printReg(wf, sgprIdx);
482 }
483 }
484 }
485
489 template<bool Condition = NumDwords == 1 || NumDwords == 2>
490 typename std::enable_if<Condition, void>::type
491 setBit(int bit, int bit_val)
492 {
493 GEM5_ALIGNED(8) DataType &sgpr = *((DataType*)srfData.data());
494 replaceBits(sgpr, bit, bit_val);
495 }
496
497 template<bool Condition = (NumDwords == 1 || NumDwords == 2) && !Const>
498 typename std::enable_if<Condition, ScalarOperand&>::type
499 operator=(DataType rhs)
500 {
501 std::memcpy((void*)srfData.data(), (void*)&rhs, sizeof(DataType));
502 return *this;
503 }
504
505 private:
512 void
514 {
515 assert(NumDwords == 1 || NumDwords == 2);
516
517 if (_opIdx >= REG_INT_CONST_POS_MIN &&
518 _opIdx <= REG_INT_CONST_NEG_MAX) {
519 assert(sizeof(DataType) <= sizeof(srfData));
520 DataType misc_val(0);
521 assert(isConstVal(_opIdx));
522 misc_val = (DataType)_gpuDynInst
523 ->readConstVal<DataType>(_opIdx);
524 std::memcpy((void*)srfData.data(), (void*)&misc_val,
525 sizeof(DataType));
526
527 return;
528 }
529
530 if (_opIdx == REG_M0 || _opIdx == REG_ZERO || _opIdx == REG_SCC) {
531 assert(sizeof(DataType) <= sizeof(srfData));
532 DataType misc_val(0);
533 misc_val = (DataType)_gpuDynInst->readMiscReg(_opIdx);
534 std::memcpy((void*)srfData.data(), (void*)&misc_val,
535 sizeof(DataType));
536
537 return;
538 }
539
540 switch(_opIdx) {
541 case REG_EXEC_LO:
542 {
543 if constexpr (NumDwords == 2) {
544 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
545 execMask().to_ullong();
546 std::memcpy((void*)srfData.data(), (void*)&exec_mask,
547 sizeof(exec_mask));
548 DPRINTF(GPUSRF, "Read EXEC\n");
549 DPRINTF(GPUSRF, "EXEC = %#x\n", exec_mask);
550 } else {
551 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()->
552 execMask().to_ullong();
553
554 ScalarRegU32 exec_mask_lo = bits(exec_mask, 31, 0);
555 std::memcpy((void*)srfData.data(),
556 (void*)&exec_mask_lo, sizeof(exec_mask_lo));
557 DPRINTF(GPUSRF, "Read EXEC_LO\n");
558 DPRINTF(GPUSRF, "EXEC_LO = %#x\n", exec_mask_lo);
559 }
560 }
561 break;
562 case REG_EXEC_HI:
563 {
568 assert(NumDwords == 1);
569 ScalarRegU64 exec_mask = _gpuDynInst->wavefront()
570 ->execMask().to_ullong();
571
572 ScalarRegU32 exec_mask_hi = bits(exec_mask, 63, 32);
573 std::memcpy((void*)srfData.data(), (void*)&exec_mask_hi,
574 sizeof(exec_mask_hi));
575 DPRINTF(GPUSRF, "Read EXEC_HI\n");
576 DPRINTF(GPUSRF, "EXEC_HI = %#x\n", exec_mask_hi);
577 }
578 break;
579 case REG_SRC_SWDA:
580 case REG_SRC_DPP:
581 case REG_SRC_LITERAL:
589 srfData[0] = _gpuDynInst->srcLiteral();
590 if constexpr (NumDwords == 2) {
591 if constexpr (std::is_integral_v<DataType>) {
592 if constexpr (std::is_signed_v<DataType>) {
593 if (bits(srfData[0], 31, 31) == 1) {
594 srfData[1] = 0xffffffff;
595 } else {
596 srfData[1] = 0;
597 }
598 } else {
599 srfData[1] = 0;
600 }
601 } else {
602 srfData[1] = _gpuDynInst->srcLiteral();
603 srfData[0] = 0;
604 }
605 }
606 break;
607 case REG_SHARED_BASE:
608 {
609 assert(NumDwords == 2);
610 if constexpr (NumDwords == 2) {
611 ComputeUnit *cu = _gpuDynInst->computeUnit();
612 ScalarRegU64 shared_base = cu->shader->ldsApe().base;
613 std::memcpy((void*)srfData.data(), (void*)&shared_base,
614 sizeof(srfData));
615 DPRINTF(GPUSRF, "Read SHARED_BASE = %#x\n",
616 shared_base);
617 }
618 }
619 break;
620 case REG_SHARED_LIMIT:
621 {
622 assert(NumDwords == 2);
623 if constexpr (NumDwords == 2) {
624 ComputeUnit *cu = _gpuDynInst->computeUnit();
625 ScalarRegU64 shared_limit = cu->shader->ldsApe().limit;
626 std::memcpy((void*)srfData.data(),
627 (void*)&shared_limit, sizeof(srfData));
628 DPRINTF(GPUSRF, "Read SHARED_LIMIT = %#x\n",
629 shared_limit);
630 }
631 }
632 break;
633 case REG_PRIVATE_BASE:
634 {
635 assert(NumDwords == 2);
636 if constexpr (NumDwords == 2) {
637 ComputeUnit *cu = _gpuDynInst->computeUnit();
638 ScalarRegU64 priv_base = cu->shader->scratchApe().base;
639 std::memcpy((void*)srfData.data(), (void*)&priv_base,
640 sizeof(srfData));
641 DPRINTF(GPUSRF, "Read PRIVATE_BASE = %#x\n",
642 priv_base);
643 }
644 }
645 break;
647 {
648 assert(NumDwords == 2);
649 if constexpr (NumDwords == 2) {
650 ComputeUnit *cu = _gpuDynInst->computeUnit();
651 ScalarRegU64 priv_limit =
652 cu->shader->scratchApe().limit;
653 std::memcpy((void*)srfData.data(), (void*)&priv_limit,
654 sizeof(srfData));
655 DPRINTF(GPUSRF, "Read PRIVATE_LIMIT = %#x\n",
656 priv_limit);
657 }
658 }
659 break;
660 case REG_POS_HALF:
661 {
662 typename OpTraits<DataType>::FloatT pos_half = 0.5;
663 std::memcpy((void*)srfData.data(), (void*)&pos_half,
664 sizeof(pos_half));
665
666 }
667 break;
668 case REG_NEG_HALF:
669 {
670 typename OpTraits<DataType>::FloatT neg_half = -0.5;
671 std::memcpy((void*)srfData.data(), (void*)&neg_half,
672 sizeof(neg_half));
673 }
674 break;
675 case REG_POS_ONE:
676 {
677 typename OpTraits<DataType>::FloatT pos_one = 1.0;
678 std::memcpy(srfData.data(), &pos_one, sizeof(pos_one));
679 }
680 break;
681 case REG_NEG_ONE:
682 {
683 typename OpTraits<DataType>::FloatT neg_one = -1.0;
684 std::memcpy(srfData.data(), &neg_one, sizeof(neg_one));
685 }
686 break;
687 case REG_POS_TWO:
688 {
689 typename OpTraits<DataType>::FloatT pos_two = 2.0;
690 std::memcpy(srfData.data(), &pos_two, sizeof(pos_two));
691 }
692 break;
693 case REG_NEG_TWO:
694 {
695 typename OpTraits<DataType>::FloatT neg_two = -2.0;
696 std::memcpy(srfData.data(), &neg_two, sizeof(neg_two));
697 }
698 break;
699 case REG_POS_FOUR:
700 {
701 typename OpTraits<DataType>::FloatT pos_four = 4.0;
702 std::memcpy(srfData.data(), &pos_four, sizeof(pos_four));
703 }
704 break;
705 case REG_NEG_FOUR:
706 {
707 typename OpTraits<DataType>::FloatT neg_four = -4.0;
708 std::memcpy((void*)srfData.data(), (void*)&neg_four ,
709 sizeof(neg_four));
710 }
711 break;
712 case REG_PI:
713 {
714 assert(sizeof(DataType) == sizeof(ScalarRegF64)
715 || sizeof(DataType) == sizeof(ScalarRegF32));
716
717 const ScalarRegU32 pi_u32(0x3e22f983UL);
718 const ScalarRegU64 pi_u64(0x3fc45f306dc9c882ULL);
719
720 if (sizeof(DataType) == sizeof(ScalarRegF64)) {
721 std::memcpy((void*)srfData.data(),
722 (void*)&pi_u64, sizeof(pi_u64));
723 } else {
724 std::memcpy((void*)srfData.data(),
725 (void*)&pi_u32, sizeof(pi_u32));
726 }
727 }
728 break;
729 default:
730 panic("Invalid special register index: %d\n", _opIdx);
731 break;
732 }
733 }
734
740 int
741 regIdx(int dword) const
742 {
743 Wavefront *wf = _gpuDynInst->wavefront();
744 ComputeUnit *cu = _gpuDynInst->computeUnit();
745 int sgprIdx(-1);
746
747 if (_opIdx == REG_VCC_HI) {
748 sgprIdx = cu->registerManager
749 ->mapSgpr(wf, wf->reservedScalarRegs - 1 + dword);
750 } else if (_opIdx == REG_VCC_LO) {
751 sgprIdx = cu->registerManager
752 ->mapSgpr(wf, wf->reservedScalarRegs - 2 + dword);
753 } else if (_opIdx == REG_FLAT_SCRATCH_HI) {
754 sgprIdx = cu->registerManager
755 ->mapSgpr(wf, wf->reservedScalarRegs - 3 + dword);
756 } else if (_opIdx == REG_FLAT_SCRATCH_LO) {
757 assert(NumDwords == 1);
758 sgprIdx = cu->registerManager
759 ->mapSgpr(wf, wf->reservedScalarRegs - 4 + dword);
760 } else {
761 sgprIdx = cu->registerManager->mapSgpr(wf, _opIdx + dword);
762 }
763
764 assert(sgprIdx > -1);
765
766 return sgprIdx;
767 }
768
777 GEM5_ALIGNED(8) std::array<ScalarRegU32, NumDwords> srfData;
778 };
779
780 // typedefs for the various sizes/types of scalar operands
794 // non-writeable versions of scalar operands
808 // typedefs for the various sizes/types of vector operands
823 // non-writeable versions of vector operands
838
839
840// Helper class for using multiple VecElemU32 to represent data types which
841// do not divide a dword evenly.
842template<int BITS, int ELEM_SIZE>
844{
845 // Logical view is:
846 // dword N, dword N - 1, ..., dword 1, dword 0.
847 // Within each dword, the element starts at [ELEM_SIZE:0]. For example,
848 // for ELEM_SIZE = 6 for fp6 types, [5:0] is the first value, [11:6] is
849 // the second, and so forth. For 6 bits specifically, the 6th element
850 // spans dword 0 and dword 1.
851 static_assert(BITS % 32 == 0);
852 static_assert(BITS % ELEM_SIZE == 0);
853 static_assert(ELEM_SIZE <= 32);
854
855 static constexpr int NumDwords = BITS / 32;
856 uint32_t dwords[NumDwords] = {};
857
858 public:
859 PackedReg() = default;
860
861 void
862 setDword(int dw, uint32_t value)
863 {
864 assert(dw < NumDwords);
865 dwords[dw] = value;
866 }
867
868 uint32_t
869 getDword(int dw)
870 {
871 assert(dw < NumDwords);
872 return dwords[dw];
873 }
874
875 uint32_t
876 getElem(int elem)
877 {
878 assert(elem < (BITS / ELEM_SIZE));
879
880 // Get the upper/lower *bit* location of the element.
881 int ubit, lbit;
882 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
883 lbit = elem * ELEM_SIZE;
884
885 // Convert the bit locations to upper/lower dwords. It is possible
886 // to span two dwords but this does not have to support spanning
887 // more than two dwords.
888 int udw, ldw;
889 udw = ubit / 32;
890 ldw = lbit / 32;
891 assert(udw == ldw || udw == ldw + 1);
892
893 if (udw == ldw) {
894 // Easy case, just shift the dword value and mask to get value.
895 int dw_lbit = lbit % 32;
896
897 uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;
898 uint32_t rv = (dwords[ldw] >> dw_lbit) & elem_mask;
899
900 return rv;
901 }
902
903 // Harder case. To make it easier put into a quad word and shift
904 // that variable instead of trying to work with two.
905 uint64_t qword =
906 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
907
908 int qw_lbit = lbit % 32;
909
910 uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
911 uint32_t rv = uint32_t((qword >> qw_lbit) & elem_mask);
912
913 return rv;
914 }
915
916 void
917 setElem(int elem, uint32_t value)
918 {
919 assert(elem < (BITS / ELEM_SIZE));
920
921 // Get the upper/lower *bit* location of the element.
922 int ubit, lbit;
923 ubit = elem * ELEM_SIZE + (ELEM_SIZE - 1);
924 lbit = elem * ELEM_SIZE;
925
926 // Convert the bit locations to upper/lower dwords. It is possible
927 // to span two dwords but this does not have to support spanning
928 // more than two dwords.
929 int udw, ldw;
930 udw = ubit / 32;
931 ldw = lbit / 32;
932 assert(udw == ldw || udw == ldw + 1);
933
934 if (udw == ldw) {
935 // Easy case, just shift the dword value and mask to get value.
936 int dw_lbit = lbit % 32;
937
938 // Make sure the value is not going to clobber another element.
939 uint32_t elem_mask = (1UL << ELEM_SIZE) - 1;
940 value &= elem_mask;
941
942 // Clear the bits we are setting.
943 elem_mask <<= dw_lbit;
944 dwords[ldw] &= ~elem_mask;
945
946 value <<= dw_lbit;
947 dwords[ldw] |= value;
948
949 return;
950 }
951
952 // Harder case. Put the two dwords in a quad word and manipulate that.
953 // Then place the two new dwords back into the storage.
954 uint64_t qword =
955 uint64_t(dwords[udw]) << 32 | uint64_t(dwords[ldw]);
956
957 int qw_lbit = lbit % 32;
958
959 // Make sure the value is not going to clobber another element.
960 uint64_t elem_mask = (1ULL << ELEM_SIZE) - 1;
961 value &= elem_mask;
962
963 elem_mask <<= qw_lbit;
964 qword &= elem_mask;
965
966 value <<= qw_lbit;
967 qword |= value;
968
969 dwords[udw] = uint32_t(qword >> 32);
970 dwords[ldw] = uint32_t(qword & mask(32));
971 }
972};
973
974}
975
976} // namespace gem5
977
978#endif // __ARCH_VEGA_OPERAND_HH__
#define DPRINTF(x,...)
Definition trace.hh:210
std::vector< ScalarRegisterFile * > srf
RegisterManager * registerManager
std::vector< VectorRegisterFile * > vrf
int mapVgpr(Wavefront *w, int vgprIndex)
int mapSgpr(Wavefront *w, int sgprIndex)
const ApertureRegister & scratchApe() const
Definition shader.hh:159
const ApertureRegister & ldsApe() const
Definition shader.hh:146
virtual void write()=0
Operand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:69
GPUDynInstPtr _gpuDynInst
instruction object that owns this operand
Definition operand.hh:87
int _opIdx
op selector value for this operand.
Definition operand.hh:94
virtual void read()=0
read from and write to the underlying register(s) that this operand is referring to.
uint32_t getElem(int elem)
Definition operand.hh:876
void setDword(int dw, uint32_t value)
Definition operand.hh:862
void setElem(int elem, uint32_t value)
Definition operand.hh:917
uint32_t getDword(int dw)
Definition operand.hh:869
int regIdx(int dword) const
for scalars we need to do some extra work to figure out how to map the op selector to the sgpr idx be...
Definition operand.hh:741
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392
void readSpecialVal()
we have determined that we are not reading our scalar operand data from the register file,...
Definition operand.hh:513
ScalarOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:373
std::enable_if< Condition, ScalarOperand & >::type operator=(DataType rhs)
Definition operand.hh:499
std::enable_if< Condition, void >::type setBit(int bit, int bit_val)
bit access to scalar data.
Definition operand.hh:491
VecRegCont vecReg
this holds all the operand data in a single vector register object (i.e., if an operand is 64b,...
Definition operand.hh:351
bool scalar
whether this operand a scalar or not.
Definition operand.hh:337
void read() override
read from the vrf.
Definition operand.hh:147
void readSrc()
certain vector operands can read from the vrf/srf or constants.
Definition operand.hh:131
std::array< VecRegContainerU32 *, NumDwords > vrfData
pointers to the underlyding registers (i.e., the actual registers in the register file).
Definition operand.hh:361
std::enable_if< Condition, DataType & >::type operator[](size_t idx)
setter [] operator.
Definition operand.hh:311
bool absMod
absolute value and negative modifiers.
Definition operand.hh:344
void write() override
write to the vrf.
Definition operand.hh:199
std::enable_if< Condition, constDataType >::type operator[](size_t idx) const
getter [] operator.
Definition operand.hh:268
ScalarOperand< DataType, Const, NumDwords > scRegData
for src operands that read scalars (i.e., scalar regs or a scalar constant).
Definition operand.hh:356
void readScalar()
if we determine that this operand is a scalar (reg or constant) then we read the scalar data into the...
Definition operand.hh:325
VecOperand(GPUDynInstPtr gpuDynInst, int opIdx)
Definition operand.hh:110
const int simdId
Definition wavefront.hh:101
VectorMask & execMask()
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 7 > i
Definition misc_types.hh:67
constexpr unsigned NumVecElemPerVecReg
Definition vec.hh:61
Bitfield< 36 > as
int elem_mask(const T *vs, const int index)
Definition utility.hh:307
bool isVectorReg(int opIdx)
int opSelectorToRegIdx(int idx, int numScalarRegs)
uint64_t ScalarRegU64
uint16_t VecElemU16
uint16_t ScalarRegU16
bool isConstVal(int opIdx)
uint32_t VecElemU32
bool isScalarReg(int opIdx)
uint64_t VecElemU64
constexpr size_t MaxOperandDwords(16)
uint32_t ScalarRegU32
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
typedef GEM5_ALIGNED(8) uint64_t uint64_ta
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition misc.hh:48
Overload hash function for BasicBlockRange type.
Definition binary32.hh:81
convenience traits so we can automatically infer the correct FP type without looking at the number of...
Definition operand.hh:60
Vector Registers layout specification.

Generated on Tue Jun 18 2024 16:23:55 for gem5 by doxygen 1.11.0