gem5 v24.0.0.0
Loading...
Searching...
No Matches
sopk.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2024 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
34#include "gpu-compute/shader.hh"
35
36namespace gem5
37{
38
39namespace VegaISA
40{
41 // --- Inst_SOPK__S_MOVK_I32 class methods ---
42
44 : Inst_SOPK(iFmt, "s_movk_i32")
45 {
46 setFlag(ALU);
47 } // Inst_SOPK__S_MOVK_I32
48
50 {
51 } // ~Inst_SOPK__S_MOVK_I32
52
53 // --- description from .arch file ---
54 // D.i = signext(SIMM16) (sign extension).
55 void
57 {
59 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
60
61 sdst = simm16;
62
63 sdst.write();
64 } // execute
65 // --- Inst_SOPK__S_CMOVK_I32 class methods ---
66
68 : Inst_SOPK(iFmt, "s_cmovk_i32")
69 {
70 setFlag(ALU);
71 } // Inst_SOPK__S_CMOVK_I32
72
74 {
75 } // ~Inst_SOPK__S_CMOVK_I32
76
77 // --- description from .arch file ---
78 // if (SCC) then D.i = signext(SIMM16);
79 // else NOP.
80 // Conditional move with sign extension.
81 void
83 {
85 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
86 ConstScalarOperandU32 scc(gpuDynInst, REG_SCC);
87
88 scc.read();
89
90 if (scc.rawData()) {
91 sdst = simm16;
92 sdst.write();
93 }
94 } // execute
95 // --- Inst_SOPK__S_CMPK_EQ_I32 class methods ---
96
98 : Inst_SOPK(iFmt, "s_cmpk_eq_i32")
99 {
100 setFlag(ALU);
101 } // Inst_SOPK__S_CMPK_EQ_I32
102
104 {
105 } // ~Inst_SOPK__S_CMPK_EQ_I32
106
107 // --- description from .arch file ---
108 // SCC = (S0.i == signext(SIMM16)).
109 void
111 {
113 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
114 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
115
116 src.read();
117
118 scc = (src.rawData() == simm16) ? 1 : 0;
119
120 scc.write();
121 } // execute
122 // --- Inst_SOPK__S_CMPK_LG_I32 class methods ---
123
125 : Inst_SOPK(iFmt, "s_cmpk_lg_i32")
126 {
127 setFlag(ALU);
128 } // Inst_SOPK__S_CMPK_LG_I32
129
131 {
132 } // ~Inst_SOPK__S_CMPK_LG_I32
133
134 // --- description from .arch file ---
135 // SCC = (S0.i != signext(SIMM16)).
136 void
138 {
140 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
141 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
142
143 src.read();
144
145 scc = (src.rawData() != simm16) ? 1 : 0;
146
147 scc.write();
148 } // execute
149 // --- Inst_SOPK__S_CMPK_GT_I32 class methods ---
150
152 : Inst_SOPK(iFmt, "s_cmpk_gt_i32")
153 {
154 setFlag(ALU);
155 } // Inst_SOPK__S_CMPK_GT_I32
156
158 {
159 } // ~Inst_SOPK__S_CMPK_GT_I32
160
161 // --- description from .arch file ---
162 // SCC = (S0.i > signext(SIMM16)).
163 void
165 {
167 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
168 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
169
170 src.read();
171
172 scc = (src.rawData() > simm16) ? 1 : 0;
173
174 scc.write();
175 } // execute
176 // --- Inst_SOPK__S_CMPK_GE_I32 class methods ---
177
179 : Inst_SOPK(iFmt, "s_cmpk_ge_i32")
180 {
181 setFlag(ALU);
182 } // Inst_SOPK__S_CMPK_GE_I32
183
185 {
186 } // ~Inst_SOPK__S_CMPK_GE_I32
187
188 // --- description from .arch file ---
189 // SCC = (S0.i >= signext(SIMM16)).
190 void
192 {
194 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
195 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
196
197 src.read();
198
199 scc = (src.rawData() >= simm16) ? 1 : 0;
200
201 scc.write();
202 } // execute
203 // --- Inst_SOPK__S_CMPK_LT_I32 class methods ---
204
206 : Inst_SOPK(iFmt, "s_cmpk_lt_i32")
207 {
208 setFlag(ALU);
209 } // Inst_SOPK__S_CMPK_LT_I32
210
212 {
213 } // ~Inst_SOPK__S_CMPK_LT_I32
214
215 // --- description from .arch file ---
216 // SCC = (S0.i < signext(SIMM16)).
217 void
219 {
221 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
222 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
223
224 src.read();
225
226 scc = (src.rawData() < simm16) ? 1 : 0;
227
228 scc.write();
229 } // execute
230 // --- Inst_SOPK__S_CMPK_LE_I32 class methods ---
231
233 : Inst_SOPK(iFmt, "s_cmpk_le_i32")
234 {
235 setFlag(ALU);
236 } // Inst_SOPK__S_CMPK_LE_I32
237
239 {
240 } // ~Inst_SOPK__S_CMPK_LE_I32
241
242 // --- description from .arch file ---
243 // SCC = (S0.i <= signext(SIMM16)).
244 void
246 {
248 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
249 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
250
251 src.read();
252
253 scc = (src.rawData() <= simm16) ? 1 : 0;
254
255 scc.write();
256 } // execute
257 // --- Inst_SOPK__S_CMPK_EQ_U32 class methods ---
258
260 : Inst_SOPK(iFmt, "s_cmpk_eq_u32")
261 {
262 setFlag(ALU);
263 } // Inst_SOPK__S_CMPK_EQ_U32
264
266 {
267 } // ~Inst_SOPK__S_CMPK_EQ_U32
268
269 // --- description from .arch file ---
270 // SCC = (S0.u == SIMM16).
271 void
273 {
275 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
276 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
277
278 src.read();
279
280 scc = (src.rawData() == simm16) ? 1 : 0;
281
282 scc.write();
283 } // execute
284 // --- Inst_SOPK__S_CMPK_LG_U32 class methods ---
285
287 : Inst_SOPK(iFmt, "s_cmpk_lg_u32")
288 {
289 setFlag(ALU);
290 } // Inst_SOPK__S_CMPK_LG_U32
291
293 {
294 } // ~Inst_SOPK__S_CMPK_LG_U32
295
296 // --- description from .arch file ---
297 // SCC = (S0.u != SIMM16).
298 void
300 {
302 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
303 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
304
305 src.read();
306
307 scc = (src.rawData() != simm16) ? 1 : 0;
308
309 scc.write();
310 } // execute
311 // --- Inst_SOPK__S_CMPK_GT_U32 class methods ---
312
314 : Inst_SOPK(iFmt, "s_cmpk_gt_u32")
315 {
316 setFlag(ALU);
317 } // Inst_SOPK__S_CMPK_GT_U32
318
320 {
321 } // ~Inst_SOPK__S_CMPK_GT_U32
322
323 // --- description from .arch file ---
324 // SCC = (S0.u > SIMM16).
325 void
327 {
329 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
330 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
331
332 src.read();
333
334 scc = (src.rawData() > simm16) ? 1 : 0;
335
336 scc.write();
337 } // execute
338 // --- Inst_SOPK__S_CMPK_GE_U32 class methods ---
339
341 : Inst_SOPK(iFmt, "s_cmpk_ge_u32")
342 {
343 setFlag(ALU);
344 } // Inst_SOPK__S_CMPK_GE_U32
345
347 {
348 } // ~Inst_SOPK__S_CMPK_GE_U32
349
350 // --- description from .arch file ---
351 // SCC = (S0.u >= SIMM16).
352 void
354 {
356 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
357 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
358
359 src.read();
360
361 scc = (src.rawData() >= simm16) ? 1 : 0;
362
363 scc.write();
364 } // execute
365 // --- Inst_SOPK__S_CMPK_LT_U32 class methods ---
366
368 : Inst_SOPK(iFmt, "s_cmpk_lt_u32")
369 {
370 setFlag(ALU);
371 } // Inst_SOPK__S_CMPK_LT_U32
372
374 {
375 } // ~Inst_SOPK__S_CMPK_LT_U32
376
377 // --- description from .arch file ---
378 // SCC = (S0.u < SIMM16).
379 void
381 {
383 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
384 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
385
386 src.read();
387
388 scc = (src.rawData() < simm16) ? 1 : 0;
389
390 scc.write();
391 } // execute
392 // --- Inst_SOPK__S_CMPK_LE_U32 class methods ---
393
395 : Inst_SOPK(iFmt, "s_cmpk_le_u32")
396 {
397 setFlag(ALU);
398 } // Inst_SOPK__S_CMPK_LE_U32
399
401 {
402 } // ~Inst_SOPK__S_CMPK_LE_U32
403
404 // --- description from .arch file ---
405 // SCC = (S0.u <= SIMM16).
406 void
408 {
410 ConstScalarOperandU32 src(gpuDynInst, instData.SDST);
411 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
412
413 src.read();
414
415 scc = (src.rawData() <= simm16) ? 1 : 0;
416
417 scc.write();
418 } // execute
419 // --- Inst_SOPK__S_ADDK_I32 class methods ---
420
422 : Inst_SOPK(iFmt, "s_addk_i32")
423 {
424 setFlag(ALU);
425 } // Inst_SOPK__S_ADDK_I32
426
428 {
429 } // ~Inst_SOPK__S_ADDK_I32
430
431 // --- description from .arch file ---
432 // D.i = D.i + signext(SIMM16);
433 // SCC = overflow.
434 void
436 {
438 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
439 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
440 ScalarOperandU32 scc(gpuDynInst, REG_SCC);
441
442 src.read();
443
444 sdst = src.rawData() + (ScalarRegI32)sext<16>(simm16);
445 scc = (bits(src.rawData(), 31) == bits(simm16, 15)
446 && bits(src.rawData(), 31) != bits(sdst.rawData(), 31)) ? 1 : 0;
447
448 sdst.write();
449 scc.write();
450 } // execute
451 // --- Inst_SOPK__S_MULK_I32 class methods ---
452
454 : Inst_SOPK(iFmt, "s_mulk_i32")
455 {
456 setFlag(ALU);
457 } // Inst_SOPK__S_MULK_I32
458
460 {
461 } // ~Inst_SOPK__S_MULK_I32
462
463 // --- description from .arch file ---
464 // D.i = D.i * signext(SIMM16).
465 void
467 {
469 ConstScalarOperandI32 src(gpuDynInst, instData.SDST);
470 ScalarOperandI32 sdst(gpuDynInst, instData.SDST);
471
472 src.read();
473
474 sdst = src.rawData() * (ScalarRegI32)sext<16>(simm16);
475
476 sdst.write();
477 } // execute
478 // --- Inst_SOPK__S_CBRANCH_I_FORK class methods ---
479
481 : Inst_SOPK(iFmt, "s_cbranch_i_fork")
482 {
484 } // Inst_SOPK__S_CBRANCH_I_FORK
485
487 {
488 } // ~Inst_SOPK__S_CBRANCH_I_FORK
489
490 // --- description from .arch file ---
491 // mask_pass = S0.u64 & EXEC;
492 // mask_fail = ~S0.u64 & EXEC;
493 // target_addr = PC + signext(SIMM16 * 4) + 4;
494 // if (mask_pass == EXEC)
495 // PC = target_addr;
496 // elsif (mask_fail == EXEC)
497 // PC += 4;
498 // elsif (bitcount(mask_fail) < bitcount(mask_pass))
499 // EXEC = mask_fail;
500 // SGPR[CSP*4] = { target_addr, mask_pass };
501 // CSP++;
502 // PC += 4;
503 // else
504 // EXEC = mask_pass;
505 // SGPR[CSP*4] = { PC + 4, mask_fail };
506 // CSP++;
507 // PC = target_addr;
508 // end.
509 // Conditional branch using branch-stack.
510 // S0 = compare mask(vcc or any sgpr), and
511 // SIMM16 = signed DWORD branch offset relative to next instruction.
512 // See also S_CBRANCH_JOIN.
513 void
518 // --- Inst_SOPK__S_GETREG_B32 class methods ---
519
521 : Inst_SOPK(iFmt, "s_getreg_b32")
522 {
523 setFlag(ALU);
524 } // Inst_SOPK__S_GETREG_B32
525
527 {
528 } // ~Inst_SOPK__S_GETREG_B32
529
530 // --- description from .arch file ---
531 // D.u = hardware-reg. Read some or all of a hardware register into the
532 // LSBs of D.
533 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
534 // is 1..32.
535 void
537 {
539 ScalarRegU32 hwregId = simm16 & 0x3f;
540 ScalarRegU32 offset = (simm16 >> 6) & 31;
541 ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
542
543 ScalarRegU32 hwreg =
544 gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
545 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
546 sdst.read();
547
548 // Store value from hardware to part of the SDST.
549 ScalarRegU32 mask = (((1U << size) - 1U) << offset);
550 sdst = (hwreg & mask) >> offset;
551 sdst.write();
552 } // execute
553 // --- Inst_SOPK__S_SETREG_B32 class methods ---
554
556 : Inst_SOPK(iFmt, "s_setreg_b32")
557 {
558 setFlag(ALU);
559 } // Inst_SOPK__S_SETREG_B32
560
562 {
563 } // ~Inst_SOPK__S_SETREG_B32
564
565 // --- description from .arch file ---
566 // hardware-reg = S0.u. Write some or all of the LSBs of D into a hardware
567 // register.
568 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
569 // is 1..32.
570 void
572 {
574 ScalarRegU32 hwregId = simm16 & 0x3f;
575 ScalarRegU32 offset = (simm16 >> 6) & 31;
576 ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
577
578 ScalarRegU32 hwreg =
579 gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
580 ScalarOperandU32 sdst(gpuDynInst, instData.SDST);
581 sdst.read();
582
583 // Store value from SDST to part of the hardware register.
584 ScalarRegU32 mask = (((1U << size) - 1U) << offset);
585 hwreg = ((hwreg & ~mask) | ((sdst.rawData() << offset) & mask));
586 gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg);
587
588 // set MODE register to control the behavior of single precision
589 // floating-point numbers: denormal mode or round mode
590 if (hwregId==1 && size==2
591 && (offset==4 || offset==0)) {
592 warn_once("Be cautious that s_setreg_b32 has no real effect "
593 "on FP modes: %s\n", gpuDynInst->disassemble());
594 return;
595 }
596
597 // panic if not changing MODE of floating-point numbers
599 } // execute
600 // --- Inst_SOPK__S_SETREG_IMM32_B32 class methods ---
601
603 InFmt_SOPK *iFmt)
604 : Inst_SOPK(iFmt, "s_setreg_imm32_b32")
605 {
606 setFlag(ALU);
607 } // Inst_SOPK__S_SETREG_IMM32_B32
608
610 {
611 } // ~Inst_SOPK__S_SETREG_IMM32_B32
612
613 // --- description from .arch file ---
614 // Write some or all of the LSBs of IMM32 into a hardware register; this
615 // --- instruction requires a 32-bit literal constant.
616 // SIMM16 = {size[4:0], offset[4:0], hwRegId[5:0]}; offset is 0..31, size
617 // is 1..32.
618 void
620 {
622 ScalarRegU32 hwregId = simm16 & 0x3f;
623 ScalarRegU32 offset = (simm16 >> 6) & 31;
624 ScalarRegU32 size = ((simm16 >> 11) & 31) + 1;
625
626 ScalarRegU32 hwreg =
627 gpuDynInst->computeUnit()->shader->getHwReg(hwregId);
629
630 // Store value from SIMM32 to part of the hardware register.
631 ScalarRegU32 mask = (((1U << size) - 1U) << offset);
632 hwreg = ((hwreg & ~mask) | ((simm32 << offset) & mask));
633 gpuDynInst->computeUnit()->shader->setHwReg(hwregId, hwreg);
634
635 // set MODE register to control the behavior of single precision
636 // floating-point numbers: denormal mode or round mode
637 if (hwregId==HW_REG_MODE && size==2
638 && (offset==4 || offset==0)) {
639 warn_once("Be cautious that s_setreg_imm32_b32 has no real effect "
640 "on FP modes: %s\n", gpuDynInst->disassemble());
641 return;
642 }
643
644 // panic if not changing modes of single-precision FPs
646 } // execute
647} // namespace VegaISA
648} // namespace gem5
void setFlag(Flags flag)
Base class for branch operations.
Definition branch.hh:49
void execute(GPUDynInstPtr) override
Definition sopk.cc:435
void execute(GPUDynInstPtr) override
Definition sopk.cc:514
void execute(GPUDynInstPtr) override
Definition sopk.cc:82
void execute(GPUDynInstPtr) override
Definition sopk.cc:110
void execute(GPUDynInstPtr) override
Definition sopk.cc:272
void execute(GPUDynInstPtr) override
Definition sopk.cc:191
void execute(GPUDynInstPtr) override
Definition sopk.cc:353
void execute(GPUDynInstPtr) override
Definition sopk.cc:164
void execute(GPUDynInstPtr) override
Definition sopk.cc:326
void execute(GPUDynInstPtr) override
Definition sopk.cc:245
void execute(GPUDynInstPtr) override
Definition sopk.cc:407
void execute(GPUDynInstPtr) override
Definition sopk.cc:137
void execute(GPUDynInstPtr) override
Definition sopk.cc:299
void execute(GPUDynInstPtr) override
Definition sopk.cc:218
void execute(GPUDynInstPtr) override
Definition sopk.cc:380
void execute(GPUDynInstPtr) override
Definition sopk.cc:536
Inst_SOPK__S_MOVK_I32(InFmt_SOPK *)
Definition sopk.cc:43
void execute(GPUDynInstPtr) override
Definition sopk.cc:56
void execute(GPUDynInstPtr) override
Definition sopk.cc:466
void execute(GPUDynInstPtr) override
Definition sopk.cc:571
void execute(GPUDynInstPtr) override
Definition sopk.cc:619
void read() override
read from and write to the underlying register(s) that this operand is referring to.
Definition operand.hh:409
std::enable_if< Condition, DataType >::type rawData() const
we store scalar data in a std::array, however if we need the full operand data we use this method to ...
Definition operand.hh:392
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr uint64_t sext(uint64_t val)
Sign-extend an N-bit value to 64 bits.
Definition bitfield.hh:129
#define warn_once(...)
Definition logging.hh:260
Bitfield< 3, 0 > mask
Definition pcstate.hh:63
Bitfield< 23, 0 > offset
Definition types.hh:144
uint32_t ScalarRegU32
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition misc.hh:49
@ HW_REG_MODE

Generated on Tue Jun 18 2024 16:23:48 for gem5 by doxygen 1.11.0