gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
macromem.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014, 2020 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
42
43#include <sstream>
44
45#include "arch/arm/generated/decoder.hh"
47#include "base/compiler.hh"
48
49namespace gem5
50{
51
52using namespace ArmISAInst;
53
54namespace ArmISA
55{
56
58 OpClass __opClass, RegIndex rn,
59 bool index, bool up, bool user, bool writeback,
60 bool load, uint32_t reglist) :
61 PredMacroOp(mnem, machInst, __opClass)
62{
63 uint32_t regs = reglist;
64 uint32_t ones = number_of_ones(reglist);
65 uint32_t mem_ops = ones;
66
67 // Copy the base address register if we overwrite it, or if this instruction
68 // is basically a no-op (we have to do something)
69 bool copy_base = (bits(reglist, rn) && load) || !ones;
70 bool force_user = user & !bits(reglist, 15);
71 bool exception_ret = user & bits(reglist, 15);
72 bool pc_temp = load && writeback && bits(reglist, 15);
73
74 if (!ones) {
75 numMicroops = 1;
76 } else if (load) {
77 numMicroops = ((ones + 1) / 2)
78 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
79 + (copy_base ? 1 : 0)
80 + (writeback? 1 : 0)
81 + (pc_temp ? 1 : 0);
82 } else {
83 numMicroops = ones + (writeback ? 1 : 0);
84 }
85
87
88 uint32_t addr = 0;
89
90 if (!up)
91 addr = (ones << 2) - 4;
92
93 if (!index)
94 addr += 4;
95
97
98 // Add 0 to Rn and stick it in ureg0.
99 // This is equivalent to a move.
100 if (copy_base)
101 *uop++ = new MicroAddiUop(machInst, int_reg::Ureg0, rn, 0);
102
103 unsigned reg = 0;
104 while (mem_ops != 0) {
105 // Do load operations in pairs if possible
106 if (load && mem_ops >= 2 &&
107 !(mem_ops == 2 && bits(regs, int_reg::Pc) && exception_ret)) {
108 // 64-bit memory operation
109 // Find 2 set register bits (clear them after finding)
110 unsigned reg_idx1;
111 unsigned reg_idx2;
112
113 // Find the first register
114 while (!bits(regs, reg)) reg++;
115 replaceBits(regs, reg, 0);
116 reg_idx1 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
117
118 // Find the second register
119 while (!bits(regs, reg)) reg++;
120 replaceBits(regs, reg, 0);
121 reg_idx2 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
122
123 // Load into temp reg if necessary
124 if (reg_idx2 == int_reg::Pc && pc_temp)
125 reg_idx2 = int_reg::Ureg1;
126
127 // Actually load both registers from memory
128 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
129 copy_base ? int_reg::Ureg0 : rn, up, addr);
130
131 if (!writeback && reg_idx2 == int_reg::Pc) {
132 // No writeback if idx==pc, set appropriate flags
133 (*uop)->setFlag(StaticInst::IsControl);
134 (*uop)->setFlag(StaticInst::IsIndirectControl);
135
136 if (!(condCode == COND_AL || condCode == COND_UC))
137 (*uop)->setFlag(StaticInst::IsCondControl);
138 else
139 (*uop)->setFlag(StaticInst::IsUncondControl);
140 }
141
142 if (up) addr += 8;
143 else addr -= 8;
144 mem_ops -= 2;
145 } else {
146 // 32-bit memory operation
147 // Find register for operation
148 unsigned reg_idx;
149 while (!bits(regs, reg)) reg++;
150 replaceBits(regs, reg, 0);
151 reg_idx = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
152
153 if (load) {
154 if (writeback && reg_idx == int_reg::Pc) {
155 // If this instruction changes the PC and performs a
156 // writeback, ensure the pc load/branch is the last uop.
157 // Load into a temp reg here.
158 *uop = new MicroLdrUop(machInst, int_reg::Ureg1,
159 copy_base ? int_reg::Ureg0 : rn, up, addr);
160 } else if (reg_idx == int_reg::Pc && exception_ret) {
161 // Special handling for exception return
162 *uop = new MicroLdrRetUop(machInst, reg_idx,
163 copy_base ? int_reg::Ureg0 : rn, up, addr);
164 } else {
165 // standard single load uop
166 *uop = new MicroLdrUop(machInst, reg_idx,
167 copy_base ? int_reg::Ureg0 : rn, up, addr);
168 }
169
170 // Loading pc as last operation? Set appropriate flags.
171 if (!writeback && reg_idx == int_reg::Pc) {
172 (*uop)->setFlag(StaticInst::IsControl);
173 (*uop)->setFlag(StaticInst::IsIndirectControl);
174
175 if (!(condCode == COND_AL || condCode == COND_UC))
176 (*uop)->setFlag(StaticInst::IsCondControl);
177 else
178 (*uop)->setFlag(StaticInst::IsUncondControl);
179 }
180 } else {
181 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
182 }
183
184 if (up) addr += 4;
185 else addr -= 4;
186 --mem_ops;
187 }
188
189 // Load/store micro-op generated, go to next uop
190 ++uop;
191 }
192
193 if (writeback && ones) {
194 // Perform writeback uop operation
195 if (up)
196 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
197 else
198 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
199
200 // Write PC after address writeback?
201 if (pc_temp) {
202 if (exception_ret) {
203 *uop = new MicroUopRegMovRet(machInst, 0, int_reg::Ureg1);
204 } else {
205 *uop = new MicroUopRegMov(
207 }
208 (*uop)->setFlag(StaticInst::IsControl);
209 (*uop)->setFlag(StaticInst::IsIndirectControl);
210
211 if (!(condCode == COND_AL || condCode == COND_UC))
212 (*uop)->setFlag(StaticInst::IsCondControl);
213 else
214 (*uop)->setFlag(StaticInst::IsUncondControl);
215
216 if (rn == int_reg::Sp)
217 (*uop)->setFlag(StaticInst::IsReturn);
218
219 ++uop;
220 }
221 }
222
223 --uop;
224 (*uop)->setLastMicroop();
225 microOps[0]->setFirstMicroop();
226
227 /* Take the control flags from the last microop for the macroop */
228 if ((*uop)->isControl())
229 setFlag(StaticInst::IsControl);
230 if ((*uop)->isCondCtrl())
231 setFlag(StaticInst::IsCondControl);
232 if ((*uop)->isUncondCtrl())
233 setFlag(StaticInst::IsUncondControl);
234 if ((*uop)->isIndirectCtrl())
235 setFlag(StaticInst::IsIndirectControl);
236 if ((*uop)->isReturn())
237 setFlag(StaticInst::IsReturn);
238
239 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
240 (*uop)->setDelayedCommit();
241 }
242}
243
244PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
245 uint32_t size, bool fp, bool load, bool noAlloc,
246 bool signExt, bool exclusive, bool acrel,
247 int64_t imm, AddrMode mode,
249 PredMacroOp(mnem, machInst, __opClass)
250{
251 bool post = (mode == AddrMd_PostIndex);
252 bool writeback = (mode != AddrMd_Offset);
253
254 if (load) {
255 // Use integer rounding to round up loads of size 4
256 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
257 } else {
258 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
259 }
261
262 StaticInstPtr *uop = microOps;
263
264 rn = makeSP(rn);
265
266 if (!post) {
267 *uop++ = new MicroAddXiSpAlignUop(machInst, int_reg::Ureg0, rn,
268 post ? 0 : imm);
269 }
270
271 if (fp) {
272 if (size == 16) {
273 if (load) {
274 *uop++ = new MicroLdFp16Uop(machInst, rt,
275 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
276 acrel);
277 *uop++ = new MicroLdFp16Uop(machInst, rt2,
278 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
279 acrel);
280 } else {
281 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
282 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
283 acrel);
284 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
285 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
286 acrel);
287 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
288 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
289 acrel);
290 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
291 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
292 acrel);
293 }
294 } else if (size == 8) {
295 if (load) {
296 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
297 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
298 acrel);
299 } else {
300 *uop++ = new MicroStrFpXImmUop(machInst, rt,
301 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
302 acrel);
303 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
304 post ? rn : int_reg::Ureg0, 8, noAlloc, exclusive,
305 acrel);
306 }
307 } else if (size == 4) {
308 if (load) {
309 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
310 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
311 acrel);
312 } else {
313 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
314 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
315 acrel);
316 }
317 }
318 } else {
319 if (size == 8) {
320 if (load) {
321 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
322 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
323 acrel);
324 } else {
325 *uop++ = new MicroStrXImmUop(machInst, rt,
326 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
327 acrel);
328 *uop++ = new MicroStrXImmUop(machInst, rt2,
329 post ? rn : int_reg::Ureg0, size, noAlloc, exclusive,
330 acrel);
331 }
332 } else if (size == 4) {
333 if (load) {
334 if (signExt) {
335 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
336 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
337 acrel);
338 } else {
339 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
340 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
341 acrel);
342 }
343 } else {
344 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
345 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
346 acrel);
347 }
348 }
349 }
350
351 if (writeback) {
352 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : int_reg::Ureg0,
353 post ? imm : 0);
354 }
355
356 assert(uop == &microOps[numMicroops]);
357 (*--uop)->setLastMicroop();
358 microOps[0]->setFirstMicroop();
359
360 for (StaticInstPtr *curUop = microOps;
361 !(*curUop)->isLastMicroop(); curUop++) {
362 (*curUop)->setDelayedCommit();
363 }
364}
365
367 OpClass __opClass, bool load, RegIndex dest,
368 RegIndex base, int64_t imm) :
369 PredMacroOp(mnem, machInst, __opClass)
370{
371 numMicroops = load ? 1 : 2;
373
374 StaticInstPtr *uop = microOps;
375
376 if (load) {
377 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
378 } else {
379 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
380 (*uop)->setDelayedCommit();
381 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
382 }
383 (*uop)->setLastMicroop();
384 microOps[0]->setFirstMicroop();
385}
386
388 OpClass __opClass, bool load, RegIndex dest,
389 RegIndex base, int64_t imm) :
390 PredMacroOp(mnem, machInst, __opClass)
391{
392 numMicroops = load ? 2 : 3;
394
395 StaticInstPtr *uop = microOps;
396
397 if (load) {
398 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
399 } else {
400 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
401 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
402 }
403 *uop = new MicroAddXiUop(machInst, base, base, imm);
404 (*uop)->setLastMicroop();
405 microOps[0]->setFirstMicroop();
406
407 for (StaticInstPtr *curUop = microOps;
408 !(*curUop)->isLastMicroop(); curUop++) {
409 (*curUop)->setDelayedCommit();
410 }
411}
412
414 OpClass __opClass, bool load, RegIndex dest,
415 RegIndex base, int64_t imm) :
416 PredMacroOp(mnem, machInst, __opClass)
417{
418 numMicroops = load ? 2 : 3;
420
421 StaticInstPtr *uop = microOps;
422
423 if (load) {
424 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
425 } else {
426 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
427 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
428 }
429 *uop = new MicroAddXiUop(machInst, base, base, imm);
430 (*uop)->setLastMicroop();
431 microOps[0]->setFirstMicroop();
432
433 for (StaticInstPtr *curUop = microOps;
434 !(*curUop)->isLastMicroop(); curUop++) {
435 (*curUop)->setDelayedCommit();
436 }
437}
438
440 OpClass __opClass, bool load, RegIndex dest,
442 ArmExtendType type, int64_t imm) :
443 PredMacroOp(mnem, machInst, __opClass)
444{
445 numMicroops = load ? 1 : 2;
447
448 StaticInstPtr *uop = microOps;
449
450 if (load) {
451 *uop = new MicroLdFp16RegUop(machInst, dest, base,
452 offset, type, imm);
453 } else {
454 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
455 offset, type, imm);
456 (*uop)->setDelayedCommit();
457 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
458 offset, type, imm);
459 }
460
461 (*uop)->setLastMicroop();
462 microOps[0]->setFirstMicroop();
463}
464
466 OpClass __opClass, RegIndex dest,
467 int64_t imm) :
468 PredMacroOp(mnem, machInst, __opClass)
469{
470 numMicroops = 1;
472
473 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
474 microOps[0]->setLastMicroop();
475 microOps[0]->setFirstMicroop();
476}
477
478VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
479 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
480 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
481 PredMacroOp(mnem, machInst, __opClass)
482{
483 assert(regs > 0 && regs <= 4);
484 assert(regs % elems == 0);
485
486 numMicroops = (regs > 2) ? 2 : 1;
487 bool wb = (rm != 15);
488 bool deinterleave = (elems > 1);
489
490 if (wb) numMicroops++;
491 if (deinterleave) numMicroops += (regs / elems);
493
494 RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
495
496 uint32_t noAlign = 0;
497
498 unsigned uopIdx = 0;
499 switch (regs) {
500 case 4:
501 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
502 size, machInst, rMid, rn, 0, align);
503 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
504 size, machInst, rMid + 4, rn, 16, noAlign);
505 break;
506 case 3:
507 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
508 size, machInst, rMid, rn, 0, align);
509 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
510 size, machInst, rMid + 4, rn, 16, noAlign);
511 break;
512 case 2:
513 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
514 size, machInst, rMid, rn, 0, align);
515 break;
516 case 1:
517 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
518 size, machInst, rMid, rn, 0, align);
519 break;
520 default:
521 // Unknown number of registers
522 microOps[uopIdx++] = new Unknown(machInst);
523 }
524 if (wb) {
525 if (rm != 15 && rm != 13) {
526 microOps[uopIdx++] =
527 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
528 } else {
529 microOps[uopIdx++] =
530 new MicroAddiUop(machInst, rn, rn, regs * 8);
531 }
532 }
533 if (deinterleave) {
534 switch (elems) {
535 case 4:
536 assert(regs == 4);
537 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
538 size, machInst, vd * 2, rMid, inc * 2);
539 break;
540 case 3:
541 assert(regs == 3);
542 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
543 size, machInst, vd * 2, rMid, inc * 2);
544 break;
545 case 2:
546 assert(regs == 4 || regs == 2);
547 if (regs == 4) {
548 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
549 size, machInst, vd * 2, rMid, inc * 2);
550 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
551 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
552 } else {
553 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
554 size, machInst, vd * 2, rMid, inc * 2);
555 }
556 break;
557 default:
558 // Bad number of elements to deinterleave
559 microOps[uopIdx++] = new Unknown(machInst);
560 }
561 }
562 assert(uopIdx == numMicroops);
563
564 for (unsigned i = 0; i < numMicroops - 1; i++) {
565 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
566 assert(uopPtr);
567 uopPtr->setDelayedCommit();
568 }
569 microOps[0]->setFirstMicroop();
570 microOps[numMicroops - 1]->setLastMicroop();
571}
572
574 OpClass __opClass, bool all, unsigned elems,
575 RegIndex rn, RegIndex vd, unsigned regs,
576 unsigned inc, uint32_t size, uint32_t align,
577 RegIndex rm, unsigned lane) :
578 PredMacroOp(mnem, machInst, __opClass)
579{
580 assert(regs > 0 && regs <= 4);
581 assert(regs % elems == 0);
582
583 unsigned eBytes = (1 << size);
584 unsigned loadSize = eBytes * elems;
585 [[maybe_unused]] unsigned loadRegs =
586 (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
587
588 assert(loadRegs > 0 && loadRegs <= 4);
589
590 numMicroops = 1;
591 bool wb = (rm != 15);
592
593 if (wb) numMicroops++;
594 numMicroops += (regs / elems);
596
598
599 unsigned uopIdx = 0;
600 switch (loadSize) {
601 case 1:
602 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
603 machInst, ufp0, rn, 0, align);
604 break;
605 case 2:
606 if (eBytes == 2) {
607 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
608 machInst, ufp0, rn, 0, align);
609 } else {
610 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
611 machInst, ufp0, rn, 0, align);
612 }
613 break;
614 case 3:
615 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
616 machInst, ufp0, rn, 0, align);
617 break;
618 case 4:
619 switch (eBytes) {
620 case 1:
621 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
622 machInst, ufp0, rn, 0, align);
623 break;
624 case 2:
625 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
626 machInst, ufp0, rn, 0, align);
627 break;
628 case 4:
629 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
630 machInst, ufp0, rn, 0, align);
631 break;
632 }
633 break;
634 case 6:
635 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
636 machInst, ufp0, rn, 0, align);
637 break;
638 case 8:
639 switch (eBytes) {
640 case 2:
641 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
642 machInst, ufp0, rn, 0, align);
643 break;
644 case 4:
645 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
646 machInst, ufp0, rn, 0, align);
647 break;
648 }
649 break;
650 case 12:
651 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
652 machInst, ufp0, rn, 0, align);
653 break;
654 case 16:
655 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
656 machInst, ufp0, rn, 0, align);
657 break;
658 default:
659 // Unrecognized load size
660 microOps[uopIdx++] = new Unknown(machInst);
661 }
662 if (wb) {
663 if (rm != 15 && rm != 13) {
664 microOps[uopIdx++] =
665 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
666 } else {
667 microOps[uopIdx++] =
668 new MicroAddiUop(machInst, rn, rn, loadSize);
669 }
670 }
671 switch (elems) {
672 case 4:
673 assert(regs == 4);
674 switch (size) {
675 case 0:
676 if (all) {
677 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
678 machInst, vd * 2, ufp0, inc * 2);
679 } else {
680 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
681 machInst, vd * 2, ufp0, inc * 2, lane);
682 }
683 break;
684 case 1:
685 if (all) {
686 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
687 machInst, vd * 2, ufp0, inc * 2);
688 } else {
689 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
690 machInst, vd * 2, ufp0, inc * 2, lane);
691 }
692 break;
693 case 2:
694 if (all) {
695 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
696 machInst, vd * 2, ufp0, inc * 2);
697 } else {
698 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
699 machInst, vd * 2, ufp0, inc * 2, lane);
700 }
701 break;
702 default:
703 // Bad size
704 microOps[uopIdx++] = new Unknown(machInst);
705 break;
706 }
707 break;
708 case 3:
709 assert(regs == 3);
710 switch (size) {
711 case 0:
712 if (all) {
713 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
714 machInst, vd * 2, ufp0, inc * 2);
715 } else {
716 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
717 machInst, vd * 2, ufp0, inc * 2, lane);
718 }
719 break;
720 case 1:
721 if (all) {
722 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
723 machInst, vd * 2, ufp0, inc * 2);
724 } else {
725 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
726 machInst, vd * 2, ufp0, inc * 2, lane);
727 }
728 break;
729 case 2:
730 if (all) {
731 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
732 machInst, vd * 2, ufp0, inc * 2);
733 } else {
734 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
735 machInst, vd * 2, ufp0, inc * 2, lane);
736 }
737 break;
738 default:
739 // Bad size
740 microOps[uopIdx++] = new Unknown(machInst);
741 break;
742 }
743 break;
744 case 2:
745 assert(regs == 2);
746 assert(loadRegs <= 2);
747 switch (size) {
748 case 0:
749 if (all) {
750 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
751 machInst, vd * 2, ufp0, inc * 2);
752 } else {
753 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
754 machInst, vd * 2, ufp0, inc * 2, lane);
755 }
756 break;
757 case 1:
758 if (all) {
759 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
760 machInst, vd * 2, ufp0, inc * 2);
761 } else {
762 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
763 machInst, vd * 2, ufp0, inc * 2, lane);
764 }
765 break;
766 case 2:
767 if (all) {
768 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
769 machInst, vd * 2, ufp0, inc * 2);
770 } else {
771 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
772 machInst, vd * 2, ufp0, inc * 2, lane);
773 }
774 break;
775 default:
776 // Bad size
777 microOps[uopIdx++] = new Unknown(machInst);
778 break;
779 }
780 break;
781 case 1:
782 assert(regs == 1 || (all && regs == 2));
783 assert(loadRegs <= 2);
784 for (unsigned offset = 0; offset < regs; offset++) {
785 switch (size) {
786 case 0:
787 if (all) {
788 microOps[uopIdx++] =
789 new MicroUnpackAllNeon2to2Uop<uint8_t>(
790 machInst, (vd + offset) * 2, ufp0, inc * 2);
791 } else {
792 microOps[uopIdx++] =
793 new MicroUnpackNeon2to2Uop<uint8_t>(
794 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
795 }
796 break;
797 case 1:
798 if (all) {
799 microOps[uopIdx++] =
800 new MicroUnpackAllNeon2to2Uop<uint16_t>(
801 machInst, (vd + offset) * 2, ufp0, inc * 2);
802 } else {
803 microOps[uopIdx++] =
804 new MicroUnpackNeon2to2Uop<uint16_t>(
805 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
806 }
807 break;
808 case 2:
809 if (all) {
810 microOps[uopIdx++] =
811 new MicroUnpackAllNeon2to2Uop<uint32_t>(
812 machInst, (vd + offset) * 2, ufp0, inc * 2);
813 } else {
814 microOps[uopIdx++] =
815 new MicroUnpackNeon2to2Uop<uint32_t>(
816 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
817 }
818 break;
819 default:
820 // Bad size
821 microOps[uopIdx++] = new Unknown(machInst);
822 break;
823 }
824 }
825 break;
826 default:
827 // Bad number of elements to unpack
828 microOps[uopIdx++] = new Unknown(machInst);
829 }
830 assert(uopIdx == numMicroops);
831
832 for (unsigned i = 0; i < numMicroops - 1; i++) {
833 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
834 assert(uopPtr);
835 uopPtr->setDelayedCommit();
836 }
837 microOps[0]->setFirstMicroop();
838 microOps[numMicroops - 1]->setLastMicroop();
839}
840
841VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
842 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
843 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
844 PredMacroOp(mnem, machInst, __opClass)
845{
846 assert(regs > 0 && regs <= 4);
847 assert(regs % elems == 0);
848
849 numMicroops = (regs > 2) ? 2 : 1;
850 bool wb = (rm != 15);
851 bool interleave = (elems > 1);
852
853 if (wb) numMicroops++;
854 if (interleave) numMicroops += (regs / elems);
856
857 uint32_t noAlign = 0;
858
859 RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
860
861 unsigned uopIdx = 0;
862 if (interleave) {
863 switch (elems) {
864 case 4:
865 assert(regs == 4);
866 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
867 size, machInst, rMid, vd * 2, inc * 2);
868 break;
869 case 3:
870 assert(regs == 3);
871 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
872 size, machInst, rMid, vd * 2, inc * 2);
873 break;
874 case 2:
875 assert(regs == 4 || regs == 2);
876 if (regs == 4) {
877 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
878 size, machInst, rMid, vd * 2, inc * 2);
879 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
880 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
881 } else {
882 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
883 size, machInst, rMid, vd * 2, inc * 2);
884 }
885 break;
886 default:
887 // Bad number of elements to interleave
888 microOps[uopIdx++] = new Unknown(machInst);
889 }
890 }
891 switch (regs) {
892 case 4:
893 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
894 size, machInst, rMid, rn, 0, align);
895 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
896 size, machInst, rMid + 4, rn, 16, noAlign);
897 break;
898 case 3:
899 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
900 size, machInst, rMid, rn, 0, align);
901 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
902 size, machInst, rMid + 4, rn, 16, noAlign);
903 break;
904 case 2:
905 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
906 size, machInst, rMid, rn, 0, align);
907 break;
908 case 1:
909 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
910 size, machInst, rMid, rn, 0, align);
911 break;
912 default:
913 // Unknown number of registers
914 microOps[uopIdx++] = new Unknown(machInst);
915 }
916 if (wb) {
917 if (rm != 15 && rm != 13) {
918 microOps[uopIdx++] =
919 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
920 } else {
921 microOps[uopIdx++] =
922 new MicroAddiUop(machInst, rn, rn, regs * 8);
923 }
924 }
925 assert(uopIdx == numMicroops);
926
927 for (unsigned i = 0; i < numMicroops - 1; i++) {
928 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
929 assert(uopPtr);
930 uopPtr->setDelayedCommit();
931 }
932 microOps[0]->setFirstMicroop();
933 microOps[numMicroops - 1]->setLastMicroop();
934}
935
937 OpClass __opClass, bool all, unsigned elems,
938 RegIndex rn, RegIndex vd, unsigned regs,
939 unsigned inc, uint32_t size, uint32_t align,
940 RegIndex rm, unsigned lane) :
941 PredMacroOp(mnem, machInst, __opClass)
942{
943 assert(!all);
944 assert(regs > 0 && regs <= 4);
945 assert(regs % elems == 0);
946
947 unsigned eBytes = (1 << size);
948 unsigned storeSize = eBytes * elems;
949 [[maybe_unused]] unsigned storeRegs =
950 (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
951
952 assert(storeRegs > 0 && storeRegs <= 4);
953
954 numMicroops = 1;
955 bool wb = (rm != 15);
956
957 if (wb) numMicroops++;
958 numMicroops += (regs / elems);
960
962
963 unsigned uopIdx = 0;
964 switch (elems) {
965 case 4:
966 assert(regs == 4);
967 switch (size) {
968 case 0:
969 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
970 machInst, ufp0, vd * 2, inc * 2, lane);
971 break;
972 case 1:
973 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
974 machInst, ufp0, vd * 2, inc * 2, lane);
975 break;
976 case 2:
977 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
978 machInst, ufp0, vd * 2, inc * 2, lane);
979 break;
980 default:
981 // Bad size
982 microOps[uopIdx++] = new Unknown(machInst);
983 break;
984 }
985 break;
986 case 3:
987 assert(regs == 3);
988 switch (size) {
989 case 0:
990 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
991 machInst, ufp0, vd * 2, inc * 2, lane);
992 break;
993 case 1:
994 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
995 machInst, ufp0, vd * 2, inc * 2, lane);
996 break;
997 case 2:
998 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
999 machInst, ufp0, vd * 2, inc * 2, lane);
1000 break;
1001 default:
1002 // Bad size
1003 microOps[uopIdx++] = new Unknown(machInst);
1004 break;
1005 }
1006 break;
1007 case 2:
1008 assert(regs == 2);
1009 assert(storeRegs <= 2);
1010 switch (size) {
1011 case 0:
1012 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
1013 machInst, ufp0, vd * 2, inc * 2, lane);
1014 break;
1015 case 1:
1016 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
1017 machInst, ufp0, vd * 2, inc * 2, lane);
1018 break;
1019 case 2:
1020 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1021 machInst, ufp0, vd * 2, inc * 2, lane);
1022 break;
1023 default:
1024 // Bad size
1025 microOps[uopIdx++] = new Unknown(machInst);
1026 break;
1027 }
1028 break;
1029 case 1:
1030 assert(regs == 1 || (all && regs == 2));
1031 assert(storeRegs <= 2);
1032 for (unsigned offset = 0; offset < regs; offset++) {
1033 switch (size) {
1034 case 0:
1035 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1036 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1037 break;
1038 case 1:
1039 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1040 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1041 break;
1042 case 2:
1043 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1044 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1045 break;
1046 default:
1047 // Bad size
1048 microOps[uopIdx++] = new Unknown(machInst);
1049 break;
1050 }
1051 }
1052 break;
1053 default:
1054 // Bad number of elements to unpack
1055 microOps[uopIdx++] = new Unknown(machInst);
1056 }
1057 switch (storeSize) {
1058 case 1:
1059 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1060 machInst, ufp0, rn, 0, align);
1061 break;
1062 case 2:
1063 if (eBytes == 2) {
1064 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1065 machInst, ufp0, rn, 0, align);
1066 } else {
1067 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1068 machInst, ufp0, rn, 0, align);
1069 }
1070 break;
1071 case 3:
1072 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1073 machInst, ufp0, rn, 0, align);
1074 break;
1075 case 4:
1076 switch (eBytes) {
1077 case 1:
1078 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1079 machInst, ufp0, rn, 0, align);
1080 break;
1081 case 2:
1082 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1083 machInst, ufp0, rn, 0, align);
1084 break;
1085 case 4:
1086 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1087 machInst, ufp0, rn, 0, align);
1088 break;
1089 }
1090 break;
1091 case 6:
1092 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1093 machInst, ufp0, rn, 0, align);
1094 break;
1095 case 8:
1096 switch (eBytes) {
1097 case 2:
1098 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1099 machInst, ufp0, rn, 0, align);
1100 break;
1101 case 4:
1102 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1103 machInst, ufp0, rn, 0, align);
1104 break;
1105 }
1106 break;
1107 case 12:
1108 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1109 machInst, ufp0, rn, 0, align);
1110 break;
1111 case 16:
1112 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1113 machInst, ufp0, rn, 0, align);
1114 break;
1115 default:
1116 // Bad store size
1117 microOps[uopIdx++] = new Unknown(machInst);
1118 }
1119 if (wb) {
1120 if (rm != 15 && rm != 13) {
1121 microOps[uopIdx++] =
1122 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1123 } else {
1124 microOps[uopIdx++] =
1125 new MicroAddiUop(machInst, rn, rn, storeSize);
1126 }
1127 }
1128 assert(uopIdx == numMicroops);
1129
1130 for (unsigned i = 0; i < numMicroops - 1; i++) {
1131 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1132 assert(uopPtr);
1133 uopPtr->setDelayedCommit();
1134 }
1135 microOps[0]->setFirstMicroop();
1136 microOps[numMicroops - 1]->setLastMicroop();
1137}
1138
1140 OpClass __opClass, RegIndex rn, RegIndex vd,
1141 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1142 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1143 PredMacroOp(mnem, machInst, __opClass)
1144{
1146 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1147 bool baseIsSP = isSP((RegIndex) rnsp);
1148
1149 numMicroops = wb ? 1 : 0;
1150
1151 int totNumBytes = numRegs * dataSize / 8;
1152 assert(totNumBytes <= 64);
1153
1154 // The guiding principle here is that no more than 16 bytes can be
1155 // transferred at a time
1156 int numMemMicroops = totNumBytes / 16;
1157 int residuum = totNumBytes % 16;
1158 if (residuum)
1159 ++numMemMicroops;
1160 numMicroops += numMemMicroops;
1161
1162 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1163 numMicroops += numMarshalMicroops;
1164
1166 unsigned uopIdx = 0;
1167 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1168
1169 int i = 0;
1170 for (; i < numMemMicroops - 1; ++i) {
1171 microOps[uopIdx++] = new MicroNeonLoad64(
1172 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1173 baseIsSP, 16 /* accSize */, eSize);
1174 }
1175 microOps[uopIdx++] = new MicroNeonLoad64(
1176 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1177 residuum ? residuum : 16 /* accSize */, eSize);
1178
1179 // Writeback microop: the post-increment amount is encoded in "Rm": a
1180 // 64-bit general register OR as '11111' for an immediate value equal to
1181 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1182 if (wb) {
1183 if (rm != int_reg::X31) {
1184 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1185 UXTX, 0);
1186 } else {
1187 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1188 totNumBytes);
1189 }
1190 }
1191
1192 for (int i = 0; i < numMarshalMicroops; ++i) {
1193 switch(numRegs) {
1194 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1195 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1196 numStructElems, 1, i /* step */);
1197 break;
1198 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1199 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1200 numStructElems, 2, i /* step */);
1201 break;
1202 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1203 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1204 numStructElems, 3, i /* step */);
1205 break;
1206 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1207 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1208 numStructElems, 4, i /* step */);
1209 break;
1210 default: panic("Invalid number of registers");
1211 }
1212
1213 }
1214
1215 assert(uopIdx == numMicroops);
1216
1217 for (int i = 0; i < numMicroops - 1; ++i) {
1218 microOps[i]->setDelayedCommit();
1219 }
1220 microOps[0]->setFirstMicroop();
1221 microOps[numMicroops - 1]->setLastMicroop();
1222}
1223
1225 OpClass __opClass, RegIndex rn, RegIndex vd,
1226 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1227 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1228 PredMacroOp(mnem, machInst, __opClass)
1229{
1231 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1232 bool baseIsSP = isSP((RegIndex) rnsp);
1233
1234 numMicroops = wb ? 1 : 0;
1235
1236 int totNumBytes = numRegs * dataSize / 8;
1237 assert(totNumBytes <= 64);
1238
1239 // The guiding principle here is that no more than 16 bytes can be
1240 // transferred at a time
1241 int numMemMicroops = totNumBytes / 16;
1242 int residuum = totNumBytes % 16;
1243 if (residuum)
1244 ++numMemMicroops;
1245 numMicroops += numMemMicroops;
1246
1247 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1248 numMicroops += numMarshalMicroops;
1249
1251 unsigned uopIdx = 0;
1252
1253 for (int i = 0; i < numMarshalMicroops; ++i) {
1254 switch (numRegs) {
1255 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1256 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1257 numStructElems, 1, i /* step */);
1258 break;
1259 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1260 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1261 numStructElems, 2, i /* step */);
1262 break;
1263 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1264 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1265 numStructElems, 3, i /* step */);
1266 break;
1267 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1268 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1269 numStructElems, 4, i /* step */);
1270 break;
1271 default: panic("Invalid number of registers");
1272 }
1273 }
1274
1275 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1276
1277 int i = 0;
1278 for (; i < numMemMicroops - 1; ++i) {
1279 microOps[uopIdx++] = new MicroNeonStore64(
1280 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1281 baseIsSP, 16 /* accSize */, eSize);
1282 }
1283 microOps[uopIdx++] = new MicroNeonStore64(
1284 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1285 residuum ? residuum : 16 /* accSize */, eSize);
1286
1287 // Writeback microop: the post-increment amount is encoded in "Rm": a
1288 // 64-bit general register OR as '11111' for an immediate value equal to
1289 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1290 if (wb) {
1291 if (rm != int_reg::X31) {
1292 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1293 UXTX, 0);
1294 } else {
1295 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1296 totNumBytes);
1297 }
1298 }
1299
1300 assert(uopIdx == numMicroops);
1301
1302 for (int i = 0; i < numMicroops - 1; i++) {
1303 microOps[i]->setDelayedCommit();
1304 }
1305 microOps[0]->setFirstMicroop();
1306 microOps[numMicroops - 1]->setLastMicroop();
1307}
1308
1310 OpClass __opClass, RegIndex rn, RegIndex vd,
1311 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1312 uint8_t numStructElems, uint8_t index, bool wb,
1313 bool replicate) :
1314 PredMacroOp(mnem, machInst, __opClass),
1315 eSize(0), dataSize(0), numStructElems(0), index(0),
1316 wb(false), replicate(false)
1317
1318{
1320 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1321 bool baseIsSP = isSP((RegIndex) rnsp);
1322
1323 numMicroops = wb ? 1 : 0;
1324
1325 int eSizeBytes = 1 << eSize;
1326 int totNumBytes = numStructElems * eSizeBytes;
1327 assert(totNumBytes <= 64);
1328
1329 // The guiding principle here is that no more than 16 bytes can be
1330 // transferred at a time
1331 int numMemMicroops = totNumBytes / 16;
1332 int residuum = totNumBytes % 16;
1333 if (residuum)
1334 ++numMemMicroops;
1335 numMicroops += numMemMicroops;
1336
1337 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1338 numMicroops += numMarshalMicroops;
1339
1341 unsigned uopIdx = 0;
1342
1343 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1344
1345 int i = 0;
1346 for (; i < numMemMicroops - 1; ++i) {
1347 microOps[uopIdx++] = new MicroNeonLoad64(
1348 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1349 baseIsSP, 16 /* accSize */, eSize);
1350 }
1351 microOps[uopIdx++] = new MicroNeonLoad64(
1352 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1353 residuum ? residuum : 16 /* accSize */, eSize);
1354
1355 // Writeback microop: the post-increment amount is encoded in "Rm": a
1356 // 64-bit general register OR as '11111' for an immediate value equal to
1357 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1358 if (wb) {
1359 if (rm != int_reg::X31) {
1360 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1361 UXTX, 0);
1362 } else {
1363 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1364 totNumBytes);
1365 }
1366 }
1367
1368 for (int i = 0; i < numMarshalMicroops; ++i) {
1369 microOps[uopIdx++] = new MicroUnpackNeon64(
1370 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1371 numStructElems, index, i /* step */, replicate);
1372 }
1373
1374 assert(uopIdx == numMicroops);
1375
1376 for (int i = 0; i < numMicroops - 1; i++) {
1377 microOps[i]->setDelayedCommit();
1378 }
1379 microOps[0]->setFirstMicroop();
1380 microOps[numMicroops - 1]->setLastMicroop();
1381}
1382
1384 OpClass __opClass, RegIndex rn, RegIndex vd,
1385 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1386 uint8_t numStructElems, uint8_t index, bool wb,
1387 bool replicate) :
1388 PredMacroOp(mnem, machInst, __opClass),
1389 eSize(0), dataSize(0), numStructElems(0), index(0),
1390 wb(false), replicate(false)
1391{
1393 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1394 bool baseIsSP = isSP((RegIndex) rnsp);
1395
1396 numMicroops = wb ? 1 : 0;
1397
1398 int eSizeBytes = 1 << eSize;
1399 int totNumBytes = numStructElems * eSizeBytes;
1400 assert(totNumBytes <= 64);
1401
1402 // The guiding principle here is that no more than 16 bytes can be
1403 // transferred at a time
1404 int numMemMicroops = totNumBytes / 16;
1405 int residuum = totNumBytes % 16;
1406 if (residuum)
1407 ++numMemMicroops;
1408 numMicroops += numMemMicroops;
1409
1410 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1411 numMicroops += numMarshalMicroops;
1412
1414 unsigned uopIdx = 0;
1415
1416 for (int i = 0; i < numMarshalMicroops; ++i) {
1417 microOps[uopIdx++] = new MicroPackNeon64(
1418 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1419 numStructElems, index, i /* step */, replicate);
1420 }
1421
1422 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1423
1424 int i = 0;
1425 for (; i < numMemMicroops - 1; ++i) {
1426 microOps[uopIdx++] = new MicroNeonStore64(
1427 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1428 baseIsSP, 16 /* accsize */, eSize);
1429 }
1430 microOps[uopIdx++] = new MicroNeonStore64(
1431 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1432 residuum ? residuum : 16 /* accSize */, eSize);
1433
1434 // Writeback microop: the post-increment amount is encoded in "Rm": a
1435 // 64-bit general register OR as '11111' for an immediate value equal to
1436 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1437 if (wb) {
1438 if (rm != int_reg::X31) {
1439 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1440 UXTX, 0);
1441 } else {
1442 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1443 totNumBytes);
1444 }
1445 }
1446
1447 assert(uopIdx == numMicroops);
1448
1449 for (int i = 0; i < numMicroops - 1; i++) {
1450 microOps[i]->setDelayedCommit();
1451 }
1452 microOps[0]->setFirstMicroop();
1453 microOps[numMicroops - 1]->setLastMicroop();
1454}
1455
1457 OpClass __opClass, RegIndex rn,
1458 RegIndex vd, bool single, bool up,
1459 bool writeback, bool load, uint32_t offset) :
1460 PredMacroOp(mnem, machInst, __opClass)
1461{
1462 int i = 0;
1463
1464 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1465 // to be functionally identical except that fldmx is deprecated. For now
1466 // we'll assume they're otherwise interchangable.
1467 int count = (single ? offset : (offset / 2));
1468 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1470
1471 int64_t addr = 0;
1472
1473 if (!up)
1474 addr = 4 * offset;
1475
1476 bool tempUp = up;
1477 for (int j = 0; j < count; j++) {
1478 if (load) {
1479 if (single) {
1480 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1481 tempUp, addr);
1482 } else {
1483 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1484 tempUp, addr);
1485 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1486 addr + (up ? 4 : -4));
1487 }
1488 } else {
1489 if (single) {
1490 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1491 tempUp, addr);
1492 } else {
1493 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1494 tempUp, addr);
1495 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1496 addr + (up ? 4 : -4));
1497 }
1498 }
1499 if (!tempUp) {
1500 addr -= (single ? 4 : 8);
1501 // The microops don't handle negative displacement, so turn if we
1502 // hit zero, flip polarity and start adding.
1503 if (addr <= 0) {
1504 tempUp = true;
1505 addr = -addr;
1506 }
1507 } else {
1508 addr += (single ? 4 : 8);
1509 }
1510 }
1511
1512 if (writeback) {
1513 if (up) {
1514 microOps[i++] =
1515 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1516 } else {
1517 microOps[i++] =
1518 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1519 }
1520 }
1521
1522 assert(numMicroops == i);
1523 microOps[0]->setFirstMicroop();
1524 microOps[numMicroops - 1]->setLastMicroop();
1525
1526 for (StaticInstPtr *curUop = microOps;
1527 !(*curUop)->isLastMicroop(); curUop++) {
1528 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1529 assert(uopPtr);
1530 uopPtr->setDelayedCommit();
1531 }
1532}
1533
1534std::string
1536 Addr pc, const loader::SymbolTable *symtab) const
1537{
1538 std::stringstream ss;
1540 printIntReg(ss, ura);
1541 ss << ", ";
1542 printIntReg(ss, urb);
1543 ss << ", ";
1544 ccprintf(ss, "#%d", imm);
1545 return ss.str();
1546}
1547
1548std::string
1550 Addr pc, const loader::SymbolTable *symtab) const
1551{
1552 std::stringstream ss;
1554 printIntReg(ss, ura);
1555 ss << ", ";
1556 printIntReg(ss, urb);
1557 ss << ", ";
1558 ccprintf(ss, "#%d", imm);
1559 return ss.str();
1560}
1561
1562std::string
1564 Addr pc, const loader::SymbolTable *symtab) const
1565{
1566 std::stringstream ss;
1568 ss << "[PC,CPSR]";
1569 return ss.str();
1570}
1571
1572std::string
1574 Addr pc, const loader::SymbolTable *symtab) const
1575{
1576 std::stringstream ss;
1578 printIntReg(ss, ura);
1579 ccprintf(ss, ", ");
1580 printIntReg(ss, urb);
1582 return ss.str();
1583}
1584
1585std::string
1587 Addr pc, const loader::SymbolTable *symtab) const
1588{
1589 std::stringstream ss;
1591 printIntReg(ss, ura);
1592 ss << ", ";
1593 printIntReg(ss, urb);
1594 return ss.str();
1595}
1596
1597std::string
1599 Addr pc, const loader::SymbolTable *symtab) const
1600{
1601 std::stringstream ss;
1603 printIntReg(ss, ura);
1604 ss << ", ";
1605 printIntReg(ss, urb);
1606 ss << ", ";
1607 printIntReg(ss, urc);
1608 return ss.str();
1609}
1610
1611std::string
1613 Addr pc, const loader::SymbolTable *symtab) const
1614{
1615 std::stringstream ss;
1617 if (isFloating())
1619 else
1620 printIntReg(ss, ura);
1621 ss << ", [";
1622 printIntReg(ss, urb);
1623 ss << ", ";
1624 ccprintf(ss, "#%d", imm);
1625 ss << "]";
1626 return ss.str();
1627}
1628
1629std::string
1631 Addr pc, const loader::SymbolTable *symtab) const
1632{
1633 std::stringstream ss;
1636 ss << ",";
1638 ss << ", [";
1639 printIntReg(ss, urb);
1640 ss << ", ";
1641 ccprintf(ss, "#%d", imm);
1642 ss << "]";
1643 return ss.str();
1644}
1645
1646} // namespace ArmISA
1647} // namespace gem5
void printExtendOperand(bool firstOperand, std::ostream &os, RegIndex rm, ArmExtendType type, int64_t shiftAmt) const
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:366
BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex dest, int64_t imm)
Definition macromem.cc:465
BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:387
BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:413
BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, RegIndex offset, ArmExtendType type, int64_t imm)
Definition macromem.cc:439
MacroMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, bool index, bool up, bool user, bool writeback, bool load, uint32_t reglist)
Definition macromem.cc:57
MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, bool single, bool up, bool writeback, bool load, uint32_t offset)
Definition macromem.cc:1456
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1535
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1549
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1586
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1598
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1573
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1612
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1630
Base class for Memory microops.
Definition macromem.hh:71
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1563
PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, bool exclusive, bool acrel, int64_t imm, AddrMode mode, RegIndex rn, RegIndex rt, RegIndex rt2)
Definition macromem.cc:244
void size(size_t newSize) override
Definition pred_inst.hh:383
StaticInstPtr * microOps
Definition pred_inst.hh:347
PredMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
Constructor.
Definition pred_inst.hh:350
ConditionCode condCode
Definition pred_inst.hh:220
VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition macromem.cc:1139
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition macromem.cc:478
VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition macromem.cc:1309
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition macromem.cc:573
VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition macromem.cc:1224
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned width, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition macromem.cc:841
VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition macromem.cc:1383
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition macromem.cc:936
Static instruction class for unknown (illegal) instructions.
Definition unknown.hh:53
bool isFloating() const
void setFlag(Flags f)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
constexpr auto & Sp
Definition int.hh:274
constexpr RegId Ureg0
Definition int.hh:229
constexpr auto & Pc
Definition int.hh:276
constexpr RegId Ureg1
Definition int.hh:230
constexpr RegId X31
Definition int.hh:271
static int regInMode(OperatingMode mode, int reg)
Definition int.hh:566
static unsigned int number_of_ones(int32_t val)
Definition macromem.hh:56
static bool isSP(RegIndex reg)
Definition int.hh:619
const int NumVecV8ArchRegs
Definition vec.hh:80
Bitfield< 15, 12 > rt
Definition types.hh:115
Bitfield< 4, 0 > mode
Definition misc_types.hh:74
const int VecSpecialElem
Definition vec.hh:87
Bitfield< 3, 0 > rm
Definition types.hh:118
Bitfield< 7, 0 > imm
Definition types.hh:132
Bitfield< 21 > writeback
Definition types.hh:126
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 23 > up
Definition types.hh:124
Bitfield< 19, 16 > rn
Definition types.hh:113
static RegIndex makeSP(RegIndex reg)
Definition int.hh:605
@ COND_UC
Definition cc.hh:120
@ COND_AL
Definition cc.hh:119
Bitfield< 19, 16 > fp
Bitfield< 21 > ss
Definition misc_types.hh:60
Bitfield< 4 > pc
Bitfield< 30, 0 > index
Bitfield< 29 > vx
Definition misc.hh:75
Bitfield< 31, 0 > all
Definition types.hh:77
Bitfield< 11, 7 > vd
Definition types.hh:169
Bitfield< 5, 3 > reg
Definition types.hh:92
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
uint16_t RegIndex
Definition types.hh:176
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
RefCountingPtr< StaticInst > StaticInstPtr
void ccprintf(cp::Print &print)
Definition cprintf.hh:130
Utility functions and datatypes used by AArch64 NEON memory instructions.

Generated on Mon May 26 2025 09:18:57 for gem5 by doxygen 1.13.2