gem5 [DEVELOP-FOR-25.1]
Loading...
Searching...
No Matches
macromem.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2010-2014, 2020 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Copyright (c) 2007-2008 The Florida State University
15 * All rights reserved.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions are
19 * met: redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer;
21 * redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution;
24 * neither the name of the copyright holders nor the names of its
25 * contributors may be used to endorse or promote products derived from
26 * this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
42
43#include <sstream>
44
45#include "arch/arm/generated/decoder.hh"
47#include "base/compiler.hh"
48
49namespace gem5
50{
51
52using namespace ArmISAInst;
53
54namespace ArmISA
55{
56
58 OpClass __opClass, RegIndex rn,
59 bool index, bool up, bool user, bool writeback,
60 bool load, uint32_t reglist) :
61 PredMacroOp(mnem, machInst, __opClass)
62{
63 uint32_t regs = reglist;
64 uint32_t ones = number_of_ones(reglist);
65 uint32_t mem_ops = ones;
66
67 // Copy the base address register if we overwrite it, or if this instruction
68 // is basically a no-op (we have to do something)
69 bool copy_base = (bits(reglist, rn) && load) || !ones;
70 bool force_user = user & !bits(reglist, 15);
71 bool exception_ret = user & bits(reglist, 15);
72 bool pc_temp = load && writeback && bits(reglist, 15);
73
74 if (!ones) {
75 numMicroops = 1;
76 } else if (load) {
77 numMicroops = ((ones + 1) / 2)
78 + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
79 + (copy_base ? 1 : 0)
80 + (writeback? 1 : 0)
81 + (pc_temp ? 1 : 0);
82 } else {
83 numMicroops = ones + (writeback ? 1 : 0);
84 }
85
87
88 uint32_t addr = 0;
89
90 if (!up)
91 addr = (ones << 2) - 4;
92
93 if (!index)
94 addr += 4;
95
97
98 // Add 0 to Rn and stick it in ureg0.
99 // This is equivalent to a move.
100 if (copy_base)
101 *uop++ = new MicroAddiUop(machInst, int_reg::Ureg0, rn, 0);
102
103 unsigned reg = 0;
104 while (mem_ops != 0) {
105 // Do load operations in pairs if possible
106 if (load && mem_ops >= 2 &&
107 !(mem_ops == 2 && bits(regs, int_reg::Pc) && exception_ret)) {
108 // 64-bit memory operation
109 // Find 2 set register bits (clear them after finding)
110 unsigned reg_idx1;
111 unsigned reg_idx2;
112
113 // Find the first register
114 while (!bits(regs, reg)) reg++;
115 replaceBits(regs, reg, 0);
116 reg_idx1 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
117
118 // Find the second register
119 while (!bits(regs, reg)) reg++;
120 replaceBits(regs, reg, 0);
121 reg_idx2 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
122
123 // Load into temp reg if necessary
124 if (reg_idx2 == int_reg::Pc && pc_temp)
125 reg_idx2 = int_reg::Ureg1;
126
127 // Actually load both registers from memory
128 *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
129 copy_base ? int_reg::Ureg0 : rn, up, addr);
130
131 if (!writeback && reg_idx2 == int_reg::Pc) {
132 // No writeback if idx==pc, set appropriate flags
133 (*uop)->setFlag(StaticInst::IsControl);
134 (*uop)->setFlag(StaticInst::IsIndirectControl);
135
136 if (!(condCode == COND_AL || condCode == COND_UC))
137 (*uop)->setFlag(StaticInst::IsCondControl);
138 else
139 (*uop)->setFlag(StaticInst::IsUncondControl);
140 }
141
142 if (up) addr += 8;
143 else addr -= 8;
144 mem_ops -= 2;
145 } else {
146 // 32-bit memory operation
147 // Find register for operation
148 unsigned reg_idx;
149 while (!bits(regs, reg)) reg++;
150 replaceBits(regs, reg, 0);
151 reg_idx = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
152
153 if (load) {
154 if (writeback && reg_idx == int_reg::Pc) {
155 // If this instruction changes the PC and performs a
156 // writeback, ensure the pc load/branch is the last uop.
157 // Load into a temp reg here.
158 *uop = new MicroLdrUop(machInst, int_reg::Ureg1,
159 copy_base ? int_reg::Ureg0 : rn, up, addr);
160 } else if (reg_idx == int_reg::Pc && exception_ret) {
161 // Special handling for exception return
162 *uop = new MicroLdrRetUop(machInst, reg_idx,
163 copy_base ? int_reg::Ureg0 : rn, up, addr);
164 } else {
165 // standard single load uop
166 *uop = new MicroLdrUop(machInst, reg_idx,
167 copy_base ? int_reg::Ureg0 : rn, up, addr);
168 }
169
170 // Loading pc as last operation? Set appropriate flags.
171 if (!writeback && reg_idx == int_reg::Pc) {
172 (*uop)->setFlag(StaticInst::IsControl);
173 (*uop)->setFlag(StaticInst::IsIndirectControl);
174
175 if (!(condCode == COND_AL || condCode == COND_UC))
176 (*uop)->setFlag(StaticInst::IsCondControl);
177 else
178 (*uop)->setFlag(StaticInst::IsUncondControl);
179 }
180 } else {
181 *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
182 }
183
184 if (up) addr += 4;
185 else addr -= 4;
186 --mem_ops;
187 }
188
189 // Load/store micro-op generated, go to next uop
190 ++uop;
191 }
192
193 if (writeback && ones) {
194 // Perform writeback uop operation
195 if (up)
196 *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
197 else
198 *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
199
200 // Write PC after address writeback?
201 if (pc_temp) {
202 if (exception_ret) {
203 *uop = new MicroUopRegMovRet(machInst, 0, int_reg::Ureg1);
204 } else {
205 *uop = new MicroUopRegMov(
207 }
208 (*uop)->setFlag(StaticInst::IsControl);
209 (*uop)->setFlag(StaticInst::IsIndirectControl);
210
211 if (!(condCode == COND_AL || condCode == COND_UC))
212 (*uop)->setFlag(StaticInst::IsCondControl);
213 else
214 (*uop)->setFlag(StaticInst::IsUncondControl);
215
216 if (rn == int_reg::Sp)
217 (*uop)->setFlag(StaticInst::IsReturn);
218
219 ++uop;
220 }
221 }
222
223 --uop;
224 (*uop)->setLastMicroop();
225 microOps[0]->setFirstMicroop();
226
227 /* Take the control flags from the last microop for the macroop */
228 if ((*uop)->isControl())
229 setFlag(StaticInst::IsControl);
230 if ((*uop)->isCondCtrl())
231 setFlag(StaticInst::IsCondControl);
232 if ((*uop)->isUncondCtrl())
233 setFlag(StaticInst::IsUncondControl);
234 if ((*uop)->isIndirectCtrl())
235 setFlag(StaticInst::IsIndirectControl);
236 if ((*uop)->isReturn())
237 setFlag(StaticInst::IsReturn);
238
239 for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
240 (*uop)->setDelayedCommit();
241 }
242}
243
244PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
245 uint32_t size, bool fp, bool load, bool noAlloc,
246 bool signExt, bool exclusive, bool acrel,
247 int64_t imm, AddrMode mode,
249 PredMacroOp(mnem, machInst, __opClass),
250 mode(mode),
251 rn(rn),
252 rt(rt),
253 rt2(rt2),
254 imm(imm)
255{
256 bool post = (mode == AddrMd_PostIndex);
257 bool writeback = (mode != AddrMd_Offset);
258
259 if (load) {
260 // Use integer rounding to round up loads of size 4
261 numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
262 } else {
263 numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
264 }
266
267 StaticInstPtr *uop = microOps;
268
269 rn = makeSP(rn);
270
271 if (!post) {
272 *uop++ = new MicroAddXiSpAlignUop(machInst, int_reg::Ureg0, rn,
273 post ? 0 : imm);
274 }
275
276 if (fp) {
277 if (size == 16) {
278 if (load) {
279 *uop++ = new MicroLdFp16Uop(machInst, rt,
280 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
281 acrel);
282 *uop++ = new MicroLdFp16Uop(machInst, rt2,
283 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
284 acrel);
285 } else {
286 *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
287 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
288 acrel);
289 *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
290 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
291 acrel);
292 *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
293 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
294 acrel);
295 *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
296 post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
297 acrel);
298 }
299 } else if (size == 8) {
300 if (load) {
301 *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
302 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
303 acrel);
304 } else {
305 *uop++ = new MicroStrFpXImmUop(machInst, rt,
306 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
307 acrel);
308 *uop++ = new MicroStrFpXImmUop(machInst, rt2,
309 post ? rn : int_reg::Ureg0, 8, noAlloc, exclusive,
310 acrel);
311 }
312 } else if (size == 4) {
313 if (load) {
314 *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
315 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
316 acrel);
317 } else {
318 *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
319 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
320 acrel);
321 }
322 }
323 } else {
324 if (size == 8) {
325 if (load) {
326 *uop++ = new MicroLdPairUop(machInst, rt, rt2,
327 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
328 acrel);
329 } else {
330 *uop++ = new MicroStrXImmUop(machInst, rt,
331 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
332 acrel);
333 *uop++ = new MicroStrXImmUop(machInst, rt2,
334 post ? rn : int_reg::Ureg0, size, noAlloc, exclusive,
335 acrel);
336 }
337 } else if (size == 4) {
338 if (load) {
339 if (signExt) {
340 *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
341 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
342 acrel);
343 } else {
344 *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
345 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
346 acrel);
347 }
348 } else {
349 *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
350 post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
351 acrel);
352 }
353 }
354 }
355
356 if (writeback) {
357 *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : int_reg::Ureg0,
358 post ? imm : 0);
359 }
360
361 assert(uop == &microOps[numMicroops]);
362 (*--uop)->setLastMicroop();
363 microOps[0]->setFirstMicroop();
364
365 for (StaticInstPtr *curUop = microOps;
366 !(*curUop)->isLastMicroop(); curUop++) {
367 (*curUop)->setDelayedCommit();
368 }
369}
370
372 OpClass __opClass, bool load, RegIndex dest,
373 RegIndex base, int64_t imm) :
374 PredMacroOp(mnem, machInst, __opClass)
375{
376 numMicroops = load ? 1 : 2;
378
379 StaticInstPtr *uop = microOps;
380
381 if (load) {
382 *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
383 } else {
384 *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
385 (*uop)->setDelayedCommit();
386 *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
387 }
388 (*uop)->setLastMicroop();
389 microOps[0]->setFirstMicroop();
390}
391
393 OpClass __opClass, bool load, RegIndex dest,
394 RegIndex base, int64_t imm) :
395 PredMacroOp(mnem, machInst, __opClass)
396{
397 numMicroops = load ? 2 : 3;
399
400 StaticInstPtr *uop = microOps;
401
402 if (load) {
403 *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
404 } else {
405 *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
406 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
407 }
408 *uop = new MicroAddXiUop(machInst, base, base, imm);
409 (*uop)->setLastMicroop();
410 microOps[0]->setFirstMicroop();
411
412 for (StaticInstPtr *curUop = microOps;
413 !(*curUop)->isLastMicroop(); curUop++) {
414 (*curUop)->setDelayedCommit();
415 }
416}
417
419 OpClass __opClass, bool load, RegIndex dest,
420 RegIndex base, int64_t imm) :
421 PredMacroOp(mnem, machInst, __opClass)
422{
423 numMicroops = load ? 2 : 3;
425
426 StaticInstPtr *uop = microOps;
427
428 if (load) {
429 *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
430 } else {
431 *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
432 *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
433 }
434 *uop = new MicroAddXiUop(machInst, base, base, imm);
435 (*uop)->setLastMicroop();
436 microOps[0]->setFirstMicroop();
437
438 for (StaticInstPtr *curUop = microOps;
439 !(*curUop)->isLastMicroop(); curUop++) {
440 (*curUop)->setDelayedCommit();
441 }
442}
443
445 OpClass __opClass, bool load, RegIndex dest,
447 ArmExtendType type, int64_t imm) :
448 PredMacroOp(mnem, machInst, __opClass)
449{
450 numMicroops = load ? 1 : 2;
452
453 StaticInstPtr *uop = microOps;
454
455 if (load) {
456 *uop = new MicroLdFp16RegUop(machInst, dest, base,
457 offset, type, imm);
458 } else {
459 *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
460 offset, type, imm);
461 (*uop)->setDelayedCommit();
462 *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
463 offset, type, imm);
464 }
465
466 (*uop)->setLastMicroop();
467 microOps[0]->setFirstMicroop();
468}
469
471 OpClass __opClass, RegIndex dest,
472 int64_t imm) :
473 PredMacroOp(mnem, machInst, __opClass)
474{
475 numMicroops = 1;
477
478 microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
479 microOps[0]->setLastMicroop();
480 microOps[0]->setFirstMicroop();
481}
482
483VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
484 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
485 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
486 PredMacroOp(mnem, machInst, __opClass)
487{
488 assert(regs > 0 && regs <= 4);
489 assert(regs % elems == 0);
490
491 numMicroops = (regs > 2) ? 2 : 1;
492 bool wb = (rm != 15);
493 bool deinterleave = (elems > 1);
494
495 if (wb) numMicroops++;
496 if (deinterleave) numMicroops += (regs / elems);
498
499 RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
500
501 uint32_t noAlign = 0;
502
503 unsigned uopIdx = 0;
504 switch (regs) {
505 case 4:
506 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
507 size, machInst, rMid, rn, 0, align);
508 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
509 size, machInst, rMid + 4, rn, 16, noAlign);
510 break;
511 case 3:
512 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
513 size, machInst, rMid, rn, 0, align);
514 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
515 size, machInst, rMid + 4, rn, 16, noAlign);
516 break;
517 case 2:
518 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
519 size, machInst, rMid, rn, 0, align);
520 break;
521 case 1:
522 microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
523 size, machInst, rMid, rn, 0, align);
524 break;
525 default:
526 // Unknown number of registers
527 microOps[uopIdx++] = new Unknown(machInst);
528 }
529 if (wb) {
530 if (rm != 15 && rm != 13) {
531 microOps[uopIdx++] =
532 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
533 } else {
534 microOps[uopIdx++] =
535 new MicroAddiUop(machInst, rn, rn, regs * 8);
536 }
537 }
538 if (deinterleave) {
539 switch (elems) {
540 case 4:
541 assert(regs == 4);
542 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
543 size, machInst, vd * 2, rMid, inc * 2);
544 break;
545 case 3:
546 assert(regs == 3);
547 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
548 size, machInst, vd * 2, rMid, inc * 2);
549 break;
550 case 2:
551 assert(regs == 4 || regs == 2);
552 if (regs == 4) {
553 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
554 size, machInst, vd * 2, rMid, inc * 2);
555 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
556 size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
557 } else {
558 microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
559 size, machInst, vd * 2, rMid, inc * 2);
560 }
561 break;
562 default:
563 // Bad number of elements to deinterleave
564 microOps[uopIdx++] = new Unknown(machInst);
565 }
566 }
567 assert(uopIdx == numMicroops);
568
569 for (unsigned i = 0; i < numMicroops - 1; i++) {
570 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
571 assert(uopPtr);
572 uopPtr->setDelayedCommit();
573 }
574 microOps[0]->setFirstMicroop();
575 microOps[numMicroops - 1]->setLastMicroop();
576}
577
579 OpClass __opClass, bool all, unsigned elems,
580 RegIndex rn, RegIndex vd, unsigned regs,
581 unsigned inc, uint32_t size, uint32_t align,
582 RegIndex rm, unsigned lane) :
583 PredMacroOp(mnem, machInst, __opClass)
584{
585 assert(regs > 0 && regs <= 4);
586 assert(regs % elems == 0);
587
588 unsigned eBytes = (1 << size);
589 unsigned loadSize = eBytes * elems;
590 [[maybe_unused]] unsigned loadRegs =
591 (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
592
593 assert(loadRegs > 0 && loadRegs <= 4);
594
595 numMicroops = 1;
596 bool wb = (rm != 15);
597
598 if (wb) numMicroops++;
599 numMicroops += (regs / elems);
601
603
604 unsigned uopIdx = 0;
605 switch (loadSize) {
606 case 1:
607 microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
608 machInst, ufp0, rn, 0, align);
609 break;
610 case 2:
611 if (eBytes == 2) {
612 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
613 machInst, ufp0, rn, 0, align);
614 } else {
615 microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
616 machInst, ufp0, rn, 0, align);
617 }
618 break;
619 case 3:
620 microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
621 machInst, ufp0, rn, 0, align);
622 break;
623 case 4:
624 switch (eBytes) {
625 case 1:
626 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
627 machInst, ufp0, rn, 0, align);
628 break;
629 case 2:
630 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
631 machInst, ufp0, rn, 0, align);
632 break;
633 case 4:
634 microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
635 machInst, ufp0, rn, 0, align);
636 break;
637 }
638 break;
639 case 6:
640 microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
641 machInst, ufp0, rn, 0, align);
642 break;
643 case 8:
644 switch (eBytes) {
645 case 2:
646 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
647 machInst, ufp0, rn, 0, align);
648 break;
649 case 4:
650 microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
651 machInst, ufp0, rn, 0, align);
652 break;
653 }
654 break;
655 case 12:
656 microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
657 machInst, ufp0, rn, 0, align);
658 break;
659 case 16:
660 microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
661 machInst, ufp0, rn, 0, align);
662 break;
663 default:
664 // Unrecognized load size
665 microOps[uopIdx++] = new Unknown(machInst);
666 }
667 if (wb) {
668 if (rm != 15 && rm != 13) {
669 microOps[uopIdx++] =
670 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
671 } else {
672 microOps[uopIdx++] =
673 new MicroAddiUop(machInst, rn, rn, loadSize);
674 }
675 }
676 switch (elems) {
677 case 4:
678 assert(regs == 4);
679 switch (size) {
680 case 0:
681 if (all) {
682 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
683 machInst, vd * 2, ufp0, inc * 2);
684 } else {
685 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
686 machInst, vd * 2, ufp0, inc * 2, lane);
687 }
688 break;
689 case 1:
690 if (all) {
691 microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
692 machInst, vd * 2, ufp0, inc * 2);
693 } else {
694 microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
695 machInst, vd * 2, ufp0, inc * 2, lane);
696 }
697 break;
698 case 2:
699 if (all) {
700 microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
701 machInst, vd * 2, ufp0, inc * 2);
702 } else {
703 microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
704 machInst, vd * 2, ufp0, inc * 2, lane);
705 }
706 break;
707 default:
708 // Bad size
709 microOps[uopIdx++] = new Unknown(machInst);
710 break;
711 }
712 break;
713 case 3:
714 assert(regs == 3);
715 switch (size) {
716 case 0:
717 if (all) {
718 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
719 machInst, vd * 2, ufp0, inc * 2);
720 } else {
721 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
722 machInst, vd * 2, ufp0, inc * 2, lane);
723 }
724 break;
725 case 1:
726 if (all) {
727 microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
728 machInst, vd * 2, ufp0, inc * 2);
729 } else {
730 microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
731 machInst, vd * 2, ufp0, inc * 2, lane);
732 }
733 break;
734 case 2:
735 if (all) {
736 microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
737 machInst, vd * 2, ufp0, inc * 2);
738 } else {
739 microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
740 machInst, vd * 2, ufp0, inc * 2, lane);
741 }
742 break;
743 default:
744 // Bad size
745 microOps[uopIdx++] = new Unknown(machInst);
746 break;
747 }
748 break;
749 case 2:
750 assert(regs == 2);
751 assert(loadRegs <= 2);
752 switch (size) {
753 case 0:
754 if (all) {
755 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
756 machInst, vd * 2, ufp0, inc * 2);
757 } else {
758 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
759 machInst, vd * 2, ufp0, inc * 2, lane);
760 }
761 break;
762 case 1:
763 if (all) {
764 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
765 machInst, vd * 2, ufp0, inc * 2);
766 } else {
767 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
768 machInst, vd * 2, ufp0, inc * 2, lane);
769 }
770 break;
771 case 2:
772 if (all) {
773 microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
774 machInst, vd * 2, ufp0, inc * 2);
775 } else {
776 microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
777 machInst, vd * 2, ufp0, inc * 2, lane);
778 }
779 break;
780 default:
781 // Bad size
782 microOps[uopIdx++] = new Unknown(machInst);
783 break;
784 }
785 break;
786 case 1:
787 assert(regs == 1 || (all && regs == 2));
788 assert(loadRegs <= 2);
789 for (unsigned offset = 0; offset < regs; offset++) {
790 switch (size) {
791 case 0:
792 if (all) {
793 microOps[uopIdx++] =
794 new MicroUnpackAllNeon2to2Uop<uint8_t>(
795 machInst, (vd + offset) * 2, ufp0, inc * 2);
796 } else {
797 microOps[uopIdx++] =
798 new MicroUnpackNeon2to2Uop<uint8_t>(
799 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
800 }
801 break;
802 case 1:
803 if (all) {
804 microOps[uopIdx++] =
805 new MicroUnpackAllNeon2to2Uop<uint16_t>(
806 machInst, (vd + offset) * 2, ufp0, inc * 2);
807 } else {
808 microOps[uopIdx++] =
809 new MicroUnpackNeon2to2Uop<uint16_t>(
810 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
811 }
812 break;
813 case 2:
814 if (all) {
815 microOps[uopIdx++] =
816 new MicroUnpackAllNeon2to2Uop<uint32_t>(
817 machInst, (vd + offset) * 2, ufp0, inc * 2);
818 } else {
819 microOps[uopIdx++] =
820 new MicroUnpackNeon2to2Uop<uint32_t>(
821 machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
822 }
823 break;
824 default:
825 // Bad size
826 microOps[uopIdx++] = new Unknown(machInst);
827 break;
828 }
829 }
830 break;
831 default:
832 // Bad number of elements to unpack
833 microOps[uopIdx++] = new Unknown(machInst);
834 }
835 assert(uopIdx == numMicroops);
836
837 for (unsigned i = 0; i < numMicroops - 1; i++) {
838 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
839 assert(uopPtr);
840 uopPtr->setDelayedCommit();
841 }
842 microOps[0]->setFirstMicroop();
843 microOps[numMicroops - 1]->setLastMicroop();
844}
845
846VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
847 unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
848 unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
849 PredMacroOp(mnem, machInst, __opClass)
850{
851 assert(regs > 0 && regs <= 4);
852 assert(regs % elems == 0);
853
854 numMicroops = (regs > 2) ? 2 : 1;
855 bool wb = (rm != 15);
856 bool interleave = (elems > 1);
857
858 if (wb) numMicroops++;
859 if (interleave) numMicroops += (regs / elems);
861
862 uint32_t noAlign = 0;
863
864 RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
865
866 unsigned uopIdx = 0;
867 if (interleave) {
868 switch (elems) {
869 case 4:
870 assert(regs == 4);
871 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
872 size, machInst, rMid, vd * 2, inc * 2);
873 break;
874 case 3:
875 assert(regs == 3);
876 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
877 size, machInst, rMid, vd * 2, inc * 2);
878 break;
879 case 2:
880 assert(regs == 4 || regs == 2);
881 if (regs == 4) {
882 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
883 size, machInst, rMid, vd * 2, inc * 2);
884 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
885 size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
886 } else {
887 microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
888 size, machInst, rMid, vd * 2, inc * 2);
889 }
890 break;
891 default:
892 // Bad number of elements to interleave
893 microOps[uopIdx++] = new Unknown(machInst);
894 }
895 }
896 switch (regs) {
897 case 4:
898 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
899 size, machInst, rMid, rn, 0, align);
900 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
901 size, machInst, rMid + 4, rn, 16, noAlign);
902 break;
903 case 3:
904 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
905 size, machInst, rMid, rn, 0, align);
906 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
907 size, machInst, rMid + 4, rn, 16, noAlign);
908 break;
909 case 2:
910 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
911 size, machInst, rMid, rn, 0, align);
912 break;
913 case 1:
914 microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
915 size, machInst, rMid, rn, 0, align);
916 break;
917 default:
918 // Unknown number of registers
919 microOps[uopIdx++] = new Unknown(machInst);
920 }
921 if (wb) {
922 if (rm != 15 && rm != 13) {
923 microOps[uopIdx++] =
924 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
925 } else {
926 microOps[uopIdx++] =
927 new MicroAddiUop(machInst, rn, rn, regs * 8);
928 }
929 }
930 assert(uopIdx == numMicroops);
931
932 for (unsigned i = 0; i < numMicroops - 1; i++) {
933 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
934 assert(uopPtr);
935 uopPtr->setDelayedCommit();
936 }
937 microOps[0]->setFirstMicroop();
938 microOps[numMicroops - 1]->setLastMicroop();
939}
940
942 OpClass __opClass, bool all, unsigned elems,
943 RegIndex rn, RegIndex vd, unsigned regs,
944 unsigned inc, uint32_t size, uint32_t align,
945 RegIndex rm, unsigned lane) :
946 PredMacroOp(mnem, machInst, __opClass)
947{
948 assert(!all);
949 assert(regs > 0 && regs <= 4);
950 assert(regs % elems == 0);
951
952 unsigned eBytes = (1 << size);
953 unsigned storeSize = eBytes * elems;
954 [[maybe_unused]] unsigned storeRegs =
955 (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
956
957 assert(storeRegs > 0 && storeRegs <= 4);
958
959 numMicroops = 1;
960 bool wb = (rm != 15);
961
962 if (wb) numMicroops++;
963 numMicroops += (regs / elems);
965
967
968 unsigned uopIdx = 0;
969 switch (elems) {
970 case 4:
971 assert(regs == 4);
972 switch (size) {
973 case 0:
974 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
975 machInst, ufp0, vd * 2, inc * 2, lane);
976 break;
977 case 1:
978 microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
979 machInst, ufp0, vd * 2, inc * 2, lane);
980 break;
981 case 2:
982 microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
983 machInst, ufp0, vd * 2, inc * 2, lane);
984 break;
985 default:
986 // Bad size
987 microOps[uopIdx++] = new Unknown(machInst);
988 break;
989 }
990 break;
991 case 3:
992 assert(regs == 3);
993 switch (size) {
994 case 0:
995 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
996 machInst, ufp0, vd * 2, inc * 2, lane);
997 break;
998 case 1:
999 microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
1000 machInst, ufp0, vd * 2, inc * 2, lane);
1001 break;
1002 case 2:
1003 microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
1004 machInst, ufp0, vd * 2, inc * 2, lane);
1005 break;
1006 default:
1007 // Bad size
1008 microOps[uopIdx++] = new Unknown(machInst);
1009 break;
1010 }
1011 break;
1012 case 2:
1013 assert(regs == 2);
1014 assert(storeRegs <= 2);
1015 switch (size) {
1016 case 0:
1017 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
1018 machInst, ufp0, vd * 2, inc * 2, lane);
1019 break;
1020 case 1:
1021 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
1022 machInst, ufp0, vd * 2, inc * 2, lane);
1023 break;
1024 case 2:
1025 microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1026 machInst, ufp0, vd * 2, inc * 2, lane);
1027 break;
1028 default:
1029 // Bad size
1030 microOps[uopIdx++] = new Unknown(machInst);
1031 break;
1032 }
1033 break;
1034 case 1:
1035 assert(regs == 1 || (all && regs == 2));
1036 assert(storeRegs <= 2);
1037 for (unsigned offset = 0; offset < regs; offset++) {
1038 switch (size) {
1039 case 0:
1040 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1041 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1042 break;
1043 case 1:
1044 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1045 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1046 break;
1047 case 2:
1048 microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1049 machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1050 break;
1051 default:
1052 // Bad size
1053 microOps[uopIdx++] = new Unknown(machInst);
1054 break;
1055 }
1056 }
1057 break;
1058 default:
1059 // Bad number of elements to unpack
1060 microOps[uopIdx++] = new Unknown(machInst);
1061 }
1062 switch (storeSize) {
1063 case 1:
1064 microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1065 machInst, ufp0, rn, 0, align);
1066 break;
1067 case 2:
1068 if (eBytes == 2) {
1069 microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1070 machInst, ufp0, rn, 0, align);
1071 } else {
1072 microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1073 machInst, ufp0, rn, 0, align);
1074 }
1075 break;
1076 case 3:
1077 microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1078 machInst, ufp0, rn, 0, align);
1079 break;
1080 case 4:
1081 switch (eBytes) {
1082 case 1:
1083 microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1084 machInst, ufp0, rn, 0, align);
1085 break;
1086 case 2:
1087 microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1088 machInst, ufp0, rn, 0, align);
1089 break;
1090 case 4:
1091 microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1092 machInst, ufp0, rn, 0, align);
1093 break;
1094 }
1095 break;
1096 case 6:
1097 microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1098 machInst, ufp0, rn, 0, align);
1099 break;
1100 case 8:
1101 switch (eBytes) {
1102 case 2:
1103 microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1104 machInst, ufp0, rn, 0, align);
1105 break;
1106 case 4:
1107 microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1108 machInst, ufp0, rn, 0, align);
1109 break;
1110 }
1111 break;
1112 case 12:
1113 microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1114 machInst, ufp0, rn, 0, align);
1115 break;
1116 case 16:
1117 microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1118 machInst, ufp0, rn, 0, align);
1119 break;
1120 default:
1121 // Bad store size
1122 microOps[uopIdx++] = new Unknown(machInst);
1123 }
1124 if (wb) {
1125 if (rm != 15 && rm != 13) {
1126 microOps[uopIdx++] =
1127 new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1128 } else {
1129 microOps[uopIdx++] =
1130 new MicroAddiUop(machInst, rn, rn, storeSize);
1131 }
1132 }
1133 assert(uopIdx == numMicroops);
1134
1135 for (unsigned i = 0; i < numMicroops - 1; i++) {
1136 MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1137 assert(uopPtr);
1138 uopPtr->setDelayedCommit();
1139 }
1140 microOps[0]->setFirstMicroop();
1141 microOps[numMicroops - 1]->setLastMicroop();
1142}
1143
1145 OpClass __opClass, RegIndex rn, RegIndex vd,
1146 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1147 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1148 PredMacroOp(mnem, machInst, __opClass)
1149{
1151 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1152 bool baseIsSP = isSP((RegIndex) rnsp);
1153
1154 numMicroops = wb ? 1 : 0;
1155
1156 int totNumBytes = numRegs * dataSize / 8;
1157 assert(totNumBytes <= 64);
1158
1159 // The guiding principle here is that no more than 16 bytes can be
1160 // transferred at a time
1161 int numMemMicroops = totNumBytes / 16;
1162 int residuum = totNumBytes % 16;
1163 if (residuum)
1164 ++numMemMicroops;
1165 numMicroops += numMemMicroops;
1166
1167 int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1168 numMicroops += numMarshalMicroops;
1169
1171 unsigned uopIdx = 0;
1172 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1173
1174 int i = 0;
1175 for (; i < numMemMicroops - 1; ++i) {
1176 microOps[uopIdx++] = new MicroNeonLoad64(
1177 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1178 baseIsSP, 16 /* accSize */, eSize);
1179 }
1180 microOps[uopIdx++] = new MicroNeonLoad64(
1181 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1182 residuum ? residuum : 16 /* accSize */, eSize);
1183
1184 // Writeback microop: the post-increment amount is encoded in "Rm": a
1185 // 64-bit general register OR as '11111' for an immediate value equal to
1186 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1187 if (wb) {
1188 if (rm != int_reg::X31) {
1189 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1190 UXTX, 0);
1191 } else {
1192 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1193 totNumBytes);
1194 }
1195 }
1196
1197 for (int i = 0; i < numMarshalMicroops; ++i) {
1198 switch(numRegs) {
1199 case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1200 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1201 numStructElems, 1, i /* step */);
1202 break;
1203 case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1204 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1205 numStructElems, 2, i /* step */);
1206 break;
1207 case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1208 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1209 numStructElems, 3, i /* step */);
1210 break;
1211 case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1212 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1213 numStructElems, 4, i /* step */);
1214 break;
1215 default: panic("Invalid number of registers");
1216 }
1217
1218 }
1219
1220 assert(uopIdx == numMicroops);
1221
1222 for (int i = 0; i < numMicroops - 1; ++i) {
1223 microOps[i]->setDelayedCommit();
1224 }
1225 microOps[0]->setFirstMicroop();
1226 microOps[numMicroops - 1]->setLastMicroop();
1227}
1228
1230 OpClass __opClass, RegIndex rn, RegIndex vd,
1231 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1232 uint8_t numStructElems, uint8_t numRegs, bool wb) :
1233 PredMacroOp(mnem, machInst, __opClass)
1234{
1236 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1237 bool baseIsSP = isSP((RegIndex) rnsp);
1238
1239 numMicroops = wb ? 1 : 0;
1240
1241 int totNumBytes = numRegs * dataSize / 8;
1242 assert(totNumBytes <= 64);
1243
1244 // The guiding principle here is that no more than 16 bytes can be
1245 // transferred at a time
1246 int numMemMicroops = totNumBytes / 16;
1247 int residuum = totNumBytes % 16;
1248 if (residuum)
1249 ++numMemMicroops;
1250 numMicroops += numMemMicroops;
1251
1252 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1253 numMicroops += numMarshalMicroops;
1254
1256 unsigned uopIdx = 0;
1257
1258 for (int i = 0; i < numMarshalMicroops; ++i) {
1259 switch (numRegs) {
1260 case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1261 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1262 numStructElems, 1, i /* step */);
1263 break;
1264 case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1265 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1266 numStructElems, 2, i /* step */);
1267 break;
1268 case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1269 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1270 numStructElems, 3, i /* step */);
1271 break;
1272 case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1273 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1274 numStructElems, 4, i /* step */);
1275 break;
1276 default: panic("Invalid number of registers");
1277 }
1278 }
1279
1280 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1281
1282 int i = 0;
1283 for (; i < numMemMicroops - 1; ++i) {
1284 microOps[uopIdx++] = new MicroNeonStore64(
1285 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1286 baseIsSP, 16 /* accSize */, eSize);
1287 }
1288 microOps[uopIdx++] = new MicroNeonStore64(
1289 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1290 residuum ? residuum : 16 /* accSize */, eSize);
1291
1292 // Writeback microop: the post-increment amount is encoded in "Rm": a
1293 // 64-bit general register OR as '11111' for an immediate value equal to
1294 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1295 if (wb) {
1296 if (rm != int_reg::X31) {
1297 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1298 UXTX, 0);
1299 } else {
1300 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1301 totNumBytes);
1302 }
1303 }
1304
1305 assert(uopIdx == numMicroops);
1306
1307 for (int i = 0; i < numMicroops - 1; i++) {
1308 microOps[i]->setDelayedCommit();
1309 }
1310 microOps[0]->setFirstMicroop();
1311 microOps[numMicroops - 1]->setLastMicroop();
1312}
1313
1315 OpClass __opClass, RegIndex rn, RegIndex vd,
1316 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1317 uint8_t numStructElems, uint8_t index, bool wb,
1318 bool replicate) :
1319 PredMacroOp(mnem, machInst, __opClass),
1320 eSize(0), dataSize(0), numStructElems(0), index(0),
1321 wb(false), replicate(false)
1322
1323{
1325 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1326 bool baseIsSP = isSP((RegIndex) rnsp);
1327
1328 numMicroops = wb ? 1 : 0;
1329
1330 int eSizeBytes = 1 << eSize;
1331 int totNumBytes = numStructElems * eSizeBytes;
1332 assert(totNumBytes <= 64);
1333
1334 // The guiding principle here is that no more than 16 bytes can be
1335 // transferred at a time
1336 int numMemMicroops = totNumBytes / 16;
1337 int residuum = totNumBytes % 16;
1338 if (residuum)
1339 ++numMemMicroops;
1340 numMicroops += numMemMicroops;
1341
1342 int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1343 numMicroops += numMarshalMicroops;
1344
1346 unsigned uopIdx = 0;
1347
1348 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1349
1350 int i = 0;
1351 for (; i < numMemMicroops - 1; ++i) {
1352 microOps[uopIdx++] = new MicroNeonLoad64(
1353 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1354 baseIsSP, 16 /* accSize */, eSize);
1355 }
1356 microOps[uopIdx++] = new MicroNeonLoad64(
1357 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1358 residuum ? residuum : 16 /* accSize */, eSize);
1359
1360 // Writeback microop: the post-increment amount is encoded in "Rm": a
1361 // 64-bit general register OR as '11111' for an immediate value equal to
1362 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1363 if (wb) {
1364 if (rm != int_reg::X31) {
1365 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1366 UXTX, 0);
1367 } else {
1368 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1369 totNumBytes);
1370 }
1371 }
1372
1373 for (int i = 0; i < numMarshalMicroops; ++i) {
1374 microOps[uopIdx++] = new MicroUnpackNeon64(
1375 machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1376 numStructElems, index, i /* step */, replicate);
1377 }
1378
1379 assert(uopIdx == numMicroops);
1380
1381 for (int i = 0; i < numMicroops - 1; i++) {
1382 microOps[i]->setDelayedCommit();
1383 }
1384 microOps[0]->setFirstMicroop();
1385 microOps[numMicroops - 1]->setLastMicroop();
1386}
1387
1389 OpClass __opClass, RegIndex rn, RegIndex vd,
1390 RegIndex rm, uint8_t eSize, uint8_t dataSize,
1391 uint8_t numStructElems, uint8_t index, bool wb,
1392 bool replicate) :
1393 PredMacroOp(mnem, machInst, __opClass),
1394 eSize(0), dataSize(0), numStructElems(0), index(0),
1395 wb(false), replicate(false)
1396{
1398 RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1399 bool baseIsSP = isSP((RegIndex) rnsp);
1400
1401 numMicroops = wb ? 1 : 0;
1402
1403 int eSizeBytes = 1 << eSize;
1404 int totNumBytes = numStructElems * eSizeBytes;
1405 assert(totNumBytes <= 64);
1406
1407 // The guiding principle here is that no more than 16 bytes can be
1408 // transferred at a time
1409 int numMemMicroops = totNumBytes / 16;
1410 int residuum = totNumBytes % 16;
1411 if (residuum)
1412 ++numMemMicroops;
1413 numMicroops += numMemMicroops;
1414
1415 int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1416 numMicroops += numMarshalMicroops;
1417
1419 unsigned uopIdx = 0;
1420
1421 for (int i = 0; i < numMarshalMicroops; ++i) {
1422 microOps[uopIdx++] = new MicroPackNeon64(
1423 machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1424 numStructElems, index, i /* step */, replicate);
1425 }
1426
1427 uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1428
1429 int i = 0;
1430 for (; i < numMemMicroops - 1; ++i) {
1431 microOps[uopIdx++] = new MicroNeonStore64(
1432 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1433 baseIsSP, 16 /* accsize */, eSize);
1434 }
1435 microOps[uopIdx++] = new MicroNeonStore64(
1436 machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1437 residuum ? residuum : 16 /* accSize */, eSize);
1438
1439 // Writeback microop: the post-increment amount is encoded in "Rm": a
1440 // 64-bit general register OR as '11111' for an immediate value equal to
1441 // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1442 if (wb) {
1443 if (rm != int_reg::X31) {
1444 microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1445 UXTX, 0);
1446 } else {
1447 microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1448 totNumBytes);
1449 }
1450 }
1451
1452 assert(uopIdx == numMicroops);
1453
1454 for (int i = 0; i < numMicroops - 1; i++) {
1455 microOps[i]->setDelayedCommit();
1456 }
1457 microOps[0]->setFirstMicroop();
1458 microOps[numMicroops - 1]->setLastMicroop();
1459}
1460
1462 OpClass __opClass, RegIndex rn,
1463 RegIndex vd, bool single, bool up,
1464 bool writeback, bool load, uint32_t offset) :
1465 PredMacroOp(mnem, machInst, __opClass)
1466{
1467 int i = 0;
1468
1469 // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1470 // to be functionally identical except that fldmx is deprecated. For now
1471 // we'll assume they're otherwise interchangable.
1472 int count = (single ? offset : (offset / 2));
1473 numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1475
1476 int64_t addr = 0;
1477
1478 if (!up)
1479 addr = 4 * offset;
1480
1481 bool tempUp = up;
1482 for (int j = 0; j < count; j++) {
1483 if (load) {
1484 if (single) {
1485 microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1486 tempUp, addr);
1487 } else {
1488 microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1489 tempUp, addr);
1490 microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1491 addr + (up ? 4 : -4));
1492 }
1493 } else {
1494 if (single) {
1495 microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1496 tempUp, addr);
1497 } else {
1498 microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1499 tempUp, addr);
1500 microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1501 addr + (up ? 4 : -4));
1502 }
1503 }
1504 if (!tempUp) {
1505 addr -= (single ? 4 : 8);
1506 // The microops don't handle negative displacement, so turn if we
1507 // hit zero, flip polarity and start adding.
1508 if (addr <= 0) {
1509 tempUp = true;
1510 addr = -addr;
1511 }
1512 } else {
1513 addr += (single ? 4 : 8);
1514 }
1515 }
1516
1517 if (writeback) {
1518 if (up) {
1519 microOps[i++] =
1520 new MicroAddiUop(machInst, rn, rn, 4 * offset);
1521 } else {
1522 microOps[i++] =
1523 new MicroSubiUop(machInst, rn, rn, 4 * offset);
1524 }
1525 }
1526
1527 assert(numMicroops == i);
1528 microOps[0]->setFirstMicroop();
1529 microOps[numMicroops - 1]->setLastMicroop();
1530
1531 for (StaticInstPtr *curUop = microOps;
1532 !(*curUop)->isLastMicroop(); curUop++) {
1533 MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1534 assert(uopPtr);
1535 uopPtr->setDelayedCommit();
1536 }
1537}
1538
1539std::string
1541 Addr pc, const loader::SymbolTable *symtab) const
1542{
1543 std::stringstream ss;
1545 printIntReg(ss, ura);
1546 ss << ", ";
1547 printIntReg(ss, urb);
1548 ss << ", ";
1549 ccprintf(ss, "#%d", imm);
1550 return ss.str();
1551}
1552
1553std::string
1555 Addr pc, const loader::SymbolTable *symtab) const
1556{
1557 std::stringstream ss;
1559 printIntReg(ss, ura);
1560 ss << ", ";
1561 printIntReg(ss, urb);
1562 ss << ", ";
1563 ccprintf(ss, "#%d", imm);
1564 return ss.str();
1565}
1566
1567std::string
1569 Addr pc, const loader::SymbolTable *symtab) const
1570{
1571 std::stringstream ss;
1573 ss << "[PC,CPSR]";
1574 return ss.str();
1575}
1576
1577std::string
1579 Addr pc, const loader::SymbolTable *symtab) const
1580{
1581 std::stringstream ss;
1583 printIntReg(ss, ura);
1584 ccprintf(ss, ", ");
1585 printIntReg(ss, urb);
1587 return ss.str();
1588}
1589
1590std::string
1592 Addr pc, const loader::SymbolTable *symtab) const
1593{
1594 std::stringstream ss;
1596 printIntReg(ss, ura);
1597 ss << ", ";
1598 printIntReg(ss, urb);
1599 return ss.str();
1600}
1601
1602std::string
1604 Addr pc, const loader::SymbolTable *symtab) const
1605{
1606 std::stringstream ss;
1608 printIntReg(ss, ura);
1609 ss << ", ";
1610 printIntReg(ss, urb);
1611 ss << ", ";
1612 printIntReg(ss, urc);
1613 return ss.str();
1614}
1615
1616std::string
1618 Addr pc, const loader::SymbolTable *symtab) const
1619{
1620 std::stringstream ss;
1622 if (isFloating())
1624 else
1625 printIntReg(ss, ura);
1626 ss << ", [";
1627 printIntReg(ss, urb);
1628 ss << ", ";
1629 ccprintf(ss, "#%d", imm);
1630 ss << "]";
1631 return ss.str();
1632}
1633
1634std::string
1636 Addr pc, const loader::SymbolTable *symtab) const
1637{
1638 std::stringstream ss;
1641 ss << ",";
1643 ss << ", [";
1644 printIntReg(ss, urb);
1645 ss << ", ";
1646 ccprintf(ss, "#%d", imm);
1647 ss << "]";
1648 return ss.str();
1649}
1650
1651std::string
1653 Addr pc, const loader::SymbolTable *symtab) const
1654{
1655 std::stringstream ss;
1657 printIntReg(ss, rt);
1658 ss << ", ";
1659 printIntReg(ss, rt2);
1660 ss << ", [";
1661 printIntReg(ss, rn, 64);
1662 if (mode == AddrMd_PostIndex) {
1663 ss << "]";
1664 }
1665 if (imm) {
1666 ccprintf(ss, ", #%d", imm);
1667 }
1668 if (mode != AddrMd_PostIndex) {
1669 ss << "]";
1670 }
1671 if (mode == AddrMd_PreIndex) {
1672 ss << "!";
1673 }
1674 return ss.str();
1675}
1676
1677} // namespace ArmISA
1678} // namespace gem5
void printExtendOperand(bool firstOperand, std::ostream &os, RegIndex rm, ArmExtendType type, int64_t shiftAmt) const
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:371
BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex dest, int64_t imm)
Definition macromem.cc:470
BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:392
BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition macromem.cc:418
BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, RegIndex offset, ArmExtendType type, int64_t imm)
Definition macromem.cc:444
MacroMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, bool index, bool up, bool user, bool writeback, bool load, uint32_t reglist)
Definition macromem.cc:57
MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, bool single, bool up, bool writeback, bool load, uint32_t offset)
Definition macromem.cc:1461
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1540
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1554
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1591
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1603
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1578
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1617
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1635
Base class for Memory microops.
Definition macromem.hh:71
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1568
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition macromem.cc:1652
PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, bool exclusive, bool acrel, int64_t imm, AddrMode mode, RegIndex rn, RegIndex rt, RegIndex rt2)
Definition macromem.cc:244
void size(size_t newSize) override
Definition pred_inst.hh:383
StaticInstPtr * microOps
Definition pred_inst.hh:347
PredMacroOp(const char *mnem, ExtMachInst _machInst, OpClass __opClass)
Constructor.
Definition pred_inst.hh:350
ConditionCode condCode
Definition pred_inst.hh:220
VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition macromem.cc:1144
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition macromem.cc:483
VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition macromem.cc:1314
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition macromem.cc:578
VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition macromem.cc:1229
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned width, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition macromem.cc:846
VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition macromem.cc:1388
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition macromem.cc:941
Static instruction class for unknown (illegal) instructions.
Definition unknown.hh:53
bool isFloating() const
void setFlag(Flags f)
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition bitfield.hh:79
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition bitfield.hh:216
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:220
constexpr auto & Sp
Definition int.hh:274
constexpr RegId Ureg0
Definition int.hh:229
constexpr auto & Pc
Definition int.hh:276
constexpr RegId Ureg1
Definition int.hh:230
constexpr RegId X31
Definition int.hh:271
static int regInMode(OperatingMode mode, int reg)
Definition int.hh:566
static unsigned int number_of_ones(int32_t val)
Definition macromem.hh:56
static bool isSP(RegIndex reg)
Definition int.hh:619
const int NumVecV8ArchRegs
Definition vec.hh:80
const int VecSpecialElem
Definition vec.hh:87
Bitfield< 3, 0 > rm
Definition types.hh:118
Bitfield< 7, 0 > imm
Definition types.hh:132
Bitfield< 21 > writeback
Definition types.hh:126
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 23, 0 > offset
Definition types.hh:144
Bitfield< 23 > up
Definition types.hh:124
Bitfield< 19, 16 > rn
Definition types.hh:113
static RegIndex makeSP(RegIndex reg)
Definition int.hh:605
@ COND_UC
Definition cc.hh:120
@ COND_AL
Definition cc.hh:119
Bitfield< 19, 16 > fp
Bitfield< 21 > ss
Definition misc_types.hh:60
Bitfield< 4 > pc
Bitfield< 30, 0 > index
Bitfield< 29 > vx
Definition misc.hh:75
Bitfield< 31, 0 > all
Definition types.hh:77
Bitfield< 11, 7 > vd
Definition types.hh:169
Bitfield< 5, 3 > reg
Definition types.hh:92
Bitfield< 3 > addr
Definition types.hh:84
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
uint16_t RegIndex
Definition types.hh:176
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition types.hh:147
RefCountingPtr< StaticInst > StaticInstPtr
void ccprintf(cp::Print &print)
Definition cprintf.hh:130
Utility functions and datatypes used by AArch64 NEON memory instructions.

Generated on Mon Oct 27 2025 04:12:54 for gem5 by doxygen 1.14.0