gem5  v22.1.0.0
macromem.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014, 2020 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Copyright (c) 2007-2008 The Florida State University
15  * All rights reserved.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions are
19  * met: redistributions of source code must retain the above copyright
20  * notice, this list of conditions and the following disclaimer;
21  * redistributions in binary form must reproduce the above copyright
22  * notice, this list of conditions and the following disclaimer in the
23  * documentation and/or other materials provided with the distribution;
24  * neither the name of the copyright holders nor the names of its
25  * contributors may be used to endorse or promote products derived from
26  * this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39  */
40 
42 
43 #include <sstream>
44 
45 #include "arch/arm/generated/decoder.hh"
47 #include "base/compiler.hh"
48 
49 namespace gem5
50 {
51 
52 using namespace ArmISAInst;
53 
54 namespace ArmISA
55 {
56 
57 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
58  OpClass __opClass, RegIndex rn,
59  bool index, bool up, bool user, bool writeback,
60  bool load, uint32_t reglist) :
61  PredMacroOp(mnem, machInst, __opClass)
62 {
63  uint32_t regs = reglist;
64  uint32_t ones = number_of_ones(reglist);
65  uint32_t mem_ops = ones;
66 
67  // Copy the base address register if we overwrite it, or if this instruction
68  // is basically a no-op (we have to do something)
69  bool copy_base = (bits(reglist, rn) && load) || !ones;
70  bool force_user = user & !bits(reglist, 15);
71  bool exception_ret = user & bits(reglist, 15);
72  bool pc_temp = load && writeback && bits(reglist, 15);
73 
74  if (!ones) {
75  numMicroops = 1;
76  } else if (load) {
77  numMicroops = ((ones + 1) / 2)
78  + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
79  + (copy_base ? 1 : 0)
80  + (writeback? 1 : 0)
81  + (pc_temp ? 1 : 0);
82  } else {
83  numMicroops = ones + (writeback ? 1 : 0);
84  }
85 
87 
88  uint32_t addr = 0;
89 
90  if (!up)
91  addr = (ones << 2) - 4;
92 
93  if (!index)
94  addr += 4;
95 
96  StaticInstPtr *uop = microOps;
97 
98  // Add 0 to Rn and stick it in ureg0.
99  // This is equivalent to a move.
100  if (copy_base)
101  *uop++ = new MicroAddiUop(machInst, int_reg::Ureg0, rn, 0);
102 
103  unsigned reg = 0;
104  while (mem_ops != 0) {
105  // Do load operations in pairs if possible
106  if (load && mem_ops >= 2 &&
107  !(mem_ops == 2 && bits(regs, int_reg::Pc) && exception_ret)) {
108  // 64-bit memory operation
109  // Find 2 set register bits (clear them after finding)
110  unsigned reg_idx1;
111  unsigned reg_idx2;
112 
113  // Find the first register
114  while (!bits(regs, reg)) reg++;
115  replaceBits(regs, reg, 0);
116  reg_idx1 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
117 
118  // Find the second register
119  while (!bits(regs, reg)) reg++;
120  replaceBits(regs, reg, 0);
121  reg_idx2 = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
122 
123  // Load into temp reg if necessary
124  if (reg_idx2 == int_reg::Pc && pc_temp)
125  reg_idx2 = int_reg::Ureg1;
126 
127  // Actually load both registers from memory
128  *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
129  copy_base ? int_reg::Ureg0 : rn, up, addr);
130 
131  if (!writeback && reg_idx2 == int_reg::Pc) {
132  // No writeback if idx==pc, set appropriate flags
133  (*uop)->setFlag(StaticInst::IsControl);
134  (*uop)->setFlag(StaticInst::IsIndirectControl);
135 
136  if (!(condCode == COND_AL || condCode == COND_UC))
137  (*uop)->setFlag(StaticInst::IsCondControl);
138  else
139  (*uop)->setFlag(StaticInst::IsUncondControl);
140  }
141 
142  if (up) addr += 8;
143  else addr -= 8;
144  mem_ops -= 2;
145  } else {
146  // 32-bit memory operation
147  // Find register for operation
148  unsigned reg_idx;
149  while (!bits(regs, reg)) reg++;
150  replaceBits(regs, reg, 0);
151  reg_idx = force_user ? int_reg::regInMode(MODE_USER, reg) : reg;
152 
153  if (load) {
154  if (writeback && reg_idx == int_reg::Pc) {
155  // If this instruction changes the PC and performs a
156  // writeback, ensure the pc load/branch is the last uop.
157  // Load into a temp reg here.
158  *uop = new MicroLdrUop(machInst, int_reg::Ureg1,
159  copy_base ? int_reg::Ureg0 : rn, up, addr);
160  } else if (reg_idx == int_reg::Pc && exception_ret) {
161  // Special handling for exception return
162  *uop = new MicroLdrRetUop(machInst, reg_idx,
163  copy_base ? int_reg::Ureg0 : rn, up, addr);
164  } else {
165  // standard single load uop
166  *uop = new MicroLdrUop(machInst, reg_idx,
167  copy_base ? int_reg::Ureg0 : rn, up, addr);
168  }
169 
170  // Loading pc as last operation? Set appropriate flags.
171  if (!writeback && reg_idx == int_reg::Pc) {
172  (*uop)->setFlag(StaticInst::IsControl);
173  (*uop)->setFlag(StaticInst::IsIndirectControl);
174 
175  if (!(condCode == COND_AL || condCode == COND_UC))
176  (*uop)->setFlag(StaticInst::IsCondControl);
177  else
178  (*uop)->setFlag(StaticInst::IsUncondControl);
179  }
180  } else {
181  *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
182  }
183 
184  if (up) addr += 4;
185  else addr -= 4;
186  --mem_ops;
187  }
188 
189  // Load/store micro-op generated, go to next uop
190  ++uop;
191  }
192 
193  if (writeback && ones) {
194  // Perform writeback uop operation
195  if (up)
196  *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
197  else
198  *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
199 
200  // Write PC after address writeback?
201  if (pc_temp) {
202  if (exception_ret) {
203  *uop = new MicroUopRegMovRet(machInst, 0, int_reg::Ureg1);
204  } else {
205  *uop = new MicroUopRegMov(
207  }
208  (*uop)->setFlag(StaticInst::IsControl);
209  (*uop)->setFlag(StaticInst::IsIndirectControl);
210 
211  if (!(condCode == COND_AL || condCode == COND_UC))
212  (*uop)->setFlag(StaticInst::IsCondControl);
213  else
214  (*uop)->setFlag(StaticInst::IsUncondControl);
215 
216  if (rn == int_reg::Sp)
217  (*uop)->setFlag(StaticInst::IsReturn);
218 
219  ++uop;
220  }
221  }
222 
223  --uop;
224  (*uop)->setLastMicroop();
226 
227  /* Take the control flags from the last microop for the macroop */
228  if ((*uop)->isControl())
229  setFlag(StaticInst::IsControl);
230  if ((*uop)->isCondCtrl())
231  setFlag(StaticInst::IsCondControl);
232  if ((*uop)->isUncondCtrl())
233  setFlag(StaticInst::IsUncondControl);
234  if ((*uop)->isIndirectCtrl())
235  setFlag(StaticInst::IsIndirectControl);
236  if ((*uop)->isReturn())
237  setFlag(StaticInst::IsReturn);
238 
239  for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
240  (*uop)->setDelayedCommit();
241  }
242 }
243 
244 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
245  uint32_t size, bool fp, bool load, bool noAlloc,
246  bool signExt, bool exclusive, bool acrel,
247  int64_t imm, AddrMode mode,
248  RegIndex rn, RegIndex rt, RegIndex rt2) :
249  PredMacroOp(mnem, machInst, __opClass)
250 {
251  bool post = (mode == AddrMd_PostIndex);
252  bool writeback = (mode != AddrMd_Offset);
253 
254  if (load) {
255  // Use integer rounding to round up loads of size 4
256  numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
257  } else {
258  numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
259  }
261 
262  StaticInstPtr *uop = microOps;
263 
264  rn = makeSP(rn);
265 
266  if (!post) {
267  *uop++ = new MicroAddXiSpAlignUop(machInst, int_reg::Ureg0, rn,
268  post ? 0 : imm);
269  }
270 
271  if (fp) {
272  if (size == 16) {
273  if (load) {
274  *uop++ = new MicroLdFp16Uop(machInst, rt,
275  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
276  acrel);
277  *uop++ = new MicroLdFp16Uop(machInst, rt2,
278  post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
279  acrel);
280  } else {
281  *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
282  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
283  acrel);
284  *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
285  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
286  acrel);
287  *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
288  post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
289  acrel);
290  *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
291  post ? rn : int_reg::Ureg0, 16, noAlloc, exclusive,
292  acrel);
293  }
294  } else if (size == 8) {
295  if (load) {
296  *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
297  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
298  acrel);
299  } else {
300  *uop++ = new MicroStrFpXImmUop(machInst, rt,
301  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
302  acrel);
303  *uop++ = new MicroStrFpXImmUop(machInst, rt2,
304  post ? rn : int_reg::Ureg0, 8, noAlloc, exclusive,
305  acrel);
306  }
307  } else if (size == 4) {
308  if (load) {
309  *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
310  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
311  acrel);
312  } else {
313  *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
314  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
315  acrel);
316  }
317  }
318  } else {
319  if (size == 8) {
320  if (load) {
321  *uop++ = new MicroLdPairUop(machInst, rt, rt2,
322  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
323  acrel);
324  } else {
325  *uop++ = new MicroStrXImmUop(machInst, rt,
326  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
327  acrel);
328  *uop++ = new MicroStrXImmUop(machInst, rt2,
329  post ? rn : int_reg::Ureg0, size, noAlloc, exclusive,
330  acrel);
331  }
332  } else if (size == 4) {
333  if (load) {
334  if (signExt) {
335  *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
336  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
337  acrel);
338  } else {
339  *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
340  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
341  acrel);
342  }
343  } else {
344  *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
345  post ? rn : int_reg::Ureg0, 0, noAlloc, exclusive,
346  acrel);
347  }
348  }
349  }
350 
351  if (writeback) {
352  *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : int_reg::Ureg0,
353  post ? imm : 0);
354  }
355 
356  assert(uop == &microOps[numMicroops]);
357  (*--uop)->setLastMicroop();
359 
360  for (StaticInstPtr *curUop = microOps;
361  !(*curUop)->isLastMicroop(); curUop++) {
362  (*curUop)->setDelayedCommit();
363  }
364 }
365 
366 BigFpMemImmOp::BigFpMemImmOp(const char *mnem, ExtMachInst machInst,
367  OpClass __opClass, bool load, RegIndex dest,
368  RegIndex base, int64_t imm) :
369  PredMacroOp(mnem, machInst, __opClass)
370 {
371  numMicroops = load ? 1 : 2;
373 
374  StaticInstPtr *uop = microOps;
375 
376  if (load) {
377  *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
378  } else {
379  *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
380  (*uop)->setDelayedCommit();
381  *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
382  }
383  (*uop)->setLastMicroop();
385 }
386 
387 BigFpMemPostOp::BigFpMemPostOp(const char *mnem, ExtMachInst machInst,
388  OpClass __opClass, bool load, RegIndex dest,
389  RegIndex base, int64_t imm) :
390  PredMacroOp(mnem, machInst, __opClass)
391 {
392  numMicroops = load ? 2 : 3;
394 
395  StaticInstPtr *uop = microOps;
396 
397  if (load) {
398  *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
399  } else {
400  *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
401  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
402  }
403  *uop = new MicroAddXiUop(machInst, base, base, imm);
404  (*uop)->setLastMicroop();
406 
407  for (StaticInstPtr *curUop = microOps;
408  !(*curUop)->isLastMicroop(); curUop++) {
409  (*curUop)->setDelayedCommit();
410  }
411 }
412 
413 BigFpMemPreOp::BigFpMemPreOp(const char *mnem, ExtMachInst machInst,
414  OpClass __opClass, bool load, RegIndex dest,
415  RegIndex base, int64_t imm) :
416  PredMacroOp(mnem, machInst, __opClass)
417 {
418  numMicroops = load ? 2 : 3;
420 
421  StaticInstPtr *uop = microOps;
422 
423  if (load) {
424  *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
425  } else {
426  *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
427  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
428  }
429  *uop = new MicroAddXiUop(machInst, base, base, imm);
430  (*uop)->setLastMicroop();
432 
433  for (StaticInstPtr *curUop = microOps;
434  !(*curUop)->isLastMicroop(); curUop++) {
435  (*curUop)->setDelayedCommit();
436  }
437 }
438 
439 BigFpMemRegOp::BigFpMemRegOp(const char *mnem, ExtMachInst machInst,
440  OpClass __opClass, bool load, RegIndex dest,
442  ArmExtendType type, int64_t imm) :
443  PredMacroOp(mnem, machInst, __opClass)
444 {
445  numMicroops = load ? 1 : 2;
447 
448  StaticInstPtr *uop = microOps;
449 
450  if (load) {
451  *uop = new MicroLdFp16RegUop(machInst, dest, base,
452  offset, type, imm);
453  } else {
454  *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
455  offset, type, imm);
456  (*uop)->setDelayedCommit();
457  *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
458  offset, type, imm);
459  }
460 
461  (*uop)->setLastMicroop();
463 }
464 
465 BigFpMemLitOp::BigFpMemLitOp(const char *mnem, ExtMachInst machInst,
466  OpClass __opClass, RegIndex dest,
467  int64_t imm) :
468  PredMacroOp(mnem, machInst, __opClass)
469 {
470  numMicroops = 1;
472 
473  microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
474  microOps[0]->setLastMicroop();
476 }
477 
478 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
479  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
480  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
481  PredMacroOp(mnem, machInst, __opClass)
482 {
483  assert(regs > 0 && regs <= 4);
484  assert(regs % elems == 0);
485 
486  numMicroops = (regs > 2) ? 2 : 1;
487  bool wb = (rm != 15);
488  bool deinterleave = (elems > 1);
489 
490  if (wb) numMicroops++;
491  if (deinterleave) numMicroops += (regs / elems);
493 
494  RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
495 
496  uint32_t noAlign = 0;
497 
498  unsigned uopIdx = 0;
499  switch (regs) {
500  case 4:
501  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
502  size, machInst, rMid, rn, 0, align);
503  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
504  size, machInst, rMid + 4, rn, 16, noAlign);
505  break;
506  case 3:
507  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
508  size, machInst, rMid, rn, 0, align);
509  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
510  size, machInst, rMid + 4, rn, 16, noAlign);
511  break;
512  case 2:
513  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
514  size, machInst, rMid, rn, 0, align);
515  break;
516  case 1:
517  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
518  size, machInst, rMid, rn, 0, align);
519  break;
520  default:
521  // Unknown number of registers
522  microOps[uopIdx++] = new Unknown(machInst);
523  }
524  if (wb) {
525  if (rm != 15 && rm != 13) {
526  microOps[uopIdx++] =
527  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
528  } else {
529  microOps[uopIdx++] =
530  new MicroAddiUop(machInst, rn, rn, regs * 8);
531  }
532  }
533  if (deinterleave) {
534  switch (elems) {
535  case 4:
536  assert(regs == 4);
537  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
538  size, machInst, vd * 2, rMid, inc * 2);
539  break;
540  case 3:
541  assert(regs == 3);
542  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
543  size, machInst, vd * 2, rMid, inc * 2);
544  break;
545  case 2:
546  assert(regs == 4 || regs == 2);
547  if (regs == 4) {
548  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
549  size, machInst, vd * 2, rMid, inc * 2);
550  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
551  size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
552  } else {
553  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
554  size, machInst, vd * 2, rMid, inc * 2);
555  }
556  break;
557  default:
558  // Bad number of elements to deinterleave
559  microOps[uopIdx++] = new Unknown(machInst);
560  }
561  }
562  assert(uopIdx == numMicroops);
563 
564  for (unsigned i = 0; i < numMicroops - 1; i++) {
565  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
566  assert(uopPtr);
567  uopPtr->setDelayedCommit();
568  }
571 }
572 
573 VldSingleOp::VldSingleOp(const char *mnem, ExtMachInst machInst,
574  OpClass __opClass, bool all, unsigned elems,
575  RegIndex rn, RegIndex vd, unsigned regs,
576  unsigned inc, uint32_t size, uint32_t align,
577  RegIndex rm, unsigned lane) :
578  PredMacroOp(mnem, machInst, __opClass)
579 {
580  assert(regs > 0 && regs <= 4);
581  assert(regs % elems == 0);
582 
583  unsigned eBytes = (1 << size);
584  unsigned loadSize = eBytes * elems;
585  [[maybe_unused]] unsigned loadRegs =
586  (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
587 
588  assert(loadRegs > 0 && loadRegs <= 4);
589 
590  numMicroops = 1;
591  bool wb = (rm != 15);
592 
593  if (wb) numMicroops++;
594  numMicroops += (regs / elems);
596 
597  RegIndex ufp0 = VecSpecialElem;
598 
599  unsigned uopIdx = 0;
600  switch (loadSize) {
601  case 1:
602  microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
603  machInst, ufp0, rn, 0, align);
604  break;
605  case 2:
606  if (eBytes == 2) {
607  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
608  machInst, ufp0, rn, 0, align);
609  } else {
610  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
611  machInst, ufp0, rn, 0, align);
612  }
613  break;
614  case 3:
615  microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
616  machInst, ufp0, rn, 0, align);
617  break;
618  case 4:
619  switch (eBytes) {
620  case 1:
621  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
622  machInst, ufp0, rn, 0, align);
623  break;
624  case 2:
625  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
626  machInst, ufp0, rn, 0, align);
627  break;
628  case 4:
629  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
630  machInst, ufp0, rn, 0, align);
631  break;
632  }
633  break;
634  case 6:
635  microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
636  machInst, ufp0, rn, 0, align);
637  break;
638  case 8:
639  switch (eBytes) {
640  case 2:
641  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
642  machInst, ufp0, rn, 0, align);
643  break;
644  case 4:
645  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
646  machInst, ufp0, rn, 0, align);
647  break;
648  }
649  break;
650  case 12:
651  microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
652  machInst, ufp0, rn, 0, align);
653  break;
654  case 16:
655  microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
656  machInst, ufp0, rn, 0, align);
657  break;
658  default:
659  // Unrecognized load size
660  microOps[uopIdx++] = new Unknown(machInst);
661  }
662  if (wb) {
663  if (rm != 15 && rm != 13) {
664  microOps[uopIdx++] =
665  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
666  } else {
667  microOps[uopIdx++] =
668  new MicroAddiUop(machInst, rn, rn, loadSize);
669  }
670  }
671  switch (elems) {
672  case 4:
673  assert(regs == 4);
674  switch (size) {
675  case 0:
676  if (all) {
677  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
678  machInst, vd * 2, ufp0, inc * 2);
679  } else {
680  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
681  machInst, vd * 2, ufp0, inc * 2, lane);
682  }
683  break;
684  case 1:
685  if (all) {
686  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
687  machInst, vd * 2, ufp0, inc * 2);
688  } else {
689  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
690  machInst, vd * 2, ufp0, inc * 2, lane);
691  }
692  break;
693  case 2:
694  if (all) {
695  microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
696  machInst, vd * 2, ufp0, inc * 2);
697  } else {
698  microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
699  machInst, vd * 2, ufp0, inc * 2, lane);
700  }
701  break;
702  default:
703  // Bad size
704  microOps[uopIdx++] = new Unknown(machInst);
705  break;
706  }
707  break;
708  case 3:
709  assert(regs == 3);
710  switch (size) {
711  case 0:
712  if (all) {
713  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
714  machInst, vd * 2, ufp0, inc * 2);
715  } else {
716  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
717  machInst, vd * 2, ufp0, inc * 2, lane);
718  }
719  break;
720  case 1:
721  if (all) {
722  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
723  machInst, vd * 2, ufp0, inc * 2);
724  } else {
725  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
726  machInst, vd * 2, ufp0, inc * 2, lane);
727  }
728  break;
729  case 2:
730  if (all) {
731  microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
732  machInst, vd * 2, ufp0, inc * 2);
733  } else {
734  microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
735  machInst, vd * 2, ufp0, inc * 2, lane);
736  }
737  break;
738  default:
739  // Bad size
740  microOps[uopIdx++] = new Unknown(machInst);
741  break;
742  }
743  break;
744  case 2:
745  assert(regs == 2);
746  assert(loadRegs <= 2);
747  switch (size) {
748  case 0:
749  if (all) {
750  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
751  machInst, vd * 2, ufp0, inc * 2);
752  } else {
753  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
754  machInst, vd * 2, ufp0, inc * 2, lane);
755  }
756  break;
757  case 1:
758  if (all) {
759  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
760  machInst, vd * 2, ufp0, inc * 2);
761  } else {
762  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
763  machInst, vd * 2, ufp0, inc * 2, lane);
764  }
765  break;
766  case 2:
767  if (all) {
768  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
769  machInst, vd * 2, ufp0, inc * 2);
770  } else {
771  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
772  machInst, vd * 2, ufp0, inc * 2, lane);
773  }
774  break;
775  default:
776  // Bad size
777  microOps[uopIdx++] = new Unknown(machInst);
778  break;
779  }
780  break;
781  case 1:
782  assert(regs == 1 || (all && regs == 2));
783  assert(loadRegs <= 2);
784  for (unsigned offset = 0; offset < regs; offset++) {
785  switch (size) {
786  case 0:
787  if (all) {
788  microOps[uopIdx++] =
789  new MicroUnpackAllNeon2to2Uop<uint8_t>(
790  machInst, (vd + offset) * 2, ufp0, inc * 2);
791  } else {
792  microOps[uopIdx++] =
793  new MicroUnpackNeon2to2Uop<uint8_t>(
794  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
795  }
796  break;
797  case 1:
798  if (all) {
799  microOps[uopIdx++] =
800  new MicroUnpackAllNeon2to2Uop<uint16_t>(
801  machInst, (vd + offset) * 2, ufp0, inc * 2);
802  } else {
803  microOps[uopIdx++] =
804  new MicroUnpackNeon2to2Uop<uint16_t>(
805  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
806  }
807  break;
808  case 2:
809  if (all) {
810  microOps[uopIdx++] =
811  new MicroUnpackAllNeon2to2Uop<uint32_t>(
812  machInst, (vd + offset) * 2, ufp0, inc * 2);
813  } else {
814  microOps[uopIdx++] =
815  new MicroUnpackNeon2to2Uop<uint32_t>(
816  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
817  }
818  break;
819  default:
820  // Bad size
821  microOps[uopIdx++] = new Unknown(machInst);
822  break;
823  }
824  }
825  break;
826  default:
827  // Bad number of elements to unpack
828  microOps[uopIdx++] = new Unknown(machInst);
829  }
830  assert(uopIdx == numMicroops);
831 
832  for (unsigned i = 0; i < numMicroops - 1; i++) {
833  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
834  assert(uopPtr);
835  uopPtr->setDelayedCommit();
836  }
839 }
840 
841 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
842  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
843  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
844  PredMacroOp(mnem, machInst, __opClass)
845 {
846  assert(regs > 0 && regs <= 4);
847  assert(regs % elems == 0);
848 
849  numMicroops = (regs > 2) ? 2 : 1;
850  bool wb = (rm != 15);
851  bool interleave = (elems > 1);
852 
853  if (wb) numMicroops++;
854  if (interleave) numMicroops += (regs / elems);
856 
857  uint32_t noAlign = 0;
858 
859  RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
860 
861  unsigned uopIdx = 0;
862  if (interleave) {
863  switch (elems) {
864  case 4:
865  assert(regs == 4);
866  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
867  size, machInst, rMid, vd * 2, inc * 2);
868  break;
869  case 3:
870  assert(regs == 3);
871  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
872  size, machInst, rMid, vd * 2, inc * 2);
873  break;
874  case 2:
875  assert(regs == 4 || regs == 2);
876  if (regs == 4) {
877  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
878  size, machInst, rMid, vd * 2, inc * 2);
879  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
880  size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
881  } else {
882  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
883  size, machInst, rMid, vd * 2, inc * 2);
884  }
885  break;
886  default:
887  // Bad number of elements to interleave
888  microOps[uopIdx++] = new Unknown(machInst);
889  }
890  }
891  switch (regs) {
892  case 4:
893  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
894  size, machInst, rMid, rn, 0, align);
895  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
896  size, machInst, rMid + 4, rn, 16, noAlign);
897  break;
898  case 3:
899  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
900  size, machInst, rMid, rn, 0, align);
901  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
902  size, machInst, rMid + 4, rn, 16, noAlign);
903  break;
904  case 2:
905  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
906  size, machInst, rMid, rn, 0, align);
907  break;
908  case 1:
909  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
910  size, machInst, rMid, rn, 0, align);
911  break;
912  default:
913  // Unknown number of registers
914  microOps[uopIdx++] = new Unknown(machInst);
915  }
916  if (wb) {
917  if (rm != 15 && rm != 13) {
918  microOps[uopIdx++] =
919  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
920  } else {
921  microOps[uopIdx++] =
922  new MicroAddiUop(machInst, rn, rn, regs * 8);
923  }
924  }
925  assert(uopIdx == numMicroops);
926 
927  for (unsigned i = 0; i < numMicroops - 1; i++) {
928  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
929  assert(uopPtr);
930  uopPtr->setDelayedCommit();
931  }
934 }
935 
936 VstSingleOp::VstSingleOp(const char *mnem, ExtMachInst machInst,
937  OpClass __opClass, bool all, unsigned elems,
938  RegIndex rn, RegIndex vd, unsigned regs,
939  unsigned inc, uint32_t size, uint32_t align,
940  RegIndex rm, unsigned lane) :
941  PredMacroOp(mnem, machInst, __opClass)
942 {
943  assert(!all);
944  assert(regs > 0 && regs <= 4);
945  assert(regs % elems == 0);
946 
947  unsigned eBytes = (1 << size);
948  unsigned storeSize = eBytes * elems;
949  [[maybe_unused]] unsigned storeRegs =
950  (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
951 
952  assert(storeRegs > 0 && storeRegs <= 4);
953 
954  numMicroops = 1;
955  bool wb = (rm != 15);
956 
957  if (wb) numMicroops++;
958  numMicroops += (regs / elems);
960 
961  RegIndex ufp0 = VecSpecialElem;
962 
963  unsigned uopIdx = 0;
964  switch (elems) {
965  case 4:
966  assert(regs == 4);
967  switch (size) {
968  case 0:
969  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
970  machInst, ufp0, vd * 2, inc * 2, lane);
971  break;
972  case 1:
973  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
974  machInst, ufp0, vd * 2, inc * 2, lane);
975  break;
976  case 2:
977  microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
978  machInst, ufp0, vd * 2, inc * 2, lane);
979  break;
980  default:
981  // Bad size
982  microOps[uopIdx++] = new Unknown(machInst);
983  break;
984  }
985  break;
986  case 3:
987  assert(regs == 3);
988  switch (size) {
989  case 0:
990  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
991  machInst, ufp0, vd * 2, inc * 2, lane);
992  break;
993  case 1:
994  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
995  machInst, ufp0, vd * 2, inc * 2, lane);
996  break;
997  case 2:
998  microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
999  machInst, ufp0, vd * 2, inc * 2, lane);
1000  break;
1001  default:
1002  // Bad size
1003  microOps[uopIdx++] = new Unknown(machInst);
1004  break;
1005  }
1006  break;
1007  case 2:
1008  assert(regs == 2);
1009  assert(storeRegs <= 2);
1010  switch (size) {
1011  case 0:
1012  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
1013  machInst, ufp0, vd * 2, inc * 2, lane);
1014  break;
1015  case 1:
1016  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
1017  machInst, ufp0, vd * 2, inc * 2, lane);
1018  break;
1019  case 2:
1020  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1021  machInst, ufp0, vd * 2, inc * 2, lane);
1022  break;
1023  default:
1024  // Bad size
1025  microOps[uopIdx++] = new Unknown(machInst);
1026  break;
1027  }
1028  break;
1029  case 1:
1030  assert(regs == 1 || (all && regs == 2));
1031  assert(storeRegs <= 2);
1032  for (unsigned offset = 0; offset < regs; offset++) {
1033  switch (size) {
1034  case 0:
1035  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1036  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1037  break;
1038  case 1:
1039  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1040  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1041  break;
1042  case 2:
1043  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1044  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1045  break;
1046  default:
1047  // Bad size
1048  microOps[uopIdx++] = new Unknown(machInst);
1049  break;
1050  }
1051  }
1052  break;
1053  default:
1054  // Bad number of elements to unpack
1055  microOps[uopIdx++] = new Unknown(machInst);
1056  }
1057  switch (storeSize) {
1058  case 1:
1059  microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1060  machInst, ufp0, rn, 0, align);
1061  break;
1062  case 2:
1063  if (eBytes == 2) {
1064  microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1065  machInst, ufp0, rn, 0, align);
1066  } else {
1067  microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1068  machInst, ufp0, rn, 0, align);
1069  }
1070  break;
1071  case 3:
1072  microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1073  machInst, ufp0, rn, 0, align);
1074  break;
1075  case 4:
1076  switch (eBytes) {
1077  case 1:
1078  microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1079  machInst, ufp0, rn, 0, align);
1080  break;
1081  case 2:
1082  microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1083  machInst, ufp0, rn, 0, align);
1084  break;
1085  case 4:
1086  microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1087  machInst, ufp0, rn, 0, align);
1088  break;
1089  }
1090  break;
1091  case 6:
1092  microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1093  machInst, ufp0, rn, 0, align);
1094  break;
1095  case 8:
1096  switch (eBytes) {
1097  case 2:
1098  microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1099  machInst, ufp0, rn, 0, align);
1100  break;
1101  case 4:
1102  microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1103  machInst, ufp0, rn, 0, align);
1104  break;
1105  }
1106  break;
1107  case 12:
1108  microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1109  machInst, ufp0, rn, 0, align);
1110  break;
1111  case 16:
1112  microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1113  machInst, ufp0, rn, 0, align);
1114  break;
1115  default:
1116  // Bad store size
1117  microOps[uopIdx++] = new Unknown(machInst);
1118  }
1119  if (wb) {
1120  if (rm != 15 && rm != 13) {
1121  microOps[uopIdx++] =
1122  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1123  } else {
1124  microOps[uopIdx++] =
1125  new MicroAddiUop(machInst, rn, rn, storeSize);
1126  }
1127  }
1128  assert(uopIdx == numMicroops);
1129 
1130  for (unsigned i = 0; i < numMicroops - 1; i++) {
1131  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1132  assert(uopPtr);
1133  uopPtr->setDelayedCommit();
1134  }
1135  microOps[0]->setFirstMicroop();
1137 }
1138 
1139 VldMultOp64::VldMultOp64(const char *mnem, ExtMachInst machInst,
1140  OpClass __opClass, RegIndex rn, RegIndex vd,
1141  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1142  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1143  PredMacroOp(mnem, machInst, __opClass)
1144 {
1146  RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1147  bool baseIsSP = isSP((RegIndex) rnsp);
1148 
1149  numMicroops = wb ? 1 : 0;
1150 
1151  int totNumBytes = numRegs * dataSize / 8;
1152  assert(totNumBytes <= 64);
1153 
1154  // The guiding principle here is that no more than 16 bytes can be
1155  // transferred at a time
1156  int numMemMicroops = totNumBytes / 16;
1157  int residuum = totNumBytes % 16;
1158  if (residuum)
1159  ++numMemMicroops;
1160  numMicroops += numMemMicroops;
1161 
1162  int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1163  numMicroops += numMarshalMicroops;
1164 
1166  unsigned uopIdx = 0;
1167  uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1168 
1169  int i = 0;
1170  for (; i < numMemMicroops - 1; ++i) {
1171  microOps[uopIdx++] = new MicroNeonLoad64(
1172  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1173  baseIsSP, 16 /* accSize */, eSize);
1174  }
1175  microOps[uopIdx++] = new MicroNeonLoad64(
1176  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1177  residuum ? residuum : 16 /* accSize */, eSize);
1178 
1179  // Writeback microop: the post-increment amount is encoded in "Rm": a
1180  // 64-bit general register OR as '11111' for an immediate value equal to
1181  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1182  if (wb) {
1183  if (rm != int_reg::X31) {
1184  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1185  UXTX, 0);
1186  } else {
1187  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1188  totNumBytes);
1189  }
1190  }
1191 
1192  for (int i = 0; i < numMarshalMicroops; ++i) {
1193  switch(numRegs) {
1194  case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1195  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1196  numStructElems, 1, i /* step */);
1197  break;
1198  case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1199  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1200  numStructElems, 2, i /* step */);
1201  break;
1202  case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1203  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1204  numStructElems, 3, i /* step */);
1205  break;
1206  case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1207  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1208  numStructElems, 4, i /* step */);
1209  break;
1210  default: panic("Invalid number of registers");
1211  }
1212 
1213  }
1214 
1215  assert(uopIdx == numMicroops);
1216 
1217  for (int i = 0; i < numMicroops - 1; ++i) {
1219  }
1220  microOps[0]->setFirstMicroop();
1222 }
1223 
1224 VstMultOp64::VstMultOp64(const char *mnem, ExtMachInst machInst,
1225  OpClass __opClass, RegIndex rn, RegIndex vd,
1226  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1227  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1228  PredMacroOp(mnem, machInst, __opClass)
1229 {
1231  RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1232  bool baseIsSP = isSP((RegIndex) rnsp);
1233 
1234  numMicroops = wb ? 1 : 0;
1235 
1236  int totNumBytes = numRegs * dataSize / 8;
1237  assert(totNumBytes <= 64);
1238 
1239  // The guiding principle here is that no more than 16 bytes can be
1240  // transferred at a time
1241  int numMemMicroops = totNumBytes / 16;
1242  int residuum = totNumBytes % 16;
1243  if (residuum)
1244  ++numMemMicroops;
1245  numMicroops += numMemMicroops;
1246 
1247  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1248  numMicroops += numMarshalMicroops;
1249 
1251  unsigned uopIdx = 0;
1252 
1253  for (int i = 0; i < numMarshalMicroops; ++i) {
1254  switch (numRegs) {
1255  case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1256  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1257  numStructElems, 1, i /* step */);
1258  break;
1259  case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1260  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1261  numStructElems, 2, i /* step */);
1262  break;
1263  case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1264  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1265  numStructElems, 3, i /* step */);
1266  break;
1267  case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1268  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1269  numStructElems, 4, i /* step */);
1270  break;
1271  default: panic("Invalid number of registers");
1272  }
1273  }
1274 
1275  uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1276 
1277  int i = 0;
1278  for (; i < numMemMicroops - 1; ++i) {
1279  microOps[uopIdx++] = new MicroNeonStore64(
1280  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1281  baseIsSP, 16 /* accSize */, eSize);
1282  }
1283  microOps[uopIdx++] = new MicroNeonStore64(
1284  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1285  residuum ? residuum : 16 /* accSize */, eSize);
1286 
1287  // Writeback microop: the post-increment amount is encoded in "Rm": a
1288  // 64-bit general register OR as '11111' for an immediate value equal to
1289  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1290  if (wb) {
1291  if (rm != int_reg::X31) {
1292  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1293  UXTX, 0);
1294  } else {
1295  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1296  totNumBytes);
1297  }
1298  }
1299 
1300  assert(uopIdx == numMicroops);
1301 
1302  for (int i = 0; i < numMicroops - 1; i++) {
1304  }
1305  microOps[0]->setFirstMicroop();
1307 }
1308 
1309 VldSingleOp64::VldSingleOp64(const char *mnem, ExtMachInst machInst,
1310  OpClass __opClass, RegIndex rn, RegIndex vd,
1311  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1312  uint8_t numStructElems, uint8_t index, bool wb,
1313  bool replicate) :
1314  PredMacroOp(mnem, machInst, __opClass),
1315  eSize(0), dataSize(0), numStructElems(0), index(0),
1316  wb(false), replicate(false)
1317 
1318 {
1320  RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1321  bool baseIsSP = isSP((RegIndex) rnsp);
1322 
1323  numMicroops = wb ? 1 : 0;
1324 
1325  int eSizeBytes = 1 << eSize;
1326  int totNumBytes = numStructElems * eSizeBytes;
1327  assert(totNumBytes <= 64);
1328 
1329  // The guiding principle here is that no more than 16 bytes can be
1330  // transferred at a time
1331  int numMemMicroops = totNumBytes / 16;
1332  int residuum = totNumBytes % 16;
1333  if (residuum)
1334  ++numMemMicroops;
1335  numMicroops += numMemMicroops;
1336 
1337  int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1338  numMicroops += numMarshalMicroops;
1339 
1341  unsigned uopIdx = 0;
1342 
1343  uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1344 
1345  int i = 0;
1346  for (; i < numMemMicroops - 1; ++i) {
1347  microOps[uopIdx++] = new MicroNeonLoad64(
1348  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1349  baseIsSP, 16 /* accSize */, eSize);
1350  }
1351  microOps[uopIdx++] = new MicroNeonLoad64(
1352  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1353  residuum ? residuum : 16 /* accSize */, eSize);
1354 
1355  // Writeback microop: the post-increment amount is encoded in "Rm": a
1356  // 64-bit general register OR as '11111' for an immediate value equal to
1357  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1358  if (wb) {
1359  if (rm != int_reg::X31) {
1360  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1361  UXTX, 0);
1362  } else {
1363  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1364  totNumBytes);
1365  }
1366  }
1367 
1368  for (int i = 0; i < numMarshalMicroops; ++i) {
1369  microOps[uopIdx++] = new MicroUnpackNeon64(
1370  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1371  numStructElems, index, i /* step */, replicate);
1372  }
1373 
1374  assert(uopIdx == numMicroops);
1375 
1376  for (int i = 0; i < numMicroops - 1; i++) {
1378  }
1379  microOps[0]->setFirstMicroop();
1381 }
1382 
1383 VstSingleOp64::VstSingleOp64(const char *mnem, ExtMachInst machInst,
1384  OpClass __opClass, RegIndex rn, RegIndex vd,
1385  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1386  uint8_t numStructElems, uint8_t index, bool wb,
1387  bool replicate) :
1388  PredMacroOp(mnem, machInst, __opClass),
1389  eSize(0), dataSize(0), numStructElems(0), index(0),
1390  wb(false), replicate(false)
1391 {
1393  RegIndex rnsp = (RegIndex) makeSP((RegIndex) rn);
1394  bool baseIsSP = isSP((RegIndex) rnsp);
1395 
1396  numMicroops = wb ? 1 : 0;
1397 
1398  int eSizeBytes = 1 << eSize;
1399  int totNumBytes = numStructElems * eSizeBytes;
1400  assert(totNumBytes <= 64);
1401 
1402  // The guiding principle here is that no more than 16 bytes can be
1403  // transferred at a time
1404  int numMemMicroops = totNumBytes / 16;
1405  int residuum = totNumBytes % 16;
1406  if (residuum)
1407  ++numMemMicroops;
1408  numMicroops += numMemMicroops;
1409 
1410  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1411  numMicroops += numMarshalMicroops;
1412 
1414  unsigned uopIdx = 0;
1415 
1416  for (int i = 0; i < numMarshalMicroops; ++i) {
1417  microOps[uopIdx++] = new MicroPackNeon64(
1418  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1419  numStructElems, index, i /* step */, replicate);
1420  }
1421 
1422  uint32_t memaccessFlags = (MMU::ArmFlags)eSize | MMU::AllowUnaligned;
1423 
1424  int i = 0;
1425  for (; i < numMemMicroops - 1; ++i) {
1426  microOps[uopIdx++] = new MicroNeonStore64(
1427  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1428  baseIsSP, 16 /* accsize */, eSize);
1429  }
1430  microOps[uopIdx++] = new MicroNeonStore64(
1431  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1432  residuum ? residuum : 16 /* accSize */, eSize);
1433 
1434  // Writeback microop: the post-increment amount is encoded in "Rm": a
1435  // 64-bit general register OR as '11111' for an immediate value equal to
1436  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1437  if (wb) {
1438  if (rm != int_reg::X31) {
1439  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1440  UXTX, 0);
1441  } else {
1442  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1443  totNumBytes);
1444  }
1445  }
1446 
1447  assert(uopIdx == numMicroops);
1448 
1449  for (int i = 0; i < numMicroops - 1; i++) {
1451  }
1452  microOps[0]->setFirstMicroop();
1454 }
1455 
1456 MacroVFPMemOp::MacroVFPMemOp(const char *mnem, ExtMachInst machInst,
1457  OpClass __opClass, RegIndex rn,
1458  RegIndex vd, bool single, bool up,
1459  bool writeback, bool load, uint32_t offset) :
1460  PredMacroOp(mnem, machInst, __opClass)
1461 {
1462  int i = 0;
1463 
1464  // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1465  // to be functionally identical except that fldmx is deprecated. For now
1466  // we'll assume they're otherwise interchangable.
1467  int count = (single ? offset : (offset / 2));
1468  numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1470 
1471  int64_t addr = 0;
1472 
1473  if (!up)
1474  addr = 4 * offset;
1475 
1476  bool tempUp = up;
1477  for (int j = 0; j < count; j++) {
1478  if (load) {
1479  if (single) {
1480  microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1481  tempUp, addr);
1482  } else {
1483  microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1484  tempUp, addr);
1485  microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1486  addr + (up ? 4 : -4));
1487  }
1488  } else {
1489  if (single) {
1490  microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1491  tempUp, addr);
1492  } else {
1493  microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1494  tempUp, addr);
1495  microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1496  addr + (up ? 4 : -4));
1497  }
1498  }
1499  if (!tempUp) {
1500  addr -= (single ? 4 : 8);
1501  // The microops don't handle negative displacement, so turn if we
1502  // hit zero, flip polarity and start adding.
1503  if (addr <= 0) {
1504  tempUp = true;
1505  addr = -addr;
1506  }
1507  } else {
1508  addr += (single ? 4 : 8);
1509  }
1510  }
1511 
1512  if (writeback) {
1513  if (up) {
1514  microOps[i++] =
1515  new MicroAddiUop(machInst, rn, rn, 4 * offset);
1516  } else {
1517  microOps[i++] =
1518  new MicroSubiUop(machInst, rn, rn, 4 * offset);
1519  }
1520  }
1521 
1522  assert(numMicroops == i);
1523  microOps[0]->setFirstMicroop();
1525 
1526  for (StaticInstPtr *curUop = microOps;
1527  !(*curUop)->isLastMicroop(); curUop++) {
1528  MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1529  assert(uopPtr);
1530  uopPtr->setDelayedCommit();
1531  }
1532 }
1533 
1534 std::string
1536  Addr pc, const loader::SymbolTable *symtab) const
1537 {
1538  std::stringstream ss;
1539  printMnemonic(ss);
1540  printIntReg(ss, ura);
1541  ss << ", ";
1542  printIntReg(ss, urb);
1543  ss << ", ";
1544  ccprintf(ss, "#%d", imm);
1545  return ss.str();
1546 }
1547 
1548 std::string
1550  Addr pc, const loader::SymbolTable *symtab) const
1551 {
1552  std::stringstream ss;
1553  printMnemonic(ss);
1554  printIntReg(ss, ura);
1555  ss << ", ";
1556  printIntReg(ss, urb);
1557  ss << ", ";
1558  ccprintf(ss, "#%d", imm);
1559  return ss.str();
1560 }
1561 
1562 std::string
1564  Addr pc, const loader::SymbolTable *symtab) const
1565 {
1566  std::stringstream ss;
1567  printMnemonic(ss);
1568  ss << "[PC,CPSR]";
1569  return ss.str();
1570 }
1571 
1572 std::string
1574  Addr pc, const loader::SymbolTable *symtab) const
1575 {
1576  std::stringstream ss;
1577  printMnemonic(ss);
1578  printIntReg(ss, ura);
1579  ccprintf(ss, ", ");
1580  printIntReg(ss, urb);
1582  return ss.str();
1583 }
1584 
1585 std::string
1587  Addr pc, const loader::SymbolTable *symtab) const
1588 {
1589  std::stringstream ss;
1590  printMnemonic(ss);
1591  printIntReg(ss, ura);
1592  ss << ", ";
1593  printIntReg(ss, urb);
1594  return ss.str();
1595 }
1596 
1597 std::string
1599  Addr pc, const loader::SymbolTable *symtab) const
1600 {
1601  std::stringstream ss;
1602  printMnemonic(ss);
1603  printIntReg(ss, ura);
1604  ss << ", ";
1605  printIntReg(ss, urb);
1606  ss << ", ";
1607  printIntReg(ss, urc);
1608  return ss.str();
1609 }
1610 
1611 std::string
1613  Addr pc, const loader::SymbolTable *symtab) const
1614 {
1615  std::stringstream ss;
1616  printMnemonic(ss);
1617  if (isFloating())
1618  printFloatReg(ss, ura);
1619  else
1620  printIntReg(ss, ura);
1621  ss << ", [";
1622  printIntReg(ss, urb);
1623  ss << ", ";
1624  ccprintf(ss, "#%d", imm);
1625  ss << "]";
1626  return ss.str();
1627 }
1628 
1629 std::string
1631  Addr pc, const loader::SymbolTable *symtab) const
1632 {
1633  std::stringstream ss;
1634  printMnemonic(ss);
1635  printIntReg(ss, dest);
1636  ss << ",";
1637  printIntReg(ss, dest2);
1638  ss << ", [";
1639  printIntReg(ss, urb);
1640  ss << ", ";
1641  ccprintf(ss, "#%d", imm);
1642  ss << "]";
1643  return ss.str();
1644 }
1645 
1646 } // namespace ArmISA
1647 } // namespace gem5
void printExtendOperand(bool firstOperand, std::ostream &os, RegIndex rm, ArmExtendType type, int64_t shiftAmt) const
Definition: static_inst.cc:562
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
Definition: static_inst.cc:377
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int).
Definition: static_inst.cc:299
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
Definition: static_inst.cc:345
BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition: macromem.cc:366
BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex dest, int64_t imm)
Definition: macromem.cc:465
BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition: macromem.cc:387
BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, int64_t imm)
Definition: macromem.cc:413
BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, RegIndex dest, RegIndex base, RegIndex offset, ArmExtendType type, int64_t imm)
Definition: macromem.cc:439
MacroMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, bool index, bool up, bool user, bool writeback, bool load, uint32_t reglist)
Definition: macromem.cc:57
MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, bool single, bool up, bool writeback, bool load, uint32_t offset)
Definition: macromem.cc:1456
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1535
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1549
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1586
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1598
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1573
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1612
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1630
Base class for Memory microops.
Definition: macromem.hh:71
std::string generateDisassembly(Addr pc, const loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1563
PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, bool exclusive, bool acrel, int64_t imm, AddrMode mode, RegIndex rn, RegIndex rt, RegIndex rt2)
Definition: macromem.cc:244
Base class for predicated macro-operations.
Definition: pred_inst.hh:343
StaticInstPtr * microOps
Definition: pred_inst.hh:347
ConditionCode condCode
Definition: pred_inst.hh:220
VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1139
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:478
VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1309
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:573
VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1224
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned width, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:841
VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1383
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:936
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:227
bool isFloating() const
Definition: static_inst.hh:157
bool isLastMicroop() const
Definition: static_inst.hh:188
void setFlag(Flags f)
Definition: static_inst.hh:207
void setDelayedCommit()
Definition: static_inst.hh:206
void setFirstMicroop()
Definition: static_inst.hh:204
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition: bitfield.hh:197
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
constexpr auto & Sp
Definition: int.hh:274
constexpr RegId Ureg0
Definition: int.hh:229
constexpr auto & Pc
Definition: int.hh:276
constexpr RegId Ureg1
Definition: int.hh:230
constexpr RegId X31
Definition: int.hh:271
static int regInMode(OperatingMode mode, int reg)
Definition: int.hh:566
static unsigned int number_of_ones(int32_t val)
Definition: macromem.hh:56
static bool isSP(RegIndex reg)
Definition: int.hh:619
const int NumVecV8ArchRegs
Definition: vec.hh:80
Bitfield< 15, 12 > rt
Definition: types.hh:115
Bitfield< 4, 0 > mode
Definition: misc_types.hh:74
const int VecSpecialElem
Definition: vec.hh:87
Bitfield< 3, 0 > rm
Definition: types.hh:118
Bitfield< 7, 0 > imm
Definition: types.hh:132
Bitfield< 21 > writeback
Definition: types.hh:126
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 23, 0 > offset
Definition: types.hh:144
Bitfield< 23 > up
Definition: types.hh:124
Bitfield< 19, 16 > rn
Definition: types.hh:113
static RegIndex makeSP(RegIndex reg)
Definition: int.hh:605
@ COND_UC
Definition: cc.hh:108
@ COND_AL
Definition: cc.hh:107
Bitfield< 19, 16 > fp
Definition: misc_types.hh:177
Bitfield< 21 > ss
Definition: misc_types.hh:60
Bitfield< 24 > j
Definition: misc_types.hh:57
Bitfield< 4 > pc
Bitfield< 30, 0 > index
Bitfield< 29 > vx
Definition: misc.hh:75
Bitfield< 5, 3 > reg
Definition: types.hh:92
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
uint16_t RegIndex
Definition: types.hh:176
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
void ccprintf(cp::Print &print)
Definition: cprintf.hh:130
void align(const scfx_rep &lhs, const scfx_rep &rhs, int &new_wp, int &len_mant, scfx_mant_ref &lhs_mant, scfx_mant_ref &rhs_mant)
Definition: scfx_rep.cc:2051
void inc(scfx_mant &mant)
Definition: scfx_mant.hh:309
Utility functions and datatypes used by AArch64 NEON memory instructions.

Generated on Wed Dec 21 2022 10:22:25 for gem5 by doxygen 1.9.1