gem5  v20.0.0.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
macromem.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2010-2014 ARM Limited
3  * All rights reserved
4  *
5  * The license below extends only to copyright in the software and shall
6  * not be construed as granting a license to any other intellectual
7  * property including but not limited to intellectual property relating
8  * to a hardware implementation of the functionality of the software
9  * licensed hereunder. You may use the software subject to the license
10  * terms below provided that you ensure that this notice is replicated
11  * unmodified and in its entirety in all distributions of the software,
12  * modified or unmodified, in source code or in binary form.
13  *
14  * Copyright (c) 2007-2008 The Florida State University
15  * All rights reserved.
16  *
17  * Redistribution and use in source and binary forms, with or without
18  * modification, are permitted provided that the following conditions are
19  * met: redistributions of source code must retain the above copyright
20  * notice, this list of conditions and the following disclaimer;
21  * redistributions in binary form must reproduce the above copyright
22  * notice, this list of conditions and the following disclaimer in the
23  * documentation and/or other materials provided with the distribution;
24  * neither the name of the copyright holders nor the names of its
25  * contributors may be used to endorse or promote products derived from
26  * this software without specific prior written permission.
27  *
28  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
29  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
30  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
31  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
32  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
33  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
34  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
35  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
36  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
37  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
38  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39  */
40 
42 
43 #include <sstream>
44 
45 #include "arch/arm/generated/decoder.hh"
47 
48 using namespace std;
49 using namespace ArmISAInst;
50 
51 namespace ArmISA
52 {
53 
54 MacroMemOp::MacroMemOp(const char *mnem, ExtMachInst machInst,
55  OpClass __opClass, IntRegIndex rn,
56  bool index, bool up, bool user, bool writeback,
57  bool load, uint32_t reglist) :
58  PredMacroOp(mnem, machInst, __opClass)
59 {
60  uint32_t regs = reglist;
61  uint32_t ones = number_of_ones(reglist);
62  uint32_t mem_ops = ones;
63 
64  // Copy the base address register if we overwrite it, or if this instruction
65  // is basically a no-op (we have to do something)
66  bool copy_base = (bits(reglist, rn) && load) || !ones;
67  bool force_user = user & !bits(reglist, 15);
68  bool exception_ret = user & bits(reglist, 15);
69  bool pc_temp = load && writeback && bits(reglist, 15);
70 
71  if (!ones) {
72  numMicroops = 1;
73  } else if (load) {
74  numMicroops = ((ones + 1) / 2)
75  + ((ones % 2 == 0 && exception_ret) ? 1 : 0)
76  + (copy_base ? 1 : 0)
77  + (writeback? 1 : 0)
78  + (pc_temp ? 1 : 0);
79  } else {
80  numMicroops = ones + (writeback ? 1 : 0);
81  }
82 
84 
85  uint32_t addr = 0;
86 
87  if (!up)
88  addr = (ones << 2) - 4;
89 
90  if (!index)
91  addr += 4;
92 
93  StaticInstPtr *uop = microOps;
94 
95  // Add 0 to Rn and stick it in ureg0.
96  // This is equivalent to a move.
97  if (copy_base)
98  *uop++ = new MicroAddiUop(machInst, INTREG_UREG0, rn, 0);
99 
100  unsigned reg = 0;
101  while (mem_ops != 0) {
102  // Do load operations in pairs if possible
103  if (load && mem_ops >= 2 &&
104  !(mem_ops == 2 && bits(regs,INTREG_PC) && exception_ret)) {
105  // 64-bit memory operation
106  // Find 2 set register bits (clear them after finding)
107  unsigned reg_idx1;
108  unsigned reg_idx2;
109 
110  // Find the first register
111  while (!bits(regs, reg)) reg++;
112  replaceBits(regs, reg, 0);
113  reg_idx1 = force_user ? intRegInMode(MODE_USER, reg) : reg;
114 
115  // Find the second register
116  while (!bits(regs, reg)) reg++;
117  replaceBits(regs, reg, 0);
118  reg_idx2 = force_user ? intRegInMode(MODE_USER, reg) : reg;
119 
120  // Load into temp reg if necessary
121  if (reg_idx2 == INTREG_PC && pc_temp)
122  reg_idx2 = INTREG_UREG1;
123 
124  // Actually load both registers from memory
125  *uop = new MicroLdr2Uop(machInst, reg_idx1, reg_idx2,
126  copy_base ? INTREG_UREG0 : rn, up, addr);
127 
128  if (!writeback && reg_idx2 == INTREG_PC) {
129  // No writeback if idx==pc, set appropriate flags
130  (*uop)->setFlag(StaticInst::IsControl);
131  (*uop)->setFlag(StaticInst::IsIndirectControl);
132 
133  if (!(condCode == COND_AL || condCode == COND_UC))
134  (*uop)->setFlag(StaticInst::IsCondControl);
135  else
136  (*uop)->setFlag(StaticInst::IsUncondControl);
137  }
138 
139  if (up) addr += 8;
140  else addr -= 8;
141  mem_ops -= 2;
142  } else {
143  // 32-bit memory operation
144  // Find register for operation
145  unsigned reg_idx;
146  while (!bits(regs, reg)) reg++;
147  replaceBits(regs, reg, 0);
148  reg_idx = force_user ? intRegInMode(MODE_USER, reg) : reg;
149 
150  if (load) {
151  if (writeback && reg_idx == INTREG_PC) {
152  // If this instruction changes the PC and performs a
153  // writeback, ensure the pc load/branch is the last uop.
154  // Load into a temp reg here.
155  *uop = new MicroLdrUop(machInst, INTREG_UREG1,
156  copy_base ? INTREG_UREG0 : rn, up, addr);
157  } else if (reg_idx == INTREG_PC && exception_ret) {
158  // Special handling for exception return
159  *uop = new MicroLdrRetUop(machInst, reg_idx,
160  copy_base ? INTREG_UREG0 : rn, up, addr);
161  } else {
162  // standard single load uop
163  *uop = new MicroLdrUop(machInst, reg_idx,
164  copy_base ? INTREG_UREG0 : rn, up, addr);
165  }
166 
167  // Loading pc as last operation? Set appropriate flags.
168  if (!writeback && reg_idx == INTREG_PC) {
169  (*uop)->setFlag(StaticInst::IsControl);
170  (*uop)->setFlag(StaticInst::IsIndirectControl);
171 
172  if (!(condCode == COND_AL || condCode == COND_UC))
173  (*uop)->setFlag(StaticInst::IsCondControl);
174  else
175  (*uop)->setFlag(StaticInst::IsUncondControl);
176  }
177  } else {
178  *uop = new MicroStrUop(machInst, reg_idx, rn, up, addr);
179  }
180 
181  if (up) addr += 4;
182  else addr -= 4;
183  --mem_ops;
184  }
185 
186  // Load/store micro-op generated, go to next uop
187  ++uop;
188  }
189 
190  if (writeback && ones) {
191  // Perform writeback uop operation
192  if (up)
193  *uop++ = new MicroAddiUop(machInst, rn, rn, ones * 4);
194  else
195  *uop++ = new MicroSubiUop(machInst, rn, rn, ones * 4);
196 
197  // Write PC after address writeback?
198  if (pc_temp) {
199  if (exception_ret) {
200  *uop = new MicroUopRegMovRet(machInst, 0, INTREG_UREG1);
201  } else {
202  *uop = new MicroUopRegMov(machInst, INTREG_PC, INTREG_UREG1);
203  }
204  (*uop)->setFlag(StaticInst::IsControl);
205  (*uop)->setFlag(StaticInst::IsIndirectControl);
206 
207  if (!(condCode == COND_AL || condCode == COND_UC))
208  (*uop)->setFlag(StaticInst::IsCondControl);
209  else
210  (*uop)->setFlag(StaticInst::IsUncondControl);
211 
212  if (rn == INTREG_SP)
213  (*uop)->setFlag(StaticInst::IsReturn);
214 
215  ++uop;
216  }
217  }
218 
219  --uop;
220  (*uop)->setLastMicroop();
221  microOps[0]->setFirstMicroop();
222 
223  /* Take the control flags from the last microop for the macroop */
224  if ((*uop)->isControl())
225  setFlag(StaticInst::IsControl);
226  if ((*uop)->isCondCtrl())
227  setFlag(StaticInst::IsCondControl);
228  if ((*uop)->isUncondCtrl())
229  setFlag(StaticInst::IsUncondControl);
230  if ((*uop)->isIndirectCtrl())
231  setFlag(StaticInst::IsIndirectControl);
232  if ((*uop)->isReturn())
233  setFlag(StaticInst::IsReturn);
234 
235  for (StaticInstPtr *uop = microOps; !(*uop)->isLastMicroop(); uop++) {
236  (*uop)->setDelayedCommit();
237  }
238 }
239 
240 PairMemOp::PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
241  uint32_t size, bool fp, bool load, bool noAlloc,
242  bool signExt, bool exclusive, bool acrel,
243  int64_t imm, AddrMode mode,
245  PredMacroOp(mnem, machInst, __opClass)
246 {
247  bool post = (mode == AddrMd_PostIndex);
248  bool writeback = (mode != AddrMd_Offset);
249 
250  if (load) {
251  // Use integer rounding to round up loads of size 4
252  numMicroops = (post ? 0 : 1) + ((size + 4) / 8) + (writeback ? 1 : 0);
253  } else {
254  numMicroops = (post ? 0 : 1) + (size / 4) + (writeback ? 1 : 0);
255  }
257 
258  StaticInstPtr *uop = microOps;
259 
260  rn = makeSP(rn);
261 
262  if (!post) {
263  *uop++ = new MicroAddXiSpAlignUop(machInst, INTREG_UREG0, rn,
264  post ? 0 : imm);
265  }
266 
267  if (fp) {
268  if (size == 16) {
269  if (load) {
270  *uop++ = new MicroLdFp16Uop(machInst, rt,
271  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
272  *uop++ = new MicroLdFp16Uop(machInst, rt2,
273  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
274  } else {
275  *uop++ = new MicroStrQBFpXImmUop(machInst, rt,
276  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
277  *uop++ = new MicroStrQTFpXImmUop(machInst, rt,
278  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
279  *uop++ = new MicroStrQBFpXImmUop(machInst, rt2,
280  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
281  *uop++ = new MicroStrQTFpXImmUop(machInst, rt2,
282  post ? rn : INTREG_UREG0, 16, noAlloc, exclusive, acrel);
283  }
284  } else if (size == 8) {
285  if (load) {
286  *uop++ = new MicroLdPairFp8Uop(machInst, rt, rt2,
287  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
288  } else {
289  *uop++ = new MicroStrFpXImmUop(machInst, rt,
290  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
291  *uop++ = new MicroStrFpXImmUop(machInst, rt2,
292  post ? rn : INTREG_UREG0, 8, noAlloc, exclusive, acrel);
293  }
294  } else if (size == 4) {
295  if (load) {
296  *uop++ = new MicroLdrDFpXImmUop(machInst, rt, rt2,
297  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
298  } else {
299  *uop++ = new MicroStrDFpXImmUop(machInst, rt, rt2,
300  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
301  }
302  }
303  } else {
304  if (size == 8) {
305  if (load) {
306  *uop++ = new MicroLdPairUop(machInst, rt, rt2,
307  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
308  } else {
309  *uop++ = new MicroStrXImmUop(machInst, rt, post ? rn : INTREG_UREG0,
310  0, noAlloc, exclusive, acrel);
311  *uop++ = new MicroStrXImmUop(machInst, rt2, post ? rn : INTREG_UREG0,
312  size, noAlloc, exclusive, acrel);
313  }
314  } else if (size == 4) {
315  if (load) {
316  if (signExt) {
317  *uop++ = new MicroLdrDSXImmUop(machInst, rt, rt2,
318  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
319  } else {
320  *uop++ = new MicroLdrDUXImmUop(machInst, rt, rt2,
321  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
322  }
323  } else {
324  *uop++ = new MicroStrDXImmUop(machInst, rt, rt2,
325  post ? rn : INTREG_UREG0, 0, noAlloc, exclusive, acrel);
326  }
327  }
328  }
329 
330  if (writeback) {
331  *uop++ = new MicroAddXiUop(machInst, rn, post ? rn : INTREG_UREG0,
332  post ? imm : 0);
333  }
334 
335  assert(uop == &microOps[numMicroops]);
336  (*--uop)->setLastMicroop();
338 
339  for (StaticInstPtr *curUop = microOps;
340  !(*curUop)->isLastMicroop(); curUop++) {
341  (*curUop)->setDelayedCommit();
342  }
343 }
344 
346  OpClass __opClass, bool load, IntRegIndex dest,
347  IntRegIndex base, int64_t imm) :
348  PredMacroOp(mnem, machInst, __opClass)
349 {
350  numMicroops = load ? 1 : 2;
352 
353  StaticInstPtr *uop = microOps;
354 
355  if (load) {
356  *uop = new MicroLdFp16Uop(machInst, dest, base, imm);
357  } else {
358  *uop = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
359  (*uop)->setDelayedCommit();
360  *++uop = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
361  }
362  (*uop)->setLastMicroop();
364 }
365 
367  OpClass __opClass, bool load, IntRegIndex dest,
368  IntRegIndex base, int64_t imm) :
369  PredMacroOp(mnem, machInst, __opClass)
370 {
371  numMicroops = load ? 2 : 3;
373 
374  StaticInstPtr *uop = microOps;
375 
376  if (load) {
377  *uop++ = new MicroLdFp16Uop(machInst, dest, base, 0);
378  } else {
379  *uop++= new MicroStrQBFpXImmUop(machInst, dest, base, 0);
380  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, 0);
381  }
382  *uop = new MicroAddXiUop(machInst, base, base, imm);
383  (*uop)->setLastMicroop();
385 
386  for (StaticInstPtr *curUop = microOps;
387  !(*curUop)->isLastMicroop(); curUop++) {
388  (*curUop)->setDelayedCommit();
389  }
390 }
391 
393  OpClass __opClass, bool load, IntRegIndex dest,
394  IntRegIndex base, int64_t imm) :
395  PredMacroOp(mnem, machInst, __opClass)
396 {
397  numMicroops = load ? 2 : 3;
399 
400  StaticInstPtr *uop = microOps;
401 
402  if (load) {
403  *uop++ = new MicroLdFp16Uop(machInst, dest, base, imm);
404  } else {
405  *uop++ = new MicroStrQBFpXImmUop(machInst, dest, base, imm);
406  *uop++ = new MicroStrQTFpXImmUop(machInst, dest, base, imm);
407  }
408  *uop = new MicroAddXiUop(machInst, base, base, imm);
409  (*uop)->setLastMicroop();
411 
412  for (StaticInstPtr *curUop = microOps;
413  !(*curUop)->isLastMicroop(); curUop++) {
414  (*curUop)->setDelayedCommit();
415  }
416 }
417 
419  OpClass __opClass, bool load, IntRegIndex dest,
421  ArmExtendType type, int64_t imm) :
422  PredMacroOp(mnem, machInst, __opClass)
423 {
424  numMicroops = load ? 1 : 2;
426 
427  StaticInstPtr *uop = microOps;
428 
429  if (load) {
430  *uop = new MicroLdFp16RegUop(machInst, dest, base,
431  offset, type, imm);
432  } else {
433  *uop = new MicroStrQBFpXRegUop(machInst, dest, base,
434  offset, type, imm);
435  (*uop)->setDelayedCommit();
436  *++uop = new MicroStrQTFpXRegUop(machInst, dest, base,
437  offset, type, imm);
438  }
439 
440  (*uop)->setLastMicroop();
442 }
443 
445  OpClass __opClass, IntRegIndex dest,
446  int64_t imm) :
447  PredMacroOp(mnem, machInst, __opClass)
448 {
449  numMicroops = 1;
451 
452  microOps[0] = new MicroLdFp16LitUop(machInst, dest, imm);
453  microOps[0]->setLastMicroop();
455 }
456 
457 VldMultOp::VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
458  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
459  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
460  PredMacroOp(mnem, machInst, __opClass)
461 {
462  assert(regs > 0 && regs <= 4);
463  assert(regs % elems == 0);
464 
465  numMicroops = (regs > 2) ? 2 : 1;
466  bool wb = (rm != 15);
467  bool deinterleave = (elems > 1);
468 
469  if (wb) numMicroops++;
470  if (deinterleave) numMicroops += (regs / elems);
472 
473  RegIndex rMid = deinterleave ? VecSpecialElem : vd * 2;
474 
475  uint32_t noAlign = 0;
476 
477  unsigned uopIdx = 0;
478  switch (regs) {
479  case 4:
480  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
481  size, machInst, rMid, rn, 0, align);
482  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
483  size, machInst, rMid + 4, rn, 16, noAlign);
484  break;
485  case 3:
486  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
487  size, machInst, rMid, rn, 0, align);
488  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
489  size, machInst, rMid + 4, rn, 16, noAlign);
490  break;
491  case 2:
492  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon16Uop>(
493  size, machInst, rMid, rn, 0, align);
494  break;
495  case 1:
496  microOps[uopIdx++] = newNeonMemInst<MicroLdrNeon8Uop>(
497  size, machInst, rMid, rn, 0, align);
498  break;
499  default:
500  // Unknown number of registers
501  microOps[uopIdx++] = new Unknown(machInst);
502  }
503  if (wb) {
504  if (rm != 15 && rm != 13) {
505  microOps[uopIdx++] =
506  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
507  } else {
508  microOps[uopIdx++] =
509  new MicroAddiUop(machInst, rn, rn, regs * 8);
510  }
511  }
512  if (deinterleave) {
513  switch (elems) {
514  case 4:
515  assert(regs == 4);
516  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon8Uop>(
517  size, machInst, vd * 2, rMid, inc * 2);
518  break;
519  case 3:
520  assert(regs == 3);
521  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon6Uop>(
522  size, machInst, vd * 2, rMid, inc * 2);
523  break;
524  case 2:
525  assert(regs == 4 || regs == 2);
526  if (regs == 4) {
527  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
528  size, machInst, vd * 2, rMid, inc * 2);
529  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
530  size, machInst, vd * 2 + 2, rMid + 4, inc * 2);
531  } else {
532  microOps[uopIdx++] = newNeonMixInst<MicroDeintNeon4Uop>(
533  size, machInst, vd * 2, rMid, inc * 2);
534  }
535  break;
536  default:
537  // Bad number of elements to deinterleave
538  microOps[uopIdx++] = new Unknown(machInst);
539  }
540  }
541  assert(uopIdx == numMicroops);
542 
543  for (unsigned i = 0; i < numMicroops - 1; i++) {
544  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
545  assert(uopPtr);
546  uopPtr->setDelayedCommit();
547  }
549  microOps[numMicroops - 1]->setLastMicroop();
550 }
551 
553  OpClass __opClass, bool all, unsigned elems,
554  RegIndex rn, RegIndex vd, unsigned regs,
555  unsigned inc, uint32_t size, uint32_t align,
556  RegIndex rm, unsigned lane) :
557  PredMacroOp(mnem, machInst, __opClass)
558 {
559  assert(regs > 0 && regs <= 4);
560  assert(regs % elems == 0);
561 
562  unsigned eBytes = (1 << size);
563  unsigned loadSize = eBytes * elems;
564  unsigned loadRegs M5_VAR_USED =
565  (loadSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
566 
567  assert(loadRegs > 0 && loadRegs <= 4);
568 
569  numMicroops = 1;
570  bool wb = (rm != 15);
571 
572  if (wb) numMicroops++;
573  numMicroops += (regs / elems);
575 
576  RegIndex ufp0 = VecSpecialElem;
577 
578  unsigned uopIdx = 0;
579  switch (loadSize) {
580  case 1:
581  microOps[uopIdx++] = new MicroLdrNeon1Uop<uint8_t>(
582  machInst, ufp0, rn, 0, align);
583  break;
584  case 2:
585  if (eBytes == 2) {
586  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint16_t>(
587  machInst, ufp0, rn, 0, align);
588  } else {
589  microOps[uopIdx++] = new MicroLdrNeon2Uop<uint8_t>(
590  machInst, ufp0, rn, 0, align);
591  }
592  break;
593  case 3:
594  microOps[uopIdx++] = new MicroLdrNeon3Uop<uint8_t>(
595  machInst, ufp0, rn, 0, align);
596  break;
597  case 4:
598  switch (eBytes) {
599  case 1:
600  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint8_t>(
601  machInst, ufp0, rn, 0, align);
602  break;
603  case 2:
604  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint16_t>(
605  machInst, ufp0, rn, 0, align);
606  break;
607  case 4:
608  microOps[uopIdx++] = new MicroLdrNeon4Uop<uint32_t>(
609  machInst, ufp0, rn, 0, align);
610  break;
611  }
612  break;
613  case 6:
614  microOps[uopIdx++] = new MicroLdrNeon6Uop<uint16_t>(
615  machInst, ufp0, rn, 0, align);
616  break;
617  case 8:
618  switch (eBytes) {
619  case 2:
620  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint16_t>(
621  machInst, ufp0, rn, 0, align);
622  break;
623  case 4:
624  microOps[uopIdx++] = new MicroLdrNeon8Uop<uint32_t>(
625  machInst, ufp0, rn, 0, align);
626  break;
627  }
628  break;
629  case 12:
630  microOps[uopIdx++] = new MicroLdrNeon12Uop<uint32_t>(
631  machInst, ufp0, rn, 0, align);
632  break;
633  case 16:
634  microOps[uopIdx++] = new MicroLdrNeon16Uop<uint32_t>(
635  machInst, ufp0, rn, 0, align);
636  break;
637  default:
638  // Unrecognized load size
639  microOps[uopIdx++] = new Unknown(machInst);
640  }
641  if (wb) {
642  if (rm != 15 && rm != 13) {
643  microOps[uopIdx++] =
644  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
645  } else {
646  microOps[uopIdx++] =
647  new MicroAddiUop(machInst, rn, rn, loadSize);
648  }
649  }
650  switch (elems) {
651  case 4:
652  assert(regs == 4);
653  switch (size) {
654  case 0:
655  if (all) {
656  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint8_t>(
657  machInst, vd * 2, ufp0, inc * 2);
658  } else {
659  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint8_t>(
660  machInst, vd * 2, ufp0, inc * 2, lane);
661  }
662  break;
663  case 1:
664  if (all) {
665  microOps[uopIdx++] = new MicroUnpackAllNeon2to8Uop<uint16_t>(
666  machInst, vd * 2, ufp0, inc * 2);
667  } else {
668  microOps[uopIdx++] = new MicroUnpackNeon2to8Uop<uint16_t>(
669  machInst, vd * 2, ufp0, inc * 2, lane);
670  }
671  break;
672  case 2:
673  if (all) {
674  microOps[uopIdx++] = new MicroUnpackAllNeon4to8Uop<uint32_t>(
675  machInst, vd * 2, ufp0, inc * 2);
676  } else {
677  microOps[uopIdx++] = new MicroUnpackNeon4to8Uop<uint32_t>(
678  machInst, vd * 2, ufp0, inc * 2, lane);
679  }
680  break;
681  default:
682  // Bad size
683  microOps[uopIdx++] = new Unknown(machInst);
684  break;
685  }
686  break;
687  case 3:
688  assert(regs == 3);
689  switch (size) {
690  case 0:
691  if (all) {
692  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint8_t>(
693  machInst, vd * 2, ufp0, inc * 2);
694  } else {
695  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint8_t>(
696  machInst, vd * 2, ufp0, inc * 2, lane);
697  }
698  break;
699  case 1:
700  if (all) {
701  microOps[uopIdx++] = new MicroUnpackAllNeon2to6Uop<uint16_t>(
702  machInst, vd * 2, ufp0, inc * 2);
703  } else {
704  microOps[uopIdx++] = new MicroUnpackNeon2to6Uop<uint16_t>(
705  machInst, vd * 2, ufp0, inc * 2, lane);
706  }
707  break;
708  case 2:
709  if (all) {
710  microOps[uopIdx++] = new MicroUnpackAllNeon4to6Uop<uint32_t>(
711  machInst, vd * 2, ufp0, inc * 2);
712  } else {
713  microOps[uopIdx++] = new MicroUnpackNeon4to6Uop<uint32_t>(
714  machInst, vd * 2, ufp0, inc * 2, lane);
715  }
716  break;
717  default:
718  // Bad size
719  microOps[uopIdx++] = new Unknown(machInst);
720  break;
721  }
722  break;
723  case 2:
724  assert(regs == 2);
725  assert(loadRegs <= 2);
726  switch (size) {
727  case 0:
728  if (all) {
729  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint8_t>(
730  machInst, vd * 2, ufp0, inc * 2);
731  } else {
732  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint8_t>(
733  machInst, vd * 2, ufp0, inc * 2, lane);
734  }
735  break;
736  case 1:
737  if (all) {
738  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint16_t>(
739  machInst, vd * 2, ufp0, inc * 2);
740  } else {
741  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint16_t>(
742  machInst, vd * 2, ufp0, inc * 2, lane);
743  }
744  break;
745  case 2:
746  if (all) {
747  microOps[uopIdx++] = new MicroUnpackAllNeon2to4Uop<uint32_t>(
748  machInst, vd * 2, ufp0, inc * 2);
749  } else {
750  microOps[uopIdx++] = new MicroUnpackNeon2to4Uop<uint32_t>(
751  machInst, vd * 2, ufp0, inc * 2, lane);
752  }
753  break;
754  default:
755  // Bad size
756  microOps[uopIdx++] = new Unknown(machInst);
757  break;
758  }
759  break;
760  case 1:
761  assert(regs == 1 || (all && regs == 2));
762  assert(loadRegs <= 2);
763  for (unsigned offset = 0; offset < regs; offset++) {
764  switch (size) {
765  case 0:
766  if (all) {
767  microOps[uopIdx++] =
768  new MicroUnpackAllNeon2to2Uop<uint8_t>(
769  machInst, (vd + offset) * 2, ufp0, inc * 2);
770  } else {
771  microOps[uopIdx++] =
772  new MicroUnpackNeon2to2Uop<uint8_t>(
773  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
774  }
775  break;
776  case 1:
777  if (all) {
778  microOps[uopIdx++] =
779  new MicroUnpackAllNeon2to2Uop<uint16_t>(
780  machInst, (vd + offset) * 2, ufp0, inc * 2);
781  } else {
782  microOps[uopIdx++] =
783  new MicroUnpackNeon2to2Uop<uint16_t>(
784  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
785  }
786  break;
787  case 2:
788  if (all) {
789  microOps[uopIdx++] =
790  new MicroUnpackAllNeon2to2Uop<uint32_t>(
791  machInst, (vd + offset) * 2, ufp0, inc * 2);
792  } else {
793  microOps[uopIdx++] =
794  new MicroUnpackNeon2to2Uop<uint32_t>(
795  machInst, (vd + offset) * 2, ufp0, inc * 2, lane);
796  }
797  break;
798  default:
799  // Bad size
800  microOps[uopIdx++] = new Unknown(machInst);
801  break;
802  }
803  }
804  break;
805  default:
806  // Bad number of elements to unpack
807  microOps[uopIdx++] = new Unknown(machInst);
808  }
809  assert(uopIdx == numMicroops);
810 
811  for (unsigned i = 0; i < numMicroops - 1; i++) {
812  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
813  assert(uopPtr);
814  uopPtr->setDelayedCommit();
815  }
817  microOps[numMicroops - 1]->setLastMicroop();
818 }
819 
820 VstMultOp::VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass,
821  unsigned elems, RegIndex rn, RegIndex vd, unsigned regs,
822  unsigned inc, uint32_t size, uint32_t align, RegIndex rm) :
823  PredMacroOp(mnem, machInst, __opClass)
824 {
825  assert(regs > 0 && regs <= 4);
826  assert(regs % elems == 0);
827 
828  numMicroops = (regs > 2) ? 2 : 1;
829  bool wb = (rm != 15);
830  bool interleave = (elems > 1);
831 
832  if (wb) numMicroops++;
833  if (interleave) numMicroops += (regs / elems);
835 
836  uint32_t noAlign = 0;
837 
838  RegIndex rMid = interleave ? VecSpecialElem : vd * 2;
839 
840  unsigned uopIdx = 0;
841  if (interleave) {
842  switch (elems) {
843  case 4:
844  assert(regs == 4);
845  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon8Uop>(
846  size, machInst, rMid, vd * 2, inc * 2);
847  break;
848  case 3:
849  assert(regs == 3);
850  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon6Uop>(
851  size, machInst, rMid, vd * 2, inc * 2);
852  break;
853  case 2:
854  assert(regs == 4 || regs == 2);
855  if (regs == 4) {
856  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
857  size, machInst, rMid, vd * 2, inc * 2);
858  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
859  size, machInst, rMid + 4, vd * 2 + 2, inc * 2);
860  } else {
861  microOps[uopIdx++] = newNeonMixInst<MicroInterNeon4Uop>(
862  size, machInst, rMid, vd * 2, inc * 2);
863  }
864  break;
865  default:
866  // Bad number of elements to interleave
867  microOps[uopIdx++] = new Unknown(machInst);
868  }
869  }
870  switch (regs) {
871  case 4:
872  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
873  size, machInst, rMid, rn, 0, align);
874  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
875  size, machInst, rMid + 4, rn, 16, noAlign);
876  break;
877  case 3:
878  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
879  size, machInst, rMid, rn, 0, align);
880  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
881  size, machInst, rMid + 4, rn, 16, noAlign);
882  break;
883  case 2:
884  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon16Uop>(
885  size, machInst, rMid, rn, 0, align);
886  break;
887  case 1:
888  microOps[uopIdx++] = newNeonMemInst<MicroStrNeon8Uop>(
889  size, machInst, rMid, rn, 0, align);
890  break;
891  default:
892  // Unknown number of registers
893  microOps[uopIdx++] = new Unknown(machInst);
894  }
895  if (wb) {
896  if (rm != 15 && rm != 13) {
897  microOps[uopIdx++] =
898  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
899  } else {
900  microOps[uopIdx++] =
901  new MicroAddiUop(machInst, rn, rn, regs * 8);
902  }
903  }
904  assert(uopIdx == numMicroops);
905 
906  for (unsigned i = 0; i < numMicroops - 1; i++) {
907  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
908  assert(uopPtr);
909  uopPtr->setDelayedCommit();
910  }
912  microOps[numMicroops - 1]->setLastMicroop();
913 }
914 
916  OpClass __opClass, bool all, unsigned elems,
917  RegIndex rn, RegIndex vd, unsigned regs,
918  unsigned inc, uint32_t size, uint32_t align,
919  RegIndex rm, unsigned lane) :
920  PredMacroOp(mnem, machInst, __opClass)
921 {
922  assert(!all);
923  assert(regs > 0 && regs <= 4);
924  assert(regs % elems == 0);
925 
926  unsigned eBytes = (1 << size);
927  unsigned storeSize = eBytes * elems;
928  unsigned storeRegs M5_VAR_USED =
929  (storeSize + sizeof(uint32_t) - 1) / sizeof(uint32_t);
930 
931  assert(storeRegs > 0 && storeRegs <= 4);
932 
933  numMicroops = 1;
934  bool wb = (rm != 15);
935 
936  if (wb) numMicroops++;
937  numMicroops += (regs / elems);
939 
940  RegIndex ufp0 = VecSpecialElem;
941 
942  unsigned uopIdx = 0;
943  switch (elems) {
944  case 4:
945  assert(regs == 4);
946  switch (size) {
947  case 0:
948  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint8_t>(
949  machInst, ufp0, vd * 2, inc * 2, lane);
950  break;
951  case 1:
952  microOps[uopIdx++] = new MicroPackNeon8to2Uop<uint16_t>(
953  machInst, ufp0, vd * 2, inc * 2, lane);
954  break;
955  case 2:
956  microOps[uopIdx++] = new MicroPackNeon8to4Uop<uint32_t>(
957  machInst, ufp0, vd * 2, inc * 2, lane);
958  break;
959  default:
960  // Bad size
961  microOps[uopIdx++] = new Unknown(machInst);
962  break;
963  }
964  break;
965  case 3:
966  assert(regs == 3);
967  switch (size) {
968  case 0:
969  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint8_t>(
970  machInst, ufp0, vd * 2, inc * 2, lane);
971  break;
972  case 1:
973  microOps[uopIdx++] = new MicroPackNeon6to2Uop<uint16_t>(
974  machInst, ufp0, vd * 2, inc * 2, lane);
975  break;
976  case 2:
977  microOps[uopIdx++] = new MicroPackNeon6to4Uop<uint32_t>(
978  machInst, ufp0, vd * 2, inc * 2, lane);
979  break;
980  default:
981  // Bad size
982  microOps[uopIdx++] = new Unknown(machInst);
983  break;
984  }
985  break;
986  case 2:
987  assert(regs == 2);
988  assert(storeRegs <= 2);
989  switch (size) {
990  case 0:
991  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint8_t>(
992  machInst, ufp0, vd * 2, inc * 2, lane);
993  break;
994  case 1:
995  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint16_t>(
996  machInst, ufp0, vd * 2, inc * 2, lane);
997  break;
998  case 2:
999  microOps[uopIdx++] = new MicroPackNeon4to2Uop<uint32_t>(
1000  machInst, ufp0, vd * 2, inc * 2, lane);
1001  break;
1002  default:
1003  // Bad size
1004  microOps[uopIdx++] = new Unknown(machInst);
1005  break;
1006  }
1007  break;
1008  case 1:
1009  assert(regs == 1 || (all && regs == 2));
1010  assert(storeRegs <= 2);
1011  for (unsigned offset = 0; offset < regs; offset++) {
1012  switch (size) {
1013  case 0:
1014  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint8_t>(
1015  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1016  break;
1017  case 1:
1018  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint16_t>(
1019  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1020  break;
1021  case 2:
1022  microOps[uopIdx++] = new MicroPackNeon2to2Uop<uint32_t>(
1023  machInst, ufp0, (vd + offset) * 2, inc * 2, lane);
1024  break;
1025  default:
1026  // Bad size
1027  microOps[uopIdx++] = new Unknown(machInst);
1028  break;
1029  }
1030  }
1031  break;
1032  default:
1033  // Bad number of elements to unpack
1034  microOps[uopIdx++] = new Unknown(machInst);
1035  }
1036  switch (storeSize) {
1037  case 1:
1038  microOps[uopIdx++] = new MicroStrNeon1Uop<uint8_t>(
1039  machInst, ufp0, rn, 0, align);
1040  break;
1041  case 2:
1042  if (eBytes == 2) {
1043  microOps[uopIdx++] = new MicroStrNeon2Uop<uint16_t>(
1044  machInst, ufp0, rn, 0, align);
1045  } else {
1046  microOps[uopIdx++] = new MicroStrNeon2Uop<uint8_t>(
1047  machInst, ufp0, rn, 0, align);
1048  }
1049  break;
1050  case 3:
1051  microOps[uopIdx++] = new MicroStrNeon3Uop<uint8_t>(
1052  machInst, ufp0, rn, 0, align);
1053  break;
1054  case 4:
1055  switch (eBytes) {
1056  case 1:
1057  microOps[uopIdx++] = new MicroStrNeon4Uop<uint8_t>(
1058  machInst, ufp0, rn, 0, align);
1059  break;
1060  case 2:
1061  microOps[uopIdx++] = new MicroStrNeon4Uop<uint16_t>(
1062  machInst, ufp0, rn, 0, align);
1063  break;
1064  case 4:
1065  microOps[uopIdx++] = new MicroStrNeon4Uop<uint32_t>(
1066  machInst, ufp0, rn, 0, align);
1067  break;
1068  }
1069  break;
1070  case 6:
1071  microOps[uopIdx++] = new MicroStrNeon6Uop<uint16_t>(
1072  machInst, ufp0, rn, 0, align);
1073  break;
1074  case 8:
1075  switch (eBytes) {
1076  case 2:
1077  microOps[uopIdx++] = new MicroStrNeon8Uop<uint16_t>(
1078  machInst, ufp0, rn, 0, align);
1079  break;
1080  case 4:
1081  microOps[uopIdx++] = new MicroStrNeon8Uop<uint32_t>(
1082  machInst, ufp0, rn, 0, align);
1083  break;
1084  }
1085  break;
1086  case 12:
1087  microOps[uopIdx++] = new MicroStrNeon12Uop<uint32_t>(
1088  machInst, ufp0, rn, 0, align);
1089  break;
1090  case 16:
1091  microOps[uopIdx++] = new MicroStrNeon16Uop<uint32_t>(
1092  machInst, ufp0, rn, 0, align);
1093  break;
1094  default:
1095  // Bad store size
1096  microOps[uopIdx++] = new Unknown(machInst);
1097  }
1098  if (wb) {
1099  if (rm != 15 && rm != 13) {
1100  microOps[uopIdx++] =
1101  new MicroAddUop(machInst, rn, rn, rm, 0, ArmISA::LSL);
1102  } else {
1103  microOps[uopIdx++] =
1104  new MicroAddiUop(machInst, rn, rn, storeSize);
1105  }
1106  }
1107  assert(uopIdx == numMicroops);
1108 
1109  for (unsigned i = 0; i < numMicroops - 1; i++) {
1110  MicroOp * uopPtr = dynamic_cast<MicroOp *>(microOps[i].get());
1111  assert(uopPtr);
1112  uopPtr->setDelayedCommit();
1113  }
1114  microOps[0]->setFirstMicroop();
1115  microOps[numMicroops - 1]->setLastMicroop();
1116 }
1117 
1119  OpClass __opClass, RegIndex rn, RegIndex vd,
1120  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1121  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1122  PredMacroOp(mnem, machInst, __opClass)
1123 {
1125  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1126  bool baseIsSP = isSP((IntRegIndex) rnsp);
1127 
1128  numMicroops = wb ? 1 : 0;
1129 
1130  int totNumBytes = numRegs * dataSize / 8;
1131  assert(totNumBytes <= 64);
1132 
1133  // The guiding principle here is that no more than 16 bytes can be
1134  // transferred at a time
1135  int numMemMicroops = totNumBytes / 16;
1136  int residuum = totNumBytes % 16;
1137  if (residuum)
1138  ++numMemMicroops;
1139  numMicroops += numMemMicroops;
1140 
1141  int numMarshalMicroops = numRegs / 2 + (numRegs % 2 ? 1 : 0);
1142  numMicroops += numMarshalMicroops;
1143 
1145  unsigned uopIdx = 0;
1146  uint32_t memaccessFlags = (TLB::ArmFlags)eSize | TLB::AllowUnaligned;
1147 
1148  int i = 0;
1149  for (; i < numMemMicroops - 1; ++i) {
1150  microOps[uopIdx++] = new MicroNeonLoad64(
1151  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1152  baseIsSP, 16 /* accSize */, eSize);
1153  }
1154  microOps[uopIdx++] = new MicroNeonLoad64(
1155  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1156  residuum ? residuum : 16 /* accSize */, eSize);
1157 
1158  // Writeback microop: the post-increment amount is encoded in "Rm": a
1159  // 64-bit general register OR as '11111' for an immediate value equal to
1160  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1161  if (wb) {
1162  if (rm != ((RegIndex) INTREG_X31)) {
1163  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1164  UXTX, 0);
1165  } else {
1166  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1167  totNumBytes);
1168  }
1169  }
1170 
1171  for (int i = 0; i < numMarshalMicroops; ++i) {
1172  switch(numRegs) {
1173  case 1: microOps[uopIdx++] = new MicroDeintNeon64_1Reg(
1174  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1175  numStructElems, 1, i /* step */);
1176  break;
1177  case 2: microOps[uopIdx++] = new MicroDeintNeon64_2Reg(
1178  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1179  numStructElems, 2, i /* step */);
1180  break;
1181  case 3: microOps[uopIdx++] = new MicroDeintNeon64_3Reg(
1182  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1183  numStructElems, 3, i /* step */);
1184  break;
1185  case 4: microOps[uopIdx++] = new MicroDeintNeon64_4Reg(
1186  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1187  numStructElems, 4, i /* step */);
1188  break;
1189  default: panic("Invalid number of registers");
1190  }
1191 
1192  }
1193 
1194  assert(uopIdx == numMicroops);
1195 
1196  for (int i = 0; i < numMicroops - 1; ++i) {
1198  }
1199  microOps[numMicroops - 1]->setLastMicroop();
1200 }
1201 
1203  OpClass __opClass, RegIndex rn, RegIndex vd,
1204  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1205  uint8_t numStructElems, uint8_t numRegs, bool wb) :
1206  PredMacroOp(mnem, machInst, __opClass)
1207 {
1209  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1210  bool baseIsSP = isSP((IntRegIndex) rnsp);
1211 
1212  numMicroops = wb ? 1 : 0;
1213 
1214  int totNumBytes = numRegs * dataSize / 8;
1215  assert(totNumBytes <= 64);
1216 
1217  // The guiding principle here is that no more than 16 bytes can be
1218  // transferred at a time
1219  int numMemMicroops = totNumBytes / 16;
1220  int residuum = totNumBytes % 16;
1221  if (residuum)
1222  ++numMemMicroops;
1223  numMicroops += numMemMicroops;
1224 
1225  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1226  numMicroops += numMarshalMicroops;
1227 
1229  unsigned uopIdx = 0;
1230 
1231  for (int i = 0; i < numMarshalMicroops; ++i) {
1232  switch (numRegs) {
1233  case 1: microOps[uopIdx++] = new MicroIntNeon64_1Reg(
1234  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1235  numStructElems, 1, i /* step */);
1236  break;
1237  case 2: microOps[uopIdx++] = new MicroIntNeon64_2Reg(
1238  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1239  numStructElems, 2, i /* step */);
1240  break;
1241  case 3: microOps[uopIdx++] = new MicroIntNeon64_3Reg(
1242  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1243  numStructElems, 3, i /* step */);
1244  break;
1245  case 4: microOps[uopIdx++] = new MicroIntNeon64_4Reg(
1246  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1247  numStructElems, 4, i /* step */);
1248  break;
1249  default: panic("Invalid number of registers");
1250  }
1251  }
1252 
1253  uint32_t memaccessFlags = (TLB::ArmFlags)eSize | TLB::AllowUnaligned;
1254 
1255  int i = 0;
1256  for (; i < numMemMicroops - 1; ++i) {
1257  microOps[uopIdx++] = new MicroNeonStore64(
1258  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1259  baseIsSP, 16 /* accSize */, eSize);
1260  }
1261  microOps[uopIdx++] = new MicroNeonStore64(
1262  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1263  residuum ? residuum : 16 /* accSize */, eSize);
1264 
1265  // Writeback microop: the post-increment amount is encoded in "Rm": a
1266  // 64-bit general register OR as '11111' for an immediate value equal to
1267  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1268  if (wb) {
1269  if (rm != ((RegIndex) INTREG_X31)) {
1270  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1271  UXTX, 0);
1272  } else {
1273  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1274  totNumBytes);
1275  }
1276  }
1277 
1278  assert(uopIdx == numMicroops);
1279 
1280  for (int i = 0; i < numMicroops - 1; i++) {
1282  }
1283  microOps[numMicroops - 1]->setLastMicroop();
1284 }
1285 
1287  OpClass __opClass, RegIndex rn, RegIndex vd,
1288  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1289  uint8_t numStructElems, uint8_t index, bool wb,
1290  bool replicate) :
1291  PredMacroOp(mnem, machInst, __opClass),
1292  eSize(0), dataSize(0), numStructElems(0), index(0),
1293  wb(false), replicate(false)
1294 
1295 {
1297  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1298  bool baseIsSP = isSP((IntRegIndex) rnsp);
1299 
1300  numMicroops = wb ? 1 : 0;
1301 
1302  int eSizeBytes = 1 << eSize;
1303  int totNumBytes = numStructElems * eSizeBytes;
1304  assert(totNumBytes <= 64);
1305 
1306  // The guiding principle here is that no more than 16 bytes can be
1307  // transferred at a time
1308  int numMemMicroops = totNumBytes / 16;
1309  int residuum = totNumBytes % 16;
1310  if (residuum)
1311  ++numMemMicroops;
1312  numMicroops += numMemMicroops;
1313 
1314  int numMarshalMicroops = numStructElems / 2 + (numStructElems % 2 ? 1 : 0);
1315  numMicroops += numMarshalMicroops;
1316 
1318  unsigned uopIdx = 0;
1319 
1320  uint32_t memaccessFlags = (TLB::ArmFlags)eSize | TLB::AllowUnaligned;
1321 
1322  int i = 0;
1323  for (; i < numMemMicroops - 1; ++i) {
1324  microOps[uopIdx++] = new MicroNeonLoad64(
1325  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1326  baseIsSP, 16 /* accSize */, eSize);
1327  }
1328  microOps[uopIdx++] = new MicroNeonLoad64(
1329  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1330  residuum ? residuum : 16 /* accSize */, eSize);
1331 
1332  // Writeback microop: the post-increment amount is encoded in "Rm": a
1333  // 64-bit general register OR as '11111' for an immediate value equal to
1334  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1335  if (wb) {
1336  if (rm != ((RegIndex) INTREG_X31)) {
1337  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1338  UXTX, 0);
1339  } else {
1340  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1341  totNumBytes);
1342  }
1343  }
1344 
1345  for (int i = 0; i < numMarshalMicroops; ++i) {
1346  microOps[uopIdx++] = new MicroUnpackNeon64(
1347  machInst, vd + (RegIndex) (2 * i), vx, eSize, dataSize,
1348  numStructElems, index, i /* step */, replicate);
1349  }
1350 
1351  assert(uopIdx == numMicroops);
1352 
1353  for (int i = 0; i < numMicroops - 1; i++) {
1355  }
1356  microOps[numMicroops - 1]->setLastMicroop();
1357 }
1358 
1360  OpClass __opClass, RegIndex rn, RegIndex vd,
1361  RegIndex rm, uint8_t eSize, uint8_t dataSize,
1362  uint8_t numStructElems, uint8_t index, bool wb,
1363  bool replicate) :
1364  PredMacroOp(mnem, machInst, __opClass),
1365  eSize(0), dataSize(0), numStructElems(0), index(0),
1366  wb(false), replicate(false)
1367 {
1369  RegIndex rnsp = (RegIndex) makeSP((IntRegIndex) rn);
1370  bool baseIsSP = isSP((IntRegIndex) rnsp);
1371 
1372  numMicroops = wb ? 1 : 0;
1373 
1374  int eSizeBytes = 1 << eSize;
1375  int totNumBytes = numStructElems * eSizeBytes;
1376  assert(totNumBytes <= 64);
1377 
1378  // The guiding principle here is that no more than 16 bytes can be
1379  // transferred at a time
1380  int numMemMicroops = totNumBytes / 16;
1381  int residuum = totNumBytes % 16;
1382  if (residuum)
1383  ++numMemMicroops;
1384  numMicroops += numMemMicroops;
1385 
1386  int numMarshalMicroops = totNumBytes > 32 ? 2 : 1;
1387  numMicroops += numMarshalMicroops;
1388 
1390  unsigned uopIdx = 0;
1391 
1392  for (int i = 0; i < numMarshalMicroops; ++i) {
1393  microOps[uopIdx++] = new MicroPackNeon64(
1394  machInst, vx + (RegIndex) (2 * i), vd, eSize, dataSize,
1395  numStructElems, index, i /* step */, replicate);
1396  }
1397 
1398  uint32_t memaccessFlags = (TLB::ArmFlags)eSize | TLB::AllowUnaligned;
1399 
1400  int i = 0;
1401  for (; i < numMemMicroops - 1; ++i) {
1402  microOps[uopIdx++] = new MicroNeonStore64(
1403  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags,
1404  baseIsSP, 16 /* accsize */, eSize);
1405  }
1406  microOps[uopIdx++] = new MicroNeonStore64(
1407  machInst, vx + (RegIndex) i, rnsp, 16 * i, memaccessFlags, baseIsSP,
1408  residuum ? residuum : 16 /* accSize */, eSize);
1409 
1410  // Writeback microop: the post-increment amount is encoded in "Rm": a
1411  // 64-bit general register OR as '11111' for an immediate value equal to
1412  // the total number of bytes transferred (i.e. 8, 16, 24, 32, 48 or 64)
1413  if (wb) {
1414  if (rm != ((RegIndex) INTREG_X31)) {
1415  microOps[uopIdx++] = new MicroAddXERegUop(machInst, rnsp, rnsp, rm,
1416  UXTX, 0);
1417  } else {
1418  microOps[uopIdx++] = new MicroAddXiUop(machInst, rnsp, rnsp,
1419  totNumBytes);
1420  }
1421  }
1422 
1423  assert(uopIdx == numMicroops);
1424 
1425  for (int i = 0; i < numMicroops - 1; i++) {
1427  }
1428  microOps[numMicroops - 1]->setLastMicroop();
1429 }
1430 
1432  OpClass __opClass, IntRegIndex rn,
1433  RegIndex vd, bool single, bool up,
1434  bool writeback, bool load, uint32_t offset) :
1435  PredMacroOp(mnem, machInst, __opClass)
1436 {
1437  int i = 0;
1438 
1439  // The lowest order bit selects fldmx (set) or fldmd (clear). These seem
1440  // to be functionally identical except that fldmx is deprecated. For now
1441  // we'll assume they're otherwise interchangable.
1442  int count = (single ? offset : (offset / 2));
1443  numMicroops = count * (single ? 1 : 2) + (writeback ? 1 : 0);
1445 
1446  int64_t addr = 0;
1447 
1448  if (!up)
1449  addr = 4 * offset;
1450 
1451  bool tempUp = up;
1452  for (int j = 0; j < count; j++) {
1453  if (load) {
1454  if (single) {
1455  microOps[i++] = new MicroLdrFpUop(machInst, vd++, rn,
1456  tempUp, addr);
1457  } else {
1458  microOps[i++] = new MicroLdrDBFpUop(machInst, vd++, rn,
1459  tempUp, addr);
1460  microOps[i++] = new MicroLdrDTFpUop(machInst, vd++, rn, tempUp,
1461  addr + (up ? 4 : -4));
1462  }
1463  } else {
1464  if (single) {
1465  microOps[i++] = new MicroStrFpUop(machInst, vd++, rn,
1466  tempUp, addr);
1467  } else {
1468  microOps[i++] = new MicroStrDBFpUop(machInst, vd++, rn,
1469  tempUp, addr);
1470  microOps[i++] = new MicroStrDTFpUop(machInst, vd++, rn, tempUp,
1471  addr + (up ? 4 : -4));
1472  }
1473  }
1474  if (!tempUp) {
1475  addr -= (single ? 4 : 8);
1476  // The microops don't handle negative displacement, so turn if we
1477  // hit zero, flip polarity and start adding.
1478  if (addr <= 0) {
1479  tempUp = true;
1480  addr = -addr;
1481  }
1482  } else {
1483  addr += (single ? 4 : 8);
1484  }
1485  }
1486 
1487  if (writeback) {
1488  if (up) {
1489  microOps[i++] =
1490  new MicroAddiUop(machInst, rn, rn, 4 * offset);
1491  } else {
1492  microOps[i++] =
1493  new MicroSubiUop(machInst, rn, rn, 4 * offset);
1494  }
1495  }
1496 
1497  assert(numMicroops == i);
1499 
1500  for (StaticInstPtr *curUop = microOps;
1501  !(*curUop)->isLastMicroop(); curUop++) {
1502  MicroOp * uopPtr = dynamic_cast<MicroOp *>(curUop->get());
1503  assert(uopPtr);
1504  uopPtr->setDelayedCommit();
1505  }
1506 }
1507 
1508 std::string
1510  Addr pc, const Loader::SymbolTable *symtab) const
1511 {
1512  std::stringstream ss;
1513  printMnemonic(ss);
1514  printIntReg(ss, ura);
1515  ss << ", ";
1516  printIntReg(ss, urb);
1517  ss << ", ";
1518  ccprintf(ss, "#%d", imm);
1519  return ss.str();
1520 }
1521 
1522 std::string
1524  Addr pc, const Loader::SymbolTable *symtab) const
1525 {
1526  std::stringstream ss;
1527  printMnemonic(ss);
1528  printIntReg(ss, ura);
1529  ss << ", ";
1530  printIntReg(ss, urb);
1531  ss << ", ";
1532  ccprintf(ss, "#%d", imm);
1533  return ss.str();
1534 }
1535 
1536 std::string
1538  Addr pc, const Loader::SymbolTable *symtab) const
1539 {
1540  std::stringstream ss;
1541  printMnemonic(ss);
1542  ss << "[PC,CPSR]";
1543  return ss.str();
1544 }
1545 
1546 std::string
1548  Addr pc, const Loader::SymbolTable *symtab) const
1549 {
1550  std::stringstream ss;
1551  printMnemonic(ss);
1552  printIntReg(ss, ura);
1553  ccprintf(ss, ", ");
1554  printIntReg(ss, urb);
1555  printExtendOperand(false, ss, (IntRegIndex)urc, type, shiftAmt);
1556  return ss.str();
1557 }
1558 
1559 std::string
1561  Addr pc, const Loader::SymbolTable *symtab) const
1562 {
1563  std::stringstream ss;
1564  printMnemonic(ss);
1565  printIntReg(ss, ura);
1566  ss << ", ";
1567  printIntReg(ss, urb);
1568  return ss.str();
1569 }
1570 
1571 std::string
1573  Addr pc, const Loader::SymbolTable *symtab) const
1574 {
1575  std::stringstream ss;
1576  printMnemonic(ss);
1577  printIntReg(ss, ura);
1578  ss << ", ";
1579  printIntReg(ss, urb);
1580  ss << ", ";
1581  printIntReg(ss, urc);
1582  return ss.str();
1583 }
1584 
1585 std::string
1587  Addr pc, const Loader::SymbolTable *symtab) const
1588 {
1589  std::stringstream ss;
1590  printMnemonic(ss);
1591  if (isFloating())
1592  printFloatReg(ss, ura);
1593  else
1594  printIntReg(ss, ura);
1595  ss << ", [";
1596  printIntReg(ss, urb);
1597  ss << ", ";
1598  ccprintf(ss, "#%d", imm);
1599  ss << "]";
1600  return ss.str();
1601 }
1602 
1603 std::string
1605  Addr pc, const Loader::SymbolTable *symtab) const
1606 {
1607  std::stringstream ss;
1608  printMnemonic(ss);
1609  printIntReg(ss, dest);
1610  ss << ",";
1611  printIntReg(ss, dest2);
1612  ss << ", [";
1613  printIntReg(ss, urb);
1614  ss << ", ";
1615  ccprintf(ss, "#%d", imm);
1616  ss << "]";
1617  return ss.str();
1618 }
1619 
1620 }
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:163
void ccprintf(cp::Print &print)
Definition: cprintf.hh:127
static IntRegIndex makeSP(IntRegIndex reg)
Definition: intregs.hh:499
void setDelayedCommit()
Definition: static_inst.hh:209
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1604
void printMnemonic(std::ostream &os, const std::string &suffix="", bool withPred=true, bool withCond64=false, ConditionCode cond64=COND_UC) const
Definition: static_inst.cc:372
Bitfield< 30, 0 > index
Bitfield< 5, 3 > reg
Definition: types.hh:87
BigFpMemPostOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:366
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1523
Bitfield< 15, 12 > rt
Definition: types.hh:123
IntRegIndex
Definition: intregs.hh:51
Bitfield< 7 > i
void printExtendOperand(bool firstOperand, std::ostream &os, IntRegIndex rm, ArmExtendType type, int64_t shiftAmt) const
Definition: static_inst.cc:555
VldSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1286
VstSingleOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t index, bool wb, bool replicate=false)
Definition: macromem.cc:1359
ip6_addr_t addr
Definition: inet.hh:330
bool isFloating() const
Definition: static_inst.hh:171
MacroVFPMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex rn, RegIndex vd, bool single, bool up, bool writeback, bool load, uint32_t offset)
Definition: macromem.cc:1431
Bitfield< 23, 0 > offset
Definition: types.hh:152
Overload hash function for BasicBlockRange type.
Definition: vec_reg.hh:587
Definition: ccregs.hh:41
Bitfield< 4, 0 > mode
BigFpMemLitOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, IntRegIndex dest, int64_t imm)
Definition: macromem.cc:444
BigFpMemPreOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:392
VstMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned width, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:820
VldMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1118
static unsigned int number_of_ones(int32_t val)
Definition: macromem.hh:51
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1537
uint8_t numStructElems
Definition: macromem.hh:214
Bitfield< 3, 0 > rm
Definition: types.hh:126
PairMemOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, uint32_t size, bool fp, bool load, bool noAlloc, bool signExt, bool exclusive, bool acrel, int64_t imm, AddrMode mode, IntRegIndex rn, IntRegIndex rt, IntRegIndex rt2)
Definition: macromem.cc:240
Utility functions and datatypes used by AArch64 NEON memory instructions.
BigFpMemImmOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, int64_t imm)
Definition: macromem.cc:345
const ExtMachInst machInst
The binary machine instruction.
Definition: static_inst.hh:231
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1560
uint8_t type
Definition: inet.hh:328
uint8_t numStructElems
Definition: macromem.hh:202
Bitfield< 4 > pc
uint16_t RegIndex
Definition: types.hh:40
void inc(scfx_mant &mant)
Definition: scfx_mant.hh:309
void align(const scfx_rep &lhs, const scfx_rep &rhs, int &new_wp, int &len_mant, scfx_mant_ref &lhs_mant, scfx_mant_ref &rhs_mant)
Definition: scfx_rep.cc:2051
ConditionCode condCode
Definition: pred_inst.hh:214
void replaceBits(T &val, int first, int last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition: bitfield.hh:156
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1547
void printIntReg(std::ostream &os, RegIndex reg_idx, uint8_t opWidth=0) const
Print a register name for disassembly given the unique dependence tag number (FP or int)...
Definition: static_inst.cc:294
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Bitfield< 19, 16 > rn
Definition: types.hh:121
const int NumVecV8ArchRegs
Definition: registers.hh:94
uint32_t numMicroops
Definition: pred_inst.hh:340
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1586
VldMultOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm)
Definition: macromem.cc:457
Bitfield< 21 > ss
void setFirstMicroop()
Definition: static_inst.hh:207
void setLastMicroop()
Definition: static_inst.hh:208
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:140
Bitfield< 7, 0 > imm
Definition: types.hh:140
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1509
VstMultOp64(const char *mnem, ExtMachInst machInst, OpClass __opClass, RegIndex rn, RegIndex vd, RegIndex rm, uint8_t eSize, uint8_t dataSize, uint8_t numStructElems, uint8_t numRegs, bool wb)
Definition: macromem.cc:1202
Bitfield< 21 > writeback
Definition: types.hh:134
VldSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:552
Bitfield< 24 > j
VstSingleOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool all, unsigned elems, RegIndex rn, RegIndex vd, unsigned regs, unsigned inc, uint32_t size, uint32_t align, RegIndex rm, unsigned lane)
Definition: macromem.cc:915
Bitfield< 23 > up
Definition: types.hh:132
Base class for predicated macro-operations.
Definition: pred_inst.hh:336
Bitfield< 29 > vx
Definition: miscregs.hh:63
Base class for Memory microops.
Definition: macromem.hh:65
void setFlag(Flags f)
Definition: static_inst.hh:210
const int VecSpecialElem
Definition: registers.hh:121
ArmExtendType
Definition: types.hh:537
TheISA::ExtMachInst ExtMachInst
Binary extended machine instruction type.
Definition: static_inst.hh:89
void printFloatReg(std::ostream &os, RegIndex reg_idx) const
Definition: static_inst.cc:340
BigFpMemRegOp(const char *mnem, ExtMachInst machInst, OpClass __opClass, bool load, IntRegIndex dest, IntRegIndex base, IntRegIndex offset, ArmExtendType type, int64_t imm)
Definition: macromem.cc:418
StaticInstPtr * microOps
Definition: pred_inst.hh:341
static bool isSP(IntRegIndex reg)
Definition: intregs.hh:515
T bits(T val, int first, int last)
Extract the bitfield from position &#39;first&#39; to &#39;last&#39; (inclusive) from &#39;val&#39; and right justify it...
Definition: bitfield.hh:71
Bitfield< 19, 16 > fp
T * get() const
Directly access the pointer itself without taking a reference.
Definition: refcnt.hh:219
int count
Definition: refcnt.hh:64
std::string generateDisassembly(Addr pc, const Loader::SymbolTable *symtab) const override
Internal function to generate disassembly string.
Definition: macromem.cc:1572
static int intRegInMode(OperatingMode mode, int reg)
Definition: intregs.hh:462
bool isLastMicroop() const
Definition: static_inst.hh:201

Generated on Thu May 28 2020 16:11:01 for gem5 by doxygen 1.8.13