gem5  v22.0.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
sdma_engine.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
35 #include "arch/generic/mmu.hh"
38 #include "dev/amdgpu/sdma_mmio.hh"
39 #include "mem/packet.hh"
40 #include "mem/packet_access.hh"
41 #include "params/SDMAEngine.hh"
42 
43 namespace gem5
44 {
45 
46 SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
47  : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
48  gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
49  pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
50  pageWptr(0), gpuDevice(nullptr), walker(p.walker)
51 {
52  gfx.ib(&gfxIb);
53  gfxIb.parent(&gfx);
54  gfx.valid(true);
55  gfxIb.valid(true);
56 
57  page.ib(&pageIb);
58  pageIb.parent(&page);
59  page.valid(true);
60  pageIb.valid(true);
61 
62  rlc0.ib(&rlc0Ib);
63  rlc0Ib.parent(&rlc0);
64 
65  rlc1.ib(&rlc1Ib);
66  rlc1Ib.parent(&rlc1);
67 }
68 
69 void
71 {
72  gpuDevice = gpu_device;
74 }
75 
76 int
78 {
79  switch (id) {
80  case 0:
82  case 1:
84  default:
85  panic("Unknown SDMA id");
86  }
87 }
88 
89 Addr
91 {
92  if (!gpuDevice->getVM().inAGP(addr)) {
93  Addr low_bits = bits(addr, 11, 0);
94  addr = (((addr >> 12) << 3) << 12) | low_bits;
95  }
96  return addr;
97 }
98 
106 {
107  if (gpuDevice->getVM().inAGP(vaddr)) {
108  // Use AGP translation gen
109  return TranslationGenPtr(
111  } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
112  // Use MMHUB translation gen
114  &gpuDevice->getVM(), vaddr, size));
115  }
116 
117  // Assume GART otherwise as this is the only other translation aperture
118  // available to the SDMA engine processor.
119  return TranslationGenPtr(
121 }
122 
123 void
125 {
126  // Get first free RLC
127  if (!rlc0.valid()) {
128  DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
129  rlcMap.insert(std::make_pair(doorbell, 0));
130  rlc0.valid(true);
131  rlc0.base(rb_base);
132  rlc0.rptr(0);
133  rlc0.wptr(0);
134  rlc0.processing(false);
135  // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1
136  rlc0.size(1024*1024);
137  } else if (!rlc1.valid()) {
138  DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
139  rlcMap.insert(std::make_pair(doorbell, 1));
140  rlc1.valid(true);
141  rlc1.base(rb_base);
142  rlc1.rptr(1);
143  rlc1.wptr(1);
144  rlc1.processing(false);
145  // TODO: size - I think pull from MQD 2^rb_cntrl[6:1]-1
146  rlc1.size(1024*1024);
147  } else {
148  panic("No free RLCs. Check they are properly unmapped.");
149  }
150 }
151 
152 void
154 {
155  assert(rlcMap.find(doorbell) != rlcMap.end());
156 
157  if (rlcMap[doorbell] == 0) {
158  rlc0.valid(false);
159  rlcMap.erase(doorbell);
160  } else if (rlcMap[doorbell] == 1) {
161  rlc1.valid(false);
162  rlcMap.erase(doorbell);
163  } else {
164  panic("Cannot unregister unknown RLC queue: %d\n", rlcMap[doorbell]);
165  }
166 }
167 
168 /* Start decoding packets from the Gfx queue. */
169 void
171 {
172  gfx.setWptr(wptrOffset);
173  if (!gfx.processing()) {
174  gfx.processing(true);
175  decodeNext(&gfx);
176  }
177 }
178 
179 /* Start decoding packets from the Page queue. */
180 void
182 {
183  page.setWptr(wptrOffset);
184  if (!page.processing()) {
185  page.processing(true);
186  decodeNext(&page);
187  }
188 }
189 
190 /* Process RLC queue at given doorbell. */
191 void
192 SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
193 {
194  assert(rlcMap.find(doorbellOffset) != rlcMap.end());
195 
196  if (rlcMap[doorbellOffset] == 0) {
197  processRLC0(wptrOffset);
198  } else if (rlcMap[doorbellOffset] == 1) {
199  processRLC1(wptrOffset);
200  } else {
201  panic("Cannot process unknown RLC queue: %d\n",
202  rlcMap[doorbellOffset]);
203  }
204 }
205 
206 /* Start decoding packets from the RLC0 queue. */
207 void
209 {
210  assert(rlc0.valid());
211 
212  rlc0.setWptr(wptrOffset);
213  if (!rlc0.processing()) {
214  cur_vmid = 1;
215  rlc0.processing(true);
216  decodeNext(&rlc0);
217  }
218 }
219 
220 /* Start decoding packets from the RLC1 queue. */
221 void
223 {
224  assert(rlc1.valid());
225 
226  rlc1.setWptr(wptrOffset);
227  if (!rlc1.processing()) {
228  cur_vmid = 1;
229  rlc1.processing(true);
230  decodeNext(&rlc1);
231  }
232 }
233 
234 /* Decoding next packet in the queue. */
235 void
237 {
238  DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
239 
240  if (q->rptr() != q->wptr()) {
241  // We are using lambda functions passed to the DmaVirtCallback objects
242  // which will call the actuall callback method (e.g., decodeHeader).
243  // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
244  // function as header in this case.
245  auto cb = new DmaVirtCallback<uint32_t>(
246  [ = ] (const uint32_t &header)
247  { decodeHeader(q, header); });
248  dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
249  } else {
250  q->processing(false);
251  if (q->parent()) {
252  DPRINTF(SDMAEngine, "SDMA switching queues\n");
253  decodeNext(q->parent());
254  }
255  cur_vmid = 0;
256  }
257 }
258 
259 /* Decoding the header of a packet. */
260 void
262 {
263  q->incRptr(sizeof(header));
264  int opcode = bits(header, 7, 0);
265  int sub_opcode = bits(header, 15, 8);
266 
267  DmaVirtCallback<uint64_t> *cb = nullptr;
268  void *dmaBuffer = nullptr;
269 
270  DPRINTF(SDMAEngine, "SDMA opcode %p sub-opcode %p\n", opcode, sub_opcode);
271 
272  switch(opcode) {
273  case SDMA_OP_NOP: {
274  uint32_t NOP_count = (header >> 16) & 0x3FFF;
275  DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
276  if (NOP_count > 0) q->incRptr(NOP_count * 4);
277  decodeNext(q);
278  } break;
279  case SDMA_OP_COPY: {
280  DPRINTF(SDMAEngine, "SDMA Copy packet\n");
281  switch (sub_opcode) {
282  case SDMA_SUBOP_COPY_LINEAR: {
283  dmaBuffer = new sdmaCopy();
284  cb = new DmaVirtCallback<uint64_t>(
285  [ = ] (const uint64_t &)
286  { copy(q, (sdmaCopy *)dmaBuffer); });
287  dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
288  } break;
290  panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
291  } break;
292  case SDMA_SUBOP_COPY_TILED: {
293  panic("SDMA_SUBOP_COPY_TILED not implemented");
294  } break;
296  panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
297  } break;
299  panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
300  } break;
301  case SDMA_SUBOP_COPY_SOA: {
302  panic("SDMA_SUBOP_COPY_SOA not implemented");
303  } break;
305  panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
306  } break;
308  panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
309  } break;
310  default: {
311  panic("SDMA unknown copy sub-opcode.");
312  } break;
313  }
314  } break;
315  case SDMA_OP_WRITE: {
316  DPRINTF(SDMAEngine, "SDMA Write packet\n");
317  switch (sub_opcode) {
319  dmaBuffer = new sdmaWrite();
320  cb = new DmaVirtCallback<uint64_t>(
321  [ = ] (const uint64_t &)
322  { write(q, (sdmaWrite *)dmaBuffer); });
323  dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
324  } break;
325  case SDMA_SUBOP_WRITE_TILED: {
326  panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
327  } break;
328  default:
329  break;
330  }
331  } break;
332  case SDMA_OP_INDIRECT: {
333  DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
334  dmaBuffer = new sdmaIndirectBuffer();
335  cb = new DmaVirtCallback<uint64_t>(
336  [ = ] (const uint64_t &)
337  { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
338  dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
339  } break;
340  case SDMA_OP_FENCE: {
341  DPRINTF(SDMAEngine, "SDMA Fence packet\n");
342  dmaBuffer = new sdmaFence();
343  cb = new DmaVirtCallback<uint64_t>(
344  [ = ] (const uint64_t &)
345  { fence(q, (sdmaFence *)dmaBuffer); });
346  dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
347  } break;
348  case SDMA_OP_TRAP: {
349  DPRINTF(SDMAEngine, "SDMA Trap packet\n");
350  dmaBuffer = new sdmaTrap();
351  cb = new DmaVirtCallback<uint64_t>(
352  [ = ] (const uint64_t &)
353  { trap(q, (sdmaTrap *)dmaBuffer); });
354  dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
355  } break;
356  case SDMA_OP_SEM: {
357  q->incRptr(sizeof(sdmaSemaphore));
358  warn("SDMA_OP_SEM not implemented");
359  decodeNext(q);
360  } break;
361  case SDMA_OP_POLL_REGMEM: {
362  DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
364  *h = *(sdmaPollRegMemHeader *)&header;
365  dmaBuffer = new sdmaPollRegMem();
366  cb = new DmaVirtCallback<uint64_t>(
367  [ = ] (const uint64_t &)
368  { pollRegMem(q, h, (sdmaPollRegMem *)dmaBuffer); });
369  dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
370  switch (sub_opcode) {
372  panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
373  } break;
375  panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
376  } break;
378  panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
379  } break;
380  default:
381  break;
382  }
383  } break;
384  case SDMA_OP_COND_EXE: {
385  q->incRptr(sizeof(sdmaCondExec));
386  warn("SDMA_OP_SEM not implemented");
387  decodeNext(q);
388  } break;
389  case SDMA_OP_ATOMIC: {
390  q->incRptr(sizeof(sdmaAtomic));
391  warn("SDMA_OP_ATOMIC not implemented");
392  decodeNext(q);
393  } break;
394  case SDMA_OP_CONST_FILL: {
395  q->incRptr(sizeof(sdmaConstFill));
396  warn("SDMA_OP_CONST_FILL not implemented");
397  decodeNext(q);
398  } break;
399  case SDMA_OP_PTEPDE: {
400  DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
401  switch (sub_opcode) {
403  DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
404  dmaBuffer = new sdmaPtePde();
405  cb = new DmaVirtCallback<uint64_t>(
406  [ = ] (const uint64_t &)
407  { ptePde(q, (sdmaPtePde *)dmaBuffer); });
408  dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
409  break;
411  panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
412  break;
414  panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
415  break;
416  case SDMA_SUBOP_PTEPDE_RMW: {
417  panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
418  } break;
419  default:
420  DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
421  sub_opcode);
422  decodeNext(q);
423  break;
424  }
425  } break;
426  case SDMA_OP_TIMESTAMP: {
427  q->incRptr(sizeof(sdmaTimestamp));
428  switch (sub_opcode) {
430  } break;
432  } break;
434  } break;
435  default:
436  break;
437  }
438  warn("SDMA_OP_TIMESTAMP not implemented");
439  decodeNext(q);
440  } break;
441  case SDMA_OP_SRBM_WRITE: {
442  DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
445  dmaBuffer = new sdmaSRBMWrite();
446  cb = new DmaVirtCallback<uint64_t>(
447  [ = ] (const uint64_t &)
448  { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
449  dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
450  } break;
451  case SDMA_OP_PRE_EXE: {
452  q->incRptr(sizeof(sdmaPredExec));
453  warn("SDMA_OP_PRE_EXE not implemented");
454  decodeNext(q);
455  } break;
456  case SDMA_OP_DUMMY_TRAP: {
457  q->incRptr(sizeof(sdmaDummyTrap));
458  warn("SDMA_OP_DUMMY_TRAP not implemented");
459  decodeNext(q);
460  } break;
461  default: {
462  panic("Invalid SDMA packet.\n");
463  } break;
464  }
465 }
466 
467 /* Implements a write packet. */
468 void
470 {
471  q->incRptr(sizeof(sdmaWrite));
472  // count represents the number of dwords - 1 to write
473  pkt->count++;
474  DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
475 
476  // first we have to read needed data from the SDMA queue
477  uint32_t *dmaBuffer = new uint32_t[pkt->count];
478  auto cb = new DmaVirtCallback<uint64_t>(
479  [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
480  dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
481  (void *)dmaBuffer);
482 }
483 
484 /* Completion of data reading for a write packet. */
485 void
486 SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
487 {
488  int bufferSize = sizeof(uint32_t) * pkt->count;
489  q->incRptr(bufferSize);
490 
491  DPRINTF(SDMAEngine, "Write packet data:\n");
492  for (int i = 0; i < pkt->count; ++i) {
493  DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
494  }
495 
496  // lastly we write read data to the destination address
497  if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
498  Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
499  gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
500  bufferSize);
501 
502  delete []dmaBuffer;
503  delete pkt;
504  decodeNext(q);
505  } else {
506  // TODO: getGARTAddr?
507  pkt->dest = getGARTAddr(pkt->dest);
508  auto cb = new DmaVirtCallback<uint32_t>(
509  [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
510  dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
511  }
512 }
513 
514 /* Completion of a write packet. */
515 void
516 SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
517 {
518  DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
519  pkt->dest, pkt->count);
520  delete []dmaBuffer;
521  delete pkt;
522  decodeNext(q);
523 }
524 
525 /* Implements a copy packet. */
526 void
528 {
529  DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
530  pkt->source, pkt->dest, pkt->count);
531  q->incRptr(sizeof(sdmaCopy));
532  // count represents the number of bytes - 1 to be copied
533  pkt->count++;
534  DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
535  pkt->source = getGARTAddr(pkt->source);
536  DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
537 
538  // first we have to read needed data from the source address
539  uint8_t *dmaBuffer = new uint8_t[pkt->count];
540  auto cb = new DmaVirtCallback<uint64_t>(
541  [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
542  dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
543 }
544 
545 /* Completion of data reading for a copy packet. */
546 void
547 SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
548 {
549  // lastly we write read data to the destination address
550  DPRINTF(SDMAEngine, "Copy packet data:\n");
551  uint64_t *dmaBuffer64 = new uint64_t[pkt->count/8];
552  memcpy(dmaBuffer64, dmaBuffer, pkt->count);
553  for (int i = 0; i < pkt->count/8; ++i) {
554  DPRINTF(SDMAEngine, "%016lx\n", dmaBuffer64[i]);
555  }
556  delete [] dmaBuffer64;
557 
558  // Aperture is unknown until translating. Do a dummy translation.
559  auto tgen = translate(pkt->dest, 64);
560  auto addr_range = *(tgen->begin());
561  Addr tmp_addr = addr_range.paddr;
562  DPRINTF(SDMAEngine, "Tmp addr %#lx -> %#lx\n", pkt->dest, tmp_addr);
563 
564  // Writing generated data to the destination address.
565  if ((gpuDevice->getVM().inMMHUB(pkt->dest) && cur_vmid == 0) ||
566  (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
567  Addr mmhubAddr = 0;
568  if (cur_vmid == 0) {
569  mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
570  } else {
571  mmhubAddr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
572  }
573  DPRINTF(SDMAEngine, "Copying to MMHUB address %#lx\n", mmhubAddr);
574  gpuDevice->getMemMgr()->writeRequest(mmhubAddr, dmaBuffer, pkt->count);
575 
576  delete pkt;
577  decodeNext(q);
578  } else {
579  auto cb = new DmaVirtCallback<uint64_t>(
580  [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
581  dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
582  }
583 }
584 
585 /* Completion of a copy packet. */
586 void
587 SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
588 {
589  DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
590  pkt->dest, pkt->count);
591  delete []dmaBuffer;
592  delete pkt;
593  decodeNext(q);
594 }
595 
596 /* Implements an indirect buffer packet. */
597 void
599 {
600  q->ib()->base(getGARTAddr(pkt->base));
601  q->ib()->rptr(0);
602  q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
603  q->ib()->setWptr(pkt->size * sizeof(uint32_t));
604 
605  q->incRptr(sizeof(sdmaIndirectBuffer));
606 
607  delete pkt;
608  decodeNext(q->ib());
609 }
610 
611 /* Implements a fence packet. */
612 void
614 {
615  q->incRptr(sizeof(sdmaFence));
616  pkt->dest = getGARTAddr(pkt->dest);
617 
618  // Writing the data from the fence packet to the destination address.
619  auto cb = new DmaVirtCallback<uint32_t>(
620  [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
621  dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
622 }
623 
624 /* Completion of a fence packet. */
625 void
627 {
628  DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
629  pkt->dest, pkt->data);
630  delete pkt;
631  decodeNext(q);
632 }
633 
634 /* Implements a trap packet. */
635 void
637 {
638  q->incRptr(sizeof(sdmaTrap));
639 
640  DPRINTF(SDMAEngine, "Trap contextId: %p rbRptr: %p ibOffset: %p\n",
641  pkt->contextId, pkt->rbRptr, pkt->ibOffset);
642 
646 
647  delete pkt;
648  decodeNext(q);
649 }
650 
651 /* Implements a write SRBM packet. */
652 void
654  sdmaSRBMWrite *pkt)
655 {
656  q->incRptr(sizeof(sdmaSRBMWrite));
657 
658  [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
659  uint32_t reg_mask = 0x00000000;
660 
661  if (header->byteEnable & 0x8) reg_mask |= 0xFF000000;
662  if (header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
663  if (header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
664  if (header->byteEnable & 0x1) reg_mask |= 0x000000FF;
665  pkt->data &= reg_mask;
666 
667  DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
668  reg_addr, pkt->data);
669 
670  warn_once("SRBM write not performed, no SRBM model. This needs to be fixed"
671  " if correct system simulation is relying on SRBM registers.");
672 
673  delete header;
674  delete pkt;
675  decodeNext(q);
676 }
677 
683 void
685  sdmaPollRegMem *pkt)
686 {
687  q->incRptr(sizeof(sdmaPollRegMem));
688 
689  DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
690  "mask=%p, retry=%d, pinterval=%d\n", header->mode, header->func,
691  header->op, pkt->address, pkt->ref, pkt->mask, pkt->retryCount,
692  pkt->pollInt);
693 
694  bool skip = false;
695 
696  if (header->mode == 1) {
697  // polling on a memory location
698  if (header->op == 0) {
699  auto cb = new DmaVirtCallback<uint32_t>(
700  [ = ] (const uint32_t &dma_buffer) {
701  pollRegMemRead(q, header, pkt, dma_buffer, 0); });
702  dmaReadVirt(pkt->address >> 3, sizeof(uint32_t), cb,
703  (void *)&cb->dmaBuffer);
704  } else {
705  panic("SDMA poll mem operation not implemented.");
706  skip = true;
707  }
708  } else {
709  warn_once("SDMA poll reg is not implemented. If this is required for "
710  "correctness, an SRBM model needs to be implemented.");
711  skip = true;
712  }
713 
714  if (skip) {
715  delete header;
716  delete pkt;
717  decodeNext(q);
718  }
719 }
720 
721 void
723  sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
724 {
725  assert(header->mode == 1 && header->op == 0);
726 
727  if (!pollRegMemFunc(dma_buffer, pkt->ref, header->func) &&
728  ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
729  pkt->retryCount == 0xfff)) {
730 
731  // continue polling on a memory location until reference value is met,
732  // retryCount is met or indefinitelly if retryCount is 0xfff
733  DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
734  pkt->address, dma_buffer, pkt->ref);
735 
736  auto cb = new DmaVirtCallback<uint32_t>(
737  [ = ] (const uint32_t &dma_buffer) {
738  pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
739  dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
740  (void *)&cb->dmaBuffer);
741  } else {
742  DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
743  pkt->address, dma_buffer, pkt->ref);
744 
745  delete header;
746  delete pkt;
747  decodeNext(q);
748  }
749 }
750 
751 bool
752 SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
753 {
754  switch (func) {
755  case 0:
756  return true;
757  break;
758  case 1:
759  return value < reference;
760  break;
761  case 2:
762  return value <= reference;
763  break;
764  case 3:
765  return value == reference;
766  break;
767  case 4:
768  return value != reference;
769  break;
770  case 5:
771  return value >= reference;
772  break;
773  case 6:
774  return value > reference;
775  break;
776  default:
777  panic("SDMA POLL_REGMEM unknown comparison function.");
778  break;
779  }
780 }
781 
782 /* Implements a PTE PDE generation packet. */
783 void
785 {
786  q->incRptr(sizeof(sdmaPtePde));
787  pkt->count++;
788 
789  DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
790  pkt->initValue, pkt->increment, pkt->count);
791 
792  // Generating pkt->count double dwords using the initial value, increment
793  // and a mask.
794  uint64_t *dmaBuffer = new uint64_t[pkt->count];
795  for (int i = 0; i < pkt->count; i++) {
796  dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
797  }
798 
799  // Writing generated data to the destination address.
800  if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
801  Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
802  gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
803  sizeof(uint64_t) * pkt->count);
804 
805  decodeNext(q);
806  } else {
807  auto cb = new DmaVirtCallback<uint64_t>(
808  [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
809  dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
810  (void *)dmaBuffer);
811  }
812 }
813 
814 /* Completion of a PTE PDE generation packet. */
815 void
816 SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
817 {
818  DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
819  pkt->dest, pkt->count);
820 
821  delete []dmaBuffer;
822  delete pkt;
823  decodeNext(q);
824 }
825 
828 {
829  AddrRangeList ranges;
830  return ranges;
831 }
832 
833 void
835 {
836  // Serialize the DmaVirtDevice base class
838 
849 
850  int num_queues = 4;
851 
853  queues.push_back((SDMAQueue *)&gfx);
854  queues.push_back((SDMAQueue *)&page);
855  queues.push_back((SDMAQueue *)&gfxIb);
856  queues.push_back((SDMAQueue *)&pageIb);
857 
858  Addr base[num_queues];
859  Addr rptr[num_queues];
860  Addr wptr[num_queues];
861  Addr size[num_queues];
862  bool processing[num_queues];
863 
864  for (int i = 0; i < num_queues; i++) {
865  base[i] = queues[i]->base();
866  rptr[i] = queues[i]->getRptr();
867  wptr[i] = queues[i]->getWptr();
868  size[i] = queues[i]->size();
869  processing[i] = queues[i]->processing();
870  }
871 
872  SERIALIZE_ARRAY(base, num_queues);
873  SERIALIZE_ARRAY(rptr, num_queues);
874  SERIALIZE_ARRAY(wptr, num_queues);
875  SERIALIZE_ARRAY(size, num_queues);
876  SERIALIZE_ARRAY(processing, num_queues);
877 }
878 
879 void
881 {
882  // Serialize the DmaVirtDevice base class
884 
895 
896  int num_queues = 4;
897  Addr base[num_queues];
898  Addr rptr[num_queues];
899  Addr wptr[num_queues];
900  Addr size[num_queues];
901  bool processing[num_queues];
902 
903  UNSERIALIZE_ARRAY(base, num_queues);
904  UNSERIALIZE_ARRAY(rptr, num_queues);
905  UNSERIALIZE_ARRAY(wptr, num_queues);
906  UNSERIALIZE_ARRAY(size, num_queues);
907  UNSERIALIZE_ARRAY(processing, num_queues);
908 
910  queues.push_back((SDMAQueue *)&gfx);
911  queues.push_back((SDMAQueue *)&page);
912  queues.push_back((SDMAQueue *)&gfxIb);
913  queues.push_back((SDMAQueue *)&pageIb);
914 
915  for (int i = 0; i < num_queues; i++) {
916  queues[i]->base(base[i]);
917  queues[i]->rptr(rptr[i]);
918  queues[i]->wptr(wptr[i]);
919  queues[i]->size(size[i]);
920  queues[i]->processing(processing[i]);
921  }
922 }
923 
924 void
926 {
927  DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
928  pkt->getLE<uint32_t>());
929 
930  // In Vega10 headers, the offsets are the same for both SDMAs
931  switch (mmio_offset) {
932  case mmSDMA_GFX_RB_BASE:
933  setGfxBaseLo(pkt->getLE<uint32_t>());
934  break;
936  setGfxBaseHi(pkt->getLE<uint32_t>());
937  break;
939  setGfxRptrLo(pkt->getLE<uint32_t>());
940  break;
942  setGfxRptrHi(pkt->getLE<uint32_t>());
943  break;
944  case mmSDMA_GFX_DOORBELL:
945  setGfxDoorbellLo(pkt->getLE<uint32_t>());
946  break;
948  setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
949  // Bit 28 of doorbell indicates that doorbell is enabled.
950  if (bits(getGfxDoorbell(), 28, 28)) {
954  }
955  break;
956  case mmSDMA_GFX_RB_CNTL: {
957  uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
958  assert(rb_size >= 6 && rb_size <= 62);
959  setGfxSize(1 << (rb_size + 2));
960  } break;
962  setGfxWptrLo(pkt->getLE<uint32_t>());
963  break;
965  setGfxWptrHi(pkt->getLE<uint32_t>());
966  break;
967  case mmSDMA_PAGE_RB_BASE:
968  setPageBaseLo(pkt->getLE<uint32_t>());
969  break;
971  setPageRptrLo(pkt->getLE<uint32_t>());
972  break;
974  setPageRptrHi(pkt->getLE<uint32_t>());
975  break;
977  setPageDoorbellLo(pkt->getLE<uint32_t>());
978  break;
980  setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
981  // Bit 28 of doorbell indicates that doorbell is enabled.
982  if (bits(getPageDoorbell(), 28, 28)) {
986  }
987  break;
988  case mmSDMA_PAGE_RB_CNTL: {
989  uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
990  assert(rb_size >= 6 && rb_size <= 62);
991  setPageSize(1 << (rb_size + 2));
992  } break;
994  setPageWptrLo(pkt->getLE<uint32_t>());
995  break;
996  default:
997  DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
998  break;
999  }
1000 }
1001 
1002 void
1004 {
1005  gfxBase = insertBits(gfxBase, 31, 0, 0);
1006  gfxBase |= data;
1007  gfx.base((gfxBase >> 1) << 12);
1008 }
1009 
1010 void
1012 {
1013  gfxBase = insertBits(gfxBase, 63, 32, 0);
1014  gfxBase |= ((uint64_t)data) << 32;
1015  gfx.base((gfxBase >> 1) << 12);
1016 }
1017 
1018 void
1020 {
1021  gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1022  gfxRptr |= data;
1023 }
1024 
1025 void
1027 {
1028  gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1029  gfxRptr |= ((uint64_t)data) << 32;
1030 }
1031 
1032 void
1034 {
1035  gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1036  gfxDoorbell |= data;
1037 }
1038 
1039 void
1041 {
1042  gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1043  gfxDoorbell |= ((uint64_t)data) << 32;
1044 }
1045 
1046 void
1048 {
1051 }
1052 
1053 void
1055 {
1057  gfxDoorbellOffset |= ((uint64_t)data) << 32;
1058 }
1059 
1060 void
1062 {
1063  gfx.size(data);
1064 }
1065 
1066 void
1068 {
1069  gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1070  gfxWptr |= data;
1071 }
1072 
1073 void
1075 {
1076  gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1077  gfxWptr |= ((uint64_t)data) << 32;
1078 }
1079 
1080 void
1082 {
1083  pageBase = insertBits(pageBase, 31, 0, 0);
1084  pageBase |= data;
1085  page.base((pageBase >> 1) << 12);
1086 }
1087 
1088 void
1090 {
1091  pageBase = insertBits(pageBase, 63, 32, 0);
1092  pageBase |= ((uint64_t)data) << 32;
1093  page.base((pageBase >> 1) << 12);
1094 }
1095 
1096 void
1098 {
1099  pageRptr = insertBits(pageRptr, 31, 0, 0);
1100  pageRptr |= data;
1101 }
1102 
1103 void
1105 {
1106  pageRptr = insertBits(pageRptr, 63, 32, 0);
1107  pageRptr |= ((uint64_t)data) << 32;
1108 }
1109 
1110 void
1112 {
1113  pageDoorbell = insertBits(pageDoorbell, 31, 0, 0);
1114  pageDoorbell |= data;
1115 }
1116 
1117 void
1119 {
1120  pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1121  pageDoorbell |= ((uint64_t)data) << 32;
1122 }
1123 
1124 void
1126 {
1129 }
1130 
1131 void
1133 {
1135  pageDoorbellOffset |= ((uint64_t)data) << 32;
1136 }
1137 
1138 void
1140 {
1141  page.size(data);
1142 }
1143 
1144 void
1146 {
1147  pageWptr = insertBits(pageWptr, 31, 0, 0);
1148  pageWptr |= data;
1149 }
1150 
1151 void
1153 {
1154  pageWptr = insertBits(pageWptr, 63, 32, 0);
1155  pageWptr |= ((uint64_t)data) << 32;
1156 }
1157 
1158 } // namespace gem5
gem5::SDMAEngine::setPageDoorbellHi
void setPageDoorbellHi(uint32_t data)
Definition: sdma_engine.cc:1118
gem5::SDMAEngine::indirectBuffer
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
Definition: sdma_engine.cc:598
gem5::sdmaCopy
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
gem5::SDMAEngine::SDMAEngine
SDMAEngine(const SDMAEngineParams &p)
Definition: sdma_engine.cc:46
gem5::sdmaWrite
struct gem5::GEM5_PACKED sdmaWrite
gem5::GEM5_PACKED::rbRptr
uint32_t rbRptr
Definition: sdma_packets.hh:153
warn
#define warn(...)
Definition: logging.hh:246
gem5::TRAP_ID
@ TRAP_ID
Definition: interrupt_handler.hh:65
gem5::GEM5_PACKED
PM4 packets.
Definition: pm4_defines.hh:77
mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition: sdma_mmio.hh:52
SDMA_OP_FENCE
#define SDMA_OP_FENCE
Definition: sdma_commands.hh:45
mmSDMA_PAGE_RB_CNTL
#define mmSDMA_PAGE_RB_CNTL
Definition: sdma_mmio.hh:53
data
const char data[]
Definition: circlebuf.test.cc:48
mmSDMA_GFX_RB_RPTR_ADDR_LO
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition: sdma_mmio.hh:48
UNSERIALIZE_SCALAR
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
gem5::sdmaSRBMWriteHeader
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::AMDGPUDevice::setDoorbellType
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
Definition: amdgpu_device.cc:424
gem5::SDMAEngine::getIHClientId
int getIHClientId()
Returns the client id for the Interrupt Handler.
Definition: sdma_engine.cc:77
gem5::SDMAEngine::rlc1Ib
SDMAQueue rlc1Ib
Definition: sdma_engine.hh:118
SDMA_SUBOP_COPY_SOA
#define SDMA_SUBOP_COPY_SOA
Definition: sdma_commands.hh:65
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_MEM_VERIFY
Definition: sdma_commands.hh:77
warn_once
#define warn_once(...)
Definition: logging.hh:250
gem5::SDMAEngine::walker
VegaISA::Walker * walker
Definition: sdma_engine.hh:134
SDMA_OP_WRITE
#define SDMA_OP_WRITE
Definition: sdma_commands.hh:43
mmSDMA_GFX_RB_BASE
#define mmSDMA_GFX_RB_BASE
Definition: sdma_mmio.hh:45
gem5::SDMAEngine::SDMAQueue::wptr
Addr wptr()
Definition: sdma_engine.hh:73
gem5::SDMAEngine::gfx
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
Definition: sdma_engine.hh:117
gem5::GEM5_PACKED::source
uint64_t source
Definition: sdma_packets.hh:51
gem5::SDMAEngine::gfxWptr
uint64_t gfxWptr
Definition: sdma_engine.hh:125
gem5::SDMAEngine::gfxDoorbell
uint64_t gfxDoorbell
Definition: sdma_engine.hh:123
gem5::sdmaTrap
struct gem5::GEM5_PACKED sdmaTrap
gem5::CheckpointIn
Definition: serialize.hh:68
gem5::SDMAEngine::SDMAQueue::size
Addr size()
Definition: sdma_engine.hh:75
gem5::SDMAEngine::registerRLCQueue
void registerRLCQueue(Addr doorbell, Addr rb_base)
Methods for RLC queues.
Definition: sdma_engine.cc:124
gem5::SDMAEngine::getPageDoorbellOffset
uint64_t getPageDoorbellOffset()
Definition: sdma_engine.hh:226
SDMA_SUBOP_WRITE_LINEAR
#define SDMA_SUBOP_WRITE_LINEAR
Definition: sdma_commands.hh:68
gem5::SDMAEngine::getGfxDoorbell
uint64_t getGfxDoorbell()
Definition: sdma_engine.hh:220
SDMA_SUBOP_COPY_TILED
#define SDMA_SUBOP_COPY_TILED
Definition: sdma_commands.hh:62
gem5::GEM5_PACKED::pollInt
uint32_t pollInt
Definition: sdma_packets.hh:213
gem5::SDMAEngine::setPageWptrLo
void setPageWptrLo(uint32_t data)
Definition: sdma_engine.cc:1145
sdma_engine.hh
gem5::SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA0
Definition: interrupt_handler.hh:59
mmSDMA_GFX_DOORBELL_OFFSET
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition: sdma_mmio.hh:50
gem5::sdmaPollRegMemHeader
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
gem5::SDMAEngine::write
Tick write(PacketPtr pkt) override
Inherited methods.
Definition: sdma_engine.hh:161
gem5::SOC15_IH_CLIENTID_SDMA1
@ SOC15_IH_CLIENTID_SDMA1
Definition: interrupt_handler.hh:60
gem5::SDMAEngine::processRLC0
void processRLC0(Addr wptrOffset)
Definition: sdma_engine.cc:208
SDMA_OP_COPY
#define SDMA_OP_COPY
Definition: sdma_commands.hh:42
gem5::GEM5_PACKED::dest
uint64_t dest
Definition: sdma_packets.hh:52
gem5::SDMAEngine::decodeNext
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
Definition: sdma_engine.cc:236
gem5::SDMAEngine::getGARTAddr
Addr getGARTAddr(Addr addr) const
Methods for translation.
Definition: sdma_engine.cc:90
SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
Definition: sdma_commands.hh:61
SDMA_OP_SEM
#define SDMA_OP_SEM
Definition: sdma_commands.hh:47
SDMA_SUBOP_PTEPDE_RMW
#define SDMA_SUBOP_PTEPDE_RMW
Definition: sdma_commands.hh:72
header
output header
Definition: nop.cc:36
gem5::SDMAEngine::SDMAQueue::setWptr
void setWptr(Addr value)
Definition: sdma_engine.hh:93
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:167
gem5::AMDGPUVM::inMMHUB
bool inMMHUB(Addr vaddr)
Definition: amdgpu_vm.hh:187
gem5::AMDGPUInterruptHandler::submitInterruptCookie
void submitInterruptCookie()
Definition: interrupt_handler.cc:126
gem5::SDMAEngine::setGfxRptrHi
void setGfxRptrHi(uint32_t data)
Definition: sdma_engine.cc:1026
std::vector
STL vector class.
Definition: stl.hh:37
gem5::SDMAEngine::pageBase
uint64_t pageBase
Definition: sdma_engine.hh:127
gem5::SDMAEngine::writeDone
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
Definition: sdma_engine.cc:516
gem5::SDMAEngine::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: sdma_engine.cc:880
gem5::SDMAEngine::getGfxDoorbellOffset
uint64_t getGfxDoorbellOffset()
Definition: sdma_engine.hh:221
gem5::SDMAEngine::setPageDoorbellOffsetHi
void setPageDoorbellOffsetHi(uint32_t data)
Definition: sdma_engine.cc:1132
gem5::AMDGPUVM::getMMHUBBase
Addr getMMHUBBase()
Definition: amdgpu_vm.hh:192
gem5::SDMAEngine::setGfxDoorbellLo
void setGfxDoorbellLo(uint32_t data)
Definition: sdma_engine.cc:1033
gem5::SDMAEngine::fenceDone
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
Definition: sdma_engine.cc:626
gem5::SDMAEngine::setPageSize
void setPageSize(uint64_t data)
Definition: sdma_engine.cc:1139
gem5::X86ISA::base
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
interrupt_handler.hh
gem5::SDMAEngine::writeReadData
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
Definition: sdma_engine.cc:486
mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition: sdma_mmio.hh:51
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::SDMAEngine::gpuDevice
AMDGPUDevice * gpuDevice
Definition: sdma_engine.hh:133
gem5::AMDGPUVM::inAGP
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:177
gem5::ClockedObject::unserialize
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Definition: clocked_object.cc:64
gem5::SDMAEngine::pollRegMemFunc
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
Definition: sdma_engine.cc:752
SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
Definition: sdma_commands.hh:73
SDMA_OP_NOP
#define SDMA_OP_NOP
Commands for the SDMA engine.
Definition: sdma_commands.hh:41
SDMA_OP_TIMESTAMP
#define SDMA_OP_TIMESTAMP
Definition: sdma_commands.hh:53
mmSDMA_PAGE_RB_RPTR_ADDR_HI
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition: sdma_mmio.hh:55
gem5::SDMAEngine::unregisterRLCQueue
void unregisterRLCQueue(Addr doorbell)
Definition: sdma_engine.cc:153
gem5::SDMAPage
@ SDMAPage
Definition: amdgpu_defines.hh:46
gem5::SDMAEngine::pageRptr
uint64_t pageRptr
Definition: sdma_engine.hh:128
gem5::sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaIndirectBuffer
sdma_commands.hh
gem5::GEM5_PACKED::ref
uint32_t ref
Definition: sdma_packets.hh:211
gem5::SDMAEngine::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
Definition: sdma_engine.cc:105
gem5::GEM5_PACKED::data
uint32_t data
Definition: pm4_defines.hh:116
SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
Definition: sdma_commands.hh:59
gem5::SDMAEngine::setPageBaseHi
void setPageBaseHi(uint32_t data)
Definition: sdma_engine.cc:1089
packet.hh
mmSDMA_PAGE_DOORBELL
#define mmSDMA_PAGE_DOORBELL
Definition: sdma_mmio.hh:57
mmSDMA_GFX_RB_BASE_HI
#define mmSDMA_GFX_RB_BASE_HI
Definition: sdma_mmio.hh:46
SDMA_OP_COND_EXE
#define SDMA_OP_COND_EXE
Definition: sdma_commands.hh:49
gem5::SDMAEngine::cur_vmid
int cur_vmid
Definition: sdma_engine.hh:263
SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_SUBOP_COPY_LINEAR_PHY
Definition: sdma_commands.hh:67
SDMA_OP_TRAP
#define SDMA_OP_TRAP
Definition: sdma_commands.hh:46
gem5::SDMAEngine::setGfxBaseLo
void setGfxBaseLo(uint32_t data)
Definition: sdma_engine.cc:1003
mmSDMA_GFX_RB_RPTR_ADDR_HI
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition: sdma_mmio.hh:47
gem5::ArmISA::opcode
Bitfield< 24, 21 > opcode
Definition: types.hh:92
gem5::SDMAEngine::setGfxDoorbellHi
void setGfxDoorbellHi(uint32_t data)
Definition: sdma_engine.cc:1040
gem5::AMDGPUDevice::getIH
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
Definition: amdgpu_device.hh:164
gem5::SDMAEngine::setGfxRptrLo
void setGfxRptrLo(uint32_t data)
Definition: sdma_engine.cc:1019
SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_LINEAR
Definition: sdma_commands.hh:60
gem5::SDMAEngine::processPage
void processPage(Addr wptrOffset)
Definition: sdma_engine.cc:181
gem5::SDMAEngine::rlc1
SDMAQueue rlc1
Definition: sdma_engine.hh:118
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::GEM5_PACKED::regAddr
uint32_t regAddr
Definition: sdma_packets.hh:194
SDMA_OP_CONST_FILL
#define SDMA_OP_CONST_FILL
Definition: sdma_commands.hh:51
gem5::SDMAEngine::pollRegMemRead
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
Definition: sdma_engine.cc:722
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::SDMAEngine::processRLC
void processRLC(Addr doorbellOffset, Addr wptrOffset)
Definition: sdma_engine.cc:192
gem5::SDMAEngine::ptePdeDone
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
Definition: sdma_engine.cc:816
gem5::AMDGPUVM::MMHUBTranslationGen
Definition: amdgpu_vm.hh:328
gem5::SDMAEngine::trap
void trap(SDMAQueue *q, sdmaTrap *pkt)
Definition: sdma_engine.cc:636
gem5::X86ISA::count
count
Definition: misc.hh:703
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::SDMAEngine::setGfxDoorbellOffsetHi
void setGfxDoorbellOffsetHi(uint32_t data)
Definition: sdma_engine.cc:1054
gem5::SDMAEngine::ptePde
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
Definition: sdma_engine.cc:784
gem5::sdmaPtePde
struct gem5::GEM5_PACKED sdmaPtePde
gem5::SDMAEngine::gfxDoorbellOffset
uint64_t gfxDoorbellOffset
Definition: sdma_engine.hh:124
SDMA_OP_PRE_EXE
#define SDMA_OP_PRE_EXE
Definition: sdma_commands.hh:55
gem5::SDMAEngine::gfxIb
SDMAQueue gfxIb
Definition: sdma_engine.hh:117
mmSDMA_GFX_RB_CNTL
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition: sdma_mmio.hh:44
pagetable_walker.hh
gem5::SDMAEngine::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: sdma_engine.cc:834
SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
Definition: sdma_commands.hh:63
mmu.hh
SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_SUBOP_TIMESTAMP_SET
Definition: sdma_commands.hh:57
gem5::GEM5_PACKED::initValue
uint64_t initValue
Definition: sdma_packets.hh:290
gem5::SDMAEngine::copy
void copy(SDMAQueue *q, sdmaCopy *pkt)
Definition: sdma_engine.cc:527
SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_TIMESTAMP_GET
Definition: sdma_commands.hh:58
gem5::SDMAEngine::setPageRptrHi
void setPageRptrHi(uint32_t data)
Definition: sdma_engine.cc:1104
gem5::SDMAEngine::pageWptr
uint64_t pageWptr
Definition: sdma_engine.hh:131
gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition: amdgpu_device.hh:60
mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition: sdma_mmio.hh:59
gem5::ClockedObject::serialize
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: clocked_object.cc:59
gem5::SDMAEngine::pageIb
SDMAQueue pageIb
Definition: sdma_engine.hh:117
gem5::insertBits
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition: bitfield.hh:166
gem5::bits
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
gem5::SDMAEngine::setPageDoorbellLo
void setPageDoorbellLo(uint32_t data)
Definition: sdma_engine.cc:1111
gem5::SDMAEngine::rlc0
SDMAQueue rlc0
Definition: sdma_engine.hh:118
gem5::GEM5_PACKED::retryCount
uint32_t retryCount
Definition: sdma_packets.hh:214
gem5::SDMAEngine::setGfxDoorbellOffsetLo
void setGfxDoorbellOffsetLo(uint32_t data)
Definition: sdma_engine.cc:1047
SERIALIZE_ARRAY
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::SDMAEngine
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
mmSDMA_GFX_DOORBELL
#define mmSDMA_GFX_DOORBELL
Definition: sdma_mmio.hh:49
SDMA_OP_SRBM_WRITE
#define SDMA_OP_SRBM_WRITE
Definition: sdma_commands.hh:54
gem5::GEM5_PACKED::contextId
uint32_t contextId
Definition: pm4_defines.hh:468
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::SDMAEngine::pageDoorbellOffset
uint64_t pageDoorbellOffset
Definition: sdma_engine.hh:130
gem5::sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWrite
gem5::SDMAGfx
@ SDMAGfx
Definition: amdgpu_defines.hh:45
gem5::sdmaFence
struct gem5::GEM5_PACKED sdmaFence
SERIALIZE_SCALAR
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
gem5::SDMAEngine::SDMAQueue::ib
SDMAQueue * ib()
Definition: sdma_engine.hh:79
packet_access.hh
gem5::SDMAEngine::SDMAQueue::parent
SDMAQueue * parent()
Definition: sdma_engine.hh:78
sdma_mmio.hh
gem5::AMDGPUVM::AGPTranslationGen
Translation range generators.
Definition: amdgpu_vm.hh:302
gem5::SDMAEngine::pollRegMem
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
Definition: sdma_engine.cc:684
gem5::SDMAEngine::copyReadData
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
Definition: sdma_engine.cc:547
SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
Definition: sdma_commands.hh:64
gem5::SDMAEngine::setGfxSize
void setGfxSize(uint64_t data)
Definition: sdma_engine.cc:1061
SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
Definition: sdma_commands.hh:75
gem5::SDMAEngine::SDMAQueue::rptr
Addr rptr()
Definition: sdma_engine.hh:71
gem5::SDMAEngine::processRLC1
void processRLC1(Addr wptrOffset)
Definition: sdma_engine.cc:222
gem5::SDMAEngine::decodeHeader
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
Definition: sdma_engine.cc:261
gem5::AMDGPUVM::GARTTranslationGen
Definition: amdgpu_vm.hh:315
gem5::SDMAEngine::getPageDoorbell
uint64_t getPageDoorbell()
Definition: sdma_engine.hh:225
gem5::AMDGPUDevice::getMemMgr
AMDGPUMemoryManager * getMemMgr()
Definition: amdgpu_device.hh:168
SDMA_SUBOP_WRITE_TILED
#define SDMA_SUBOP_WRITE_TILED
Definition: sdma_commands.hh:69
gem5::SDMAEngine::pageDoorbell
uint64_t pageDoorbell
Definition: sdma_engine.hh:129
gem5::ArmISA::q
Bitfield< 27 > q
Definition: misc_types.hh:55
gem5::AMDGPUMemoryManager::writeRequest
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag=0, Event *callback=nullptr)
Write size amount of data to device memory at addr using flags and callback.
Definition: memory_manager.cc:53
gem5::SDMAEngine::rlcMap
std::unordered_map< Addr, int > rlcMap
Definition: sdma_engine.hh:137
gem5::SDMAEngine::gfxRptr
uint64_t gfxRptr
Definition: sdma_engine.hh:122
SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_PTEPDE_COPY
Definition: sdma_commands.hh:71
SDMA_OP_INDIRECT
#define SDMA_OP_INDIRECT
Definition: sdma_commands.hh:44
UNSERIALIZE_ARRAY
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
mmSDMA_PAGE_DOORBELL_OFFSET
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition: sdma_mmio.hh:58
gem5::SDMAEngine::page
SDMAQueue page
Definition: sdma_engine.hh:117
gem5::SDMAEngine::SDMAQueue::base
Addr base()
Definition: sdma_engine.hh:70
gem5::SDMAEngine::setPageRptrLo
void setPageRptrLo(uint32_t data)
Definition: sdma_engine.cc:1097
mmSDMA_PAGE_RB_BASE
#define mmSDMA_PAGE_RB_BASE
Definition: sdma_mmio.hh:54
gem5::SDMAEngine::setGfxWptrLo
void setGfxWptrLo(uint32_t data)
Definition: sdma_engine.cc:1067
gem5::GEM5_PACKED::ibOffset
uint32_t ibOffset
Definition: sdma_packets.hh:154
gem5::SDMAEngine::SDMAQueue
Definition: sdma_engine.hh:56
gem5::SDMAEngine::gfxBase
uint64_t gfxBase
Definition: sdma_engine.hh:121
gem5::Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition: packet_access.hh:78
gem5::ArmISA::id
Bitfield< 33 > id
Definition: misc_types.hh:251
gem5::SDMAEngine::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: sdma_engine.cc:70
gem5::sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMem
gem5::GEM5_PACKED::increment
uint64_t increment
Definition: sdma_packets.hh:291
gem5::GEM5_PACKED::address
uint64_t address
Definition: sdma_packets.hh:210
mmSDMA_PAGE_RB_RPTR_ADDR_LO
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition: sdma_mmio.hh:56
gem5::CheckpointOut
std::ostream CheckpointOut
Definition: serialize.hh:66
gem5::GEM5_PACKED::mask
uint32_t mask
Definition: pm4_defines.hh:301
gem5::SDMAEngine::SDMAQueue::valid
bool valid()
Definition: sdma_engine.hh:76
gem5::SDMAEngine::fence
void fence(SDMAQueue *q, sdmaFence *pkt)
Definition: sdma_engine.cc:613
gem5::GEM5_PACKED::base
uint64_t base
Definition: pm4_queues.hh:107
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gem5::SDMAEngine::srbmWrite
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
Definition: sdma_engine.cc:653
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::SDMAEngine::SDMAQueue::processing
bool processing()
Definition: sdma_engine.hh:77
gem5::GEM5_PACKED::count
uint16_t count
Definition: pm4_defines.hh:87
SDMA_OP_PTEPDE
#define SDMA_OP_PTEPDE
Definition: sdma_commands.hh:52
std::list< AddrRange >
gem5::SDMAEngine::processGfx
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
Definition: sdma_engine.cc:170
SDMA_SUBOP_PTEPDE_GEN
#define SDMA_SUBOP_PTEPDE_GEN
Definition: sdma_commands.hh:70
SDMA_OP_DUMMY_TRAP
#define SDMA_OP_DUMMY_TRAP
Definition: sdma_commands.hh:56
gem5::SDMAEngine::setPageDoorbellOffsetLo
void setPageDoorbellOffsetLo(uint32_t data)
Definition: sdma_engine.cc:1125
gem5::VegaISA::Walker::setDevRequestor
void setDevRequestor(RequestorID mid)
Definition: pagetable_walker.hh:162
SDMA_OP_POLL_REGMEM
#define SDMA_OP_POLL_REGMEM
Definition: sdma_commands.hh:48
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::SDMAEngine::setGfxWptrHi
void setGfxWptrHi(uint32_t data)
Definition: sdma_engine.cc:1074
SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
Definition: sdma_commands.hh:76
gem5::SDMAEngine::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: sdma_engine.cc:827
SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_SUBOP_COPY_DIRTY_PAGE
Definition: sdma_commands.hh:66
gem5::AMDGPUDevice::setSDMAEngine
void setSDMAEngine(Addr offset, SDMAEngine *eng)
Definition: amdgpu_device.cc:431
gem5::SDMAEngine::rlc0Ib
SDMAQueue rlc0Ib
Definition: sdma_engine.hh:118
gem5::AMDGPUInterruptHandler::prepareInterruptCookie
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
Definition: interrupt_handler.cc:75
gem5::SDMAEngine::setPageBaseLo
void setPageBaseLo(uint32_t data)
Definition: sdma_engine.cc:1081
gem5::SDMAEngine::setGfxBaseHi
void setGfxBaseHi(uint32_t data)
Definition: sdma_engine.cc:1011
gem5::AMDGPUDevice::vramRequestorId
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Definition: amdgpu_device.hh:187
gem5::SDMAEngine::copyDone
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
Definition: sdma_engine.cc:587
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
gem5::GEM5_PACKED::size
uint32_t size
Definition: sdma_packets.hh:124
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
SDMA_OP_ATOMIC
#define SDMA_OP_ATOMIC
Definition: sdma_commands.hh:50
gem5::X86ISA::addr
Bitfield< 3 > addr
Definition: types.hh:84
gem5::SDMAEngine::writeMMIO
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
Definition: sdma_engine.cc:925
gem5::SDMAEngine::setPageWptrHi
void setPageWptrHi(uint32_t data)
Definition: sdma_engine.cc:1152

Generated on Wed Jul 13 2022 10:39:18 for gem5 by doxygen 1.8.17