gem5  v22.1.0.0
sdma_engine.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
35 #include "arch/generic/mmu.hh"
36 #include "debug/SDMAData.hh"
37 #include "debug/SDMAEngine.hh"
40 #include "dev/amdgpu/sdma_mmio.hh"
41 #include "mem/packet.hh"
42 #include "mem/packet_access.hh"
43 #include "params/SDMAEngine.hh"
44 
45 namespace gem5
46 {
47 
48 SDMAEngine::SDMAEngine(const SDMAEngineParams &p)
49  : DmaVirtDevice(p), id(0), gfxBase(0), gfxRptr(0),
50  gfxDoorbell(0), gfxDoorbellOffset(0), gfxWptr(0), pageBase(0),
51  pageRptr(0), pageDoorbell(0), pageDoorbellOffset(0),
52  pageWptr(0), gpuDevice(nullptr), walker(p.walker)
53 {
54  gfx.ib(&gfxIb);
55  gfxIb.parent(&gfx);
56  gfx.valid(true);
57  gfxIb.valid(true);
58 
59  page.ib(&pageIb);
60  pageIb.parent(&page);
61  page.valid(true);
62  pageIb.valid(true);
63 
64  rlc0.ib(&rlc0Ib);
65  rlc0Ib.parent(&rlc0);
66 
67  rlc1.ib(&rlc1Ib);
68  rlc1Ib.parent(&rlc1);
69 }
70 
71 void
73 {
74  gpuDevice = gpu_device;
76 }
77 
78 int
80 {
81  switch (id) {
82  case 0:
84  case 1:
86  default:
87  panic("Unknown SDMA id");
88  }
89 }
90 
91 Addr
93 {
94  if (!gpuDevice->getVM().inAGP(addr)) {
95  Addr low_bits = bits(addr, 11, 0);
96  addr = (((addr >> 12) << 3) << 12) | low_bits;
97  }
98  return addr;
99 }
100 
101 Addr
103 {
104  // SDMA packets can access both host and device memory as either a source
105  // or destination address. We don't know which until it is translated, so
106  // we do a dummy functional translation to determine if the address
107  // resides in system memory or not.
108  auto tgen = translate(raw_addr, 64);
109  auto addr_range = *(tgen->begin());
110  Addr tmp_addr = addr_range.paddr;
111  DPRINTF(SDMAEngine, "getDeviceAddress raw_addr %#lx -> %#lx\n",
112  raw_addr, tmp_addr);
113 
114  // SDMA packets will access device memory through the MMHUB aperture in
115  // supervisor mode (vmid == 0) and in user mode (vmid > 0). In the case
116  // of vmid == 0 the address is already an MMHUB address in the packet,
117  // so simply subtract the MMHUB base. For vmid > 0 the address is a
118  // virtual address that must first be translated. The translation will
119  // return an MMHUB address, then we can similarly subtract the base to
120  // get the device address. Otherwise, for host, device address is 0.
121  Addr device_addr = 0;
122  if ((gpuDevice->getVM().inMMHUB(raw_addr) && cur_vmid == 0) ||
123  (gpuDevice->getVM().inMMHUB(tmp_addr) && cur_vmid != 0)) {
124  if (cur_vmid == 0) {
125  device_addr = raw_addr - gpuDevice->getVM().getMMHUBBase();
126  } else {
127  device_addr = tmp_addr - gpuDevice->getVM().getMMHUBBase();
128  }
129  }
130 
131  return device_addr;
132 }
133 
141 {
142  if (cur_vmid > 0) {
143  // Only user translation is available to user queues (vmid > 0)
145  &gpuDevice->getVM(), walker,
146  cur_vmid, vaddr, size));
147  } else if (gpuDevice->getVM().inAGP(vaddr)) {
148  // Use AGP translation gen
149  return TranslationGenPtr(
151  } else if (gpuDevice->getVM().inMMHUB(vaddr)) {
152  // Use MMHUB translation gen
154  &gpuDevice->getVM(), vaddr, size));
155  }
156 
157  // Assume GART otherwise as this is the only other translation aperture
158  // available to the SDMA engine processor.
159  return TranslationGenPtr(
161 }
162 
163 void
164 SDMAEngine::registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size,
165  Addr rptr_wb_addr)
166 {
167  // Get first free RLC
168  if (!rlc0.valid()) {
169  DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC0\n", doorbell);
170  rlcInfo[0] = doorbell;
171  rlc0.valid(true);
172  rlc0.base(rb_base);
173  rlc0.rptr(0);
174  rlc0.wptr(0);
175  rlc0.rptrWbAddr(rptr_wb_addr);
176  rlc0.processing(false);
177  rlc0.size(size);
178  } else if (!rlc1.valid()) {
179  DPRINTF(SDMAEngine, "Doorbell %lx mapped to RLC1\n", doorbell);
180  rlcInfo[1] = doorbell;
181  rlc1.valid(true);
182  rlc1.base(rb_base);
183  rlc1.rptr(0);
184  rlc1.wptr(0);
185  rlc1.rptrWbAddr(rptr_wb_addr);
186  rlc1.processing(false);
187  rlc1.size(size);
188  } else {
189  panic("No free RLCs. Check they are properly unmapped.");
190  }
191 }
192 
193 void
195 {
196  DPRINTF(SDMAEngine, "Unregistering RLC queue at %#lx\n", doorbell);
197  if (rlcInfo[0] == doorbell) {
198  rlc0.valid(false);
199  rlcInfo[0] = 0;
200  } else if (rlcInfo[1] == doorbell) {
201  rlc1.valid(false);
202  rlcInfo[1] = 0;
203  } else {
204  panic("Cannot unregister: no RLC queue at %#lx\n", doorbell);
205  }
206 }
207 
208 void
210 {
211  for (auto doorbell: rlcInfo) {
212  unregisterRLCQueue(doorbell);
213  }
214 }
215 
216 /* Start decoding packets from the Gfx queue. */
217 void
219 {
220  gfx.setWptr(wptrOffset);
221  if (!gfx.processing()) {
222  gfx.processing(true);
223  decodeNext(&gfx);
224  }
225 }
226 
227 /* Start decoding packets from the Page queue. */
228 void
230 {
231  page.setWptr(wptrOffset);
232  if (!page.processing()) {
233  page.processing(true);
234  decodeNext(&page);
235  }
236 }
237 
238 /* Process RLC queue at given doorbell. */
239 void
240 SDMAEngine::processRLC(Addr doorbellOffset, Addr wptrOffset)
241 {
242  if (rlcInfo[0] == doorbellOffset) {
243  processRLC0(wptrOffset);
244  } else if (rlcInfo[1] == doorbellOffset) {
245  processRLC1(wptrOffset);
246  } else {
247  panic("Cannot process: no RLC queue at %#lx\n", doorbellOffset);
248  }
249 }
250 
251 /* Start decoding packets from the RLC0 queue. */
252 void
254 {
255  assert(rlc0.valid());
256 
257  rlc0.setWptr(wptrOffset);
258  if (!rlc0.processing()) {
259  cur_vmid = 1;
260  rlc0.processing(true);
261  decodeNext(&rlc0);
262  }
263 }
264 
265 /* Start decoding packets from the RLC1 queue. */
266 void
268 {
269  assert(rlc1.valid());
270 
271  rlc1.setWptr(wptrOffset);
272  if (!rlc1.processing()) {
273  cur_vmid = 1;
274  rlc1.processing(true);
275  decodeNext(&rlc1);
276  }
277 }
278 
279 /* Decoding next packet in the queue. */
280 void
282 {
283  DPRINTF(SDMAEngine, "SDMA decode rptr %p wptr %p\n", q->rptr(), q->wptr());
284 
285  if (q->rptr() != q->wptr()) {
286  // We are using lambda functions passed to the DmaVirtCallback objects
287  // which will call the actuall callback method (e.g., decodeHeader).
288  // The dmaBuffer member of the DmaVirtCallback is passed to the lambda
289  // function as header in this case.
290  auto cb = new DmaVirtCallback<uint32_t>(
291  [ = ] (const uint32_t &header)
292  { decodeHeader(q, header); });
293  dmaReadVirt(q->rptr(), sizeof(uint32_t), cb, &cb->dmaBuffer);
294  } else {
295  // The driver expects the rptr to be written back to host memory
296  // periodically. In simulation, we writeback rptr after each burst of
297  // packets from a doorbell, rather than using the cycle count which
298  // is not accurate in all simulation settings (e.g., KVM).
299  DPRINTF(SDMAEngine, "Writing rptr %#lx back to host addr %#lx\n",
300  q->globalRptr(), q->rptrWbAddr());
301  if (q->rptrWbAddr()) {
302  auto cb = new DmaVirtCallback<uint64_t>(
303  [ = ](const uint64_t &) { }, q->globalRptr());
304  dmaWriteVirt(q->rptrWbAddr(), sizeof(Addr), cb, &cb->dmaBuffer);
305  }
306  q->processing(false);
307  if (q->parent()) {
308  DPRINTF(SDMAEngine, "SDMA switching queues\n");
309  decodeNext(q->parent());
310  }
311  cur_vmid = 0;
312  }
313 }
314 
315 /* Decoding the header of a packet. */
316 void
318 {
319  q->incRptr(sizeof(header));
320  int opcode = bits(header, 7, 0);
321  int sub_opcode = bits(header, 15, 8);
322 
323  DmaVirtCallback<uint64_t> *cb = nullptr;
324  void *dmaBuffer = nullptr;
325 
326  DPRINTF(SDMAEngine, "SDMA opcode %p sub-opcode %p\n", opcode, sub_opcode);
327 
328  switch(opcode) {
329  case SDMA_OP_NOP: {
330  uint32_t NOP_count = (header >> 16) & 0x3FFF;
331  DPRINTF(SDMAEngine, "SDMA NOP packet with count %d\n", NOP_count);
332  if (NOP_count > 0) q->incRptr(NOP_count * 4);
333  decodeNext(q);
334  } break;
335  case SDMA_OP_COPY: {
336  DPRINTF(SDMAEngine, "SDMA Copy packet\n");
337  switch (sub_opcode) {
338  case SDMA_SUBOP_COPY_LINEAR: {
339  dmaBuffer = new sdmaCopy();
340  cb = new DmaVirtCallback<uint64_t>(
341  [ = ] (const uint64_t &)
342  { copy(q, (sdmaCopy *)dmaBuffer); });
343  dmaReadVirt(q->rptr(), sizeof(sdmaCopy), cb, dmaBuffer);
344  } break;
346  panic("SDMA_SUBOP_COPY_LINEAR_SUB_WIND not implemented");
347  } break;
348  case SDMA_SUBOP_COPY_TILED: {
349  panic("SDMA_SUBOP_COPY_TILED not implemented");
350  } break;
352  panic("SDMA_SUBOP_COPY_TILED_SUB_WIND not implemented");
353  } break;
355  panic("SDMA_SUBOP_COPY_T2T_SUB_WIND not implemented");
356  } break;
357  case SDMA_SUBOP_COPY_SOA: {
358  panic("SDMA_SUBOP_COPY_SOA not implemented");
359  } break;
361  panic("SDMA_SUBOP_COPY_DIRTY_PAGE not implemented");
362  } break;
364  panic("SDMA_SUBOP_COPY_LINEAR_PHY not implemented");
365  } break;
366  default: {
367  panic("SDMA unknown copy sub-opcode.");
368  } break;
369  }
370  } break;
371  case SDMA_OP_WRITE: {
372  DPRINTF(SDMAEngine, "SDMA Write packet\n");
373  switch (sub_opcode) {
375  dmaBuffer = new sdmaWrite();
376  cb = new DmaVirtCallback<uint64_t>(
377  [ = ] (const uint64_t &)
378  { write(q, (sdmaWrite *)dmaBuffer); });
379  dmaReadVirt(q->rptr(), sizeof(sdmaWrite), cb, dmaBuffer);
380  } break;
381  case SDMA_SUBOP_WRITE_TILED: {
382  panic("SDMA_SUBOP_WRITE_TILED not implemented.\n");
383  } break;
384  default:
385  break;
386  }
387  } break;
388  case SDMA_OP_INDIRECT: {
389  DPRINTF(SDMAEngine, "SDMA IndirectBuffer packet\n");
390  dmaBuffer = new sdmaIndirectBuffer();
391  cb = new DmaVirtCallback<uint64_t>(
392  [ = ] (const uint64_t &)
393  { indirectBuffer(q, (sdmaIndirectBuffer *)dmaBuffer); });
394  dmaReadVirt(q->rptr(), sizeof(sdmaIndirectBuffer), cb, dmaBuffer);
395  } break;
396  case SDMA_OP_FENCE: {
397  DPRINTF(SDMAEngine, "SDMA Fence packet\n");
398  dmaBuffer = new sdmaFence();
399  cb = new DmaVirtCallback<uint64_t>(
400  [ = ] (const uint64_t &)
401  { fence(q, (sdmaFence *)dmaBuffer); });
402  dmaReadVirt(q->rptr(), sizeof(sdmaFence), cb, dmaBuffer);
403  } break;
404  case SDMA_OP_TRAP: {
405  DPRINTF(SDMAEngine, "SDMA Trap packet\n");
406  dmaBuffer = new sdmaTrap();
407  cb = new DmaVirtCallback<uint64_t>(
408  [ = ] (const uint64_t &)
409  { trap(q, (sdmaTrap *)dmaBuffer); });
410  dmaReadVirt(q->rptr(), sizeof(sdmaTrap), cb, dmaBuffer);
411  } break;
412  case SDMA_OP_SEM: {
413  q->incRptr(sizeof(sdmaSemaphore));
414  warn("SDMA_OP_SEM not implemented");
415  decodeNext(q);
416  } break;
417  case SDMA_OP_POLL_REGMEM: {
418  DPRINTF(SDMAEngine, "SDMA PollRegMem packet\n");
420  *h = *(sdmaPollRegMemHeader *)&header;
421  dmaBuffer = new sdmaPollRegMem();
422  cb = new DmaVirtCallback<uint64_t>(
423  [ = ] (const uint64_t &)
424  { pollRegMem(q, h, (sdmaPollRegMem *)dmaBuffer); });
425  dmaReadVirt(q->rptr(), sizeof(sdmaPollRegMem), cb, dmaBuffer);
426  switch (sub_opcode) {
428  panic("SDMA_SUBOP_POLL_REG_WRITE_MEM not implemented");
429  } break;
431  panic("SDMA_SUBOP_POLL_DBIT_WRITE_MEM not implemented");
432  } break;
434  panic("SDMA_SUBOP_POLL_MEM_VERIFY not implemented");
435  } break;
436  default:
437  break;
438  }
439  } break;
440  case SDMA_OP_COND_EXE: {
441  q->incRptr(sizeof(sdmaCondExec));
442  warn("SDMA_OP_SEM not implemented");
443  decodeNext(q);
444  } break;
445  case SDMA_OP_ATOMIC: {
446  DPRINTF(SDMAEngine, "SDMA Atomic packet\n");
447  dmaBuffer = new sdmaAtomic();
449  *h = *(sdmaAtomicHeader *)&header;
450  cb = new DmaVirtCallback<uint64_t>(
451  [ = ] (const uint64_t &)
452  { atomic(q, h, (sdmaAtomic *)dmaBuffer); });
453  dmaReadVirt(q->rptr(), sizeof(sdmaAtomic), cb, dmaBuffer);
454  } break;
455  case SDMA_OP_CONST_FILL: {
456  q->incRptr(sizeof(sdmaConstFill));
457  warn("SDMA_OP_CONST_FILL not implemented");
458  decodeNext(q);
459  } break;
460  case SDMA_OP_PTEPDE: {
461  DPRINTF(SDMAEngine, "SDMA PTEPDE packet\n");
462  switch (sub_opcode) {
464  DPRINTF(SDMAEngine, "SDMA PTEPDE_GEN sub-opcode\n");
465  dmaBuffer = new sdmaPtePde();
466  cb = new DmaVirtCallback<uint64_t>(
467  [ = ] (const uint64_t &)
468  { ptePde(q, (sdmaPtePde *)dmaBuffer); });
469  dmaReadVirt(q->rptr(), sizeof(sdmaPtePde), cb, dmaBuffer);
470  break;
472  panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
473  break;
475  panic("SDMA_SUBOP_PTEPDE_COPY not implemented");
476  break;
477  case SDMA_SUBOP_PTEPDE_RMW: {
478  panic("SDMA_SUBOP_PTEPDE_RMW not implemented");
479  } break;
480  default:
481  DPRINTF(SDMAEngine, "Unsupported PTEPDE sub-opcode %d\n",
482  sub_opcode);
483  decodeNext(q);
484  break;
485  }
486  } break;
487  case SDMA_OP_TIMESTAMP: {
488  q->incRptr(sizeof(sdmaTimestamp));
489  switch (sub_opcode) {
491  } break;
493  } break;
495  } break;
496  default:
497  break;
498  }
499  warn("SDMA_OP_TIMESTAMP not implemented");
500  decodeNext(q);
501  } break;
502  case SDMA_OP_SRBM_WRITE: {
503  DPRINTF(SDMAEngine, "SDMA SRBMWrite packet\n");
506  dmaBuffer = new sdmaSRBMWrite();
507  cb = new DmaVirtCallback<uint64_t>(
508  [ = ] (const uint64_t &)
509  { srbmWrite(q, header, (sdmaSRBMWrite *)dmaBuffer); });
510  dmaReadVirt(q->rptr(), sizeof(sdmaSRBMWrite), cb, dmaBuffer);
511  } break;
512  case SDMA_OP_PRE_EXE: {
513  q->incRptr(sizeof(sdmaPredExec));
514  warn("SDMA_OP_PRE_EXE not implemented");
515  decodeNext(q);
516  } break;
517  case SDMA_OP_DUMMY_TRAP: {
518  q->incRptr(sizeof(sdmaDummyTrap));
519  warn("SDMA_OP_DUMMY_TRAP not implemented");
520  decodeNext(q);
521  } break;
522  default: {
523  panic("Invalid SDMA packet.\n");
524  } break;
525  }
526 }
527 
528 /* Implements a write packet. */
529 void
531 {
532  q->incRptr(sizeof(sdmaWrite));
533  // count represents the number of dwords - 1 to write
534  pkt->count++;
535  DPRINTF(SDMAEngine, "Write %d dwords to %lx\n", pkt->count, pkt->dest);
536 
537  // first we have to read needed data from the SDMA queue
538  uint32_t *dmaBuffer = new uint32_t[pkt->count];
539  auto cb = new DmaVirtCallback<uint64_t>(
540  [ = ] (const uint64_t &) { writeReadData(q, pkt, dmaBuffer); });
541  dmaReadVirt(q->rptr(), sizeof(uint32_t) * pkt->count, cb,
542  (void *)dmaBuffer);
543 }
544 
545 /* Completion of data reading for a write packet. */
546 void
547 SDMAEngine::writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
548 {
549  int bufferSize = sizeof(uint32_t) * pkt->count;
550  q->incRptr(bufferSize);
551 
552  DPRINTF(SDMAEngine, "Write packet data:\n");
553  for (int i = 0; i < pkt->count; ++i) {
554  DPRINTF(SDMAEngine, "%08x\n", dmaBuffer[i]);
555  }
556 
557  // lastly we write read data to the destination address
558  if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
559  Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
560  auto cb = new EventFunctionWrapper(
561  [ = ]{ writeDone(q, pkt, dmaBuffer); }, name());
562  gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
563  bufferSize, 0, cb);
564  } else {
565  // TODO: getGARTAddr?
566  pkt->dest = getGARTAddr(pkt->dest);
567  auto cb = new DmaVirtCallback<uint32_t>(
568  [ = ] (const uint64_t &) { writeDone(q, pkt, dmaBuffer); });
569  dmaWriteVirt(pkt->dest, bufferSize, cb, (void *)dmaBuffer);
570  }
571 }
572 
573 /* Completion of a write packet. */
574 void
575 SDMAEngine::writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
576 {
577  DPRINTF(SDMAEngine, "Write packet completed to %p, %d dwords\n",
578  pkt->dest, pkt->count);
579  delete []dmaBuffer;
580  delete pkt;
581  decodeNext(q);
582 }
583 
584 /* Implements a copy packet. */
585 void
587 {
588  DPRINTF(SDMAEngine, "Copy src: %lx -> dest: %lx count %d\n",
589  pkt->source, pkt->dest, pkt->count);
590  q->incRptr(sizeof(sdmaCopy));
591  // count represents the number of bytes - 1 to be copied
592  pkt->count++;
593  DPRINTF(SDMAEngine, "Getting GART addr for %lx\n", pkt->source);
594  pkt->source = getGARTAddr(pkt->source);
595  DPRINTF(SDMAEngine, "GART addr %lx\n", pkt->source);
596 
597  // Read data from the source first, then call the copyReadData method
598  uint8_t *dmaBuffer = new uint8_t[pkt->count];
599  Addr device_addr = getDeviceAddress(pkt->source);
600  if (device_addr) {
601  DPRINTF(SDMAEngine, "Copying from device address %#lx\n", device_addr);
602  auto cb = new EventFunctionWrapper(
603  [ = ]{ copyReadData(q, pkt, dmaBuffer); }, name());
604 
605  // Copy the minimum page size at a time in case the physical addresses
606  // are not contiguous.
608  for (; !gen.done(); gen.next()) {
609  Addr chunk_addr = getDeviceAddress(gen.addr());
610  assert(chunk_addr);
611 
612  DPRINTF(SDMAEngine, "Copying chunk of %d bytes from %#lx (%#lx)\n",
613  gen.size(), gen.addr(), chunk_addr);
614 
615  gpuDevice->getMemMgr()->readRequest(chunk_addr, dmaBuffer,
616  gen.size(), 0,
617  gen.last() ? cb : nullptr);
618  dmaBuffer += gen.size();
619  }
620  } else {
621  auto cb = new DmaVirtCallback<uint64_t>(
622  [ = ] (const uint64_t &) { copyReadData(q, pkt, dmaBuffer); });
623  dmaReadVirt(pkt->source, pkt->count, cb, (void *)dmaBuffer);
624  }
625 }
626 
627 /* Completion of data reading for a copy packet. */
628 void
629 SDMAEngine::copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
630 {
631  // lastly we write read data to the destination address
632  uint64_t *dmaBuffer64 = reinterpret_cast<uint64_t *>(dmaBuffer);
633 
634  DPRINTF(SDMAEngine, "Copy packet last/first qwords:\n");
635  DPRINTF(SDMAEngine, "First: %016lx\n", dmaBuffer64[0]);
636  DPRINTF(SDMAEngine, "Last: %016lx\n", dmaBuffer64[(pkt->count/8)-1]);
637 
638  DPRINTF(SDMAData, "Copy packet data:\n");
639  for (int i = 0; i < pkt->count/8; ++i) {
640  DPRINTF(SDMAData, "%016lx\n", dmaBuffer64[i]);
641  }
642 
643  Addr device_addr = getDeviceAddress(pkt->dest);
644  // Write read data to the destination address then call the copyDone method
645  if (device_addr) {
646  DPRINTF(SDMAEngine, "Copying to device address %#lx\n", device_addr);
647  auto cb = new EventFunctionWrapper(
648  [ = ]{ copyDone(q, pkt, dmaBuffer); }, name());
649 
650  // Copy the minimum page size at a time in case the physical addresses
651  // are not contiguous.
653  for (; !gen.done(); gen.next()) {
654  Addr chunk_addr = getDeviceAddress(gen.addr());
655  assert(chunk_addr);
656 
657  DPRINTF(SDMAEngine, "Copying chunk of %d bytes to %#lx (%#lx)\n",
658  gen.size(), gen.addr(), chunk_addr);
659 
660  gpuDevice->getMemMgr()->writeRequest(chunk_addr, dmaBuffer,
661  gen.size(), 0,
662  gen.last() ? cb : nullptr);
663 
664  dmaBuffer += gen.size();
665  }
666  } else {
667  auto cb = new DmaVirtCallback<uint64_t>(
668  [ = ] (const uint64_t &) { copyDone(q, pkt, dmaBuffer); });
669  dmaWriteVirt(pkt->dest, pkt->count, cb, (void *)dmaBuffer);
670  }
671 }
672 
673 /* Completion of a copy packet. */
674 void
675 SDMAEngine::copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
676 {
677  DPRINTF(SDMAEngine, "Copy completed to %p, %d dwords\n",
678  pkt->dest, pkt->count);
679  delete []dmaBuffer;
680  delete pkt;
681  decodeNext(q);
682 }
683 
684 /* Implements an indirect buffer packet. */
685 void
687 {
688  q->ib()->base(getGARTAddr(pkt->base));
689  q->ib()->rptr(0);
690  q->ib()->size(pkt->size * sizeof(uint32_t) + 1);
691  q->ib()->setWptr(pkt->size * sizeof(uint32_t));
692 
693  q->incRptr(sizeof(sdmaIndirectBuffer));
694 
695  delete pkt;
696  decodeNext(q->ib());
697 }
698 
699 /* Implements a fence packet. */
700 void
702 {
703  q->incRptr(sizeof(sdmaFence));
704  pkt->dest = getGARTAddr(pkt->dest);
705 
706  // Writing the data from the fence packet to the destination address.
707  auto cb = new DmaVirtCallback<uint32_t>(
708  [ = ] (const uint32_t &) { fenceDone(q, pkt); }, pkt->data);
709  dmaWriteVirt(pkt->dest, sizeof(pkt->data), cb, &cb->dmaBuffer);
710 }
711 
712 /* Completion of a fence packet. */
713 void
715 {
716  DPRINTF(SDMAEngine, "Fence completed to %p, data 0x%x\n",
717  pkt->dest, pkt->data);
718  delete pkt;
719  decodeNext(q);
720 }
721 
722 /* Implements a trap packet. */
723 void
725 {
726  q->incRptr(sizeof(sdmaTrap));
727 
728  DPRINTF(SDMAEngine, "Trap contextId: %p\n", pkt->intrContext);
729 
730  uint32_t ring_id = 0;
731  assert(page.processing() ^ gfx.processing());
732  if (page.processing()) {
733  ring_id = 3;
734  }
735 
739 
740  delete pkt;
741  decodeNext(q);
742 }
743 
744 /* Implements a write SRBM packet. */
745 void
747  sdmaSRBMWrite *pkt)
748 {
749  q->incRptr(sizeof(sdmaSRBMWrite));
750 
751  [[maybe_unused]] uint32_t reg_addr = pkt->regAddr << 2;
752  uint32_t reg_mask = 0x00000000;
753 
754  if (header->byteEnable & 0x8) reg_mask |= 0xFF000000;
755  if (header->byteEnable & 0x4) reg_mask |= 0x00FF0000;
756  if (header->byteEnable & 0x2) reg_mask |= 0x0000FF00;
757  if (header->byteEnable & 0x1) reg_mask |= 0x000000FF;
758  pkt->data &= reg_mask;
759 
760  DPRINTF(SDMAEngine, "SRBM write to %#x with data %#x\n",
761  reg_addr, pkt->data);
762 
763  warn_once("SRBM write not performed, no SRBM model. This needs to be fixed"
764  " if correct system simulation is relying on SRBM registers.");
765 
766  delete header;
767  delete pkt;
768  decodeNext(q);
769 }
770 
776 void
778  sdmaPollRegMem *pkt)
779 {
780  q->incRptr(sizeof(sdmaPollRegMem));
781 
782  DPRINTF(SDMAEngine, "POLL_REGMEM: M=%d, func=%d, op=%d, addr=%p, ref=%d, "
783  "mask=%p, retry=%d, pinterval=%d\n", header->mode, header->func,
784  header->op, pkt->address, pkt->ref, pkt->mask, pkt->retryCount,
785  pkt->pollInt);
786 
787  bool skip = false;
788 
789  if (header->mode == 1) {
790  // polling on a memory location
791  if (header->op == 0) {
792  auto cb = new DmaVirtCallback<uint32_t>(
793  [ = ] (const uint32_t &dma_buffer) {
794  pollRegMemRead(q, header, pkt, dma_buffer, 0); });
795  dmaReadVirt(pkt->address >> 3, sizeof(uint32_t), cb,
796  (void *)&cb->dmaBuffer);
797  } else {
798  panic("SDMA poll mem operation not implemented.");
799  skip = true;
800  }
801  } else {
802  warn_once("SDMA poll reg is not implemented. If this is required for "
803  "correctness, an SRBM model needs to be implemented.");
804  skip = true;
805  }
806 
807  if (skip) {
808  delete header;
809  delete pkt;
810  decodeNext(q);
811  }
812 }
813 
814 void
816  sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
817 {
818  assert(header->mode == 1 && header->op == 0);
819 
820  if (!pollRegMemFunc(dma_buffer, pkt->ref, header->func) &&
821  ((count < (pkt->retryCount + 1) && pkt->retryCount != 0xfff) ||
822  pkt->retryCount == 0xfff)) {
823 
824  // continue polling on a memory location until reference value is met,
825  // retryCount is met or indefinitelly if retryCount is 0xfff
826  DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d.\n",
827  pkt->address, dma_buffer, pkt->ref);
828 
829  auto cb = new DmaVirtCallback<uint32_t>(
830  [ = ] (const uint32_t &dma_buffer) {
831  pollRegMemRead(q, header, pkt, dma_buffer, count + 1); });
832  dmaReadVirt(pkt->address, sizeof(uint32_t), cb,
833  (void *)&cb->dmaBuffer);
834  } else {
835  DPRINTF(SDMAEngine, "SDMA polling mem addr %p, val %d ref %d done.\n",
836  pkt->address, dma_buffer, pkt->ref);
837 
838  delete header;
839  delete pkt;
840  decodeNext(q);
841  }
842 }
843 
844 bool
845 SDMAEngine::pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
846 {
847  switch (func) {
848  case 0:
849  return true;
850  break;
851  case 1:
852  return value < reference;
853  break;
854  case 2:
855  return value <= reference;
856  break;
857  case 3:
858  return value == reference;
859  break;
860  case 4:
861  return value != reference;
862  break;
863  case 5:
864  return value >= reference;
865  break;
866  case 6:
867  return value > reference;
868  break;
869  default:
870  panic("SDMA POLL_REGMEM unknown comparison function.");
871  break;
872  }
873 }
874 
875 /* Implements a PTE PDE generation packet. */
876 void
878 {
879  q->incRptr(sizeof(sdmaPtePde));
880  pkt->count++;
881 
882  DPRINTF(SDMAEngine, "PTEPDE init: %d inc: %d count: %d\n",
883  pkt->initValue, pkt->increment, pkt->count);
884 
885  // Generating pkt->count double dwords using the initial value, increment
886  // and a mask.
887  uint64_t *dmaBuffer = new uint64_t[pkt->count];
888  for (int i = 0; i < pkt->count; i++) {
889  dmaBuffer[i] = (pkt->mask | (pkt->initValue + (i * pkt->increment)));
890  }
891 
892  // Writing generated data to the destination address.
893  if (gpuDevice->getVM().inMMHUB(pkt->dest)) {
894  Addr mmhubAddr = pkt->dest - gpuDevice->getVM().getMMHUBBase();
895  auto cb = new EventFunctionWrapper(
896  [ = ]{ ptePdeDone(q, pkt, dmaBuffer); }, name());
897  gpuDevice->getMemMgr()->writeRequest(mmhubAddr, (uint8_t *)dmaBuffer,
898  sizeof(uint64_t) * pkt->count, 0,
899  cb);
900  } else {
901  auto cb = new DmaVirtCallback<uint64_t>(
902  [ = ] (const uint64_t &) { ptePdeDone(q, pkt, dmaBuffer); });
903  dmaWriteVirt(pkt->dest, sizeof(uint64_t) * pkt->count, cb,
904  (void *)dmaBuffer);
905  }
906 }
907 
908 /* Completion of a PTE PDE generation packet. */
909 void
910 SDMAEngine::ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
911 {
912  DPRINTF(SDMAEngine, "PtePde packet completed to %p, %d 2dwords\n",
913  pkt->dest, pkt->count);
914 
915  delete []dmaBuffer;
916  delete pkt;
917  decodeNext(q);
918 }
919 
920 void
922 {
923  q->incRptr(sizeof(sdmaAtomic));
924  DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx, src: %ld, cmp: %ld, loop?"
925  " %d loopInt: %d\n", header->opcode, pkt->addr, pkt->srcData,
926  pkt->cmpData, header->loop, pkt->loopInt);
927 
928  // Read the data at pkt->addr
929  uint64_t *dmaBuffer = new uint64_t;
930  auto cb = new DmaVirtCallback<uint64_t>(
931  [ = ] (const uint64_t &)
932  { atomicData(q, header, pkt, dmaBuffer); });
933  dmaReadVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
934 }
935 
936 void
938  uint64_t *dmaBuffer)
939 {
940  DPRINTF(SDMAEngine, "Atomic op %d on addr %#lx got data %#lx\n",
941  header->opcode, pkt->addr, *dmaBuffer);
942 
943  if (header->opcode == SDMA_ATOMIC_ADD64) {
944  // Atomic add with return -- dst = dst + src
945  int64_t dst_data = *dmaBuffer;
946  int64_t src_data = pkt->srcData;
947 
948  DPRINTF(SDMAEngine, "Atomic ADD_RTN: %ld + %ld = %ld\n", dst_data,
949  src_data, dst_data + src_data);
950 
951  // Reuse the dmaBuffer allocated
952  *dmaBuffer = dst_data + src_data;
953 
954  auto cb = new DmaVirtCallback<uint64_t>(
955  [ = ] (const uint64_t &)
956  { atomicDone(q, header, pkt, dmaBuffer); });
957  dmaWriteVirt(pkt->addr, sizeof(uint64_t), cb, (void *)dmaBuffer);
958  } else {
959  panic("Unsupported SDMA atomic opcode: %d\n", header->opcode);
960  }
961 }
962 
963 void
965  uint64_t *dmaBuffer)
966 {
967  DPRINTF(SDMAEngine, "Atomic op %d op addr %#lx complete (sent %lx)\n",
968  header->opcode, pkt->addr, *dmaBuffer);
969 
970  delete dmaBuffer;
971  delete header;
972  delete pkt;
973  decodeNext(q);
974 }
975 
978 {
979  AddrRangeList ranges;
980  return ranges;
981 }
982 
983 void
985 {
986  // Serialize the DmaVirtDevice base class
988 
999 
1000  int num_queues = 4;
1001 
1002  std::vector<SDMAQueue *> queues;
1003  queues.push_back((SDMAQueue *)&gfx);
1004  queues.push_back((SDMAQueue *)&page);
1005  queues.push_back((SDMAQueue *)&gfxIb);
1006  queues.push_back((SDMAQueue *)&pageIb);
1007 
1008  Addr base[num_queues];
1009  Addr rptr[num_queues];
1010  Addr wptr[num_queues];
1011  Addr size[num_queues];
1012  bool processing[num_queues];
1013 
1014  for (int i = 0; i < num_queues; i++) {
1015  base[i] = queues[i]->base();
1016  rptr[i] = queues[i]->getRptr();
1017  wptr[i] = queues[i]->getWptr();
1018  size[i] = queues[i]->size();
1019  processing[i] = queues[i]->processing();
1020  }
1021 
1022  SERIALIZE_ARRAY(base, num_queues);
1023  SERIALIZE_ARRAY(rptr, num_queues);
1024  SERIALIZE_ARRAY(wptr, num_queues);
1025  SERIALIZE_ARRAY(size, num_queues);
1026  SERIALIZE_ARRAY(processing, num_queues);
1027 }
1028 
1029 void
1031 {
1032  // Serialize the DmaVirtDevice base class
1034 
1045 
1046  int num_queues = 4;
1047  Addr base[num_queues];
1048  Addr rptr[num_queues];
1049  Addr wptr[num_queues];
1050  Addr size[num_queues];
1051  bool processing[num_queues];
1052 
1053  UNSERIALIZE_ARRAY(base, num_queues);
1054  UNSERIALIZE_ARRAY(rptr, num_queues);
1055  UNSERIALIZE_ARRAY(wptr, num_queues);
1056  UNSERIALIZE_ARRAY(size, num_queues);
1057  UNSERIALIZE_ARRAY(processing, num_queues);
1058 
1059  std::vector<SDMAQueue *> queues;
1060  queues.push_back((SDMAQueue *)&gfx);
1061  queues.push_back((SDMAQueue *)&page);
1062  queues.push_back((SDMAQueue *)&gfxIb);
1063  queues.push_back((SDMAQueue *)&pageIb);
1064 
1065  for (int i = 0; i < num_queues; i++) {
1066  queues[i]->base(base[i]);
1067  queues[i]->rptr(rptr[i]);
1068  queues[i]->wptr(wptr[i]);
1069  queues[i]->size(size[i]);
1070  queues[i]->processing(processing[i]);
1071  }
1072 }
1073 
1074 void
1076 {
1077  DPRINTF(SDMAEngine, "Writing offset %#x with data %x\n", mmio_offset,
1078  pkt->getLE<uint32_t>());
1079 
1080  // In Vega10 headers, the offsets are the same for both SDMAs
1081  switch (mmio_offset) {
1082  case mmSDMA_GFX_RB_BASE:
1083  setGfxBaseLo(pkt->getLE<uint32_t>());
1084  break;
1085  case mmSDMA_GFX_RB_BASE_HI:
1086  setGfxBaseHi(pkt->getLE<uint32_t>());
1087  break;
1089  setGfxRptrLo(pkt->getLE<uint32_t>());
1090  break;
1092  setGfxRptrHi(pkt->getLE<uint32_t>());
1093  break;
1094  case mmSDMA_GFX_DOORBELL:
1095  setGfxDoorbellLo(pkt->getLE<uint32_t>());
1096  break;
1098  setGfxDoorbellOffsetLo(pkt->getLE<uint32_t>());
1099  // Bit 28 of doorbell indicates that doorbell is enabled.
1100  if (bits(getGfxDoorbell(), 28, 28)) {
1104  }
1105  break;
1106  case mmSDMA_GFX_RB_CNTL: {
1107  uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1108  assert(rb_size >= 6 && rb_size <= 62);
1109  setGfxSize(1 << (rb_size + 2));
1110  } break;
1112  setGfxWptrLo(pkt->getLE<uint32_t>());
1113  break;
1115  setGfxWptrHi(pkt->getLE<uint32_t>());
1116  break;
1117  case mmSDMA_PAGE_RB_BASE:
1118  setPageBaseLo(pkt->getLE<uint32_t>());
1119  break;
1121  setPageRptrLo(pkt->getLE<uint32_t>());
1122  break;
1124  setPageRptrHi(pkt->getLE<uint32_t>());
1125  break;
1126  case mmSDMA_PAGE_DOORBELL:
1127  setPageDoorbellLo(pkt->getLE<uint32_t>());
1128  break;
1130  setPageDoorbellOffsetLo(pkt->getLE<uint32_t>());
1131  // Bit 28 of doorbell indicates that doorbell is enabled.
1132  if (bits(getPageDoorbell(), 28, 28)) {
1136  }
1137  break;
1138  case mmSDMA_PAGE_RB_CNTL: {
1139  uint32_t rb_size = bits(pkt->getLE<uint32_t>(), 6, 1);
1140  assert(rb_size >= 6 && rb_size <= 62);
1141  setPageSize(1 << (rb_size + 2));
1142  } break;
1144  setPageWptrLo(pkt->getLE<uint32_t>());
1145  break;
1146  default:
1147  DPRINTF(SDMAEngine, "Unknown SDMA MMIO %#x\n", mmio_offset);
1148  break;
1149  }
1150 }
1151 
1152 void
1154 {
1155  gfxBase = insertBits(gfxBase, 31, 0, 0);
1156  gfxBase |= data;
1157  gfx.base((gfxBase >> 1) << 12);
1158 }
1159 
1160 void
1162 {
1163  gfxBase = insertBits(gfxBase, 63, 32, 0);
1164  gfxBase |= ((uint64_t)data) << 32;
1165  gfx.base((gfxBase >> 1) << 12);
1166 }
1167 
1168 void
1170 {
1171  gfxRptr = insertBits(gfxRptr, 31, 0, 0);
1172  gfxRptr |= data;
1174 }
1175 
1176 void
1178 {
1179  gfxRptr = insertBits(gfxRptr, 63, 32, 0);
1180  gfxRptr |= ((uint64_t)data) << 32;
1182 }
1183 
1184 void
1186 {
1187  gfxDoorbell = insertBits(gfxDoorbell, 31, 0, 0);
1188  gfxDoorbell |= data;
1189 }
1190 
1191 void
1193 {
1194  gfxDoorbell = insertBits(gfxDoorbell, 63, 32, 0);
1195  gfxDoorbell |= ((uint64_t)data) << 32;
1196 }
1197 
1198 void
1200 {
1203 }
1204 
1205 void
1207 {
1209  gfxDoorbellOffset |= ((uint64_t)data) << 32;
1210 }
1211 
1212 void
1214 {
1215  gfx.size(data);
1216 }
1217 
1218 void
1220 {
1221  gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1222  gfxWptr |= data;
1223 }
1224 
1225 void
1227 {
1228  gfxWptr = insertBits(gfxWptr, 31, 0, 0);
1229  gfxWptr |= ((uint64_t)data) << 32;
1230 }
1231 
1232 void
1234 {
1235  pageBase = insertBits(pageBase, 31, 0, 0);
1236  pageBase |= data;
1237  page.base((pageBase >> 1) << 12);
1238 }
1239 
1240 void
1242 {
1243  pageBase = insertBits(pageBase, 63, 32, 0);
1244  pageBase |= ((uint64_t)data) << 32;
1245  page.base((pageBase >> 1) << 12);
1246 }
1247 
1248 void
1250 {
1251  pageRptr = insertBits(pageRptr, 31, 0, 0);
1252  pageRptr |= data;
1254 }
1255 
1256 void
1258 {
1259  pageRptr = insertBits(pageRptr, 63, 32, 0);
1260  pageRptr |= ((uint64_t)data) << 32;
1262 }
1263 
1264 void
1266 {
1267  pageDoorbell = insertBits(pageDoorbell, 31, 0, 0);
1268  pageDoorbell |= data;
1269 }
1270 
1271 void
1273 {
1274  pageDoorbell = insertBits(pageDoorbell, 63, 32, 0);
1275  pageDoorbell |= ((uint64_t)data) << 32;
1276 }
1277 
1278 void
1280 {
1283 }
1284 
1285 void
1287 {
1289  pageDoorbellOffset |= ((uint64_t)data) << 32;
1290 }
1291 
1292 void
1294 {
1295  page.size(data);
1296 }
1297 
1298 void
1300 {
1301  pageWptr = insertBits(pageWptr, 31, 0, 0);
1302  pageWptr |= data;
1303 }
1304 
1305 void
1307 {
1308  pageWptr = insertBits(pageWptr, 63, 32, 0);
1309  pageWptr |= ((uint64_t)data) << 32;
1310 }
1311 
1312 } // namespace gem5
static constexpr int AMDGPU_MMHUB_PAGE_SIZE
Definition: amdgpu_vm.hh:83
#define DPRINTF(x,...)
Definition: trace.hh:186
const char data[]
Device model for an AMD GPU.
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
AMDGPUMemoryManager * getMemMgr()
AMDGPUVM & getVM()
AMDGPUInterruptHandler * getIH()
Get handles to GPU blocks.
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void prepareInterruptCookie(ContextID cntxtId, uint32_t ring_id, uint32_t client_id, uint32_t source_id)
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
Translation range generators.
Definition: amdgpu_vm.hh:303
bool inAGP(Addr vaddr)
Methods for resolving apertures.
Definition: amdgpu_vm.hh:177
Addr getMMHUBBase()
Definition: amdgpu_vm.hh:192
bool inMMHUB(Addr vaddr)
Definition: amdgpu_vm.hh:187
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
void serialize(CheckpointOut &cp) const override
Serialize an object.
void unserialize(CheckpointIn &cp) override
Unserialize an object.
Wraps a std::function object in a DmaCallback.
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
virtual std::string name() const
Definition: named.hh:47
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void setWptr(Addr value)
Definition: sdma_engine.hh:101
System DMA Engine class for AMD dGPU.
Definition: sdma_engine.hh:48
uint64_t pageDoorbell
Definition: sdma_engine.hh:137
void setPageRptrLo(uint32_t data)
void unserialize(CheckpointIn &cp) override
Unserialize an object.
uint64_t getPageDoorbellOffset()
Definition: sdma_engine.hh:246
SDMAQueue gfx
Each SDMAEngine processes four queues: paging, gfx, rlc0, and rlc1, where RLC stands for Run List Con...
Definition: sdma_engine.hh:125
void ptePde(SDMAQueue *q, sdmaPtePde *pkt)
Definition: sdma_engine.cc:877
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
uint64_t getGfxDoorbellOffset()
Definition: sdma_engine.hh:241
void setPageDoorbellHi(uint32_t data)
VegaISA::Walker * walker
Definition: sdma_engine.hh:142
void setGfxRptrHi(uint32_t data)
void writeDone(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
Definition: sdma_engine.cc:575
void setGfxSize(uint64_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
Definition: sdma_engine.cc:240
void registerRLCQueue(Addr doorbell, Addr rb_base, uint32_t size, Addr rptr_wb_addr)
Methods for RLC queues.
Definition: sdma_engine.cc:164
SDMAQueue rlc0Ib
Definition: sdma_engine.hh:126
void copy(SDMAQueue *q, sdmaCopy *pkt)
Definition: sdma_engine.cc:586
Tick write(PacketPtr pkt) override
Inherited methods.
Definition: sdma_engine.hh:176
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void fenceDone(SDMAQueue *q, sdmaFence *pkt)
Definition: sdma_engine.cc:714
void writeReadData(SDMAQueue *q, sdmaWrite *pkt, uint32_t *dmaBuffer)
Definition: sdma_engine.cc:547
void unregisterRLCQueue(Addr doorbell)
Definition: sdma_engine.cc:194
void setGfxBaseLo(uint32_t data)
void processRLC0(Addr wptrOffset)
Definition: sdma_engine.cc:253
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
Definition: sdma_engine.cc:218
void setGfxDoorbellOffsetHi(uint32_t data)
void atomic(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt)
Definition: sdma_engine.cc:921
void deallocateRLCQueues()
Definition: sdma_engine.cc:209
AMDGPUDevice * gpuDevice
Definition: sdma_engine.hh:141
Addr getGARTAddr(Addr addr) const
Methods for translation.
Definition: sdma_engine.cc:92
void setPageDoorbellOffsetHi(uint32_t data)
void processRLC1(Addr wptrOffset)
Definition: sdma_engine.cc:267
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
Definition: sdma_engine.cc:229
uint64_t getGfxDoorbell()
Definition: sdma_engine.hh:240
void decodeHeader(SDMAQueue *q, uint32_t data)
Reads the first DW (32 bits) (i.e., header) of an SDMA packet, which encodes the opcode and sub-opcod...
Definition: sdma_engine.cc:317
void setPageDoorbellOffsetLo(uint32_t data)
uint64_t getPageDoorbell()
Definition: sdma_engine.hh:245
SDMAQueue pageIb
Definition: sdma_engine.hh:125
SDMAEngine(const SDMAEngineParams &p)
Definition: sdma_engine.cc:48
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: sdma_engine.cc:72
uint64_t gfxDoorbell
Definition: sdma_engine.hh:131
Addr getDeviceAddress(Addr raw_addr)
Translate an address in an SDMA packet.
Definition: sdma_engine.cc:102
uint64_t pageDoorbellOffset
Definition: sdma_engine.hh:138
void setPageBaseHi(uint32_t data)
uint64_t gfxDoorbellOffset
Definition: sdma_engine.hh:132
bool pollRegMemFunc(uint32_t value, uint32_t reference, uint32_t func)
Definition: sdma_engine.cc:845
void setPageWptrHi(uint32_t data)
void setPageWptrLo(uint32_t data)
void pollRegMemRead(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt, uint32_t dma_buffer, int count)
Definition: sdma_engine.cc:815
void setGfxDoorbellLo(uint32_t data)
void copyReadData(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
Definition: sdma_engine.cc:629
void indirectBuffer(SDMAQueue *q, sdmaIndirectBuffer *pkt)
Definition: sdma_engine.cc:686
void srbmWrite(SDMAQueue *q, sdmaSRBMWriteHeader *header, sdmaSRBMWrite *pkt)
Definition: sdma_engine.cc:746
void atomicData(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
Definition: sdma_engine.cc:937
void trap(SDMAQueue *q, sdmaTrap *pkt)
Definition: sdma_engine.cc:724
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: sdma_engine.cc:977
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellHi(uint32_t data)
void setPageBaseLo(uint32_t data)
void copyDone(SDMAQueue *q, sdmaCopy *pkt, uint8_t *dmaBuffer)
Definition: sdma_engine.cc:675
void setGfxBaseHi(uint32_t data)
void setPageSize(uint64_t data)
void ptePdeDone(SDMAQueue *q, sdmaPtePde *pkt, uint64_t *dmaBuffer)
Definition: sdma_engine.cc:910
TranslationGenPtr translate(Addr vaddr, Addr size) override
GPUController will perform DMA operations on VAs, and because page faults are not currently supported...
Definition: sdma_engine.cc:140
void setPageRptrHi(uint32_t data)
void decodeNext(SDMAQueue *q)
This method checks read and write pointers and starts decoding packets if the read pointer is less th...
Definition: sdma_engine.cc:281
void fence(SDMAQueue *q, sdmaFence *pkt)
Definition: sdma_engine.cc:701
void atomicDone(SDMAQueue *q, sdmaAtomicHeader *header, sdmaAtomic *pkt, uint64_t *dmaBuffer)
Definition: sdma_engine.cc:964
void serialize(CheckpointOut &cp) const override
Serialize an object.
Definition: sdma_engine.cc:984
int getIHClientId()
Returns the client id for the Interrupt Handler.
Definition: sdma_engine.cc:79
std::array< Addr, 2 > rlcInfo
Definition: sdma_engine.hh:145
void pollRegMem(SDMAQueue *q, sdmaPollRegMemHeader *header, sdmaPollRegMem *pkt)
Implements a poll reg/mem packet that polls an SRBM register or a memory location,...
Definition: sdma_engine.cc:777
SDMAQueue rlc1Ib
Definition: sdma_engine.hh:126
void setDevRequestor(RequestorID mid)
STL vector class.
Definition: stl.hh:37
constexpr T bits(T val, unsigned first, unsigned last)
Extract the bitfield from position 'first' to 'last' (inclusive) from 'val' and right justify it.
Definition: bitfield.hh:76
constexpr T insertBits(T val, unsigned first, unsigned last, B bit_val)
Returns val with bits first to last set to the LSBs of bit_val.
Definition: bitfield.hh:166
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
#define UNSERIALIZE_ARRAY(member, size)
Definition: serialize.hh:618
#define SERIALIZE_ARRAY(member, size)
Definition: serialize.hh:610
#define warn(...)
Definition: logging.hh:246
#define warn_once(...)
Definition: logging.hh:250
Bitfield< 27 > q
Definition: misc_types.hh:55
Bitfield< 7 > i
Definition: misc_types.hh:67
Bitfield< 33 > id
Definition: misc_types.hh:257
Bitfield< 24, 21 > opcode
Definition: types.hh:92
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 51, 12 > base
Definition: pagetable.hh:141
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
struct gem5::GEM5_PACKED sdmaFence
struct gem5::GEM5_PACKED sdmaAtomic
std::ostream CheckpointOut
Definition: serialize.hh:66
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
@ SOC15_IH_CLIENTID_SDMA0
@ SOC15_IH_CLIENTID_SDMA1
struct gem5::GEM5_PACKED sdmaPtePde
struct gem5::GEM5_PACKED sdmaPollRegMem
struct gem5::GEM5_PACKED sdmaPollRegMemHeader
constexpr unsigned int SDMA_ATOMIC_ADD64
struct gem5::GEM5_PACKED sdmaWrite
struct gem5::GEM5_PACKED sdmaAtomicHeader
struct gem5::GEM5_PACKED sdmaCopy
SDMA packets.
struct gem5::GEM5_PACKED sdmaIndirectBuffer
struct gem5::GEM5_PACKED sdmaTrap
struct gem5::GEM5_PACKED sdmaSRBMWrite
struct gem5::GEM5_PACKED sdmaSRBMWriteHeader
std::unique_ptr< TranslationGen > TranslationGenPtr
output header
Definition: nop.cc:36
Declaration of the Packet class.
#define SDMA_SUBOP_COPY_SOA
#define SDMA_OP_SEM
#define SDMA_OP_PTEPDE
#define SDMA_OP_ATOMIC
#define SDMA_OP_DUMMY_TRAP
#define SDMA_SUBOP_PTEPDE_COPY
#define SDMA_SUBOP_COPY_LINEAR
#define SDMA_SUBOP_COPY_T2T_SUB_WIND
#define SDMA_SUBOP_TIMESTAMP_GET
#define SDMA_SUBOP_WRITE_TILED
#define SDMA_OP_PRE_EXE
#define SDMA_OP_TRAP
#define SDMA_SUBOP_PTEPDE_GEN
#define SDMA_OP_WRITE
#define SDMA_SUBOP_COPY_LINEAR_SUB_WIND
#define SDMA_OP_COPY
#define SDMA_SUBOP_COPY_LINEAR_PHY
#define SDMA_OP_POLL_REGMEM
#define SDMA_SUBOP_TIMESTAMP_GET_GLOBAL
#define SDMA_SUBOP_PTEPDE_COPY_BACKWARDS
#define SDMA_SUBOP_TIMESTAMP_SET
#define SDMA_OP_TIMESTAMP
#define SDMA_OP_INDIRECT
#define SDMA_OP_COND_EXE
#define SDMA_OP_CONST_FILL
#define SDMA_SUBOP_COPY_DIRTY_PAGE
#define SDMA_OP_NOP
Commands for the SDMA engine.
#define SDMA_SUBOP_WRITE_LINEAR
#define SDMA_OP_FENCE
#define SDMA_SUBOP_PTEPDE_RMW
#define SDMA_OP_SRBM_WRITE
#define SDMA_SUBOP_POLL_MEM_VERIFY
#define SDMA_SUBOP_POLL_REG_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED_SUB_WIND
#define SDMA_SUBOP_POLL_DBIT_WRITE_MEM
#define SDMA_SUBOP_COPY_TILED
#define mmSDMA_GFX_DOORBELL
Definition: sdma_mmio.hh:49
#define mmSDMA_PAGE_RB_RPTR_ADDR_HI
Definition: sdma_mmio.hh:55
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_LO
Definition: sdma_mmio.hh:52
#define mmSDMA_PAGE_RB_BASE
Definition: sdma_mmio.hh:54
#define mmSDMA_PAGE_RB_WPTR_POLL_ADDR_LO
Definition: sdma_mmio.hh:59
#define mmSDMA_PAGE_DOORBELL
Definition: sdma_mmio.hh:57
#define mmSDMA_GFX_DOORBELL_OFFSET
Definition: sdma_mmio.hh:50
#define mmSDMA_PAGE_DOORBELL_OFFSET
Definition: sdma_mmio.hh:58
#define mmSDMA_GFX_RB_CNTL
MMIO offsets for SDMA engine.
Definition: sdma_mmio.hh:44
#define mmSDMA_GFX_RB_RPTR_ADDR_HI
Definition: sdma_mmio.hh:47
#define mmSDMA_PAGE_RB_RPTR_ADDR_LO
Definition: sdma_mmio.hh:56
#define mmSDMA_GFX_RB_RPTR_ADDR_LO
Definition: sdma_mmio.hh:48
#define mmSDMA_GFX_RB_WPTR_POLL_ADDR_HI
Definition: sdma_mmio.hh:51
#define mmSDMA_GFX_RB_BASE
Definition: sdma_mmio.hh:45
#define mmSDMA_PAGE_RB_CNTL
Definition: sdma_mmio.hh:53
#define mmSDMA_GFX_RB_BASE_HI
Definition: sdma_mmio.hh:46
#define UNSERIALIZE_SCALAR(scalar)
Definition: serialize.hh:575
#define SERIALIZE_SCALAR(scalar)
Definition: serialize.hh:568
PM4 packets.
Definition: pm4_defines.hh:78

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1