gem5  v22.0.0.2
gpu_command_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <cassert>
35 
37 #include "base/chunk_generator.hh"
38 #include "debug/GPUCommandProc.hh"
39 #include "debug/GPUKernelInfo.hh"
42 #include "mem/abstract_mem.hh"
43 #include "mem/packet_access.hh"
46 #include "params/GPUCommandProcessor.hh"
47 #include "sim/full_system.hh"
48 #include "sim/process.hh"
49 #include "sim/proxy_ptr.hh"
50 #include "sim/syscall_emul_buf.hh"
51 
52 namespace gem5
53 {
54 
56  : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
57  walker(p.walker), hsaPP(p.hsapp)
58 {
59  assert(hsaPP);
60  hsaPP->setDevice(this);
62 }
63 
66 {
67  return *hsaPP;
68 }
69 
75 {
76  return gpuDevice->vramRequestorId();
77 }
78 
81 {
82  if (!FullSystem) {
83  // Grab the process and try to translate the virtual address with it;
84  // with new extensions, it will likely be wrong to just arbitrarily
85  // grab context zero.
86  auto process = sys->threads[0]->getProcessPtr();
87 
88  return process->pTable->translateRange(vaddr, size);
89  }
90 
91  // In full system use the page tables setup by the kernel driver rather
92  // than the CPU page tables.
93  return TranslationGenPtr(
95  1 /* vmid */, vaddr, size));
96 }
97 
115 void
116 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
117  Addr host_pkt_addr)
118 {
119  static int dynamic_task_id = 0;
120  _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
121 
126  auto *tc = sys->threads[0];
127 
128  TranslatingPortProxy fs_proxy(tc);
129  SETranslatingPortProxy se_proxy(tc);
130  PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;
131 
137  bool is_system_page = true;
138  Addr phys_addr = disp_pkt->kernel_object;
139  if (FullSystem) {
146  int vmid = 1;
147  unsigned tmp_bytes;
149  phys_addr, tmp_bytes, BaseMMU::Mode::Read,
150  is_system_page);
151  }
152 
162  AMDKernelCode akc;
163  if (is_system_page) {
164  DPRINTF(GPUCommandProc, "kernel_object in system, using proxy\n");
165  virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
166  sizeof(AMDKernelCode));
167  } else {
168  assert(FullSystem);
169  DPRINTF(GPUCommandProc, "kernel_object in device, using device mem\n");
170  // Read from GPU memory manager
171  uint8_t raw_akc[sizeof(AMDKernelCode)];
172  for (int i = 0; i < sizeof(AMDKernelCode) / sizeof(uint8_t); ++i) {
173  Addr mmhubAddr = phys_addr + i*sizeof(uint8_t);
175  RequestPtr request = std::make_shared<Request>(
176  mmhubAddr, sizeof(uint8_t), flags, walker->getDevRequestor());
177  Packet *readPkt = new Packet(request, MemCmd::ReadReq);
178  readPkt->allocate();
179  system()->getDeviceMemory(readPkt)->access(readPkt);
180  raw_akc[i] = readPkt->getLE<uint8_t>();
181  delete readPkt;
182  }
183  memcpy(&akc, &raw_akc, sizeof(AMDKernelCode));
184  }
185 
186  DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
187  "kernel object\n", akc.kernel_code_entry_byte_offset);
188 
189  DPRINTF(GPUCommandProc,"GPUCommandProc: Sending dispatch pkt to %lu\n",
190  (uint64_t)tc->cpuId());
191 
192 
193  Addr machine_code_addr = (Addr)disp_pkt->kernel_object
195 
196  DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
197  machine_code_addr);
198 
199  std::string kernel_name;
200 
211  kernel_name = "Some kernel";
212  } else {
213  kernel_name = "Blit kernel";
214  }
215 
216  DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
217 
218  HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
219  dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr);
220 
221  DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
222  "grid size (%dx%dx%d) kernarg addr: %#x, completion "
223  "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
224  disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
225  disp_pkt->grid_size_x, disp_pkt->grid_size_y,
226  disp_pkt->grid_size_z, disp_pkt->kernarg_address,
227  disp_pkt->completion_signal);
228 
229  DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
230  "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
231  "LDS size: %d)\n", kernel_name, task->numVectorRegs(),
232  task->numScalarRegs(), task->codeAddr(), 0, 0);
233 
234  initABI(task);
235  ++dynamic_task_id;
236 }
237 
238 uint64_t
240 {
241  Addr value_addr = getHsaSignalValueAddr(signal_handle);
242  auto tc = system()->threads[0];
243  ConstVPtr<Addr> prev_value(value_addr, tc);
244  return *prev_value;
245 }
246 
247 void
248 GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value,
249  HsaSignalCallbackFunction function)
250 {
251  // The signal value is aligned 8 bytes from
252  // the actual handle in the runtime
253  Addr value_addr = getHsaSignalValueAddr(signal_handle);
254  Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
255  Addr event_addr = getHsaSignalEventAddr(signal_handle);
256  DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);
257 
258  auto cb = new DmaVirtCallback<uint64_t>(function, signal_value);
259 
260  dmaWriteVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
261 
262  auto tc = system()->threads[0];
263  ConstVPtr<uint64_t> mailbox_ptr(mailbox_addr, tc);
264 
265  // Notifying an event with its mailbox pointer is
266  // not supported in the current implementation. Just use
267  // mailbox pointer to distinguish between interruptible
268  // and default signal. Interruptible signal will have
269  // a valid mailbox pointer.
270  if (*mailbox_ptr != 0) {
271  // This is an interruptible signal. Now, read the
272  // event ID and directly communicate with the driver
273  // about that event notification.
274  ConstVPtr<uint32_t> event_val(event_addr, tc);
275 
276  DPRINTF(GPUCommandProc, "Calling signal wakeup event on "
277  "signal event value %d\n", *event_val);
278 
279  // The mailbox/wakeup signal uses the SE mode proxy port to write
280  // the event value. This is not available in full system mode so
281  // instead we need to issue a DMA write to the address. The value of
282  // *event_val clears the event.
283  if (FullSystem) {
284  auto cb = new DmaVirtCallback<uint64_t>(function, *event_val);
285  dmaWriteVirt(mailbox_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
286  } else {
287  signalWakeupEvent(*event_val);
288  }
289  }
290 }
291 
292 void
294 {
295  fatal_if(_driver, "Should not overwrite driver.");
296  // TODO: GPU Driver inheritance hierarchy doesn't really make sense.
297  // Should get rid of the base class.
298  _driver = gpu_driver;
299  assert(_driver);
300 }
301 
304 {
305  return _driver;
306 }
307 
325 void
326 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
327  Addr host_pkt_addr)
328 {
329  hsaPP->finishPkt(raw_pkt, queue_id);
330 }
331 
339 void
340 GPUCommandProcessor::submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
341  Addr host_pkt_addr)
342 {
343  //Parse the Packet, see what it wants us to do
344  _hsa_agent_dispatch_packet_t * agent_pkt =
345  (_hsa_agent_dispatch_packet_t *)raw_pkt;
346 
347  if (agent_pkt->type == AgentCmd::Nop) {
348  DPRINTF(GPUCommandProc, "Agent Dispatch Packet NOP\n");
349  } else if (agent_pkt->type == AgentCmd::Steal) {
350  //This is where we steal the HSA Task's completion signal
351  int kid = agent_pkt->arg[0];
352  DPRINTF(GPUCommandProc,
353  "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
354  kid);
355 
356  HSAQueueEntry *task = dispatcher.hsaTask(kid);
357  uint64_t signal_addr = task->completionSignal();// + sizeof(uint64_t);
358 
359  uint64_t return_address = agent_pkt->return_address;
360  DPRINTF(GPUCommandProc, "Return Addr: %p\n",return_address);
361  //*return_address = signal_addr;
362  Addr *new_signal_addr = new Addr;
363  *new_signal_addr = (Addr)signal_addr;
364  dmaWriteVirt(return_address, sizeof(Addr), nullptr, new_signal_addr, 0);
365 
366  DPRINTF(GPUCommandProc,
367  "Agent Dispatch Packet Stealing signal handle from kid %d :" \
368  "(%x:%x) writing into %x\n",
369  kid,signal_addr,new_signal_addr,return_address);
370 
371  } else
372  {
373  panic("The agent dispatch packet provided an unknown argument in" \
374  "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
375  }
376 
377  hsaPP->finishPkt(raw_pkt, queue_id);
378 }
379 
386 void
388 {
389  dispatcher.dispatch(task);
390 }
391 
392 void
394 {
395  _driver->signalWakeupEvent(event_id);
396 }
397 
404 void
406 {
407  auto cb = new DmaVirtCallback<uint32_t>(
408  [ = ] (const uint32_t &readDispIdOffset)
409  { ReadDispIdOffsetDmaEvent(task, readDispIdOffset); }, 0);
410 
411  Addr hostReadIdxPtr
412  = hsaPP->getQueueDesc(task->queueId())->hostReadIndexPtr;
413 
414  dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
415  sizeof(uint32_t), cb, &cb->dmaBuffer);
416 }
417 
418 System*
420 {
421  return sys;
422 }
423 
426 {
427  AddrRangeList ranges;
428  return ranges;
429 }
430 
431 void
433 {
434  gpuDevice = gpu_device;
436 }
437 
438 void
440 {
441  _shader = shader;
442 }
443 
444 Shader*
446 {
447  return _shader;
448 }
449 
450 } // namespace gem5
gem5::_hsa_agent_dispatch_packet_t
Definition: hsa_packet.hh:72
gem5::_hsa_agent_dispatch_packet_t::arg
uint64_t arg[4]
Definition: hsa_packet.hh:78
gem5::GPUCommandProcessor::getHsaSignalEventAddr
Addr getHsaSignalEventAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:125
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:191
gem5::SETranslatingPortProxy
Definition: se_translating_port_proxy.hh:49
gem5::AMDKernelCode::runtime_loader_kernel_symbol
uint64_t runtime_loader_kernel_symbol
Definition: kernel_code.hh:187
gem5::ConstProxyPtr
Definition: proxy_ptr.hh:109
gem5::GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:445
gem5::GPUCommandProcessor::hsaPP
HSAPacketProcessor * hsaPP
Definition: gpu_command_processor.hh:140
gem5::_hsa_dispatch_packet_t::workgroup_size_x
uint16_t workgroup_size_x
Definition: hsa_packet.hh:57
gem5::GPUCommandProcessor::signalWakeupEvent
void signalWakeupEvent(uint32_t event_id)
Definition: gpu_command_processor.cc:393
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:293
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:171
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
abstract_mem.hh
gem5::Request::PHYSICAL
@ PHYSICAL
The virtual address is also the physical address.
Definition: request.hh:117
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:59
gem5::HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:328
gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:147
translating_port_proxy.hh
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:63
gem5::GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:425
gem5::GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:167
proxy_ptr.hh
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
syscall_emul_buf.hh
gem5::System::getDeviceMemory
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Definition: system.cc:322
gem5::GPUCommandProcessor::getHsaSignalMailboxAddr
Addr getHsaSignalMailboxAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:120
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:141
gem5::GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:116
gem5::DmaVirtDevice::DmaVirtCallback::dmaBuffer
T dmaBuffer
Definition: dma_virt_device.hh:62
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:248
gem5::TranslatingPortProxy
This proxy attempts to translate virtual addresses using the TLBs.
Definition: translating_port_proxy.hh:60
Nop
def format Nop(code, *opt_flags)
Definition: nop.cc:82
gem5::VegaISA::Walker::startFunctional
Fault startFunctional(Addr base, Addr vaddr, PageTableEntry &pte, unsigned &logBytes, BaseMMU::Mode mode)
Definition: pagetable_walker.cc:63
gem5::Flags< FlagsType >
gem5::System
Definition: system.hh:75
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:114
gem5::HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:135
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:667
gem5::GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:387
gem5::GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:439
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::HSAPacketProcessor
Definition: hsa_packet_processor.hh:252
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
amdgpu_device.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::_hsa_dispatch_packet_t::grid_size_x
uint32_t grid_size_x
Definition: hsa_packet.hh:61
gem5::PioDevice::sys
System * sys
Definition: io_device.hh:105
gem5::_hsa_dispatch_packet_t::kernel_object
uint64_t kernel_object
Definition: hsa_packet.hh:66
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
pagetable_walker.hh
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::GPUCommandProcessor::_driver
GPUComputeDriver * _driver
Definition: gpu_command_processor.hh:133
process.hh
gem5::GPUCommandProcessor::walker
VegaISA::Walker * walker
Definition: gpu_command_processor.hh:135
gem5::GPUCommandProcessor::submitAgentDispatchPkt
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Definition: gpu_command_processor.cc:340
gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition: port_proxy.hh:86
gem5::_hsa_dispatch_packet_t::workgroup_size_z
uint16_t workgroup_size_z
Definition: hsa_packet.hh:59
gem5::DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:209
gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition: amdgpu_device.hh:60
gem5::HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:177
flags
uint8_t flags
Definition: helpers.cc:66
gem5::VegaISA::Walker::getDevRequestor
RequestorID getDevRequestor() const
Definition: pagetable_walker.hh:163
gpu_command_processor.hh
gem5::_hsa_dispatch_packet_t
Definition: hsa_packet.hh:53
gem5::GPUCommandProcessor::gpuDevice
AMDGPUDevice * gpuDevice
Definition: gpu_command_processor.hh:134
gem5::PortProxy::readBlob
void readBlob(Addr addr, void *p, int size) const
Higher level interfaces based on the above.
Definition: port_proxy.hh:182
gem5::GPUCommandProcessor::getHsaSignalValueAddr
Addr getHsaSignalValueAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:115
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:70
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::_hsa_agent_dispatch_packet_t::type
uint16_t type
Definition: hsa_packet.hh:75
gem5::AMDGPUVM::UserTranslationGen
Definition: amdgpu_vm.hh:341
gem5::GPUCommandProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: gpu_command_processor.cc:432
gem5::_hsa_dispatch_packet_t::kernarg_address
uint64_t kernarg_address
Definition: hsa_packet.hh:67
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:419
gem5::_hsa_dispatch_packet_t::grid_size_y
uint32_t grid_size_y
Definition: hsa_packet.hh:62
gem5::_hsa_dispatch_packet_t::grid_size_z
uint32_t grid_size_z
Definition: hsa_packet.hh:63
packet_access.hh
full_system.hh
gem5::AMDGPUVM::getPageTableBase
Addr getPageTableBase(uint16_t vmid)
Definition: amdgpu_vm.hh:272
gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:220
gem5::_hsa_dispatch_packet_t::workgroup_size_y
uint16_t workgroup_size_y
Definition: hsa_packet.hh:58
gem5::GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:131
gem5::Packet::allocate
void allocate()
Allocate memory for the packet.
Definition: packet.hh:1339
gem5::HSAPacketProcessor::setDevice
void setDevice(GPUCommandProcessor *dev)
Definition: hsa_packet_processor.cc:642
gem5::System::threads
Threads threads
Definition: system.hh:314
gem5::GPUCommandProcessor::ReadDispIdOffsetDmaEvent
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:156
gem5::GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:326
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:239
gem5::GPUComputeDriver
Definition: gpu_compute_driver.hh:62
gem5::GPUCommandProcessor::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
Definition: gpu_command_processor.cc:74
gem5::Packet::getLE
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
Definition: packet_access.hh:78
chunk_generator.hh
gem5::AMDKernelCode::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition: kernel_code.hh:90
se_translating_port_proxy.hh
gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95
dispatcher.hh
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
std::list< AddrRange >
gem5::VegaISA::Walker::setDevRequestor
void setDevRequestor(RequestorID mid)
Definition: pagetable_walker.hh:162
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:226
gem5::memory::AbstractMemory::access
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
Definition: abstract_mem.cc:379
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::_hsa_agent_dispatch_packet_t::return_address
uint64_t return_address
Definition: hsa_packet.hh:77
gem5::GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:132
gem5::_hsa_dispatch_packet_t::completion_signal
uint64_t completion_signal
Definition: hsa_packet.hh:69
gem5::GPUCommandProcessor::HsaSignalCallbackFunction
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
Definition: gpu_command_processor.hh:74
gem5::AMDGPUDevice::vramRequestorId
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Definition: amdgpu_device.hh:187
gem5::GPUCommandProcessor::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
Definition: gpu_command_processor.cc:80
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:178
gem5::GPUCommandProcessor::driver
GPUComputeDriver * driver()
Definition: gpu_command_processor.cc:303
gem5::AMDKernelCode
Definition: kernel_code.hh:82
gem5::Shader
Definition: shader.hh:83
gem5::GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:405

Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17