gem5  [DEVELOP-FOR-23.0]
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
gpu_command_processor.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
34 #include <cassert>
35 
37 #include "base/chunk_generator.hh"
38 #include "debug/GPUCommandProc.hh"
39 #include "debug/GPUKernelInfo.hh"
42 #include "mem/abstract_mem.hh"
43 #include "mem/packet_access.hh"
46 #include "params/GPUCommandProcessor.hh"
47 #include "sim/full_system.hh"
48 #include "sim/process.hh"
49 #include "sim/proxy_ptr.hh"
50 #include "sim/syscall_emul_buf.hh"
51 
52 namespace gem5
53 {
54 
56  : DmaVirtDevice(p), dispatcher(*p.dispatcher), _driver(nullptr),
57  walker(p.walker), hsaPP(p.hsapp)
58 {
59  assert(hsaPP);
60  hsaPP->setDevice(this);
62 }
63 
66 {
67  return *hsaPP;
68 }
69 
75 {
76  return gpuDevice->vramRequestorId();
77 }
78 
81 {
82  if (!FullSystem) {
83  // Grab the process and try to translate the virtual address with it;
84  // with new extensions, it will likely be wrong to just arbitrarily
85  // grab context zero.
86  auto process = sys->threads[0]->getProcessPtr();
87 
88  return process->pTable->translateRange(vaddr, size);
89  }
90 
91  // In full system use the page tables setup by the kernel driver rather
92  // than the CPU page tables.
93  return TranslationGenPtr(
95  1 /* vmid */, vaddr, size));
96 }
97 
115 void
116 GPUCommandProcessor::submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
117  Addr host_pkt_addr)
118 {
119  static int dynamic_task_id = 0;
120  _hsa_dispatch_packet_t *disp_pkt = (_hsa_dispatch_packet_t*)raw_pkt;
121  assert(!(disp_pkt->kernel_object & (system()->cacheLineSize() - 1)));
122 
127  auto *tc = sys->threads[0];
128 
129  TranslatingPortProxy fs_proxy(tc);
130  SETranslatingPortProxy se_proxy(tc);
131  PortProxy &virt_proxy = FullSystem ? fs_proxy : se_proxy;
132 
138  bool is_system_page = true;
139  Addr phys_addr = disp_pkt->kernel_object;
140  if (FullSystem) {
147  int vmid = 1;
148  unsigned tmp_bytes;
150  phys_addr, tmp_bytes, BaseMMU::Mode::Read,
151  is_system_page);
152  }
153 
154  DPRINTF(GPUCommandProc, "kernobj vaddr %#lx paddr %#lx size %d s:%d\n",
155  disp_pkt->kernel_object, phys_addr, sizeof(AMDKernelCode),
156  is_system_page);
157 
167  AMDKernelCode akc;
168  if (is_system_page) {
169  DPRINTF(GPUCommandProc, "kernel_object in system, using proxy\n");
170  virt_proxy.readBlob(disp_pkt->kernel_object, (uint8_t*)&akc,
171  sizeof(AMDKernelCode));
172  } else {
173  assert(FullSystem);
174  DPRINTF(GPUCommandProc, "kernel_object in device, using device mem\n");
175 
176  // Read from GPU memory manager one cache line at a time to prevent
177  // rare cases where the AKC spans two memory pages.
178  ChunkGenerator gen(disp_pkt->kernel_object, sizeof(AMDKernelCode),
179  system()->cacheLineSize());
180  for (; !gen.done(); gen.next()) {
181  Addr chunk_addr = gen.addr();
182  int vmid = 1;
183  unsigned dummy;
185  chunk_addr, dummy, BaseMMU::Mode::Read,
186  is_system_page);
187 
189  RequestPtr request = std::make_shared<Request>(chunk_addr,
190  system()->cacheLineSize(), flags, walker->getDevRequestor());
191  Packet *readPkt = new Packet(request, MemCmd::ReadReq);
192  readPkt->dataStatic((uint8_t *)&akc + gen.complete());
193  system()->getDeviceMemory(readPkt)->access(readPkt);
194  delete readPkt;
195  }
196  }
197 
198  DPRINTF(GPUCommandProc, "GPU machine code is %lli bytes from start of the "
199  "kernel object\n", akc.kernel_code_entry_byte_offset);
200 
201  DPRINTF(GPUCommandProc,"GPUCommandProc: Sending dispatch pkt to %lu\n",
202  (uint64_t)tc->cpuId());
203 
204 
205  Addr machine_code_addr = (Addr)disp_pkt->kernel_object
207 
208  DPRINTF(GPUCommandProc, "Machine code starts at addr: %#x\n",
209  machine_code_addr);
210 
211  std::string kernel_name;
212 
223  kernel_name = "Some kernel";
224  } else {
225  kernel_name = "Blit kernel";
226  }
227 
228  DPRINTF(GPUKernelInfo, "Kernel name: %s\n", kernel_name.c_str());
229 
230  GfxVersion gfxVersion = FullSystem ? gpuDevice->getGfxVersion()
231  : driver()->getGfxVersion();
232  HSAQueueEntry *task = new HSAQueueEntry(kernel_name, queue_id,
233  dynamic_task_id, raw_pkt, &akc, host_pkt_addr, machine_code_addr,
234  gfxVersion);
235 
236  DPRINTF(GPUCommandProc, "Task ID: %i Got AQL: wg size (%dx%dx%d), "
237  "grid size (%dx%dx%d) kernarg addr: %#x, completion "
238  "signal addr:%#x\n", dynamic_task_id, disp_pkt->workgroup_size_x,
239  disp_pkt->workgroup_size_y, disp_pkt->workgroup_size_z,
240  disp_pkt->grid_size_x, disp_pkt->grid_size_y,
241  disp_pkt->grid_size_z, disp_pkt->kernarg_address,
242  disp_pkt->completion_signal);
243 
244  DPRINTF(GPUCommandProc, "Extracted code object: %s (num vector regs: %d, "
245  "num scalar regs: %d, code addr: %#x, kernarg size: %d, "
246  "LDS size: %d)\n", kernel_name, task->numVectorRegs(),
247  task->numScalarRegs(), task->codeAddr(), 0, 0);
248 
249  initABI(task);
250  ++dynamic_task_id;
251 }
252 
253 uint64_t
255 {
256  Addr value_addr = getHsaSignalValueAddr(signal_handle);
257  auto tc = system()->threads[0];
258  ConstVPtr<Addr> prev_value(value_addr, tc);
259  return *prev_value;
260 }
261 
262 void
263 GPUCommandProcessor::updateHsaSignal(Addr signal_handle, uint64_t signal_value,
264  HsaSignalCallbackFunction function)
265 {
266  // The signal value is aligned 8 bytes from
267  // the actual handle in the runtime
268  Addr value_addr = getHsaSignalValueAddr(signal_handle);
269  Addr mailbox_addr = getHsaSignalMailboxAddr(signal_handle);
270  Addr event_addr = getHsaSignalEventAddr(signal_handle);
271  DPRINTF(GPUCommandProc, "Triggering completion signal: %x!\n", value_addr);
272 
273  auto cb = new DmaVirtCallback<uint64_t>(function, signal_value);
274 
275  dmaWriteVirt(value_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
276 
277  auto tc = system()->threads[0];
278  ConstVPtr<uint64_t> mailbox_ptr(mailbox_addr, tc);
279 
280  // Notifying an event with its mailbox pointer is
281  // not supported in the current implementation. Just use
282  // mailbox pointer to distinguish between interruptible
283  // and default signal. Interruptible signal will have
284  // a valid mailbox pointer.
285  if (*mailbox_ptr != 0) {
286  // This is an interruptible signal. Now, read the
287  // event ID and directly communicate with the driver
288  // about that event notification.
289  ConstVPtr<uint32_t> event_val(event_addr, tc);
290 
291  DPRINTF(GPUCommandProc, "Calling signal wakeup event on "
292  "signal event value %d\n", *event_val);
293 
294  // The mailbox/wakeup signal uses the SE mode proxy port to write
295  // the event value. This is not available in full system mode so
296  // instead we need to issue a DMA write to the address. The value of
297  // *event_val clears the event.
298  if (FullSystem) {
299  auto cb = new DmaVirtCallback<uint64_t>(function, *event_val);
300  dmaWriteVirt(mailbox_addr, sizeof(Addr), cb, &cb->dmaBuffer, 0);
301  } else {
302  signalWakeupEvent(*event_val);
303  }
304  }
305 }
306 
307 void
309 {
310  fatal_if(_driver, "Should not overwrite driver.");
311  // TODO: GPU Driver inheritance hierarchy doesn't really make sense.
312  // Should get rid of the base class.
313  _driver = gpu_driver;
314  assert(_driver);
315 }
316 
319 {
320  return _driver;
321 }
322 
340 void
341 GPUCommandProcessor::submitVendorPkt(void *raw_pkt, uint32_t queue_id,
342  Addr host_pkt_addr)
343 {
344  hsaPP->finishPkt(raw_pkt, queue_id);
345 }
346 
354 void
355 GPUCommandProcessor::submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id,
356  Addr host_pkt_addr)
357 {
358  //Parse the Packet, see what it wants us to do
359  _hsa_agent_dispatch_packet_t * agent_pkt =
360  (_hsa_agent_dispatch_packet_t *)raw_pkt;
361 
362  if (agent_pkt->type == AgentCmd::Nop) {
363  DPRINTF(GPUCommandProc, "Agent Dispatch Packet NOP\n");
364  } else if (agent_pkt->type == AgentCmd::Steal) {
365  //This is where we steal the HSA Task's completion signal
366  int kid = agent_pkt->arg[0];
367  DPRINTF(GPUCommandProc,
368  "Agent Dispatch Packet Stealing signal handle for kernel %d\n",
369  kid);
370 
371  HSAQueueEntry *task = dispatcher.hsaTask(kid);
372  uint64_t signal_addr = task->completionSignal();// + sizeof(uint64_t);
373 
374  uint64_t return_address = agent_pkt->return_address;
375  DPRINTF(GPUCommandProc, "Return Addr: %p\n",return_address);
376  //*return_address = signal_addr;
377  Addr *new_signal_addr = new Addr;
378  *new_signal_addr = (Addr)signal_addr;
379  dmaWriteVirt(return_address, sizeof(Addr), nullptr, new_signal_addr, 0);
380 
381  DPRINTF(GPUCommandProc,
382  "Agent Dispatch Packet Stealing signal handle from kid %d :" \
383  "(%x:%x) writing into %x\n",
384  kid,signal_addr,new_signal_addr,return_address);
385 
386  } else
387  {
388  panic("The agent dispatch packet provided an unknown argument in" \
389  "arg[0],currently only 0(nop) or 1(return kernel signal) is accepted");
390  }
391 
392  hsaPP->finishPkt(raw_pkt, queue_id);
393 }
394 
401 void
403 {
404  dispatcher.dispatch(task);
405 }
406 
407 void
409 {
410  _driver->signalWakeupEvent(event_id);
411 }
412 
419 void
421 {
422  auto cb = new DmaVirtCallback<uint32_t>(
423  [ = ] (const uint32_t &readDispIdOffset)
424  { ReadDispIdOffsetDmaEvent(task, readDispIdOffset); }, 0);
425 
426  Addr hostReadIdxPtr
427  = hsaPP->getQueueDesc(task->queueId())->hostReadIndexPtr;
428 
429  dmaReadVirt(hostReadIdxPtr + sizeof(hostReadIdxPtr),
430  sizeof(uint32_t), cb, &cb->dmaBuffer);
431 }
432 
433 System*
435 {
436  return sys;
437 }
438 
441 {
442  AddrRangeList ranges;
443  return ranges;
444 }
445 
446 void
448 {
449  gpuDevice = gpu_device;
451 }
452 
453 void
455 {
456  _shader = shader;
457 }
458 
459 Shader*
461 {
462  return _shader;
463 }
464 
465 } // namespace gem5
gem5::_hsa_agent_dispatch_packet_t
Definition: hsa_packet.hh:72
gem5::_hsa_agent_dispatch_packet_t::arg
uint64_t arg[4]
Definition: hsa_packet.hh:78
gem5::GPUCommandProcessor::getHsaSignalEventAddr
Addr getHsaSignalEventAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:125
gem5::GPUComputeDriver::signalWakeupEvent
virtual void signalWakeupEvent(uint32_t event_id)
Definition: gpu_compute_driver.cc:191
gem5::SETranslatingPortProxy
Definition: se_translating_port_proxy.hh:49
gem5::AMDKernelCode::runtime_loader_kernel_symbol
uint64_t runtime_loader_kernel_symbol
Definition: kernel_code.hh:187
gem5::ConstProxyPtr
Definition: proxy_ptr.hh:109
gem5::GPUCommandProcessor::shader
Shader * shader()
Definition: gpu_command_processor.cc:460
gem5::GPUCommandProcessor::hsaPP
HSAPacketProcessor * hsaPP
Definition: gpu_command_processor.hh:140
gem5::GPUComputeDriver::getGfxVersion
GfxVersion getGfxVersion() const
Definition: gpu_compute_driver.hh:145
gem5::_hsa_dispatch_packet_t::workgroup_size_x
uint16_t workgroup_size_x
Definition: hsa_packet.hh:57
gem5::GPUCommandProcessor::signalWakeupEvent
void signalWakeupEvent(uint32_t event_id)
Definition: gpu_command_processor.cc:408
gem5::GPUCommandProcessor::attachDriver
void attachDriver(GPUComputeDriver *driver)
Definition: gpu_command_processor.cc:308
gem5::DmaVirtDevice::DmaVirtCallback
Wraps a std::function object in a DmaCallback.
Definition: dma_virt_device.hh:51
gem5::HSAQueueEntry::completionSignal
Addr completionSignal() const
Definition: hsa_queue_entry.hh:195
gem5::DmaVirtDevice::dmaReadVirt
void dmaReadVirt(Addr host_addr, unsigned size, DmaCallback *cb, void *data, Tick delay=0)
Initiate a DMA read from virtual address host_addr.
Definition: dma_virt_device.cc:38
abstract_mem.hh
gem5::ChunkGenerator::next
bool next()
Advance generator to next chunk.
Definition: chunk_generator.hh:185
gem5::ChunkGenerator::complete
Addr complete() const
Number of bytes we have already chunked up.
Definition: chunk_generator.hh:132
gem5::HSAQueueEntry
Definition: hsa_queue_entry.hh:60
gem5::HSAPacketProcessor::getQueueDesc
HSAQueueDescriptor * getQueueDesc(uint32_t queId)
Definition: hsa_packet_processor.hh:330
gem5::HSAQueueEntry::queueId
uint32_t queueId() const
Definition: hsa_queue_entry.hh:171
translating_port_proxy.hh
gem5::GPUDispatcher::hsaTask
HSAQueueEntry * hsaTask(int disp_id)
Definition: dispatcher.cc:65
gem5::GPUCommandProcessor::getAddrRanges
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
Definition: gpu_command_processor.cc:440
gem5::GPUCommandProcessor::GPUCommandProcessor
GPUCommandProcessor()=delete
gem5::AMDGPUDevice::getVM
AMDGPUVM & getVM()
Definition: amdgpu_device.hh:180
proxy_ptr.hh
gem5::GPUCommandProcessor::hsaPacketProc
HSAPacketProcessor & hsaPacketProc()
Definition: gpu_command_processor.cc:65
syscall_emul_buf.hh
gem5::System::getDeviceMemory
memory::AbstractMemory * getDeviceMemory(const PacketPtr &pkt) const
Return a pointer to the device memory.
Definition: system.cc:311
gem5::GPUCommandProcessor::getHsaSignalMailboxAddr
Addr getHsaSignalMailboxAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:120
gem5::HSAQueueEntry::numScalarRegs
int numScalarRegs() const
Definition: hsa_queue_entry.hh:165
gem5::GPUCommandProcessor::submitDispatchPkt
void submitDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitDispatchPkt() is the entry point into the CP from the HSAPP and is only meant to be used with A...
Definition: gpu_command_processor.cc:116
gem5::DmaVirtDevice::DmaVirtCallback::dmaBuffer
T dmaBuffer
Definition: dma_virt_device.hh:62
gem5::GPUCommandProcessor::updateHsaSignal
void updateHsaSignal(Addr signal_handle, uint64_t signal_value, HsaSignalCallbackFunction function=[](const uint64_t &) { })
Definition: gpu_command_processor.cc:263
gem5::TranslatingPortProxy
This proxy attempts to translate virtual addresses using the TLBs.
Definition: translating_port_proxy.hh:60
Nop
def format Nop(code, *opt_flags)
Definition: nop.cc:82
gem5::ChunkGenerator
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
Definition: chunk_generator.hh:59
gem5::VegaISA::Walker::startFunctional
Fault startFunctional(Addr base, Addr vaddr, PageTableEntry &pte, unsigned &logBytes, BaseMMU::Mode mode)
Definition: pagetable_walker.cc:62
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1175
gem5::Flags< FlagsType >
gem5::System
Definition: system.hh:74
gem5::GPUDispatcher::dispatch
void dispatch(HSAQueueEntry *task)
After all relevant HSA data structures have been traversed/extracted from memory by the CP,...
Definition: dispatcher.cc:116
gem5::ChunkGenerator::done
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
Definition: chunk_generator.hh:141
gem5::HSAQueueEntry::numVectorRegs
int numVectorRegs() const
Definition: hsa_queue_entry.hh:159
gem5::HSAPacketProcessor::finishPkt
void finishPkt(void *pkt, uint32_t rl_idx)
Definition: hsa_packet_processor.cc:671
gem5::GPUCommandProcessor::dispatchPkt
void dispatchPkt(HSAQueueEntry *task)
Once the CP has finished extracting all relevant information about a task and has initialized the ABI...
Definition: gpu_command_processor.cc:402
gem5::GPUCommandProcessor::setShader
void setShader(Shader *shader)
Definition: gpu_command_processor.cc:454
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
gem5::HSAPacketProcessor
Definition: hsa_packet_processor.hh:254
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:210
amdgpu_device.hh
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:108
gem5::_hsa_dispatch_packet_t::grid_size_x
uint32_t grid_size_x
Definition: hsa_packet.hh:61
gem5::PioDevice::sys
System * sys
Definition: io_device.hh:105
gem5::_hsa_dispatch_packet_t::kernel_object
uint64_t kernel_object
Definition: hsa_packet.hh:66
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
pagetable_walker.hh
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:87
gem5::GPUCommandProcessor::_driver
GPUComputeDriver * _driver
Definition: gpu_command_processor.hh:133
process.hh
gem5::GPUCommandProcessor::walker
VegaISA::Walker * walker
Definition: gpu_command_processor.hh:135
gem5::GPUCommandProcessor::submitAgentDispatchPkt
void submitAgentDispatchPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitAgentDispatchPkt() is for accepting agent dispatch packets.
Definition: gpu_command_processor.cc:355
gem5::PortProxy
This object is a proxy for a port or other object which implements the functional response protocol,...
Definition: port_proxy.hh:86
gem5::_hsa_dispatch_packet_t::workgroup_size_z
uint16_t workgroup_size_z
Definition: hsa_packet.hh:59
gem5::DmaDevice::Params
DmaDeviceParams Params
Definition: dma_device.hh:224
gem5::AMDGPUDevice
Device model for an AMD GPU.
Definition: amdgpu_device.hh:62
gem5::HSAQueueEntry::codeAddr
Addr codeAddr() const
Definition: hsa_queue_entry.hh:201
flags
uint8_t flags
Definition: helpers.cc:66
gem5::VegaISA::Walker::getDevRequestor
RequestorID getDevRequestor() const
Definition: pagetable_walker.hh:163
gpu_command_processor.hh
gem5::_hsa_dispatch_packet_t
Definition: hsa_packet.hh:53
gem5::GPUCommandProcessor::gpuDevice
AMDGPUDevice * gpuDevice
Definition: gpu_command_processor.hh:134
gem5::PortProxy::readBlob
void readBlob(Addr addr, void *p, int size) const
Higher level interfaces based on the above.
Definition: port_proxy.hh:182
gem5::GPUCommandProcessor::getHsaSignalValueAddr
Addr getHsaSignalValueAddr(Addr signal_handle)
Definition: gpu_command_processor.hh:115
gem5::GPUDispatcher::setCommandProcessor
void setCommandProcessor(GPUCommandProcessor *gpu_cmd_proc)
Definition: dispatcher.cc:72
gem5::DmaVirtDevice::dmaWriteVirt
void dmaWriteVirt(Addr host_addr, unsigned size, DmaCallback *b, void *data, Tick delay=0)
Initiate a DMA write from virtual address host_addr.
Definition: dma_virt_device.cc:45
gem5::_hsa_agent_dispatch_packet_t::type
uint16_t type
Definition: hsa_packet.hh:75
gem5::AMDGPUVM::UserTranslationGen
Definition: amdgpu_vm.hh:355
gem5::GPUCommandProcessor::setGPUDevice
void setGPUDevice(AMDGPUDevice *gpu_device)
Definition: gpu_command_processor.cc:447
gem5::_hsa_dispatch_packet_t::kernarg_address
uint64_t kernarg_address
Definition: hsa_packet.hh:67
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
gem5::GPUCommandProcessor::system
System * system()
Definition: gpu_command_processor.cc:434
gem5::_hsa_dispatch_packet_t::grid_size_y
uint32_t grid_size_y
Definition: hsa_packet.hh:62
gem5::_hsa_dispatch_packet_t::grid_size_z
uint32_t grid_size_z
Definition: hsa_packet.hh:63
packet_access.hh
full_system.hh
gem5::AMDGPUVM::getPageTableBase
Addr getPageTableBase(uint16_t vmid)
Definition: amdgpu_vm.hh:286
gem5::FullSystem
bool FullSystem
The FullSystem variable can be used to determine the current mode of simulation.
Definition: root.cc:220
gem5::_hsa_dispatch_packet_t::workgroup_size_y
uint16_t workgroup_size_y
Definition: hsa_packet.hh:58
gem5::GPUCommandProcessor::_shader
Shader * _shader
Definition: gpu_command_processor.hh:131
gem5::HSAPacketProcessor::setDevice
void setDevice(GPUCommandProcessor *dev)
Definition: hsa_packet_processor.cc:646
gem5::System::threads
Threads threads
Definition: system.hh:310
gem5::ChunkGenerator::addr
Addr addr() const
Return starting address of current chunk.
Definition: chunk_generator.hh:119
gem5::GPUCommandProcessor::ReadDispIdOffsetDmaEvent
void ReadDispIdOffsetDmaEvent(HSAQueueEntry *task, const uint32_t &readDispIdOffset)
Perform a DMA read of the read_dispatch_id_field_base_byte_offset field, which follows directly after...
Definition: gpu_command_processor.hh:156
gem5::GPUCommandProcessor::submitVendorPkt
void submitVendorPkt(void *raw_pkt, uint32_t queue_id, Addr host_pkt_addr)
submitVendorPkt() is for accepting vendor-specific packets from the HSAPP.
Definition: gpu_command_processor.cc:341
gem5::GPUCommandProcessor::functionalReadHsaSignal
uint64_t functionalReadHsaSignal(Addr signal_handle)
Definition: gpu_command_processor.cc:254
gem5::GPUComputeDriver
Definition: gpu_compute_driver.hh:62
gem5::GPUCommandProcessor::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from GPU device.
Definition: gpu_command_processor.cc:74
chunk_generator.hh
gem5::AMDKernelCode::kernel_code_entry_byte_offset
int64_t kernel_code_entry_byte_offset
Definition: kernel_code.hh:90
se_translating_port_proxy.hh
gem5::RequestorID
uint16_t RequestorID
Definition: request.hh:95
dispatcher.hh
gem5::DmaVirtDevice
Definition: dma_virt_device.hh:41
gem5::Request::PHYSICAL
@ PHYSICAL
The virtual address is also the physical address.
Definition: request.hh:117
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
std::list< AddrRange >
gem5::VegaISA::Walker::setDevRequestor
void setDevRequestor(RequestorID mid)
Definition: pagetable_walker.hh:162
fatal_if
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition: logging.hh:236
gem5::memory::AbstractMemory::access
void access(PacketPtr pkt)
Perform an untimed memory access and update all the state (e.g.
Definition: abstract_mem.cc:380
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::_hsa_agent_dispatch_packet_t::return_address
uint64_t return_address
Definition: hsa_packet.hh:77
gem5::GPUCommandProcessor::dispatcher
GPUDispatcher & dispatcher
Definition: gpu_command_processor.hh:132
gem5::AMDGPUDevice::getGfxVersion
GfxVersion getGfxVersion() const
Definition: amdgpu_device.hh:215
gem5::_hsa_dispatch_packet_t::completion_signal
uint64_t completion_signal
Definition: hsa_packet.hh:69
gem5::GPUCommandProcessor::HsaSignalCallbackFunction
std::function< void(const uint64_t &)> HsaSignalCallbackFunction
Definition: gpu_command_processor.hh:74
gem5::AMDGPUDevice::vramRequestorId
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
Definition: amdgpu_device.hh:201
gem5::GPUCommandProcessor::translate
TranslationGenPtr translate(Addr vaddr, Addr size) override
Function used to translate a range of addresses from virtual to physical addresses.
Definition: gpu_command_processor.cc:80
gem5::TranslationGenPtr
std::unique_ptr< TranslationGen > TranslationGenPtr
Definition: translation_gen.hh:128
panic
#define panic(...)
This implements a cprintf based panic() function.
Definition: logging.hh:188
gem5::GPUCommandProcessor::driver
GPUComputeDriver * driver()
Definition: gpu_command_processor.cc:318
gem5::AMDKernelCode
Definition: kernel_code.hh:82
gem5::Shader
Definition: shader.hh:83
gem5::GPUCommandProcessor::initABI
void initABI(HSAQueueEntry *task)
The CP is responsible for traversing all HSA-ABI-related data structures from memory and initializing...
Definition: gpu_command_processor.cc:420

Generated on Sun Jul 30 2023 01:56:57 for gem5 by doxygen 1.8.17