36 #include "debug/AMDGPUDevice.hh"
47 #include "params/AMDGPUDevice.hh"
55 :
PciDevice(
p), gpuMemMgr(
p.memory_manager), deviceIH(
p.device_ih),
56 sdma0(
p.sdma0), sdma1(
p.sdma1), pm4PktProc(
p.pm4_pkt_proc), cp(
p.cp),
57 checkpoint_before_mmios(
p.checkpoint_before_mmios),
58 init_interrupt_count(0), _lastVMID(0),
59 deviceMem(
name() +
".deviceMem",
p.memories, false,
"", false)
63 romBin.open(
p.rom_binary, std::ios::binary);
70 for (
auto&
m :
p.memories) {
83 if (
p.trace_file !=
"") {
101 uint64_t rom_data = 0;
103 memcpy(&rom_data,
rom.data() + rom_offset, pkt->
getSize());
104 pkt->
setUintX(rom_data, ByteOrder::little);
107 pkt->
getAddr(), rom_offset, rom_data);
120 for (
auto &
r : ranges) {
121 if (
r.start() != 0) {
122 ret_ranges.push_back(
r);
209 uint8_t *dataPtr =
new uint8_t[pkt->
getSize()];
213 system->getDeviceMemory(readPkt)->access(readPkt);
239 switch (aperture_offset) {
247 uint32_t value = pkt->
getLE<uint32_t>() | 0x1;
249 pkt->
setLE<uint32_t>(value);
291 pkt->
getLE<uint64_t>());
295 pkt->
getLE<uint64_t>());
307 pkt->
getLE<uint64_t>() + 1);
318 panic(
"Write to unkown queue type!");
321 warn(
"Unknown doorbell offset: %lx\n",
offset);
378 panic(
"Request with address out of mapped range!");
406 panic(
"Request with address out of mapped range!");
465 panic(
"No SDMA with id %d\n",
id);
490 uint64_t regs_size =
regs.size();
491 uint64_t doorbells_size =
doorbells.size();
492 uint64_t sdma_engs_size =
sdmaEngs.size();
499 uint32_t reg_addrs[regs_size];
500 uint64_t reg_values[regs_size];
501 uint32_t doorbells_offset[doorbells_size];
502 QueueType doorbells_queues[doorbells_size];
503 uint32_t sdma_engs_offset[sdma_engs_size];
504 int sdma_engs[sdma_engs_size];
507 for (
auto & it :
regs) {
508 reg_addrs[idx] = it.first;
509 reg_values[idx] = it.second;
515 doorbells_offset[idx] = it.first;
516 doorbells_queues[idx] = it.second;
522 sdma_engs_offset[idx] = it.first;
523 sdma_engs[idx] = it.second ==
sdma0 ? 0 : 1;
530 sizeof(doorbells_offset[0]));
532 sizeof(doorbells_queues[0]));
534 sizeof(sdma_engs_offset[0]));
547 uint64_t regs_size = 0;
548 uint64_t doorbells_size = 0;
549 uint64_t sdma_engs_size = 0;
556 uint32_t reg_addrs[regs_size];
557 uint64_t reg_values[regs_size];
561 sizeof(reg_values)/
sizeof(reg_values[0]));
563 for (
int idx = 0; idx < regs_size; ++idx) {
564 regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
568 if (doorbells_size > 0) {
569 uint32_t doorbells_offset[doorbells_size];
570 QueueType doorbells_queues[doorbells_size];
573 sizeof(doorbells_offset[0]));
575 sizeof(doorbells_queues[0]));
577 for (
int idx = 0; idx < doorbells_size; ++idx) {
578 regs.insert(std::make_pair(doorbells_offset[idx],
579 doorbells_queues[idx]));
580 doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
584 if (sdma_engs_size > 0) {
585 uint32_t sdma_engs_offset[sdma_engs_size];
586 int sdma_engs[sdma_engs_size];
589 sizeof(sdma_engs_offset[0]));
592 for (
int idx = 0; idx < sdma_engs_size; ++idx) {
594 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
608 idMap.insert(std::make_pair(pasid, vmid));
614 panic(
"All VMIDs have been assigned");
626 auto result =
idMap.find(pasid);
627 assert(result !=
idMap.end());
628 if (result ==
idMap.end())
return;
629 uint16_t vmid = result->second;
651 std::unordered_map<uint16_t, std::set<int>>&
AbstractMemory declaration.
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
uint32_t getRegVal(uint32_t addr)
Register value getter/setter.
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
PM4PacketProcessor * pm4PktProc
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
void deallocateAllQueues()
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
Blocks of the GPU.
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciDevice.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, QueueType > doorbells
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateRptr(const uint32_t &data)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
void readMMIO(PacketPtr pkt, Addr offset)
Addr getMmioAperture(Addr addr)
void writeMMIO(PacketPtr pkt, Addr offset)
Addr getFrameAperture(Addr addr)
Addr gartBase()
Return base address of GART table in framebuffer.
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
HWScheduler * hwScheduler()
void setGPUDevice(AMDGPUDevice *gpu_device)
void write(Addr db_addr, uint64_t doorbell_reg)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
T * getPtr()
get a pointer to the data ptr.
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
void setLE(T v)
Set the value in the data pointer to v as little endian.
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
void makeAtomicResponse()
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
PCI device, base implementation is only config space.
PCIConfig config
The current config space.
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
System DMA Engine class for AMD dGPU.
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of SDMA MMIO registers.
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void deallocateRLCQueues()
void processPage(Addr wptrOffset)
void setGPUDevice(AMDGPUDevice *gpu_device)
GPUCommandProcessor & gpuCmdProc
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
#define panic(...)
This implements a cprintf based panic() function.
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
constexpr uint32_t ROM_SIZE
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
static constexpr uint32_t IH_OFFSET_SHIFT
static constexpr uint32_t MMHUB_BASE
static constexpr uint32_t SDMA0_BASE
static constexpr uint32_t GRBM_BASE
static constexpr uint32_t SDMA1_BASE
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
static constexpr uint32_t SDMA_OFFSET_SHIFT
constexpr int FRAMEBUFFER_BAR
static constexpr uint32_t IH_BASE
constexpr int DOORBELL_BAR
static constexpr uint32_t NBIO_BASE
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_INTERRUPT_PIN
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
const std::string & name()