36#include "debug/AMDGPUDevice.hh"
48#include "params/AMDGPUDevice.hh"
56 :
PciDevice(
p), gpuMemMgr(
p.memory_manager), deviceIH(
p.device_ih),
57 pm4PktProc(
p.pm4_pkt_proc), cp(
p.cp),
58 checkpoint_before_mmios(
p.checkpoint_before_mmios),
59 init_interrupt_count(0), _lastVMID(0),
60 deviceMem(
name() +
".deviceMem",
p.memories, false,
"", false)
64 romBin.open(
p.rom_binary, std::ios::binary);
71 for (
auto&
m :
p.memories) {
84 if (
p.trace_file !=
"") {
89 for (
auto&
s :
p.sdmas) {
90 s->setGPUDevice(
this);
110 if (
p.device_name ==
"Vega10") {
118 }
else if (
p.device_name ==
"MI100" ||
p.device_name ==
"MI200") {
126 panic(
"Unknown GPU device %s\n",
p.device_name);
136 uint64_t mmhubBase = 0x8000ULL << 24;
137 uint64_t mmhubTop = 0x83ffULL << 24;
144 if (
p.device_name ==
"Vega10") {
148 }
else if (
p.device_name ==
"MI100") {
153 }
else if (
p.device_name ==
"MI200") {
161 panic(
"Unknown GPU device %s\n",
p.device_name);
174 uint64_t rom_data = 0;
176 memcpy(&rom_data,
rom.data() + rom_offset, pkt->
getSize());
177 pkt->
setUintX(rom_data, ByteOrder::little);
180 pkt->
getAddr(), rom_offset, rom_data);
189 uint64_t rom_data = pkt->
getUintX(ByteOrder::little);
191 memcpy(
rom.data() + rom_offset, &rom_data, pkt->
getSize());
194 pkt->
getAddr(), rom_offset, rom_data);
207 for (
auto &
r : ranges) {
208 if (
r.start() != 0) {
209 ret_ranges.push_back(
r);
287 uint8_t *dataPtr =
new uint8_t[pkt->
getSize()];
291 system->getDeviceMemory(readPkt)->access(readPkt);
318 pkt->
setUintX(value, ByteOrder::little);
361 uint8_t *dataPtr =
new uint8_t[pkt->
getSize()];
362 std::memcpy(dataPtr, pkt->
getPtr<uint8_t>(),
363 pkt->
getSize() *
sizeof(uint8_t));
367 system->getDeviceMemory(writePkt)->access(writePkt);
382 pkt->
getLE<uint64_t>());
386 pkt->
getLE<uint64_t>());
398 pkt->
getLE<uint64_t>() + 1);
409 panic(
"Write to unkown queue type!");
412 warn(
"Unknown doorbell offset: %lx\n",
offset);
425 for (
int idx = 0; idx <
sdmaIds.size(); ++idx) {
483 panic(
"Request with address out of mapped range!");
517 panic(
"Request with address out of mapped range!");
597 uint64_t regs_size =
regs.size();
598 uint64_t doorbells_size =
doorbells.size();
599 uint64_t sdma_engs_size =
sdmaEngs.size();
606 uint32_t reg_addrs[regs_size];
607 uint64_t reg_values[regs_size];
608 uint32_t doorbells_offset[doorbells_size];
609 QueueType doorbells_queues[doorbells_size];
610 uint32_t sdma_engs_offset[sdma_engs_size];
611 int sdma_engs[sdma_engs_size];
614 for (
auto & it :
regs) {
615 reg_addrs[idx] = it.first;
616 reg_values[idx] = it.second;
622 doorbells_offset[idx] = it.first;
623 doorbells_queues[idx] = it.second;
629 sdma_engs_offset[idx] = it.first;
630 sdma_engs[idx] = it.second->getId();
637 sizeof(doorbells_offset[0]));
639 sizeof(doorbells_queues[0]));
641 sizeof(sdma_engs_offset[0]));
655 uint64_t regs_size = 0;
656 uint64_t doorbells_size = 0;
657 uint64_t sdma_engs_size = 0;
664 uint32_t reg_addrs[regs_size];
665 uint64_t reg_values[regs_size];
669 sizeof(reg_values)/
sizeof(reg_values[0]));
671 for (
int idx = 0; idx < regs_size; ++idx) {
672 regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
676 if (doorbells_size > 0) {
677 uint32_t doorbells_offset[doorbells_size];
678 QueueType doorbells_queues[doorbells_size];
681 sizeof(doorbells_offset[0]));
683 sizeof(doorbells_queues[0]));
685 for (
int idx = 0; idx < doorbells_size; ++idx) {
686 regs.insert(std::make_pair(doorbells_offset[idx],
687 doorbells_queues[idx]));
688 doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
692 if (sdma_engs_size > 0) {
693 uint32_t sdma_engs_offset[sdma_engs_size];
694 int sdma_engs[sdma_engs_size];
697 sizeof(sdma_engs_offset[0]));
700 for (
int idx = 0; idx < sdma_engs_size; ++idx) {
701 int sdma_id = sdma_engs[idx];
702 assert(
sdmaIds.count(sdma_id));
704 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
719 idMap.insert(std::make_pair(pasid, vmid));
725 panic(
"All VMIDs have been assigned");
737 auto result =
idMap.find(pasid);
738 assert(result !=
idMap.end());
739 if (result ==
idMap.end())
return;
740 uint16_t vmid = result->second;
753 it.second->deallocateRLCQueues();
763std::unordered_map<uint16_t, std::set<int>>&
AbstractMemory declaration.
#define AMDGPU_MP0_SMN_C2PMSG_33
#define VEGA10_FB_LOCATION_BASE
#define VEGA10_FB_LOCATION_TOP
#define MI200_MEM_SIZE_REG
#define MI200_FB_LOCATION_TOP
#define MI100_FB_LOCATION_BASE
#define MI200_FB_LOCATION_BASE
#define MI100_FB_LOCATION_TOP
#define MI100_MEM_SIZE_REG
Device model for an AMD GPU.
void insertQId(uint16_t vmid, int id)
uint32_t getRegVal(uint32_t addr)
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
std::unordered_map< uint16_t, uint16_t > idMap
void readMMIO(PacketPtr pkt, Addr offset)
void serialize(CheckpointOut &cp) const override
Checkpoint support.
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void writeMMIO(PacketPtr pkt, Addr offset)
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
PM4PacketProcessor * pm4PktProc
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
void deallocateAllQueues()
void readROM(PacketPtr pkt)
AddrRange romRange
VGA ROM methods.
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
std::array< uint8_t, ROM_SIZE > rom
bool isROM(Addr addr) const
std::unordered_map< uint32_t, AddrRange > sdmaMmios
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
SDMAEngine * getSDMAEngine(Addr offset)
AMDGPUMemoryManager * gpuMemMgr
AMDGPUDevice(const AMDGPUDeviceParams &p)
void readDoorbell(PacketPtr pkt, Addr offset)
AMDGPUNbio nbio
Blocks of the GPU.
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
AMDGPUInterruptHandler * deviceIH
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool checkpoint_before_mmios
Initial checkpoint support variables.
bool haveRegVal(uint32_t addr)
Register value getter/setter.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
void deallocateVmid(uint16_t vmid)
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void intrPost()
Methods inherited from PciDevice.
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void writeROM(PacketPtr pkt)
void writeDoorbell(PacketPtr pkt, Addr offset)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
void setRegVal(uint32_t addr, uint32_t value)
uint16_t allocateVMID(uint16_t pasid)
void deallocatePasid(uint16_t pasid)
SDMAEngine * getSDMAById(int id)
void writeFrame(PacketPtr pkt, Addr offset)
void setSDMAEngine(Addr offset, SDMAEngine *eng)
memory::PhysicalMemory deviceMem
std::unordered_map< uint32_t, QueueType > doorbells
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateRptr(const uint32_t &data)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void readMMIO(PacketPtr pkt, Addr offset)
void writeMMIO(PacketPtr pkt, Addr offset)
bool readFrame(PacketPtr pkt, Addr offset)
void writeFrame(PacketPtr pkt, Addr offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void setMMHUBBase(Addr base)
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
void readMMIO(PacketPtr pkt, Addr offset)
Addr getMmioAperture(Addr addr)
void writeMMIO(PacketPtr pkt, Addr offset)
Addr getFrameAperture(Addr addr)
Addr gartBase()
Return base address of GART table in framebuffer.
void setMMHUBTop(Addr top)
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
HWScheduler * hwScheduler()
void setGPUDevice(AMDGPUDevice *gpu_device)
void write(Addr db_addr, uint64_t doorbell_reg)
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void setGPUDevice(AMDGPUDevice *gpu_device)
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
static PacketPtr createWrite(const RequestPtr &req)
T * getPtr()
get a pointer to the data ptr.
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
void makeAtomicResponse()
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
PCI device, base implementation is only config space.
PCIConfig config
The current config space.
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
System DMA Engine class for AMD dGPU.
void setPageRptrLo(uint32_t data)
void setGfxRptrLo(uint32_t data)
void setGfxWptrLo(uint32_t data)
void setGfxRptrHi(uint32_t data)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
void setGfxSize(uint32_t data)
void setGfxBaseLo(uint32_t data)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setGfxWptrHi(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
void processPage(Addr wptrOffset)
void setPageDoorbellOffsetLo(uint32_t data)
void setPageWptrLo(uint32_t data)
void setGfxDoorbellLo(uint32_t data)
void setPageDoorbellLo(uint32_t data)
void setPageSize(uint32_t data)
void setPageBaseLo(uint32_t data)
void setGfxBaseHi(uint32_t data)
void setPageRptrHi(uint32_t data)
GPUCommandProcessor & gpuCmdProc
The GPUCommandProcessor (CP) is responsible for accepting commands, in the form of HSA AQL packets,...
AddrRange RangeSize(Addr start, Addr size)
Addr start() const
Get the start address of the range.
#define panic(...)
This implements a cprintf based panic() function.
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
#define UNSERIALIZE_ARRAY(member, size)
#define SERIALIZE_ARRAY(member, size)
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Tick curTick()
The universal simulation clock.
std::ostream CheckpointOut
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
uint64_t Tick
Tick count type.
constexpr uint32_t ROM_SIZE
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
static constexpr uint32_t IH_OFFSET_SHIFT
static constexpr uint32_t MMHUB_BASE
static constexpr uint32_t GRBM_BASE
static constexpr uint32_t MMHUB_OFFSET_SHIFT
static constexpr int AMDGPU_VM_COUNT
constexpr int FRAMEBUFFER_BAR
static constexpr uint32_t IH_BASE
constexpr int DOORBELL_BAR
static constexpr uint32_t NBIO_BASE
constexpr uint32_t VGA_ROM_DEFAULT
static constexpr uint32_t GRBM_OFFSET_SHIFT
Declaration of the Packet class.
#define PCI0_INTERRUPT_PIN
#define UNSERIALIZE_SCALAR(scalar)
#define SERIALIZE_SCALAR(scalar)
const std::string & name()