Go to the documentation of this file.
36 #include "debug/AMDGPUDevice.hh"
48 #include "params/AMDGPUDevice.hh"
56 :
PciDevice(
p), gpuMemMgr(
p.memory_manager), deviceIH(
p.device_ih),
57 pm4PktProc(
p.pm4_pkt_proc), cp(
p.cp),
58 checkpoint_before_mmios(
p.checkpoint_before_mmios),
59 init_interrupt_count(0), _lastVMID(0),
60 deviceMem(
name() +
".deviceMem",
p.memories, false,
"", false)
64 romBin.open(
p.rom_binary, std::ios::binary);
71 for (
auto&
m :
p.memories) {
84 if (
p.trace_file !=
"") {
89 for (
auto&
s :
p.sdmas) {
90 s->setGPUDevice(
this);
110 if (
p.device_name ==
"Vega10") {
118 }
else if (
p.device_name ==
"MI100" ||
p.device_name ==
"MI200") {
126 panic(
"Unknown GPU device %s\n",
p.device_name);
136 uint64_t mmhubBase = 0x8000ULL << 24;
137 uint64_t mmhubTop = 0x83ffULL << 24;
144 if (
p.device_name ==
"Vega10") {
148 }
else if (
p.device_name ==
"MI100") {
153 }
else if (
p.device_name ==
"MI200") {
161 panic(
"Unknown GPU device %s\n",
p.device_name);
174 uint64_t rom_data = 0;
176 memcpy(&rom_data,
rom.data() + rom_offset, pkt->
getSize());
177 pkt->
setUintX(rom_data, ByteOrder::little);
180 pkt->
getAddr(), rom_offset, rom_data);
189 uint64_t rom_data = pkt->
getUintX(ByteOrder::little);
191 memcpy(
rom.data() + rom_offset, &rom_data, pkt->
getSize());
194 pkt->
getAddr(), rom_offset, rom_data);
207 for (
auto &
r : ranges) {
208 if (
r.start() != 0) {
209 ret_ranges.push_back(
r);
287 uint8_t *dataPtr =
new uint8_t[pkt->
getSize()];
291 system->getDeviceMemory(readPkt)->access(readPkt);
318 pkt->
setUintX(value, ByteOrder::little);
361 uint8_t *dataPtr =
new uint8_t[pkt->
getSize()];
362 std::memcpy(dataPtr, pkt->
getPtr<uint8_t>(),
363 pkt->
getSize() *
sizeof(uint8_t));
367 system->getDeviceMemory(writePkt)->access(writePkt);
382 pkt->
getLE<uint64_t>());
386 pkt->
getLE<uint64_t>());
398 pkt->
getLE<uint64_t>() + 1);
409 panic(
"Write to unkown queue type!");
412 warn(
"Unknown doorbell offset: %lx\n",
offset);
425 for (
int idx = 0; idx <
sdmaIds.size(); ++idx) {
483 panic(
"Request with address out of mapped range!");
517 panic(
"Request with address out of mapped range!");
597 uint64_t regs_size =
regs.size();
598 uint64_t doorbells_size =
doorbells.size();
599 uint64_t sdma_engs_size =
sdmaEngs.size();
606 uint32_t reg_addrs[regs_size];
607 uint64_t reg_values[regs_size];
608 uint32_t doorbells_offset[doorbells_size];
609 QueueType doorbells_queues[doorbells_size];
610 uint32_t sdma_engs_offset[sdma_engs_size];
611 int sdma_engs[sdma_engs_size];
614 for (
auto & it :
regs) {
615 reg_addrs[idx] = it.first;
616 reg_values[idx] = it.second;
622 doorbells_offset[idx] = it.first;
623 doorbells_queues[idx] = it.second;
629 sdma_engs_offset[idx] = it.first;
630 sdma_engs[idx] = it.second->getId();
637 sizeof(doorbells_offset[0]));
639 sizeof(doorbells_queues[0]));
641 sizeof(sdma_engs_offset[0]));
655 uint64_t regs_size = 0;
656 uint64_t doorbells_size = 0;
657 uint64_t sdma_engs_size = 0;
664 uint32_t reg_addrs[regs_size];
665 uint64_t reg_values[regs_size];
669 sizeof(reg_values)/
sizeof(reg_values[0]));
671 for (
int idx = 0; idx < regs_size; ++idx) {
672 regs.insert(std::make_pair(reg_addrs[idx], reg_values[idx]));
676 if (doorbells_size > 0) {
677 uint32_t doorbells_offset[doorbells_size];
678 QueueType doorbells_queues[doorbells_size];
681 sizeof(doorbells_offset[0]));
683 sizeof(doorbells_queues[0]));
685 for (
int idx = 0; idx < doorbells_size; ++idx) {
686 regs.insert(std::make_pair(doorbells_offset[idx],
687 doorbells_queues[idx]));
688 doorbells[doorbells_offset[idx]] = doorbells_queues[idx];
692 if (sdma_engs_size > 0) {
693 uint32_t sdma_engs_offset[sdma_engs_size];
694 int sdma_engs[sdma_engs_size];
697 sizeof(sdma_engs_offset[0]));
700 for (
int idx = 0; idx < sdma_engs_size; ++idx) {
701 int sdma_id = sdma_engs[idx];
702 assert(
sdmaIds.count(sdma_id));
704 sdmaEngs.insert(std::make_pair(sdma_engs_offset[idx], sdma));
719 idMap.insert(std::make_pair(pasid, vmid));
725 panic(
"All VMIDs have been assigned");
737 auto result =
idMap.find(pasid);
738 assert(result !=
idMap.end());
739 if (result ==
idMap.end())
return;
740 uint16_t vmid = result->second;
753 it.second->deallocateRLCQueues();
763 std::unordered_map<uint16_t, std::set<int>>&
void readMMIO(PacketPtr pkt, Addr offset)
Tick curTick()
The universal simulation clock.
Tick read(PacketPtr pkt) override
Pure virtual function that the device must implement.
bool isROM(Addr addr) const
#define MI200_MEM_SIZE_REG
#define MI200_FB_LOCATION_TOP
Addr start() const
Get the start address of the range.
void setGPUDevice(AMDGPUDevice *gpu_device)
AddrRangeList getAddrRanges() const override
Every PIO device is obliged to provide an implementation that returns the address ranges the device r...
PCIConfig config
The current config space.
uint64_t getUintX(ByteOrder endian) const
Get the data in the packet byte swapped from the specified endianness and zero-extended to 64 bits.
AMDGPUMemoryManager * gpuMemMgr
#define UNSERIALIZE_SCALAR(scalar)
void serialize(CheckpointOut &cp) const override
Serialize this object to the given output stream.
void writeROM(PacketPtr pkt)
void setDoorbellType(uint32_t offset, QueueType qt)
Set handles to GPU blocks.
static constexpr uint32_t NBIO_BASE
AddrRange RangeSize(Addr start, Addr size)
SDMAEngine * getSDMAEngine(Addr offset)
void unserialize(CheckpointIn &cp) override
Reconstruct the state of this object from a checkpoint.
static constexpr uint32_t IH_OFFSET_SHIFT
Addr getMmioAperture(Addr addr)
SDMAEngine * getSDMAById(int id)
static constexpr uint32_t MMHUB_OFFSET_SHIFT
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
Methods for setting the values of interrupt handler MMIO registers.
void writeMMIO(PacketPtr pkt, Addr mmio_offset)
void deallocateAllQueues()
void intrPost()
Methods inherited from PciDevice.
void serializeSection(CheckpointOut &cp, const char *name) const
Serialize an object into a new section.
const std::string to_string(sc_enc enc)
void setPageWptrLo(uint32_t data)
constexpr int DOORBELL_BAR
AMDGPUNbio nbio
Blocks of the GPU.
std::unordered_map< uint16_t, uint16_t > idMap
std::unordered_map< uint32_t, AddrRange > sdmaMmios
std::unordered_map< Addr, uint16_t > doorbellVMIDMap
static PacketPtr createWrite(const RequestPtr &req)
Addr gartBase()
Return base address of GART table in framebuffer.
void setGfxRptrHi(uint32_t data)
void readMMIO(PacketPtr pkt, Addr offset)
constexpr int FRAMEBUFFER_BAR
#define MI100_FB_LOCATION_BASE
void unserialize(CheckpointIn &cp) override
Unserialize an object.
void setGPUDevice(AMDGPUDevice *gpu_device)
HSAPacketProcessor & hsaPacketProc()
void readMMIOTrace(std::string trace_file)
Read an MMIO trace gathered from a real system and place the MMIO values read and written into the MM...
void makeAtomicResponse()
void setGfxDoorbellLo(uint32_t data)
Addr getFrameAperture(Addr addr)
void setPageSize(uint32_t data)
void process(PM4Queue *q, Addr wptrOffset)
This method start processing a PM4Queue from the current read pointer to the newly communicated write...
Tick readConfig(PacketPtr pkt) override
Read from the PCI config space data that is stored locally.
void mapDoorbellToVMID(Addr doorbell, uint16_t vmid)
void writeMMIO(PacketPtr pkt, Addr offset)
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
std::unordered_map< uint32_t, SDMAEngine * > sdmaEngs
virtual Tick writeConfig(PacketPtr pkt)
Write to the PCI config space data that is stored locally.
void setGfxBaseLo(uint32_t data)
AMDGPUDevice(const AMDGPUDeviceParams &p)
#define VEGA10_FB_LOCATION_TOP
static constexpr uint32_t GRBM_OFFSET_SHIFT
void setGfxRptrLo(uint32_t data)
void updateRptr(const uint32_t &data)
#define MI100_FB_LOCATION_TOP
void processPage(Addr wptrOffset)
Tick writeConfig(PacketPtr pkt) override
Write to the PCI config space data that is stored locally.
void unserializeSection(CheckpointIn &cp, const char *name)
Unserialize an a child object.
void setGPUDevice(AMDGPUDevice *gpu_device)
void setMMHUBTop(Addr top)
void processRLC(Addr doorbellOffset, Addr wptrOffset)
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
uint64_t Tick
Tick count type.
void setRegVal(uint32_t addr, uint32_t value)
std::shared_ptr< Request > RequestPtr
uint16_t allocateVMID(uint16_t pasid)
AMDMMIOReader mmioReader
MMIO reader to populate device registers map.
void setGPUDevice(AMDGPUDevice *gpu_device)
PCI device, base implementation is only config space.
bool checkpoint_before_mmios
Initial checkpoint support variables.
std::unordered_map< uint32_t, sdmaFuncPtr > sdmaFunc
void readROM(PacketPtr pkt)
bool getBAR(Addr addr, int &num, Addr &offs)
Which base address register (if any) maps the given address?
void setPageRptrHi(uint32_t data)
Device model for an AMD GPU.
void serialize(CheckpointOut &cp) const override
Checkpoint support.
void dispatchAccess(PacketPtr pkt, bool read)
Convert a PCI packet into a response.
void setPageDoorbellLo(uint32_t data)
void setGfxDoorbellOffsetLo(uint32_t data)
#define SERIALIZE_ARRAY(member, size)
void readFrame(PacketPtr pkt, Addr offset)
Helper methods to handle specific BAR read/writes.
void readDoorbell(PacketPtr pkt, Addr offset)
System DMA Engine class for AMD dGPU.
void setMMHUBBase(Addr base)
void setGPUDevice(AMDGPUDevice *gpu_device)
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
std::unordered_map< uint64_t, uint64_t > gartTable
Copy of GART table.
bool readFrame(PacketPtr pkt, Addr offset)
constexpr uint32_t ROM_SIZE
const std::string & name()
#define SERIALIZE_SCALAR(scalar)
Tick write(PacketPtr pkt) override
Pure virtual function that the device must implement.
constexpr uint32_t VGA_ROM_DEFAULT
std::unordered_map< uint16_t, std::set< int > > & getUsedVMIDs()
bool haveRegVal(uint32_t addr)
Register value getter/setter.
#define VEGA10_FB_LOCATION_BASE
void readFromTrace(PacketPtr pkt, int barnum, Addr offset)
Get the next MMIO read from the trace file to an offset in a BAR and write the value to the packet pr...
static constexpr uint32_t IH_BASE
RequestorID getRequestorID() const
Get the requestorID for the memory manager.
void writeMMIO(PacketPtr pkt, Addr offset)
virtual Tick readConfig(PacketPtr pkt)
Read from the PCI config space data that is stored locally.
void readMMIO(PacketPtr pkt, Addr offset)
std::unordered_map< uint32_t, SDMAEngine * > sdmaIds
#define UNSERIALIZE_ARRAY(member, size)
PM4Queue * getQueue(Addr offset, bool gfx=false)
Based on an offset communicated through doorbell write, the PM4PacketProcessor identifies which queue...
HWScheduler * hwScheduler()
void setPageRptrLo(uint32_t data)
AddrRange romRange
VGA ROM methods.
static constexpr uint32_t MMHUB_BASE
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
void setGfxWptrLo(uint32_t data)
GPUCommandProcessor & gpuCmdProc
#define MI100_MEM_SIZE_REG
std::unordered_map< uint16_t, std::set< int > > usedVMIDs
void updateReadIndex(Addr offset, uint64_t rd_idx)
Update read index on doorbell rings.
void insertQId(uint16_t vmid, int id)
T getLE() const
Get the data in the packet byte swapped from little endian to host endian.
void deallocateVmid(uint16_t vmid)
virtual Tick read(PacketPtr pkt)=0
Pure virtual function that the device must implement.
void deallocatePasid(uint16_t pasid)
std::array< uint8_t, ROM_SIZE > rom
std::ostream CheckpointOut
static constexpr int AMDGPU_VM_COUNT
memory::PhysicalMemory deviceMem
uint32_t getRegVal(uint32_t addr)
void write(Addr db_addr, uint64_t doorbell_reg)
void writeFrame(PacketPtr pkt, Addr offset)
void writeFrame(PacketPtr pkt, Addr offset)
void processGfx(Addr wptrOffset)
Given a new write ptr offset, communicated to the GPU through a doorbell write, the SDMA engine proce...
void setPageDoorbellOffsetLo(uint32_t data)
#define AMDGPU_MP0_SMN_C2PMSG_33
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
void(SDMAEngine::* sdmaFuncPtr)(uint32_t)
void setGfxWptrHi(uint32_t data)
void writeMMIO(PacketPtr pkt, Addr offset)
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
PM4PacketProcessor * pm4PktProc
void setSDMAEngine(Addr offset, SDMAEngine *eng)
void setGfxSize(uint32_t data)
void setUintX(uint64_t w, ByteOrder endian)
Set the value in the word w after truncating it to the length of the packet and then byteswapping it ...
AddrRangeList getAddrRanges() const override
Determine the address ranges that this device responds to.
void setPageBaseLo(uint32_t data)
std::unordered_map< uint32_t, QueueType > doorbells
static constexpr uint32_t GRBM_BASE
#define MI200_FB_LOCATION_BASE
void setGfxBaseHi(uint32_t data)
RequestorID vramRequestorId()
Methods related to translations and system/device memory.
AMDGPUInterruptHandler * deviceIH
#define panic(...)
This implements a cprintf based panic() function.
T * getPtr()
get a pointer to the data ptr.
#define PCI0_INTERRUPT_PIN
void writeDoorbell(PacketPtr pkt, Addr offset)
Generated on Sun Jul 30 2023 01:56:54 for gem5 by doxygen 1.8.17