34 #ifndef __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ 35 #define __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ 38 #include <unordered_map> 44 #include "mem/ruby/protocol/HSAScope.hh" 45 #include "mem/ruby/protocol/HSASegment.hh" 46 #include "mem/ruby/protocol/PrefetchBit.hh" 47 #include "mem/ruby/protocol/RubyAccessMode.hh" 48 #include "mem/ruby/protocol/RubyRequestType.hh" 49 #include "mem/ruby/protocol/SequencerRequestType.hh" 58 class RubyGPUCoalescerParams;
93 std::map<uint64_t, PerInstPackets>
instMap;
100 : seqNum(_seqNum), issueTime(
Cycles(0)),
101 rubyType(RubyRequestType_NULL)
150 Port &getPort(
const std::string &if_name,
157 void printProgress(std::ostream& out)
const;
158 void resetStats()
override;
160 void regStats()
override;
164 void writeCallback(
Addr address,
168 void writeCallback(
Addr address,
171 Cycles initialRequestTime,
172 Cycles forwardRequestTime,
176 void writeCallback(
Addr address,
179 Cycles initialRequestTime,
180 Cycles forwardRequestTime,
181 Cycles firstResponseTime);
185 void readCallback(
Addr address,
189 void readCallback(
Addr address,
192 Cycles initialRequestTime,
193 Cycles forwardRequestTime,
194 Cycles firstResponseTime);
196 void readCallback(
Addr address,
199 Cycles initialRequestTime,
200 Cycles forwardRequestTime,
205 void atomicCallback(
Addr address,
213 virtual RequestStatus makeRequest(
PacketPtr pkt)
override;
220 return deadlockCheckEvent.scheduled();
226 deschedule(deadlockCheckEvent);
231 void print(std::ostream& out)
const;
233 void evictionCallback(
Addr address);
234 void completeIssue();
236 void insertKernel(
int wavefront_id,
PacketPtr pkt);
240 void recordRequestType(SequencerRequestType requestType);
245 {
return *m_typeLatencyHist[
t]; }
248 {
return m_missLatencyHist; }
250 {
return *m_missTypeLatencyHist[
t]; }
253 {
return *m_missMachLatencyHist[
t]; }
257 {
return *m_missTypeMachLatencyHist[
r][
t]; }
260 {
return *m_IssueToInitialDelayHist[
t]; }
264 {
return *m_InitialToForwardDelayHist[
t]; }
268 {
return *m_ForwardToFirstResponseDelayHist[
t]; }
272 {
return *m_FirstResponseToCompletionDelayHist[
t]; }
277 Addr pc, RubyAccessMode access_mode,
282 void kernelCallback(
int wavfront_id);
288 Cycles initialRequestTime,
289 Cycles forwardRequestTime,
294 Cycles initialRequestTime,
295 Cycles forwardRequestTime,
297 bool success,
bool isRegion);
301 virtual RubyRequestType getRequestType(
PacketPtr pkt);
416 #endif // __MEM_RUBY_SYSTEM_GPU_COALESCER_HH__ GMTokenPort & getGMTokenPort()
Stats::Histogram & getLatencyHist()
CoalescedRequest(uint64_t _seqNum)
Ports are used to interface objects to each other.
Stats::Scalar CP_TCCStHits
EventFunctionWrapper issueEvent
Stats::Scalar GPU_TCPStHits
void setSeqNum(uint64_t _seqNum)
Cycles is a wrapper class for representing cycle counts, i.e.
bool recvTimingReq(PacketPtr)
Receive a timing request from the peer.
UncoalescedTable(GPUCoalescer *gc)
const std::string & name()
Stats::Histogram m_missLatencyHist
Histogram for holding latency profile of all requests that miss in the controller connected to this s...
std::vector< Stats::Histogram * > m_ForwardToFirstResponseDelayHist
const PortID InvalidPortID
Stats::Histogram & getIssueToInitialDelayHist(uint32_t t) const
Stats::Histogram & getFirstResponseToCompletionDelayHist(const MachineType t) const
GMTokenPort(const std::string &name, ClockedObject *owner, PortID id=InvalidPortID)
Declaration of a request, the overall memory request consisting of the parts of the request that are ...
HSASegment reqSegmentToHSASegment(const RequestPtr &req)
std::shared_ptr< Request > RequestPtr
Stats::Scalar GPU_TCPLdHits
Stats::Scalar GPU_TCCStHits
EventFunctionWrapper deadlockCheckEvent
Stats::Histogram & getMissTypeLatencyHist(uint32_t t)
uint64_t getSeqNum() const
Stats::Scalar GPU_TCPLdTransfers
Stats::Histogram & getForwardRequestToFirstResponseHist(const MachineType t) const
Stats::Scalar CP_TCPLdTransfers
std::vector< Stats::Histogram * > m_missMachLatencyHist
Histograms for profiling the latencies for requests that required external messages.
int m_max_outstanding_requests
Stats::Histogram & getMissTypeMachLatencyHist(uint32_t r, uint32_t t) const
Stats::Histogram m_latencyHist
Histogram for holding latency profile of all requests.
void checkDeadlock(Tick threshold)
std::vector< PacketPtr > & getPackets()
Declaration of Statistics objects.
This is a simple scalar statistic, like a counter.
Stats::Scalar CP_TCPStTransfers
RubyGPUCoalescerParams Params
Stats::Histogram & getMissMachLatencyHist(uint32_t t) const
void setRubyType(RubyRequestType type)
Cycles getIssueTime() const
std::map< uint64_t, PerInstPackets > instMap
void setIssueTime(Cycles _issueTime)
CacheMemory * m_dataCache_ptr
uint64_t Tick
Tick count type.
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
bool assumingRfOCoherence
PerInstPackets * getInstPackets(int offset)
Stats::Scalar GPU_TCCLdHits
Tick recvAtomic(PacketPtr)
Receive an atomic request packet from the peer.
void descheduleDeadlockEvent() override
int m_store_waiting_on_load_cycles
Stats::Histogram m_outstandReqHist
Histogram for number of outstanding requests per cycle.
std::vector< std::vector< Stats::Histogram * > > m_missTypeMachLatencyHist
bool isDeadlockEventScheduled() const override
std::vector< Stats::Histogram * > m_InitialToForwardDelayHist
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
int m_load_waiting_on_load_cycles
A Packet is used to encapsulate a transfer between two objects in the memory system (e...
std::unordered_map< int, PacketPtr > kernelEndList
std::vector< Stats::Histogram * > m_IssueToInitialDelayHist
Histograms for recording the breakdown of miss latency.
Stats::Histogram & getOutstandReqHist()
Stats::Scalar CP_TCPLdHits
RubyRequestType getRubyType() const
std::vector< PacketPtr > pkts
void insertPacket(PacketPtr pkt)
std::vector< Stats::Histogram * > m_typeLatencyHist
void insertPacket(PacketPtr pkt)
std::list< PacketPtr > PerInstPackets
std::ostream & operator<<(std::ostream &out, const GPUCoalescer &obj)
Stats::Histogram & getMissLatencyHist()
Stats::Histogram & getTypeLatencyHist(uint32_t t)
std::vector< int > newKernelEnds
PacketPtr getFirstPkt() const
HSAScope reqScopeToHSAScope(const RequestPtr &req)
AddrRangeList getAddrRanges() const
Get a list of the non-overlapping address ranges the owner is responsible for.
int outstandingCount() const override
Stats::Scalar CP_TCPStHits
int m_load_waiting_on_store_cycles
void printRequestTable(std::stringstream &ss)
Stats::Histogram & getInitialToForwardDelayHist(const MachineType t) const
void recvFunctional(PacketPtr)
Receive a functional request packet from the peer.
CacheMemory * m_instCache_ptr
UncoalescedTable uncoalescedTable
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Cycles m_deadlock_threshold
void print(std::ostream &out) const
bool m_runningGarnetStandalone
std::map< Addr, std::deque< CoalescedRequest * > > coalescedTable
std::vector< Stats::Histogram * > m_FirstResponseToCompletionDelayHist
std::vector< Stats::Histogram * > m_missTypeLatencyHist
int m_store_waiting_on_store_cycles
Stats::Scalar GPU_TCPStTransfers
bool m_deadlock_check_scheduled
Stats::Scalar CP_TCCLdHits