45#include "debug/ElasticTrace.hh"
59 depWindowSize(params.depWindowSize),
60 dataTraceStream(
nullptr),
61 instTraceStream(
nullptr),
62 startTraceInst(params.startTraceInst),
64 traceVirtAddr(params.traceVirtAddr),
67 cpu =
dynamic_cast<CPU *
>(params.manager);
69 fatal_if(!cpu,
"Manager of %s is not of type O3CPU and thus does not "\
70 "support dependency tracing.\n",
name());
72 fatal_if(depWindowSize == 0,
"depWindowSize parameter must be non-zero. "\
73 "Recommended size is 3x ROB size in the O3CPU.\n");
75 fatal_if(cpu->numThreads > 1,
"numThreads = %i, %s supports tracing for"\
76 "single-threaded workload only", cpu->numThreads,
name());
78 fatal_if(params.instFetchTraceFile ==
"",
"Assign instruction fetch "\
79 "trace file path to instFetchTraceFile");
80 fatal_if(params.dataDepTraceFile ==
"",
"Assign data dependency "\
81 "trace file path to dataDepTraceFile");
82 std::string filename =
simout.resolve(
name() +
"." +
83 params.instFetchTraceFile);
85 filename =
simout.resolve(
name() +
"." + params.dataDepTraceFile);
88 ProtoMessage::PacketHeader inst_pkt_header;
89 inst_pkt_header.set_obj_id(
name());
91 instTraceStream->write(inst_pkt_header);
94 ProtoMessage::InstDepRecordHeader data_rec_header;
95 data_rec_header.set_obj_id(
name());
97 data_rec_header.set_window_size(depWindowSize);
98 dataTraceStream->write(data_rec_header);
106 inform(
"@%llu: regProbeListeners() called, startTraceInst = %llu",
115 cpu->getContext(0)->scheduleInstCountEvent(
124 inform(
"@%llu: No. of instructions committed = %llu, registering elastic"
125 " probe listeners",
curTick(),
cpu->totalNumSimulatedInsts());
151 req->getPC(), req->getVaddr(), req->getPaddr(),
152 req->getFlags(), req->getSize(),
curTick());
156 ProtoMessage::Packet inst_fetch_pkt;
157 inst_fetch_pkt.set_tick(
curTick());
159 inst_fetch_pkt.set_pc(req->getPC());
160 inst_fetch_pkt.set_flags(req->getFlags());
161 inst_fetch_pkt.set_addr(req->getPaddr());
162 inst_fetch_pkt.set_size(req->getSize());
178 has already retired (mostly squashed)", dyn_inst->seqNum);
191 auto itr_exec_info =
tempStore.find(dyn_inst->seqNum);
193 exec_info_ptr = itr_exec_info->second;
196 tempStore[dyn_inst->seqNum] = exec_info_ptr;
201 (std::size_t)
stats.maxTempStoreSize.value());
211 auto itr_exec_info =
tempStore.find(dyn_inst->seqNum);
214 " skipping.\n", dyn_inst->seqNum);
242 int8_t max_regs = dyn_inst->numSrcRegs();
243 for (
int src_idx = 0; src_idx < max_regs; src_idx++) {
245 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
248 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
250 " %i (%s)\n", seq_num,
272 max_regs = dyn_inst->numDestRegs();
273 for (
int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
276 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
281 dyn_inst->renamedDestIdx(dest_idx);
283 " %i (%s)\n", seq_num, phys_dest_reg->
flatIndex(),
289 (std::size_t)
stats.maxPhysRegDepMapSize.value());
296 inst_reg_pair.second);
297 auto itr_regdep_map =
physRegDepMap.find(inst_reg_pair.second);
308 auto itr_exec_info =
tempStore.find(head_inst->seqNum);
320 head_inst->hasRequest() &&
321 head_inst->getFault() ==
NoFault) {
337 if (!head_inst->isNop()) {
343 auto itr_temp_store =
tempStore.find(head_inst->seqNum);
346 "store, skipping.\n", head_inst->seqNum);
366 if (head_inst->getFault() !=
NoFault) {
368 "skip adding it to the trace\n",
369 (head_inst->isMemRef() ?
"Load/store" :
"Comp inst."),
371 }
else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
373 "skip adding it to the trace\n", head_inst->seqNum);
374 }
else if (!head_inst->readPredicate()) {
376 "skip adding it to the trace\n",
377 (head_inst->isMemRef() ?
"Load/store" :
"Comp inst."),
399 new_record->
instNum = head_inst->seqNum;
400 new_record->
commit = commit;
401 new_record->
type = head_inst->isLoad() ? Record::LOAD :
402 (head_inst->isStore() ? Record::STORE :
406 new_record->
reqFlags = head_inst->memReqFlags;
407 new_record->
virtAddr = head_inst->effAddr;
408 new_record->
physAddr = head_inst->physEffAddr;
410 new_record->
size = head_inst->effSize;
411 new_record->
pc = head_inst->pcState().instAddr();
437 if (head_inst->isLoad() && !commit) {
442 std::set<InstSeqNum>::const_iterator dep_set_it;
452 "%lli\n", new_record->
instNum, *dep_set_it);
453 TraceInfo* reg_dep = trace_info_itr->second;
469 "%lli is skipped\n",new_record->
instNum, *dep_set_it);
477 if (head_inst->isStore()) {
498 (commit ?
"committed" :
"squashed"), new_record->
instNum);
519 bool find_load_not_store)
527 uint32_t num_go_back = 0;
532 while (num_go_back <
depWindowSize && from_itr != until_itr) {
533 if (find_load_not_store) {
553 past_record = *from_itr;
567 uint32_t num_go_back = 0;
568 Tick execute_tick = 0;
570 if (new_record->
isLoad()) {
573 ++
stats.numIssueOrderDepLoads;
574 }
else if (new_record->
isStore()) {
577 ++
stats.numIssueOrderDepStores;
581 ++
stats.numIssueOrderDepOther;
587 while (num_go_back <
depWindowSize && from_itr != until_itr) {
599 past_record = *from_itr;
617 (uint32_t)
stats.maxNumDependents.value());
622 Tick execute_tick)
const
629 Tick execute_tick)
const
637 Tick execute_tick)
const
646 Tick execute_tick)
const
659 auto itr_exec_info =
tempStore.find(temp_sn);
663 delete exec_info_ptr;
679 int64_t comp_delay = -1;
680 Tick execution_tick = 0, completion_tick = 0;
689 if (past_record->
isLoad()) {
695 }
else if (past_record->
isStore()) {
697 }
else if (past_record->
isComp()){
700 assert(execution_tick >= completion_tick);
701 comp_delay = execution_tick - completion_tick;
704 execution_tick, completion_tick, comp_delay);
723 int64_t comp_delay = -1;
724 Tick execution_tick = 0, completion_tick = 0;
742 assert(execution_tick >= completion_tick);
743 comp_delay = execution_tick - completion_tick;
745 execution_tick, completion_tick, comp_delay);
789 uint16_t num_filtered_nodes = 0;
792 while (num_to_write > 0) {
794 assert(temp_ptr->
type != Record::INVALID);
801 "is as follows:\n", temp_ptr->
instNum);
805 "size %i, flags %i\n", temp_ptr->
physAddr,
813 }
else if (temp_ptr->
isStore()) {
824 ProtoMessage::InstDepRecord dep_pkt;
825 dep_pkt.set_seq_num(temp_ptr->
instNum);
826 dep_pkt.set_type(temp_ptr->
type);
827 dep_pkt.set_pc(temp_ptr->
pc);
829 dep_pkt.set_flags(temp_ptr->
reqFlags);
830 dep_pkt.set_p_addr(temp_ptr->
physAddr);
834 dep_pkt.set_v_addr(temp_ptr->
virtAddr);
835 dep_pkt.set_size(temp_ptr->
size);
837 dep_pkt.set_comp_delay(temp_ptr->
compDelay);
844 dep_pkt.add_rob_dep(temp_ptr->
robDepList.front());
856 if (num_filtered_nodes != 0) {
861 dep_pkt.set_weight(num_filtered_nodes);
862 num_filtered_nodes = 0;
869 ++
stats.numFilteredNodes;
870 ++num_filtered_nodes;
877 depTrace.erase(dep_trace_itr_start, dep_trace_itr);
883 "Number of register dependencies recorded during tracing"),
885 "Number of commit order (rob) dependencies for a store "
886 "recorded on a past load/store during tracing"),
888 "Number of loads that got assigned issue order dependency "
889 "because they were dependency-free"),
891 "Number of stores that got assigned issue order dependency "
892 "because they were dependency-free"),
894 "Number of non load/store insts that got assigned issue order "
895 "dependency because they were dependency-free"),
897 "No. of nodes filtered out before writing the output trace"),
899 "Maximum number or dependents on any instruction"),
901 "Maximum size of the temporary store during the run"),
903 "Maximum size of register dependency map")
910 return Record::RecordType_Name(
type);
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
const RegIndex & flatIndex() const
Flat index accessor.
constexpr const char * className() const
Return a const char* with the register class name.
ProbeListenerObject(const ProbeListenerObjectParams ¶ms)
void connectListener(Args &&...args)
Register ID: describe an architectural register with its class and index.
constexpr bool is(RegClassType reg_class) const
constexpr const char * className() const
Return a const char* with the register class name.
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record's Rob dependency ...
ElasticTrace(const ElasticTraceParams ¶ms)
Constructor.
CPU * cpu
Pointer to the O3CPU that is this listener's parent a.k.a.
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
void regEtraceListeners()
Register all listeners.
std::pair< InstSeqNum, RegIndex > SeqNumRegPair
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight.
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call.
std::unordered_map< RegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object.
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request.
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
gem5::o3::ElasticTrace::ElasticTraceStats stats
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Tick executeTick
Timestamp when instruction was first processed by execute stage.
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
const std::string & typeToStr() const
Return string specifying the type of the node.
Request::FlagsType reqFlags
bool isLoad() const
Is the record a load.
std::list< InstSeqNum > robDepList
std::list< InstSeqNum > physRegDepList
int64_t compDelay
Computational delay after the last dependent inst.
bool isComp() const
Is the record a fetch triggering an Icache request.
RecordType type
The type of trace record for the instruction node.
bool isStore() const
Is the record a store.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
const Params & params() const
RefCountingPtr< const DynInst > DynInstConstPtr
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Copyright (c) 2024 Arm Limited All rights reserved.
std::shared_ptr< Request > RequestPtr
Tick curTick()
The universal simulation clock.
uint64_t Tick
Tick count type.
void registerExitCallback(const std::function< void()> &callback)
Register an exit callback.
constexpr decltype(nullptr) NoFault
@ MiscRegClass
Control (misc) register.
Declaration of the Packet class.
statistics::Scalar maxNumDependents
Maximum number of dependents on any instruction.
statistics::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
statistics::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free.
statistics::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free.
ElasticTraceStats(statistics::Group *parent)
statistics::Scalar numRegDep
Number of register dependencies recorded during tracing.
statistics::Scalar numFilteredNodes
Number of filtered nodes.
statistics::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
statistics::Scalar numOrderDepStores
Number of stores that got assigned a commit order dependency on a past load/store.
statistics::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
Tick getExecuteTick() const
Get the execute tick of the instruction.
const std::string & name()