gem5 [DEVELOP-FOR-25.0]
Loading...
Searching...
No Matches
elastic_trace.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
39
40#include "base/callback.hh"
41#include "base/output.hh"
42#include "base/trace.hh"
43#include "cpu/o3/dyn_inst.hh"
44#include "cpu/reg_class.hh"
45#include "debug/ElasticTrace.hh"
46#include "mem/packet.hh"
47
48namespace gem5
49{
50
51namespace o3
52{
53
54ElasticTrace::ElasticTrace(const ElasticTraceParams &params)
57 firstWin(true),
58 lastClearedSeqNum(0),
59 depWindowSize(params.depWindowSize),
60 dataTraceStream(nullptr),
61 instTraceStream(nullptr),
62 startTraceInst(params.startTraceInst),
63 allProbesReg(false),
64 traceVirtAddr(params.traceVirtAddr),
65 stats(this)
66{
67 cpu = dynamic_cast<CPU *>(params.manager);
68
69 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
70 "support dependency tracing.\n", name());
71
72 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
73 "Recommended size is 3x ROB size in the O3CPU.\n");
74
75 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
76 "single-threaded workload only", cpu->numThreads, name());
77 // Initialize the protobuf output stream
78 fatal_if(params.instFetchTraceFile == "", "Assign instruction fetch "\
79 "trace file path to instFetchTraceFile");
80 fatal_if(params.dataDepTraceFile == "", "Assign data dependency "\
81 "trace file path to dataDepTraceFile");
82 std::string filename = simout.resolve(name() + "." +
83 params.instFetchTraceFile);
84 instTraceStream = new ProtoOutputStream(filename);
85 filename = simout.resolve(name() + "." + params.dataDepTraceFile);
86 dataTraceStream = new ProtoOutputStream(filename);
87 // Create a protobuf message for the header and write it to the stream
88 ProtoMessage::PacketHeader inst_pkt_header;
89 inst_pkt_header.set_obj_id(name());
90 inst_pkt_header.set_tick_freq(sim_clock::Frequency);
91 instTraceStream->write(inst_pkt_header);
92 // Create a protobuf message for the header and write it to
93 // the stream
94 ProtoMessage::InstDepRecordHeader data_rec_header;
95 data_rec_header.set_obj_id(name());
96 data_rec_header.set_tick_freq(sim_clock::Frequency);
97 data_rec_header.set_window_size(depWindowSize);
98 dataTraceStream->write(data_rec_header);
99 // Register a callback to flush trace records and close the output streams.
100 registerExitCallback([this]() { flushTraces(); });
101}
102
103void
105{
106 inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
108 if (startTraceInst == 0) {
109 // If we want to start tracing from the start of the simulation,
110 // register all elastic trace probes now.
112 } else {
113 // Schedule an event to register all elastic trace probes when
114 // specified no. of instructions are committed.
115 cpu->getContext(0)->scheduleInstCountEvent(
117 }
118}
119
120void
122{
123 assert(!allProbesReg);
124 inform("@%llu: No. of instructions committed = %llu, registering elastic"
125 " probe listeners", curTick(), cpu->totalNumSimulatedInsts());
126 // Create new listeners: provide method to be called upon a notify() for
127 // each probe point.
129 this, "FetchRequest", &ElasticTrace::fetchReqTrace);
131 this, "Execute", &ElasticTrace::recordExecTick);
133 this, "ToCommit", &ElasticTrace::recordToCommTick);
135 this, "Rename", &ElasticTrace::updateRegDep);
137 this, "SquashInRename", &ElasticTrace::removeRegDepMapEntry);
139 this, "Squash", &ElasticTrace::addSquashedInst);
141 this, "Commit", &ElasticTrace::addCommittedInst);
142 allProbesReg = true;
143}
144
145void
147{
148
149 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
151 req->getPC(), req->getVaddr(), req->getPaddr(),
152 req->getFlags(), req->getSize(), curTick());
153
154 // Create a protobuf message including the request fields necessary to
155 // recreate the request in the TraceCPU.
156 ProtoMessage::Packet inst_fetch_pkt;
157 inst_fetch_pkt.set_tick(curTick());
158 inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
159 inst_fetch_pkt.set_pc(req->getPC());
160 inst_fetch_pkt.set_flags(req->getFlags());
161 inst_fetch_pkt.set_addr(req->getPaddr());
162 inst_fetch_pkt.set_size(req->getSize());
163 // Write the message to the stream.
164 instTraceStream->write(inst_fetch_pkt);
165}
166
167void
169{
170
171 // In a corner case, a retired instruction is propagated backward to the
172 // IEW instruction queue to handle some side-channel information. But we
173 // must not process an instruction again. So we test the sequence number
174 // against the lastClearedSeqNum and skip adding the instruction for such
175 // corner cases.
176 if (dyn_inst->seqNum <= lastClearedSeqNum) {
177 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
178 has already retired (mostly squashed)", dyn_inst->seqNum);
179 // Do nothing as program has proceeded and this inst has been
180 // propagated backwards to handle something.
181 return;
182 }
183
184 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
185 curTick());
186 // Either the execution info object will already exist if this
187 // instruction had a register dependency recorded in the rename probe
188 // listener before entering execute stage or it will not exist and will
189 // need to be created here.
190 InstExecInfo* exec_info_ptr;
191 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
192 if (itr_exec_info != tempStore.end()) {
193 exec_info_ptr = itr_exec_info->second;
194 } else {
195 exec_info_ptr = new InstExecInfo;
196 tempStore[dyn_inst->seqNum] = exec_info_ptr;
197 }
198
199 exec_info_ptr->executeTick = curTick();
200 stats.maxTempStoreSize = std::max(tempStore.size(),
201 (std::size_t)stats.maxTempStoreSize.value());
202}
203
204void
206{
207 // If tracing has just been enabled then the instruction at this stage of
208 // execution is far enough that we cannot gather info about its past like
209 // the tick it started execution. Simply return until we see an instruction
210 // that is found in the tempStore.
211 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
212 if (itr_exec_info == tempStore.end()) {
213 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
214 " skipping.\n", dyn_inst->seqNum);
215 return;
216 }
217
218 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
219 curTick());
220 InstExecInfo* exec_info_ptr = itr_exec_info->second;
221 exec_info_ptr->toCommitTick = curTick();
222
223}
224
225void
227{
228 // Get the sequence number of the instruction
229 InstSeqNum seq_num = dyn_inst->seqNum;
230
231 assert(dyn_inst->seqNum > lastClearedSeqNum);
232
233 // Since this is the first probe activated in the pipeline, create
234 // a new execution info object to track this instruction as it
235 // progresses through the pipeline.
236 InstExecInfo* exec_info_ptr = new InstExecInfo;
237 tempStore[seq_num] = exec_info_ptr;
238
239 // Loop through the source registers and look up the dependency map. If
240 // the source register entry is found in the dependency map, add a
241 // dependency on the last writer.
242 int8_t max_regs = dyn_inst->numSrcRegs();
243 for (int src_idx = 0; src_idx < max_regs; src_idx++) {
244
245 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
246 if (!src_reg.is(MiscRegClass) && !src_reg.is(InvalidRegClass)) {
247 // Get the physical register index of the i'th source register.
248 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
249 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
250 " %i (%s)\n", seq_num,
251 phys_src_reg->flatIndex(), phys_src_reg->className());
252 auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
253 if (itr_writer != physRegDepMap.end()) {
254 InstSeqNum last_writer = itr_writer->second;
255 // Additionally the dependency distance is kept less than the
256 // window size parameter to limit the memory allocation to
257 // nodes in the graph. If the window were tending to infinite
258 // we would have to load a large number of node objects during
259 // replay.
260 if (seq_num - last_writer < depWindowSize) {
261 // Record a physical register dependency.
262 exec_info_ptr->physRegDepSet.insert(last_writer);
263 }
264 }
265
266 }
267
268 }
269
270 // Loop through the destination registers of this instruction and update
271 // the physical register dependency map for last writers to registers.
272 max_regs = dyn_inst->numDestRegs();
273 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
274 // For data dependency tracking the register must be an int, float or
275 // CC register and not a Misc register.
276 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
277 if (!dest_reg.is(MiscRegClass) && !dest_reg.is(InvalidRegClass)) {
278 // Get the physical register index of the i'th destination
279 // register.
280 PhysRegIdPtr phys_dest_reg =
281 dyn_inst->renamedDestIdx(dest_idx);
282 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
283 " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
284 dest_reg.className());
285 physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
286 }
287 }
288 stats.maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
289 (std::size_t)stats.maxPhysRegDepMapSize.value());
290}
291
292void
294{
295 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
296 inst_reg_pair.second);
297 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
298 if (itr_regdep_map != physRegDepMap.end())
299 physRegDepMap.erase(itr_regdep_map);
300}
301
302void
304{
305 // If the squashed instruction was squashed before being processed by
306 // execute stage then it will not be in the temporary store. In this case
307 // do nothing and return.
308 auto itr_exec_info = tempStore.find(head_inst->seqNum);
309 if (itr_exec_info == tempStore.end())
310 return;
311
312 // If there is a squashed load for which a read request was
313 // sent before it got squashed then add it to the trace.
314 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
315 head_inst->seqNum);
316 // Get pointer to the execution info object corresponding to the inst.
317 InstExecInfo* exec_info_ptr = itr_exec_info->second;
318 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
319 exec_info_ptr->toCommitTick != MaxTick &&
320 head_inst->hasRequest() &&
321 head_inst->getFault() == NoFault) {
322 // Add record to depTrace with commit parameter as false.
323 addDepTraceRecord(head_inst, exec_info_ptr, false);
324 }
325 // As the information contained is no longer needed, remove the execution
326 // info object from the temporary store.
327 clearTempStoreUntil(head_inst);
328}
329
330void
332{
333 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
334 head_inst->seqNum);
335
336 // Add the instruction to the depTrace.
337 if (!head_inst->isNop()) {
338
339 // If tracing has just been enabled then the instruction at this stage
340 // of execution is far enough that we cannot gather info about its past
341 // like the tick it started execution. Simply return until we see an
342 // instruction that is found in the tempStore.
343 auto itr_temp_store = tempStore.find(head_inst->seqNum);
344 if (itr_temp_store == tempStore.end()) {
345 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
346 "store, skipping.\n", head_inst->seqNum);
347 return;
348 }
349
350 // Get pointer to the execution info object corresponding to the inst.
351 InstExecInfo* exec_info_ptr = itr_temp_store->second;
352 assert(exec_info_ptr->executeTick != MaxTick);
353 assert(exec_info_ptr->toCommitTick != MaxTick);
354
355 // Check if the instruction had a fault, if it predicated false and
356 // thus previous register values were restored or if it was a
357 // load/store that did not have a request (e.g. when the size of the
358 // request is zero). In all these cases the instruction is set as
359 // executed and is picked up by the commit probe listener. But a
360 // request is not issued and registers are not written. So practically,
361 // skipping these should not hurt as execution would not stall on them.
362 // Alternatively, these could be included merely as a compute node in
363 // the graph. Removing these for now. If correlation accuracy needs to
364 // be improved in future these can be turned into comp nodes at the
365 // cost of bigger traces.
366 if (head_inst->getFault() != NoFault) {
367 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
368 "skip adding it to the trace\n",
369 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
370 head_inst->seqNum);
371 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
372 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
373 "skip adding it to the trace\n", head_inst->seqNum);
374 } else if (!head_inst->readPredicate()) {
375 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
376 "skip adding it to the trace\n",
377 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
378 head_inst->seqNum);
379 } else {
380 // Add record to depTrace with commit parameter as true.
381 addDepTraceRecord(head_inst, exec_info_ptr, true);
382 }
383 }
384 // As the information contained is no longer needed, remove the execution
385 // info object from the temporary store.
386 clearTempStoreUntil(head_inst);
387}
388
389void
391 InstExecInfo* exec_info_ptr, bool commit)
392{
393 // Create a record to assign dynamic intruction related fields.
394 TraceInfo* new_record = new TraceInfo;
395 // Add to map for sequence number look up to retrieve the TraceInfo pointer
396 traceInfoMap[head_inst->seqNum] = new_record;
397
398 // Assign fields from the instruction
399 new_record->instNum = head_inst->seqNum;
400 new_record->commit = commit;
401 new_record->type = head_inst->isLoad() ? Record::LOAD :
402 (head_inst->isStore() ? Record::STORE :
403 Record::COMP);
404
405 // Assign fields for creating a request in case of a load/store
406 new_record->reqFlags = head_inst->memReqFlags;
407 new_record->virtAddr = head_inst->effAddr;
408 new_record->physAddr = head_inst->physEffAddr;
409 // Currently the tracing does not support split requests.
410 new_record->size = head_inst->effSize;
411 new_record->pc = head_inst->pcState().instAddr();
412
413 // Assign the timing information stored in the execution info object
414 new_record->executeTick = exec_info_ptr->executeTick;
415 new_record->toCommitTick = exec_info_ptr->toCommitTick;
416 new_record->commitTick = curTick();
417
418 // Assign initial values for number of dependents and computational delay
419 new_record->numDepts = 0;
420 new_record->compDelay = -1;
421
422 // The physical register dependency set of the first instruction is
423 // empty. Since there are no records in the depTrace at this point, the
424 // case of adding an ROB dependency by using a reverse iterator is not
425 // applicable. Thus, populate the fields of the record corresponding to the
426 // first instruction and return.
427 if (depTrace.empty()) {
428 // Store the record in depTrace.
429 depTrace.push_back(new_record);
430 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
431 new_record->instNum);
432 return;
433 }
434
435 // Clear register dependencies for squashed loads as they may be dependent
436 // on squashed instructions and we do not add those to the trace.
437 if (head_inst->isLoad() && !commit) {
438 (exec_info_ptr->physRegDepSet).clear();
439 }
440
441 // Assign the register dependencies stored in the execution info object
442 std::set<InstSeqNum>::const_iterator dep_set_it;
443 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
444 dep_set_it != (exec_info_ptr->physRegDepSet).end();
445 ++dep_set_it) {
446 auto trace_info_itr = traceInfoMap.find(*dep_set_it);
447 if (trace_info_itr != traceInfoMap.end()) {
448 // The register dependency is valid. Assign it and calculate
449 // computational delay
450 new_record->physRegDepList.push_back(*dep_set_it);
451 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
452 "%lli\n", new_record->instNum, *dep_set_it);
453 TraceInfo* reg_dep = trace_info_itr->second;
454 reg_dep->numDepts++;
455 compDelayPhysRegDep(reg_dep, new_record);
456 ++stats.numRegDep;
457 } else {
458 // The instruction that this has a register dependency on was
459 // not added to the trace because of one of the following
460 // 1. it was an instruction that had a fault
461 // 2. it was an instruction that was predicated false and
462 // previous register values were restored
463 // 3. it was load/store that did not have a request (e.g. when
464 // the size of the request is zero but this may not be a fault)
465 // In all these cases the instruction is set as executed and is
466 // picked up by the commit probe listener. But a request is not
467 // issued and registers are not written to in these cases.
468 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
469 "%lli is skipped\n",new_record->instNum, *dep_set_it);
470 }
471 }
472
473 // Check for and assign an ROB dependency in addition to register
474 // dependency before adding the record to the trace.
475 // As stores have to commit in order a store is dependent on the last
476 // committed load/store. This is recorded in the ROB dependency.
477 if (head_inst->isStore()) {
478 // Look up store-after-store order dependency
479 updateCommitOrderDep(new_record, false);
480 // Look up store-after-load order dependency
481 updateCommitOrderDep(new_record, true);
482 }
483
484 // In case a node is dependency-free or its dependency got discarded
485 // because it was outside the window, it is marked ready in the ROB at the
486 // time of issue. A request is sent as soon as possible. To model this, a
487 // node is assigned an issue order dependency on a committed instruction
488 // that completed earlier than it. This is done to avoid the problem of
489 // determining the issue times of such dependency-free nodes during replay
490 // which could lead to too much parallelism, thinking conservatively.
491 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
492 updateIssueOrderDep(new_record);
493 }
494
495 // Store the record in depTrace.
496 depTrace.push_back(new_record);
497 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
498 (commit ? "committed" : "squashed"), new_record->instNum);
499
500 // To process the number of records specified by depWindowSize in the
501 // forward direction, the depTrace must have twice as many records
502 // to check for dependencies.
503 if (depTrace.size() == 2 * depWindowSize) {
504
505 DPRINTF(ElasticTrace, "Writing out trace...\n");
506
507 // Write out the records which have been processed to the trace
508 // and remove them from the depTrace.
510
511 // After the first window, writeDepTrace() must check for valid
512 // compDelay.
513 firstWin = false;
514 }
515}
516
517void
519 bool find_load_not_store)
520{
521 assert(new_record->isStore());
522 // Iterate in reverse direction to search for the last committed
523 // load/store that completed earlier than the new record
524 depTraceRevItr from_itr(depTrace.end());
525 depTraceRevItr until_itr(depTrace.begin());
526 TraceInfo* past_record = *from_itr;
527 uint32_t num_go_back = 0;
528
529 // The execution time of this store is when it is sent, that is committed
530 Tick execute_tick = curTick();
531 // Search for store-after-load or store-after-store order dependency
532 while (num_go_back < depWindowSize && from_itr != until_itr) {
533 if (find_load_not_store) {
534 // Check if previous inst is a load completed earlier by comparing
535 // with execute tick
536 if (hasLoadCompleted(past_record, execute_tick)) {
537 // Assign rob dependency and calculate the computational delay
538 assignRobDep(past_record, new_record);
539 ++stats.numRegDep;
540 return;
541 }
542 } else {
543 // Check if previous inst is a store sent earlier by comparing with
544 // execute tick
545 if (hasStoreCommitted(past_record, execute_tick)) {
546 // Assign rob dependency and calculate the computational delay
547 assignRobDep(past_record, new_record);
548 ++stats.numRegDep;
549 return;
550 }
551 }
552 ++from_itr;
553 past_record = *from_itr;
554 ++num_go_back;
555 }
556}
557
558void
560{
561 // Interate in reverse direction to search for the last committed
562 // record that completed earlier than the new record
563 depTraceRevItr from_itr(depTrace.end());
564 depTraceRevItr until_itr(depTrace.begin());
565 TraceInfo* past_record = *from_itr;
566
567 uint32_t num_go_back = 0;
568 Tick execute_tick = 0;
569
570 if (new_record->isLoad()) {
571 // The execution time of a load is when a request is sent
572 execute_tick = new_record->executeTick;
573 ++stats.numIssueOrderDepLoads;
574 } else if (new_record->isStore()) {
575 // The execution time of a store is when it is sent, i.e. committed
576 execute_tick = curTick();
577 ++stats.numIssueOrderDepStores;
578 } else {
579 // The execution time of a non load/store is when it completes
580 execute_tick = new_record->toCommitTick;
581 ++stats.numIssueOrderDepOther;
582 }
583
584 // We search if this record has an issue order dependency on a past record.
585 // Once we find it, we update both the new record and the record it depends
586 // on and return.
587 while (num_go_back < depWindowSize && from_itr != until_itr) {
588 // Check if a previous inst is a load sent earlier, or a store sent
589 // earlier, or a comp inst completed earlier by comparing with execute
590 // tick
591 if (hasLoadBeenSent(past_record, execute_tick) ||
592 hasStoreCommitted(past_record, execute_tick) ||
593 hasCompCompleted(past_record, execute_tick)) {
594 // Assign rob dependency and calculate the computational delay
595 assignRobDep(past_record, new_record);
596 return;
597 }
598 ++from_itr;
599 past_record = *from_itr;
600 ++num_go_back;
601 }
602}
603
604void
606 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
607 new_record->typeToStr(), new_record->instNum,
608 past_record->instNum);
609 // Add dependency on past record
610 new_record->robDepList.push_back(past_record->instNum);
611 // Update new_record's compute delay with respect to the past record
612 compDelayRob(past_record, new_record);
613 // Increment number of dependents of the past record
614 ++(past_record->numDepts);
615 // Update stat to log max number of dependents
616 stats.maxNumDependents = std::max(past_record->numDepts,
617 (uint32_t)stats.maxNumDependents.value());
618}
619
620bool
622 Tick execute_tick) const
623{
624 return (past_record->isStore() && past_record->commitTick <= execute_tick);
625}
626
627bool
629 Tick execute_tick) const
630{
631 return(past_record->isLoad() && past_record->commit &&
632 past_record->toCommitTick <= execute_tick);
633}
634
635bool
637 Tick execute_tick) const
638{
639 // Check if previous inst is a load sent earlier than this
640 return (past_record->isLoad() && past_record->commit &&
641 past_record->executeTick <= execute_tick);
642}
643
644bool
646 Tick execute_tick) const
647{
648 return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
649}
650
651void
653{
654 // Clear from temp store starting with the execution info object
655 // corresponding the head_inst and continue clearing by decrementing the
656 // sequence number until the last cleared sequence number.
657 InstSeqNum temp_sn = (head_inst->seqNum);
658 while (temp_sn > lastClearedSeqNum) {
659 auto itr_exec_info = tempStore.find(temp_sn);
660 if (itr_exec_info != tempStore.end()) {
661 InstExecInfo* exec_info_ptr = itr_exec_info->second;
662 // Free allocated memory for the info object
663 delete exec_info_ptr;
664 // Remove entry from temporary store
665 tempStore.erase(itr_exec_info);
666 }
667 temp_sn--;
668 }
669 // Update the last cleared sequence number to that of the head_inst
670 lastClearedSeqNum = head_inst->seqNum;
671}
672
673void
675{
676 // The computation delay is the delay between the completion tick of the
677 // inst. pointed to by past_record and the execution tick of its dependent
678 // inst. pointed to by new_record.
679 int64_t comp_delay = -1;
680 Tick execution_tick = 0, completion_tick = 0;
681
682 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
683 new_record->instNum, past_record->instNum);
684
685 // Get the tick when the node is executed as per the modelling of
686 // computation delay
687 execution_tick = new_record->getExecuteTick();
688
689 if (past_record->isLoad()) {
690 if (new_record->isStore()) {
691 completion_tick = past_record->toCommitTick;
692 } else {
693 completion_tick = past_record->executeTick;
694 }
695 } else if (past_record->isStore()) {
696 completion_tick = past_record->commitTick;
697 } else if (past_record->isComp()){
698 completion_tick = past_record->toCommitTick;
699 }
700 assert(execution_tick >= completion_tick);
701 comp_delay = execution_tick - completion_tick;
702
703 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
704 execution_tick, completion_tick, comp_delay);
705
706 // Assign the computational delay with respect to the dependency which
707 // completes the latest.
708 if (new_record->compDelay == -1)
709 new_record->compDelay = comp_delay;
710 else
711 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
712 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
713 new_record->compDelay);
714}
715
716void
718 TraceInfo* new_record)
719{
720 // The computation delay is the delay between the completion tick of the
721 // inst. pointed to by past_record and the execution tick of its dependent
722 // inst. pointed to by new_record.
723 int64_t comp_delay = -1;
724 Tick execution_tick = 0, completion_tick = 0;
725
726 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
727 " %lli.\n", new_record->instNum, past_record->instNum);
728
729 // Get the tick when the node is executed as per the modelling of
730 // computation delay
731 execution_tick = new_record->getExecuteTick();
732
733 // When there is a physical register dependency on an instruction, the
734 // completion tick of that instruction is when it wrote to the register,
735 // that is toCommitTick. In case, of a store updating a destination
736 // register, this is approximated to commitTick instead
737 if (past_record->isStore()) {
738 completion_tick = past_record->commitTick;
739 } else {
740 completion_tick = past_record->toCommitTick;
741 }
742 assert(execution_tick >= completion_tick);
743 comp_delay = execution_tick - completion_tick;
744 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
745 execution_tick, completion_tick, comp_delay);
746
747 // Assign the computational delay with respect to the dependency which
748 // completes the latest.
749 if (new_record->compDelay == -1)
750 new_record->compDelay = comp_delay;
751 else
752 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
753 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
754 new_record->compDelay);
755}
756
757Tick
759{
760 if (isLoad()) {
761 // Execution tick for a load instruction is when the request was sent,
762 // that is executeTick.
763 return executeTick;
764 } else if (isStore()) {
765 // Execution tick for a store instruction is when the request was sent,
766 // that is commitTick.
767 return commitTick;
768 } else {
769 // Execution tick for a non load/store instruction is when the register
770 // value was written to, that is commitTick.
771 return toCommitTick;
772 }
773}
774
775void
776ElasticTrace::writeDepTrace(uint32_t num_to_write)
777{
778 // Write the trace with fields as follows:
779 // Instruction sequence number
780 // If instruction was a load
781 // If instruction was a store
782 // If instruction has addr
783 // If instruction has size
784 // If instruction has flags
785 // List of order dependencies - optional, repeated
786 // Computational delay with respect to last completed dependency
787 // List of physical register RAW dependencies - optional, repeated
788 // Weight of a node equal to no. of filtered nodes before it - optional
789 uint16_t num_filtered_nodes = 0;
790 depTraceItr dep_trace_itr(depTrace.begin());
791 depTraceItr dep_trace_itr_start = dep_trace_itr;
792 while (num_to_write > 0) {
793 TraceInfo* temp_ptr = *dep_trace_itr;
794 assert(temp_ptr->type != Record::INVALID);
795 // If no node dependends on a comp node then there is no reason to
796 // track the comp node in the dependency graph. We filter out such
797 // nodes but count them and add a weight field to the subsequent node
798 // that we do include in the trace.
799 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
800 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
801 "is as follows:\n", temp_ptr->instNum);
802 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
803 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
804 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
805 "size %i, flags %i\n", temp_ptr->physAddr,
806 temp_ptr->size, temp_ptr->reqFlags);
807 } else {
808 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
809 }
810 if (firstWin && temp_ptr->compDelay == -1) {
811 if (temp_ptr->isLoad()) {
812 temp_ptr->compDelay = temp_ptr->executeTick;
813 } else if (temp_ptr->isStore()) {
814 temp_ptr->compDelay = temp_ptr->commitTick;
815 } else {
816 temp_ptr->compDelay = temp_ptr->toCommitTick;
817 }
818 }
819 assert(temp_ptr->compDelay != -1);
820 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
821 temp_ptr->compDelay);
822
823 // Create a protobuf message for the dependency record
824 ProtoMessage::InstDepRecord dep_pkt;
825 dep_pkt.set_seq_num(temp_ptr->instNum);
826 dep_pkt.set_type(temp_ptr->type);
827 dep_pkt.set_pc(temp_ptr->pc);
828 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
829 dep_pkt.set_flags(temp_ptr->reqFlags);
830 dep_pkt.set_p_addr(temp_ptr->physAddr);
831 // If tracing of virtual addresses is enabled, set the optional
832 // field for it
833 if (traceVirtAddr)
834 dep_pkt.set_v_addr(temp_ptr->virtAddr);
835 dep_pkt.set_size(temp_ptr->size);
836 }
837 dep_pkt.set_comp_delay(temp_ptr->compDelay);
838 if (temp_ptr->robDepList.empty()) {
839 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
840 }
841 while (!temp_ptr->robDepList.empty()) {
842 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
843 temp_ptr->robDepList.front());
844 dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
845 temp_ptr->robDepList.pop_front();
846 }
847 if (temp_ptr->physRegDepList.empty()) {
848 DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
849 }
850 while (!temp_ptr->physRegDepList.empty()) {
851 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
852 temp_ptr->physRegDepList.front());
853 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
854 temp_ptr->physRegDepList.pop_front();
855 }
856 if (num_filtered_nodes != 0) {
857 // Set the weight of this node as the no. of filtered nodes
858 // between this node and the last node that we wrote to output
859 // stream. The weight will be used during replay to model ROB
860 // occupancy of filtered nodes.
861 dep_pkt.set_weight(num_filtered_nodes);
862 num_filtered_nodes = 0;
863 }
864 // Write the message to the protobuf output stream
865 dataTraceStream->write(dep_pkt);
866 } else {
867 // Don't write the node to the trace but note that we have filtered
868 // out a node.
869 ++stats.numFilteredNodes;
870 ++num_filtered_nodes;
871 }
872 dep_trace_itr++;
873 traceInfoMap.erase(temp_ptr->instNum);
874 delete temp_ptr;
875 num_to_write--;
876 }
877 depTrace.erase(dep_trace_itr_start, dep_trace_itr);
878}
879
881 : statistics::Group(parent),
882 ADD_STAT(numRegDep, statistics::units::Count::get(),
883 "Number of register dependencies recorded during tracing"),
885 "Number of commit order (rob) dependencies for a store "
886 "recorded on a past load/store during tracing"),
888 "Number of loads that got assigned issue order dependency "
889 "because they were dependency-free"),
891 "Number of stores that got assigned issue order dependency "
892 "because they were dependency-free"),
894 "Number of non load/store insts that got assigned issue order "
895 "dependency because they were dependency-free"),
897 "No. of nodes filtered out before writing the output trace"),
899 "Maximum number or dependents on any instruction"),
901 "Maximum size of the temporary store during the run"),
903 "Maximum size of register dependency map")
904{
905}
906
907const std::string&
909{
910 return Record::RecordType_Name(type);
911}
912
913void
915{
916 // Write to trace all records in the depTrace.
917 writeDepTrace(depTrace.size());
918 // Delete the stream objects
919 delete dataTraceStream;
920 delete instTraceStream;
921}
922
923} // namespace o3
924} // namespace gem5
#define DPRINTFR(x,...)
Definition trace.hh:223
#define DPRINTF(x,...)
Definition trace.hh:209
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition protoio.hh:91
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:472
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
ProbeListenerObject(const ProbeListenerObjectParams &params)
void connectListener(Args &&...args)
Register ID: describe an architectural register with its class and index.
Definition reg_class.hh:94
constexpr bool is(RegClassType reg_class) const
Definition reg_class.hh:275
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record's Rob dependency ...
ElasticTrace(const ElasticTraceParams &params)
Constructor.
CPU * cpu
Pointer to the O3CPU that is this listener's parent a.k.a.
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
void regEtraceListeners()
Register all listeners.
std::pair< InstSeqNum, RegIndex > SeqNumRegPair
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight.
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call.
std::unordered_map< RegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object.
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request.
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
gem5::o3::ElasticTrace::ElasticTraceStats stats
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
Statistics container.
Definition group.hh:93
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
Tick executeTick
Timestamp when instruction was first processed by execute stage.
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
const std::string & typeToStr() const
Return string specifying the type of the node.
bool isLoad() const
Is the record a load.
std::list< InstSeqNum > robDepList
std::list< InstSeqNum > physRegDepList
int64_t compDelay
Computational delay after the last dependent inst.
bool isComp() const
Is the record a fetch triggering an Icache request.
RecordType type
The type of trace record for the instruction node.
bool isStore() const
Is the record a store.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:268
const Params & params() const
#define inform(...)
Definition logging.hh:289
RefCountingPtr< const DynInst > DynInstConstPtr
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition core.cc:47
Units for Stats.
Definition units.hh:113
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
PhysRegId * PhysRegIdPtr
Definition reg_class.hh:510
OutputDirectory simout
Definition output.cc:62
uint64_t Tick
Tick count type.
Definition types.hh:58
const Tick MaxTick
Definition types.hh:60
void registerExitCallback(const std::function< void()> &callback)
Register an exit callback.
Definition core.cc:143
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ InvalidRegClass
Definition reg_class.hh:71
@ MiscRegClass
Control (misc) register.
Definition reg_class.hh:70
Declaration of the Packet class.
statistics::Scalar maxNumDependents
Maximum number of dependents on any instruction.
statistics::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
statistics::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free.
statistics::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free.
ElasticTraceStats(statistics::Group *parent)
statistics::Scalar numRegDep
Number of register dependencies recorded during tracing.
statistics::Scalar numFilteredNodes
Number of filtered nodes.
statistics::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
statistics::Scalar numOrderDepStores
Number of stores that got assigned a commit order dependency on a past load/store.
statistics::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
Tick getExecuteTick() const
Get the execute tick of the instruction.
const std::string & name()
Definition trace.cc:48

Generated on Mon May 26 2025 09:19:08 for gem5 by doxygen 1.13.2