gem5 v24.0.0.0
Loading...
Searching...
No Matches
elastic_trace.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2013 - 2015 ARM Limited
3 * All rights reserved
4 *
5 * The license below extends only to copyright in the software and shall
6 * not be construed as granting a license to any other intellectual
7 * property including but not limited to intellectual property relating
8 * to a hardware implementation of the functionality of the software
9 * licensed hereunder. You may use the software subject to the license
10 * terms below provided that you ensure that this notice is replicated
11 * unmodified and in its entirety in all distributions of the software,
12 * modified or unmodified, in source code or in binary form.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions are
16 * met: redistributions of source code must retain the above copyright
17 * notice, this list of conditions and the following disclaimer;
18 * redistributions in binary form must reproduce the above copyright
19 * notice, this list of conditions and the following disclaimer in the
20 * documentation and/or other materials provided with the distribution;
21 * neither the name of the copyright holders nor the names of its
22 * contributors may be used to endorse or promote products derived from
23 * this software without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 */
37
39
40#include "base/callback.hh"
41#include "base/output.hh"
42#include "base/trace.hh"
43#include "cpu/o3/dyn_inst.hh"
44#include "cpu/reg_class.hh"
45#include "debug/ElasticTrace.hh"
46#include "mem/packet.hh"
47
48namespace gem5
49{
50
51namespace o3
52{
53
54ElasticTrace::ElasticTrace(const ElasticTraceParams &params)
55 : ProbeListenerObject(params),
56 regEtraceListenersEvent([this]{ regEtraceListeners(); }, name()),
57 firstWin(true),
58 lastClearedSeqNum(0),
59 depWindowSize(params.depWindowSize),
60 dataTraceStream(nullptr),
61 instTraceStream(nullptr),
62 startTraceInst(params.startTraceInst),
63 allProbesReg(false),
64 traceVirtAddr(params.traceVirtAddr),
65 stats(this)
66{
67 cpu = dynamic_cast<CPU *>(params.manager);
68
69 fatal_if(!cpu, "Manager of %s is not of type O3CPU and thus does not "\
70 "support dependency tracing.\n", name());
71
72 fatal_if(depWindowSize == 0, "depWindowSize parameter must be non-zero. "\
73 "Recommended size is 3x ROB size in the O3CPU.\n");
74
75 fatal_if(cpu->numThreads > 1, "numThreads = %i, %s supports tracing for"\
76 "single-threaded workload only", cpu->numThreads, name());
77 // Initialize the protobuf output stream
78 fatal_if(params.instFetchTraceFile == "", "Assign instruction fetch "\
79 "trace file path to instFetchTraceFile");
80 fatal_if(params.dataDepTraceFile == "", "Assign data dependency "\
81 "trace file path to dataDepTraceFile");
82 std::string filename = simout.resolve(name() + "." +
83 params.instFetchTraceFile);
84 instTraceStream = new ProtoOutputStream(filename);
85 filename = simout.resolve(name() + "." + params.dataDepTraceFile);
86 dataTraceStream = new ProtoOutputStream(filename);
87 // Create a protobuf message for the header and write it to the stream
88 ProtoMessage::PacketHeader inst_pkt_header;
89 inst_pkt_header.set_obj_id(name());
90 inst_pkt_header.set_tick_freq(sim_clock::Frequency);
91 instTraceStream->write(inst_pkt_header);
92 // Create a protobuf message for the header and write it to
93 // the stream
94 ProtoMessage::InstDepRecordHeader data_rec_header;
95 data_rec_header.set_obj_id(name());
96 data_rec_header.set_tick_freq(sim_clock::Frequency);
97 data_rec_header.set_window_size(depWindowSize);
98 dataTraceStream->write(data_rec_header);
99 // Register a callback to flush trace records and close the output streams.
100 registerExitCallback([this]() { flushTraces(); });
101}
102
103void
105{
106 inform("@%llu: regProbeListeners() called, startTraceInst = %llu",
108 if (startTraceInst == 0) {
109 // If we want to start tracing from the start of the simulation,
110 // register all elastic trace probes now.
112 } else {
113 // Schedule an event to register all elastic trace probes when
114 // specified no. of instructions are committed.
117 }
118}
119
120void
122{
123 assert(!allProbesReg);
124 inform("@%llu: No. of instructions committed = %llu, registering elastic"
125 " probe listeners", curTick(), cpu->numSimulatedInsts());
126 // Create new listeners: provide method to be called upon a notify() for
127 // each probe point.
129 "FetchRequest", &ElasticTrace::fetchReqTrace));
131 DynInstConstPtr>(this, "Execute",
134 DynInstConstPtr>(this, "ToCommit",
137 DynInstConstPtr>(this, "Rename",
140 "SquashInRename", &ElasticTrace::removeRegDepMapEntry));
142 DynInstConstPtr>(this, "Squash",
145 DynInstConstPtr>(this, "Commit",
147 allProbesReg = true;
148}
149
150void
152{
153
154 DPRINTFR(ElasticTrace, "Fetch Req %i,(%lli,%lli,%lli),%i,%i,%lli\n",
156 req->getPC(), req->getVaddr(), req->getPaddr(),
157 req->getFlags(), req->getSize(), curTick());
158
159 // Create a protobuf message including the request fields necessary to
160 // recreate the request in the TraceCPU.
161 ProtoMessage::Packet inst_fetch_pkt;
162 inst_fetch_pkt.set_tick(curTick());
163 inst_fetch_pkt.set_cmd(MemCmd::ReadReq);
164 inst_fetch_pkt.set_pc(req->getPC());
165 inst_fetch_pkt.set_flags(req->getFlags());
166 inst_fetch_pkt.set_addr(req->getPaddr());
167 inst_fetch_pkt.set_size(req->getSize());
168 // Write the message to the stream.
169 instTraceStream->write(inst_fetch_pkt);
170}
171
172void
174{
175
176 // In a corner case, a retired instruction is propagated backward to the
177 // IEW instruction queue to handle some side-channel information. But we
178 // must not process an instruction again. So we test the sequence number
179 // against the lastClearedSeqNum and skip adding the instruction for such
180 // corner cases.
181 if (dyn_inst->seqNum <= lastClearedSeqNum) {
182 DPRINTFR(ElasticTrace, "[sn:%lli] Ignoring in execute as instruction \
183 has already retired (mostly squashed)", dyn_inst->seqNum);
184 // Do nothing as program has proceeded and this inst has been
185 // propagated backwards to handle something.
186 return;
187 }
188
189 DPRINTFR(ElasticTrace, "[sn:%lli] Execute Tick = %i\n", dyn_inst->seqNum,
190 curTick());
191 // Either the execution info object will already exist if this
192 // instruction had a register dependency recorded in the rename probe
193 // listener before entering execute stage or it will not exist and will
194 // need to be created here.
195 InstExecInfo* exec_info_ptr;
196 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
197 if (itr_exec_info != tempStore.end()) {
198 exec_info_ptr = itr_exec_info->second;
199 } else {
200 exec_info_ptr = new InstExecInfo;
201 tempStore[dyn_inst->seqNum] = exec_info_ptr;
202 }
203
204 exec_info_ptr->executeTick = curTick();
205 stats.maxTempStoreSize = std::max(tempStore.size(),
206 (std::size_t)stats.maxTempStoreSize.value());
207}
208
209void
211{
212 // If tracing has just been enabled then the instruction at this stage of
213 // execution is far enough that we cannot gather info about its past like
214 // the tick it started execution. Simply return until we see an instruction
215 // that is found in the tempStore.
216 auto itr_exec_info = tempStore.find(dyn_inst->seqNum);
217 if (itr_exec_info == tempStore.end()) {
218 DPRINTFR(ElasticTrace, "recordToCommTick: [sn:%lli] Not in temp store,"
219 " skipping.\n", dyn_inst->seqNum);
220 return;
221 }
222
223 DPRINTFR(ElasticTrace, "[sn:%lli] To Commit Tick = %i\n", dyn_inst->seqNum,
224 curTick());
225 InstExecInfo* exec_info_ptr = itr_exec_info->second;
226 exec_info_ptr->toCommitTick = curTick();
227
228}
229
230void
232{
233 // Get the sequence number of the instruction
234 InstSeqNum seq_num = dyn_inst->seqNum;
235
236 assert(dyn_inst->seqNum > lastClearedSeqNum);
237
238 // Since this is the first probe activated in the pipeline, create
239 // a new execution info object to track this instruction as it
240 // progresses through the pipeline.
241 InstExecInfo* exec_info_ptr = new InstExecInfo;
242 tempStore[seq_num] = exec_info_ptr;
243
244 // Loop through the source registers and look up the dependency map. If
245 // the source register entry is found in the dependency map, add a
246 // dependency on the last writer.
247 int8_t max_regs = dyn_inst->numSrcRegs();
248 for (int src_idx = 0; src_idx < max_regs; src_idx++) {
249
250 const RegId& src_reg = dyn_inst->srcRegIdx(src_idx);
251 if (!src_reg.is(MiscRegClass) && !src_reg.is(InvalidRegClass)) {
252 // Get the physical register index of the i'th source register.
253 PhysRegIdPtr phys_src_reg = dyn_inst->renamedSrcIdx(src_idx);
254 DPRINTFR(ElasticTrace, "[sn:%lli] Check map for src reg"
255 " %i (%s)\n", seq_num,
256 phys_src_reg->flatIndex(), phys_src_reg->className());
257 auto itr_writer = physRegDepMap.find(phys_src_reg->flatIndex());
258 if (itr_writer != physRegDepMap.end()) {
259 InstSeqNum last_writer = itr_writer->second;
260 // Additionally the dependency distance is kept less than the
261 // window size parameter to limit the memory allocation to
262 // nodes in the graph. If the window were tending to infinite
263 // we would have to load a large number of node objects during
264 // replay.
265 if (seq_num - last_writer < depWindowSize) {
266 // Record a physical register dependency.
267 exec_info_ptr->physRegDepSet.insert(last_writer);
268 }
269 }
270
271 }
272
273 }
274
275 // Loop through the destination registers of this instruction and update
276 // the physical register dependency map for last writers to registers.
277 max_regs = dyn_inst->numDestRegs();
278 for (int dest_idx = 0; dest_idx < max_regs; dest_idx++) {
279 // For data dependency tracking the register must be an int, float or
280 // CC register and not a Misc register.
281 const RegId& dest_reg = dyn_inst->destRegIdx(dest_idx);
282 if (!dest_reg.is(MiscRegClass) && !dest_reg.is(InvalidRegClass)) {
283 // Get the physical register index of the i'th destination
284 // register.
285 PhysRegIdPtr phys_dest_reg =
286 dyn_inst->renamedDestIdx(dest_idx);
287 DPRINTFR(ElasticTrace, "[sn:%lli] Update map for dest reg"
288 " %i (%s)\n", seq_num, phys_dest_reg->flatIndex(),
289 dest_reg.className());
290 physRegDepMap[phys_dest_reg->flatIndex()] = seq_num;
291 }
292 }
293 stats.maxPhysRegDepMapSize = std::max(physRegDepMap.size(),
294 (std::size_t)stats.maxPhysRegDepMapSize.value());
295}
296
297void
299{
300 DPRINTFR(ElasticTrace, "Remove Map entry for Reg %i\n",
301 inst_reg_pair.second);
302 auto itr_regdep_map = physRegDepMap.find(inst_reg_pair.second);
303 if (itr_regdep_map != physRegDepMap.end())
304 physRegDepMap.erase(itr_regdep_map);
305}
306
307void
309{
310 // If the squashed instruction was squashed before being processed by
311 // execute stage then it will not be in the temporary store. In this case
312 // do nothing and return.
313 auto itr_exec_info = tempStore.find(head_inst->seqNum);
314 if (itr_exec_info == tempStore.end())
315 return;
316
317 // If there is a squashed load for which a read request was
318 // sent before it got squashed then add it to the trace.
319 DPRINTFR(ElasticTrace, "Attempt to add squashed inst [sn:%lli]\n",
320 head_inst->seqNum);
321 // Get pointer to the execution info object corresponding to the inst.
322 InstExecInfo* exec_info_ptr = itr_exec_info->second;
323 if (head_inst->isLoad() && exec_info_ptr->executeTick != MaxTick &&
324 exec_info_ptr->toCommitTick != MaxTick &&
325 head_inst->hasRequest() &&
326 head_inst->getFault() == NoFault) {
327 // Add record to depTrace with commit parameter as false.
328 addDepTraceRecord(head_inst, exec_info_ptr, false);
329 }
330 // As the information contained is no longer needed, remove the execution
331 // info object from the temporary store.
332 clearTempStoreUntil(head_inst);
333}
334
335void
337{
338 DPRINTFR(ElasticTrace, "Attempt to add committed inst [sn:%lli]\n",
339 head_inst->seqNum);
340
341 // Add the instruction to the depTrace.
342 if (!head_inst->isNop()) {
343
344 // If tracing has just been enabled then the instruction at this stage
345 // of execution is far enough that we cannot gather info about its past
346 // like the tick it started execution. Simply return until we see an
347 // instruction that is found in the tempStore.
348 auto itr_temp_store = tempStore.find(head_inst->seqNum);
349 if (itr_temp_store == tempStore.end()) {
350 DPRINTFR(ElasticTrace, "addCommittedInst: [sn:%lli] Not in temp "
351 "store, skipping.\n", head_inst->seqNum);
352 return;
353 }
354
355 // Get pointer to the execution info object corresponding to the inst.
356 InstExecInfo* exec_info_ptr = itr_temp_store->second;
357 assert(exec_info_ptr->executeTick != MaxTick);
358 assert(exec_info_ptr->toCommitTick != MaxTick);
359
360 // Check if the instruction had a fault, if it predicated false and
361 // thus previous register values were restored or if it was a
362 // load/store that did not have a request (e.g. when the size of the
363 // request is zero). In all these cases the instruction is set as
364 // executed and is picked up by the commit probe listener. But a
365 // request is not issued and registers are not written. So practically,
366 // skipping these should not hurt as execution would not stall on them.
367 // Alternatively, these could be included merely as a compute node in
368 // the graph. Removing these for now. If correlation accuracy needs to
369 // be improved in future these can be turned into comp nodes at the
370 // cost of bigger traces.
371 if (head_inst->getFault() != NoFault) {
372 DPRINTF(ElasticTrace, "%s [sn:%lli] has faulted so "
373 "skip adding it to the trace\n",
374 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
375 head_inst->seqNum);
376 } else if (head_inst->isMemRef() && !head_inst->hasRequest()) {
377 DPRINTF(ElasticTrace, "Load/store [sn:%lli] has no request so "
378 "skip adding it to the trace\n", head_inst->seqNum);
379 } else if (!head_inst->readPredicate()) {
380 DPRINTF(ElasticTrace, "%s [sn:%lli] is predicated false so "
381 "skip adding it to the trace\n",
382 (head_inst->isMemRef() ? "Load/store" : "Comp inst."),
383 head_inst->seqNum);
384 } else {
385 // Add record to depTrace with commit parameter as true.
386 addDepTraceRecord(head_inst, exec_info_ptr, true);
387 }
388 }
389 // As the information contained is no longer needed, remove the execution
390 // info object from the temporary store.
391 clearTempStoreUntil(head_inst);
392}
393
394void
396 InstExecInfo* exec_info_ptr, bool commit)
397{
398 // Create a record to assign dynamic intruction related fields.
399 TraceInfo* new_record = new TraceInfo;
400 // Add to map for sequence number look up to retrieve the TraceInfo pointer
401 traceInfoMap[head_inst->seqNum] = new_record;
402
403 // Assign fields from the instruction
404 new_record->instNum = head_inst->seqNum;
405 new_record->commit = commit;
406 new_record->type = head_inst->isLoad() ? Record::LOAD :
407 (head_inst->isStore() ? Record::STORE :
408 Record::COMP);
409
410 // Assign fields for creating a request in case of a load/store
411 new_record->reqFlags = head_inst->memReqFlags;
412 new_record->virtAddr = head_inst->effAddr;
413 new_record->physAddr = head_inst->physEffAddr;
414 // Currently the tracing does not support split requests.
415 new_record->size = head_inst->effSize;
416 new_record->pc = head_inst->pcState().instAddr();
417
418 // Assign the timing information stored in the execution info object
419 new_record->executeTick = exec_info_ptr->executeTick;
420 new_record->toCommitTick = exec_info_ptr->toCommitTick;
421 new_record->commitTick = curTick();
422
423 // Assign initial values for number of dependents and computational delay
424 new_record->numDepts = 0;
425 new_record->compDelay = -1;
426
427 // The physical register dependency set of the first instruction is
428 // empty. Since there are no records in the depTrace at this point, the
429 // case of adding an ROB dependency by using a reverse iterator is not
430 // applicable. Thus, populate the fields of the record corresponding to the
431 // first instruction and return.
432 if (depTrace.empty()) {
433 // Store the record in depTrace.
434 depTrace.push_back(new_record);
435 DPRINTF(ElasticTrace, "Added first inst record %lli to DepTrace.\n",
436 new_record->instNum);
437 return;
438 }
439
440 // Clear register dependencies for squashed loads as they may be dependent
441 // on squashed instructions and we do not add those to the trace.
442 if (head_inst->isLoad() && !commit) {
443 (exec_info_ptr->physRegDepSet).clear();
444 }
445
446 // Assign the register dependencies stored in the execution info object
447 std::set<InstSeqNum>::const_iterator dep_set_it;
448 for (dep_set_it = (exec_info_ptr->physRegDepSet).begin();
449 dep_set_it != (exec_info_ptr->physRegDepSet).end();
450 ++dep_set_it) {
451 auto trace_info_itr = traceInfoMap.find(*dep_set_it);
452 if (trace_info_itr != traceInfoMap.end()) {
453 // The register dependency is valid. Assign it and calculate
454 // computational delay
455 new_record->physRegDepList.push_back(*dep_set_it);
456 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
457 "%lli\n", new_record->instNum, *dep_set_it);
458 TraceInfo* reg_dep = trace_info_itr->second;
459 reg_dep->numDepts++;
460 compDelayPhysRegDep(reg_dep, new_record);
462 } else {
463 // The instruction that this has a register dependency on was
464 // not added to the trace because of one of the following
465 // 1. it was an instruction that had a fault
466 // 2. it was an instruction that was predicated false and
467 // previous register values were restored
468 // 3. it was load/store that did not have a request (e.g. when
469 // the size of the request is zero but this may not be a fault)
470 // In all these cases the instruction is set as executed and is
471 // picked up by the commit probe listener. But a request is not
472 // issued and registers are not written to in these cases.
473 DPRINTF(ElasticTrace, "Inst %lli has register dependency on "
474 "%lli is skipped\n",new_record->instNum, *dep_set_it);
475 }
476 }
477
478 // Check for and assign an ROB dependency in addition to register
479 // dependency before adding the record to the trace.
480 // As stores have to commit in order a store is dependent on the last
481 // committed load/store. This is recorded in the ROB dependency.
482 if (head_inst->isStore()) {
483 // Look up store-after-store order dependency
484 updateCommitOrderDep(new_record, false);
485 // Look up store-after-load order dependency
486 updateCommitOrderDep(new_record, true);
487 }
488
489 // In case a node is dependency-free or its dependency got discarded
490 // because it was outside the window, it is marked ready in the ROB at the
491 // time of issue. A request is sent as soon as possible. To model this, a
492 // node is assigned an issue order dependency on a committed instruction
493 // that completed earlier than it. This is done to avoid the problem of
494 // determining the issue times of such dependency-free nodes during replay
495 // which could lead to too much parallelism, thinking conservatively.
496 if (new_record->robDepList.empty() && new_record->physRegDepList.empty()) {
497 updateIssueOrderDep(new_record);
498 }
499
500 // Store the record in depTrace.
501 depTrace.push_back(new_record);
502 DPRINTF(ElasticTrace, "Added %s inst %lli to DepTrace.\n",
503 (commit ? "committed" : "squashed"), new_record->instNum);
504
505 // To process the number of records specified by depWindowSize in the
506 // forward direction, the depTrace must have twice as many records
507 // to check for dependencies.
508 if (depTrace.size() == 2 * depWindowSize) {
509
510 DPRINTF(ElasticTrace, "Writing out trace...\n");
511
512 // Write out the records which have been processed to the trace
513 // and remove them from the depTrace.
515
516 // After the first window, writeDepTrace() must check for valid
517 // compDelay.
518 firstWin = false;
519 }
520}
521
522void
524 bool find_load_not_store)
525{
526 assert(new_record->isStore());
527 // Iterate in reverse direction to search for the last committed
528 // load/store that completed earlier than the new record
529 depTraceRevItr from_itr(depTrace.end());
530 depTraceRevItr until_itr(depTrace.begin());
531 TraceInfo* past_record = *from_itr;
532 uint32_t num_go_back = 0;
533
534 // The execution time of this store is when it is sent, that is committed
535 Tick execute_tick = curTick();
536 // Search for store-after-load or store-after-store order dependency
537 while (num_go_back < depWindowSize && from_itr != until_itr) {
538 if (find_load_not_store) {
539 // Check if previous inst is a load completed earlier by comparing
540 // with execute tick
541 if (hasLoadCompleted(past_record, execute_tick)) {
542 // Assign rob dependency and calculate the computational delay
543 assignRobDep(past_record, new_record);
545 return;
546 }
547 } else {
548 // Check if previous inst is a store sent earlier by comparing with
549 // execute tick
550 if (hasStoreCommitted(past_record, execute_tick)) {
551 // Assign rob dependency and calculate the computational delay
552 assignRobDep(past_record, new_record);
554 return;
555 }
556 }
557 ++from_itr;
558 past_record = *from_itr;
559 ++num_go_back;
560 }
561}
562
563void
565{
566 // Interate in reverse direction to search for the last committed
567 // record that completed earlier than the new record
568 depTraceRevItr from_itr(depTrace.end());
569 depTraceRevItr until_itr(depTrace.begin());
570 TraceInfo* past_record = *from_itr;
571
572 uint32_t num_go_back = 0;
573 Tick execute_tick = 0;
574
575 if (new_record->isLoad()) {
576 // The execution time of a load is when a request is sent
577 execute_tick = new_record->executeTick;
579 } else if (new_record->isStore()) {
580 // The execution time of a store is when it is sent, i.e. committed
581 execute_tick = curTick();
583 } else {
584 // The execution time of a non load/store is when it completes
585 execute_tick = new_record->toCommitTick;
587 }
588
589 // We search if this record has an issue order dependency on a past record.
590 // Once we find it, we update both the new record and the record it depends
591 // on and return.
592 while (num_go_back < depWindowSize && from_itr != until_itr) {
593 // Check if a previous inst is a load sent earlier, or a store sent
594 // earlier, or a comp inst completed earlier by comparing with execute
595 // tick
596 if (hasLoadBeenSent(past_record, execute_tick) ||
597 hasStoreCommitted(past_record, execute_tick) ||
598 hasCompCompleted(past_record, execute_tick)) {
599 // Assign rob dependency and calculate the computational delay
600 assignRobDep(past_record, new_record);
601 return;
602 }
603 ++from_itr;
604 past_record = *from_itr;
605 ++num_go_back;
606 }
607}
608
609void
611 DPRINTF(ElasticTrace, "%s %lli has ROB dependency on %lli\n",
612 new_record->typeToStr(), new_record->instNum,
613 past_record->instNum);
614 // Add dependency on past record
615 new_record->robDepList.push_back(past_record->instNum);
616 // Update new_record's compute delay with respect to the past record
617 compDelayRob(past_record, new_record);
618 // Increment number of dependents of the past record
619 ++(past_record->numDepts);
620 // Update stat to log max number of dependents
621 stats.maxNumDependents = std::max(past_record->numDepts,
622 (uint32_t)stats.maxNumDependents.value());
623}
624
625bool
627 Tick execute_tick) const
628{
629 return (past_record->isStore() && past_record->commitTick <= execute_tick);
630}
631
632bool
634 Tick execute_tick) const
635{
636 return(past_record->isLoad() && past_record->commit &&
637 past_record->toCommitTick <= execute_tick);
638}
639
640bool
642 Tick execute_tick) const
643{
644 // Check if previous inst is a load sent earlier than this
645 return (past_record->isLoad() && past_record->commit &&
646 past_record->executeTick <= execute_tick);
647}
648
649bool
651 Tick execute_tick) const
652{
653 return(past_record->isComp() && past_record->toCommitTick <= execute_tick);
654}
655
656void
658{
659 // Clear from temp store starting with the execution info object
660 // corresponding the head_inst and continue clearing by decrementing the
661 // sequence number until the last cleared sequence number.
662 InstSeqNum temp_sn = (head_inst->seqNum);
663 while (temp_sn > lastClearedSeqNum) {
664 auto itr_exec_info = tempStore.find(temp_sn);
665 if (itr_exec_info != tempStore.end()) {
666 InstExecInfo* exec_info_ptr = itr_exec_info->second;
667 // Free allocated memory for the info object
668 delete exec_info_ptr;
669 // Remove entry from temporary store
670 tempStore.erase(itr_exec_info);
671 }
672 temp_sn--;
673 }
674 // Update the last cleared sequence number to that of the head_inst
675 lastClearedSeqNum = head_inst->seqNum;
676}
677
678void
680{
681 // The computation delay is the delay between the completion tick of the
682 // inst. pointed to by past_record and the execution tick of its dependent
683 // inst. pointed to by new_record.
684 int64_t comp_delay = -1;
685 Tick execution_tick = 0, completion_tick = 0;
686
687 DPRINTF(ElasticTrace, "Seq num %lli has ROB dependency on seq num %lli.\n",
688 new_record->instNum, past_record->instNum);
689
690 // Get the tick when the node is executed as per the modelling of
691 // computation delay
692 execution_tick = new_record->getExecuteTick();
693
694 if (past_record->isLoad()) {
695 if (new_record->isStore()) {
696 completion_tick = past_record->toCommitTick;
697 } else {
698 completion_tick = past_record->executeTick;
699 }
700 } else if (past_record->isStore()) {
701 completion_tick = past_record->commitTick;
702 } else if (past_record->isComp()){
703 completion_tick = past_record->toCommitTick;
704 }
705 assert(execution_tick >= completion_tick);
706 comp_delay = execution_tick - completion_tick;
707
708 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
709 execution_tick, completion_tick, comp_delay);
710
711 // Assign the computational delay with respect to the dependency which
712 // completes the latest.
713 if (new_record->compDelay == -1)
714 new_record->compDelay = comp_delay;
715 else
716 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
717 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
718 new_record->compDelay);
719}
720
721void
723 TraceInfo* new_record)
724{
725 // The computation delay is the delay between the completion tick of the
726 // inst. pointed to by past_record and the execution tick of its dependent
727 // inst. pointed to by new_record.
728 int64_t comp_delay = -1;
729 Tick execution_tick = 0, completion_tick = 0;
730
731 DPRINTF(ElasticTrace, "Seq. num %lli has register dependency on seq. num"
732 " %lli.\n", new_record->instNum, past_record->instNum);
733
734 // Get the tick when the node is executed as per the modelling of
735 // computation delay
736 execution_tick = new_record->getExecuteTick();
737
738 // When there is a physical register dependency on an instruction, the
739 // completion tick of that instruction is when it wrote to the register,
740 // that is toCommitTick. In case, of a store updating a destination
741 // register, this is approximated to commitTick instead
742 if (past_record->isStore()) {
743 completion_tick = past_record->commitTick;
744 } else {
745 completion_tick = past_record->toCommitTick;
746 }
747 assert(execution_tick >= completion_tick);
748 comp_delay = execution_tick - completion_tick;
749 DPRINTF(ElasticTrace, "Computational delay is %lli - %lli = %lli\n",
750 execution_tick, completion_tick, comp_delay);
751
752 // Assign the computational delay with respect to the dependency which
753 // completes the latest.
754 if (new_record->compDelay == -1)
755 new_record->compDelay = comp_delay;
756 else
757 new_record->compDelay = std::min(comp_delay, new_record->compDelay);
758 DPRINTF(ElasticTrace, "Final computational delay = %lli.\n",
759 new_record->compDelay);
760}
761
762Tick
764{
765 if (isLoad()) {
766 // Execution tick for a load instruction is when the request was sent,
767 // that is executeTick.
768 return executeTick;
769 } else if (isStore()) {
770 // Execution tick for a store instruction is when the request was sent,
771 // that is commitTick.
772 return commitTick;
773 } else {
774 // Execution tick for a non load/store instruction is when the register
775 // value was written to, that is commitTick.
776 return toCommitTick;
777 }
778}
779
780void
781ElasticTrace::writeDepTrace(uint32_t num_to_write)
782{
783 // Write the trace with fields as follows:
784 // Instruction sequence number
785 // If instruction was a load
786 // If instruction was a store
787 // If instruction has addr
788 // If instruction has size
789 // If instruction has flags
790 // List of order dependencies - optional, repeated
791 // Computational delay with respect to last completed dependency
792 // List of physical register RAW dependencies - optional, repeated
793 // Weight of a node equal to no. of filtered nodes before it - optional
794 uint16_t num_filtered_nodes = 0;
795 depTraceItr dep_trace_itr(depTrace.begin());
796 depTraceItr dep_trace_itr_start = dep_trace_itr;
797 while (num_to_write > 0) {
798 TraceInfo* temp_ptr = *dep_trace_itr;
799 assert(temp_ptr->type != Record::INVALID);
800 // If no node dependends on a comp node then there is no reason to
801 // track the comp node in the dependency graph. We filter out such
802 // nodes but count them and add a weight field to the subsequent node
803 // that we do include in the trace.
804 if (!temp_ptr->isComp() || temp_ptr->numDepts != 0) {
805 DPRINTFR(ElasticTrace, "Instruction with seq. num %lli "
806 "is as follows:\n", temp_ptr->instNum);
807 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
808 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
809 DPRINTFR(ElasticTrace, "\thas a request with phys addr %i, "
810 "size %i, flags %i\n", temp_ptr->physAddr,
811 temp_ptr->size, temp_ptr->reqFlags);
812 } else {
813 DPRINTFR(ElasticTrace, "\tis a %s\n", temp_ptr->typeToStr());
814 }
815 if (firstWin && temp_ptr->compDelay == -1) {
816 if (temp_ptr->isLoad()) {
817 temp_ptr->compDelay = temp_ptr->executeTick;
818 } else if (temp_ptr->isStore()) {
819 temp_ptr->compDelay = temp_ptr->commitTick;
820 } else {
821 temp_ptr->compDelay = temp_ptr->toCommitTick;
822 }
823 }
824 assert(temp_ptr->compDelay != -1);
825 DPRINTFR(ElasticTrace, "\thas computational delay %lli\n",
826 temp_ptr->compDelay);
827
828 // Create a protobuf message for the dependency record
829 ProtoMessage::InstDepRecord dep_pkt;
830 dep_pkt.set_seq_num(temp_ptr->instNum);
831 dep_pkt.set_type(temp_ptr->type);
832 dep_pkt.set_pc(temp_ptr->pc);
833 if (temp_ptr->isLoad() || temp_ptr->isStore()) {
834 dep_pkt.set_flags(temp_ptr->reqFlags);
835 dep_pkt.set_p_addr(temp_ptr->physAddr);
836 // If tracing of virtual addresses is enabled, set the optional
837 // field for it
838 if (traceVirtAddr)
839 dep_pkt.set_v_addr(temp_ptr->virtAddr);
840 dep_pkt.set_size(temp_ptr->size);
841 }
842 dep_pkt.set_comp_delay(temp_ptr->compDelay);
843 if (temp_ptr->robDepList.empty()) {
844 DPRINTFR(ElasticTrace, "\thas no order (rob) dependencies\n");
845 }
846 while (!temp_ptr->robDepList.empty()) {
847 DPRINTFR(ElasticTrace, "\thas order (rob) dependency on %lli\n",
848 temp_ptr->robDepList.front());
849 dep_pkt.add_rob_dep(temp_ptr->robDepList.front());
850 temp_ptr->robDepList.pop_front();
851 }
852 if (temp_ptr->physRegDepList.empty()) {
853 DPRINTFR(ElasticTrace, "\thas no register dependencies\n");
854 }
855 while (!temp_ptr->physRegDepList.empty()) {
856 DPRINTFR(ElasticTrace, "\thas register dependency on %lli\n",
857 temp_ptr->physRegDepList.front());
858 dep_pkt.add_reg_dep(temp_ptr->physRegDepList.front());
859 temp_ptr->physRegDepList.pop_front();
860 }
861 if (num_filtered_nodes != 0) {
862 // Set the weight of this node as the no. of filtered nodes
863 // between this node and the last node that we wrote to output
864 // stream. The weight will be used during replay to model ROB
865 // occupancy of filtered nodes.
866 dep_pkt.set_weight(num_filtered_nodes);
867 num_filtered_nodes = 0;
868 }
869 // Write the message to the protobuf output stream
870 dataTraceStream->write(dep_pkt);
871 } else {
872 // Don't write the node to the trace but note that we have filtered
873 // out a node.
875 ++num_filtered_nodes;
876 }
877 dep_trace_itr++;
878 traceInfoMap.erase(temp_ptr->instNum);
879 delete temp_ptr;
880 num_to_write--;
881 }
882 depTrace.erase(dep_trace_itr_start, dep_trace_itr);
883}
884
886 : statistics::Group(parent),
887 ADD_STAT(numRegDep, statistics::units::Count::get(),
888 "Number of register dependencies recorded during tracing"),
889 ADD_STAT(numOrderDepStores, statistics::units::Count::get(),
890 "Number of commit order (rob) dependencies for a store "
891 "recorded on a past load/store during tracing"),
892 ADD_STAT(numIssueOrderDepLoads, statistics::units::Count::get(),
893 "Number of loads that got assigned issue order dependency "
894 "because they were dependency-free"),
895 ADD_STAT(numIssueOrderDepStores, statistics::units::Count::get(),
896 "Number of stores that got assigned issue order dependency "
897 "because they were dependency-free"),
898 ADD_STAT(numIssueOrderDepOther, statistics::units::Count::get(),
899 "Number of non load/store insts that got assigned issue order "
900 "dependency because they were dependency-free"),
901 ADD_STAT(numFilteredNodes, statistics::units::Count::get(),
902 "No. of nodes filtered out before writing the output trace"),
903 ADD_STAT(maxNumDependents, statistics::units::Count::get(),
904 "Maximum number or dependents on any instruction"),
905 ADD_STAT(maxTempStoreSize, statistics::units::Count::get(),
906 "Maximum size of the temporary store during the run"),
907 ADD_STAT(maxPhysRegDepMapSize, statistics::units::Count::get(),
908 "Maximum size of register dependency map")
909{
910}
911
912const std::string&
914{
915 return Record::RecordType_Name(type);
916}
917
918void
920{
921 // Write to trace all records in the depTrace.
922 writeDepTrace(depTrace.size());
923 // Delete the stream objects
924 delete dataTraceStream;
925 delete instTraceStream;
926}
927
928} // namespace o3
929} // namespace gem5
#define DPRINTFR(x,...)
Definition trace.hh:224
#define DPRINTF(x,...)
Definition trace.hh:210
A ProtoOutputStream wraps a coded stream, potentially with compression, based on looking at the file ...
Definition protoio.hh:91
void write(const google::protobuf::Message &msg)
Write a message to the stream, preprending it with the message size.
Definition protoio.cc:84
virtual ThreadContext * getContext(int tn)
Given a thread num get tho thread context for it.
Definition base.hh:288
static Counter numSimulatedInsts()
Definition base.hh:609
std::string resolve(const std::string &name) const
Returns relative file names prepended with name of this directory.
Definition output.cc:204
Physical register ID.
Definition reg_class.hh:415
const RegIndex & flatIndex() const
Flat index accessor.
Definition reg_class.hh:472
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
ProbeListenerArg generates a listener for the class of Arg and the class type T which is the class co...
Definition probe.hh:229
This class is a minimal wrapper around SimObject.
Definition probe.hh:108
std::vector< ProbeListener * > listeners
Definition probe.hh:111
If you want a reference counting pointer to a mutable object, create it like this:
Definition refcnt.hh:127
Register ID: describe an architectural register with its class and index.
Definition reg_class.hh:94
constexpr bool is(RegClassType reg_class) const
Definition reg_class.hh:275
constexpr const char * className() const
Return a const char* with the register class name.
Definition reg_class.hh:281
virtual void scheduleInstCountEvent(Event *event, Tick count)=0
O3CPU class, has each of the stages (fetch through commit) within it, as well as all of the time buff...
Definition cpu.hh:94
The elastic trace is a type of probe listener and listens to probe points in multiple stages of the O...
void compDelayRob(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has an ROB...
const bool traceVirtAddr
Whether to trace virtual addresses for memory requests.
bool hasCompCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a comp node that completed earlier than the execute tick.
void updateIssueOrderDep(TraceInfo *new_record)
Reverse iterate through the graph, search for an issue order dependency for a new node and update the...
bool allProbesReg
Whther the elastic trace listener has been registered for all probes.
std::vector< TraceInfo * >::iterator depTraceItr
Typedef of iterator to the instruction dependency trace.
void addCommittedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is committed.
bool hasLoadCompleted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load that completed earlier than the execute tick.
ProtoOutputStream * dataTraceStream
Protobuf output stream for data dependency trace.
void recordToCommTick(const DynInstConstPtr &dyn_inst)
Populate the timestamp field in an InstExecInfo object for an instruction in flight when it is execut...
std::unordered_map< InstSeqNum, InstExecInfo * > tempStore
Temporary store of InstExecInfo objects.
void writeDepTrace(uint32_t num_to_write)
Write out given number of records to the trace starting with the first record in depTrace and iterati...
std::vector< TraceInfo * > depTrace
The instruction dependency trace containing TraceInfo objects.
void clearTempStoreUntil(const DynInstConstPtr &head_inst)
Clear entries in the temporary store of execution info objects to free allocated memory until the pre...
uint32_t depWindowSize
The maximum distance for a dependency and is set by a top level level parameter.
void assignRobDep(TraceInfo *past_record, TraceInfo *new_record)
The new_record has an order dependency on a past_record, thus update the new record's Rob dependency ...
ElasticTrace(const ElasticTraceParams &params)
Constructor.
CPU * cpu
Pointer to the O3CPU that is this listener's parent a.k.a.
bool firstWin
Used for checking the first window for processing and writing of dependency trace.
std::reverse_iterator< depTraceItr > depTraceRevItr
Typedef of the reverse iterator to the instruction dependency trace.
void addDepTraceRecord(const DynInstConstPtr &head_inst, InstExecInfo *exec_info_ptr, bool commit)
Add a record to the dependency trace depTrace which is a sequential container.
void compDelayPhysRegDep(TraceInfo *past_record, TraceInfo *new_record)
Calculate the computational delay between an instruction and a subsequent instruction that has a Phys...
void removeRegDepMapEntry(const SeqNumRegPair &inst_reg_pair)
When an instruction gets squashed the destination register mapped to it is freed up in the rename sta...
EventFunctionWrapper regEtraceListenersEvent
Event to trigger registering this listener for all probe points.
void regEtraceListeners()
Register all listeners.
void recordExecTick(const DynInstConstPtr &dyn_inst)
Populate the execute timestamp field in an InstExecInfo object for an instruction in flight.
void regProbeListeners()
Register the probe listeners that is the methods called on a probe point notify() call.
std::unordered_map< RegIndex, InstSeqNum > physRegDepMap
Map for recording the producer of a physical register to check Read After Write dependencies.
std::unordered_map< InstSeqNum, TraceInfo * > traceInfoMap
Map where the instruction sequence number is mapped to the pointer to the TraceInfo object.
void addSquashedInst(const DynInstConstPtr &head_inst)
Add an instruction that is at the head of the ROB and is squashed only if it is a load and a request ...
bool hasLoadBeenSent(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a load sent earlier than the execute tick.
void fetchReqTrace(const RequestPtr &req)
Take the fields of the request class object that are relevant to create an instruction fetch request.
InstSeqNum lastClearedSeqNum
The last cleared instruction sequence number used to free up the memory allocated in the temporary st...
gem5::o3::ElasticTrace::ElasticTraceStats stats
ProtoOutputStream * instTraceStream
Protobuf output stream for instruction fetch trace.
void updateRegDep(const DynInstConstPtr &dyn_inst)
Record a Read After Write physical register dependency if there has been a write to the source regist...
void flushTraces()
Process any outstanding trace records, flush them out to the protobuf output streams and delete the s...
void updateCommitOrderDep(TraceInfo *new_record, bool find_load_not_store)
Reverse iterate through the graph, search for a store-after-store or store-after-load dependency and ...
const InstSeqNum startTraceInst
Number of instructions after which to enable tracing.
bool hasStoreCommitted(TraceInfo *past_record, Tick execute_tick) const
Check if past record is a store sent earlier than the execute tick.
Statistics container.
Definition group.hh:93
Counter value() const
Return the current value of this stat as its base type.
STL pair class.
Definition stl.hh:58
#define ADD_STAT(n,...)
Convenience macro to add a stat to a statistics group.
Definition group.hh:75
Tick executeTick
Timestamp when instruction was first processed by execute stage.
Tick toCommitTick
Timestamp when instruction execution is completed in execute stage and instruction is marked as ready...
std::set< InstSeqNum > physRegDepSet
Set of instruction sequence numbers that this instruction depends on due to Read After Write data dep...
const std::string & typeToStr() const
Return string specifying the type of the node.
bool isLoad() const
Is the record a load.
std::list< InstSeqNum > robDepList
std::list< InstSeqNum > physRegDepList
int64_t compDelay
Computational delay after the last dependent inst.
bool isComp() const
Is the record a fetch triggering an Icache request.
RecordType type
The type of trace record for the instruction node.
bool isStore() const
Is the record a store.
#define fatal_if(cond,...)
Conditional fatal macro that checks the supplied condition and only causes a fatal error if the condi...
Definition logging.hh:236
#define inform(...)
Definition logging.hh:257
Tick Frequency
The simulated frequency of curTick(). (In ticks per second)
Definition core.cc:47
Copyright (c) 2024 - Pranith Kumar Copyright (c) 2020 Inria All rights reserved.
Definition binary32.hh:36
std::shared_ptr< Request > RequestPtr
Definition request.hh:94
Tick curTick()
The universal simulation clock.
Definition cur_tick.hh:46
OutputDirectory simout
Definition output.cc:62
uint64_t Tick
Tick count type.
Definition types.hh:58
const Tick MaxTick
Definition types.hh:60
void registerExitCallback(const std::function< void()> &callback)
Register an exit callback.
Definition core.cc:143
constexpr decltype(nullptr) NoFault
Definition types.hh:253
uint64_t InstSeqNum
Definition inst_seq.hh:40
@ InvalidRegClass
Definition reg_class.hh:71
@ MiscRegClass
Control (misc) register.
Definition reg_class.hh:70
Declaration of the Packet class.
statistics::Scalar maxNumDependents
Maximum number of dependents on any instruction.
statistics::Scalar maxTempStoreSize
Maximum size of the temporary store mostly useful as a check that it is not growing.
statistics::Scalar numIssueOrderDepLoads
Number of load insts that got assigned an issue order dependency because they were dependency-free.
statistics::Scalar numIssueOrderDepStores
Number of store insts that got assigned an issue order dependency because they were dependency-free.
ElasticTraceStats(statistics::Group *parent)
statistics::Scalar numRegDep
Number of register dependencies recorded during tracing.
statistics::Scalar numFilteredNodes
Number of filtered nodes.
statistics::Scalar maxPhysRegDepMapSize
Maximum size of the map that holds the last writer to a physical register.
statistics::Scalar numIssueOrderDepOther
Number of non load/store insts that got assigned an issue order dependency because they were dependen...
Tick getExecuteTick() const
Get the execute tick of the instruction.
const std::string & name()
Definition trace.cc:48

Generated on Tue Jun 18 2024 16:24:02 for gem5 by doxygen 1.11.0