gem5 v24.1.0.1
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
protocol_tester.cc
Go to the documentation of this file.
1/*
2 * Copyright (c) 2017-2021 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 *
8 * 1. Redistributions of source code must retain the above copyright notice,
9 * this list of conditions and the following disclaimer.
10 *
11 * 2. Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * 3. Neither the name of the copyright holder nor the names of its
16 * contributors may be used to endorse or promote products derived from this
17 * software without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 * POSSIBILITY OF SUCH DAMAGE.
30 */
31
33
34#include <algorithm>
35#include <ctime>
36#include <fstream>
37
42#include "debug/ProtocolTest.hh"
43#include "mem/request.hh"
44#include "sim/sim_exit.hh"
45#include "sim/system.hh"
46
47namespace gem5
48{
49
52 _requestorId(p.system->getRequestorId(this)),
53 numCpuPorts(p.port_cpu_ports_connection_count),
54 numDmaPorts(p.port_dma_ports_connection_count),
55 numVectorPorts(p.port_cu_vector_ports_connection_count),
56 numSqcPorts(p.port_cu_sqc_ports_connection_count),
57 numScalarPorts(p.port_cu_scalar_ports_connection_count),
58 numTokenPorts(p.port_cu_token_ports_connection_count),
59 numCusPerSqc(p.cus_per_sqc),
60 numCusPerScalar(p.cus_per_scalar),
61 numWfsPerCu(p.wavefronts_per_cu),
62 numWisPerWf(p.workitems_per_wavefront),
63 numCuTokens(p.max_cu_tokens),
64 numAtomicLocs(p.num_atomic_locations),
65 numNormalLocsPerAtomic(p.num_normal_locs_per_atomic),
66 episodeLength(p.episode_length),
67 maxNumEpisodes(p.max_num_episodes),
68 debugTester(p.debug_tester),
69 cpuThreads(p.cpu_threads),
70 dmaThreads(p.dma_threads),
71 wfs(p.wavefronts)
72{
73 int idx = 0; // global port index
74
75 numCpus = numCpuPorts; // 1 cpu port per CPU
76 numDmas = numDmaPorts; // 1 dma port per DMA
77 numCus = numVectorPorts; // 1 vector port per CU
78
79 // create all physical cpu's data ports
80 for (int i = 0; i < numCpuPorts; ++i) {
81 DPRINTF(ProtocolTest, "Creating %s\n",
82 csprintf("%s-cpuPort%d", name(), i));
83 cpuPorts.push_back(new SeqPort(csprintf("%s-cpuPort%d", name(), i),
84 this, i, idx));
85 idx++;
86 }
87
88 // create all physical DMA data ports
89 for (int i = 0; i < numDmaPorts; ++i) {
90 DPRINTF(ProtocolTest, "Creating %s\n",
91 csprintf("%s-dmaPort%d", name(), i));
92 dmaPorts.push_back(new SeqPort(csprintf("%s-dmaPort%d", name(), i),
93 this, i, idx));
94 idx++;
95 }
96
97 // create all physical gpu's data ports
98 for (int i = 0; i < numVectorPorts; ++i) {
99 DPRINTF(ProtocolTest, "Creating %s\n",
100 csprintf("%s-cuVectorPort%d", name(), i));
101 cuVectorPorts.push_back(new SeqPort(csprintf("%s-cuVectorPort%d",
102 name(), i),
103 this, i, idx));
104 idx++;
105 }
106
107 for (int i = 0; i < numScalarPorts; ++i) {
108 DPRINTF(ProtocolTest, "Creating %s\n",
109 csprintf("%s-cuScalarPort%d", name(), i));
110 cuScalarPorts.push_back(new SeqPort(csprintf("%s-cuScalarPort%d",
111 name(), i),
112 this, i, idx));
113 idx++;
114 }
115
116 for (int i = 0; i < numSqcPorts; ++i) {
117 DPRINTF(ProtocolTest, "Creating %s\n",
118 csprintf("%s-cuSqcPort%d", name(), i));
119 cuSqcPorts.push_back(new SeqPort(csprintf("%s-cuSqcPort%d",
120 name(), i),
121 this, i, idx));
122 idx++;
123 }
124
125 for (int i = 0; i < numTokenPorts; ++i) {
126 cuTokenPorts.push_back(new GMTokenPort(csprintf("%s-cuTokenPort%d",
127 name(), i),
128 this, i));
130 cuTokenPorts[i]->setTokenManager(cuTokenManagers[i]);
131 }
132
133 // create an address manager
136 nextEpisodeId = 0;
137
138 if (!debugTester)
139 warn("Data race check is not enabled\n");
140
141 sentExitSignal = false;
142
143 // set random seed number, if specified.
144 // Note: random_m5 will use a fixed key if random_seed is not set.
145 // This ensures a reproducable.
146 if (p.random_seed != 0) {
147 rng->init(p.random_seed);
148 } else {
149 warn(
150 "If `random_seed == 0` (or `random_seed` is unset) "
151 "ProtocolTester does not seed the RNG. This will NOT result in "
152 "the RNG generating different results each run. In this case the "
153 "RNG is seeded by a default value. This differs from behavior in "
154 "previous versions of gem5. Setting `random_seed` to a non-zero "
155 "value is strongly recommended."
156 );
157 }
158
159 actionCount = 0;
160
161 // create a new log file
162 logFile = simout.create(p.log_file);
163 assert(logFile);
164
165 // print test configs
166 std::stringstream ss;
167 ss << "GPU Ruby test's configurations" << std::endl
168 << "\tNumber of CPUs: " << numCpus << std::endl
169 << "\tNumber of DMAs: " << numDmas << std::endl
170 << "\tNumber of CUs: " << numCus << std::endl
171 << "\tNumber of wavefronts per CU: " << numWfsPerCu << std::endl
172 << "\tWavefront size: " << numWisPerWf << std::endl
173 << "\tNumber of atomic locations: " << numAtomicLocs << std::endl
174 << "\tNumber of non-atomic locations: "
175 << numNormalLocsPerAtomic * numAtomicLocs << std::endl
176 << "\tEpisode length: " << episodeLength << std::endl
177 << "\tTest length (max number of episodes): " << maxNumEpisodes
178 << std::endl
179 << "\tRandom seed: " << p.random_seed
180 << std::endl;
181
182 ccprintf(*(logFile->stream()), "%s", ss.str());
183 logFile->stream()->flush();
184}
185
187{
188 for (int i = 0; i < cpuPorts.size(); ++i)
189 delete cpuPorts[i];
190 for (int i = 0; i < dmaPorts.size(); ++i)
191 delete dmaPorts[i];
192 for (int i = 0; i < cuVectorPorts.size(); ++i)
193 delete cuVectorPorts[i];
194 for (int i = 0; i < cuScalarPorts.size(); ++i)
195 delete cuScalarPorts[i];
196 for (int i = 0; i < cuSqcPorts.size(); ++i)
197 delete cuSqcPorts[i];
198 delete addrManager;
199
200 // close the log file
202}
203
204void
206{
207 DPRINTF(ProtocolTest, "Attach threads to ports\n");
208
209 // connect cpu threads to cpu's ports
210 for (int cpu_id = 0; cpu_id < numCpus; ++cpu_id) {
211 cpuThreads[cpu_id]->attachTesterThreadToPorts(this,
212 static_cast<SeqPort*>(cpuPorts[cpu_id]));
213 cpuThreads[cpu_id]->scheduleWakeup();
214 cpuThreads[cpu_id]->scheduleDeadlockCheckEvent();
215 }
216
217 // connect dma threads to dma's ports
218 for (int dma_id = 0; dma_id < numDmas; ++dma_id) {
219 dmaThreads[dma_id]->attachTesterThreadToPorts(this,
220 static_cast<SeqPort*>(dmaPorts[dma_id]));
221 dmaThreads[dma_id]->scheduleWakeup();
222 dmaThreads[dma_id]->scheduleDeadlockCheckEvent();
223 }
224
225 // connect gpu wavefronts to gpu's ports
226 int wfId = 0;
227 int vectorPortId = 0;
228 int sqcPortId = 0;
229 int scalarPortId = 0;
230
231 for (int cu_id = 0; cu_id < numCus; ++cu_id) {
232 vectorPortId = cu_id;
233 sqcPortId = cu_id/numCusPerSqc;
234 scalarPortId = cu_id/numCusPerScalar;
235
236 for (int i = 0; i < numWfsPerCu; ++i) {
237 wfId = cu_id * numWfsPerCu + i;
238 wfs[wfId]->attachTesterThreadToPorts(this,
239 static_cast<SeqPort*>(cuVectorPorts[vectorPortId]),
240 cuTokenPorts[vectorPortId],
241 static_cast<SeqPort*>(cuSqcPorts[sqcPortId]),
242 static_cast<SeqPort*>(cuScalarPorts[scalarPortId]));
243 wfs[wfId]->scheduleWakeup();
244 wfs[wfId]->scheduleDeadlockCheckEvent();
245 }
246 }
247}
248
249Port&
250ProtocolTester::getPort(const std::string &if_name, PortID idx)
251{
252 if (if_name != "cpu_ports" && if_name != "dma_ports" &&
253 if_name != "cu_vector_ports" && if_name != "cu_sqc_ports" &&
254 if_name != "cu_scalar_ports" && if_name != "cu_token_ports") {
255 // pass along to super class
256 return ClockedObject::getPort(if_name, idx);
257 } else {
258 if (if_name == "cpu_ports") {
259 if (idx > numCpuPorts)
260 panic("ProtocolTester: unknown cpu port %d\n", idx);
261 return *cpuPorts[idx];
262 } else if (if_name == "dma_ports") {
263 if (idx > numDmaPorts)
264 panic("ProtocolTester: unknown dma port %d\n", idx);
265 return *dmaPorts[idx];
266 } else if (if_name == "cu_vector_ports") {
267 if (idx > numVectorPorts)
268 panic("ProtocolTester: unknown cu vect port %d\n", idx);
269 return *cuVectorPorts[idx];
270 } else if (if_name == "cu_sqc_ports") {
271 if (idx > numSqcPorts)
272 panic("ProtocolTester: unknown cu sqc port %d\n", idx);
273 return *cuSqcPorts[idx];
274 } else if (if_name == "cu_token_ports") {
275 if (idx > numTokenPorts)
276 panic("ProtocolTester: unknown cu token port %d\n", idx);
277 return *cuTokenPorts[idx];
278 } else {
279 assert(if_name == "cu_scalar_ports");
280 if (idx > numScalarPorts)
281 panic("ProtocolTester: unknown cu scal port %d\n", idx);
282 return *cuScalarPorts[idx];
283 }
284 }
285
286 assert(false);
287}
288
289bool
291{
293 if (!sentExitSignal) {
294 // all done
295 inform("Total completed episodes: %d\n", nextEpisodeId - 1);
296 exitSimLoop("GPU Ruby Tester: Passed!");
297 sentExitSignal = true;
298 }
299 return true;
300 }
301 return false;
302}
303
304bool
306 Location loc, bool isStore) const
307{
308 if (debugTester) {
309 // go through all active episodes in all threads
310 for (const TesterThread* th : wfs) {
311 if (!th->checkDRF(atomic_loc, loc, isStore))
312 return false;
313 }
314
315 for (const TesterThread* th : cpuThreads) {
316 if (!th->checkDRF(atomic_loc, loc, isStore))
317 return false;
318 }
319
320 for (const TesterThread* th : dmaThreads) {
321 if (!th->checkDRF(atomic_loc, loc, isStore))
322 return false;
323 }
324 }
325
326 return true;
327}
328
329void
331{
332 if (!sentExitSignal) {
333 // go through all threads and dump their outstanding requests
334 for (auto t : cpuThreads) {
335 t->printAllOutstandingReqs(ss);
336 }
337
338 for (auto t : dmaThreads) {
339 t->printAllOutstandingReqs(ss);
340 }
341
342 for (auto t : wfs) {
343 t->printAllOutstandingReqs(ss);
344 }
345
346 // dump error log into a file
347 assert(logFile);
348 ccprintf(*(logFile->stream()), "%s", ss.str());
349 logFile->stream()->flush();
350
351 sentExitSignal = true;
352 // terminate the simulation
353 panic("GPU Ruby Tester: Failed!\n");
354 }
355}
356
357bool
359{
360 // get the requesting thread from the original sender state
361 ProtocolTester::SenderState* senderState =
362 safe_cast<ProtocolTester::SenderState*>(pkt->senderState);
363 TesterThread *th = senderState->th;
364
365 th->hitCallback(pkt);
366
367 return true;
368}
369
370} // namespace gem5
#define DPRINTF(x,...)
Definition trace.hh:209
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
virtual std::string name() const
Definition named.hh:47
void close(OutputStream *file)
Closes an output file and free the corresponding OutputFile.
Definition output.cc:147
OutputStream * create(const std::string &name, bool binary=false, bool no_gz=false)
Creates a file in this directory (optionally compressed).
Definition output.cc:210
std::ostream * stream() const
Get the output underlying output stream.
Definition output.hh:62
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition packet.hh:295
SenderState * senderState
This packet's sender state.
Definition packet.hh:545
Ports are used to interface objects to each other.
Definition port.hh:62
virtual bool recvTimingResp(PacketPtr pkt)
Receive a timing response from the peer.
ProtocolTester(const Params &p)
std::vector< GpuWavefront * > wfs
void init() override
init() is called after all C++ SimObjects have been created and all ports are connected.
std::vector< RequestPort * > cuVectorPorts
AddressManager * addrManager
AddressManager::Location Location
std::vector< RequestPort * > cuSqcPorts
ProtocolTesterParams Params
std::vector< DmaThread * > dmaThreads
bool checkDRF(Location atomic_loc, Location loc, bool isStore) const
std::vector< GMTokenPort * > cuTokenPorts
std::vector< CpuThread * > cpuThreads
std::vector< RequestPort * > cuScalarPorts
std::vector< RequestPort * > dmaPorts
Random::RandomPtr rng
Port & getPort(const std::string &if_name, PortID idx=InvalidPortID) override
Get a port with a given name and index.
std::vector< TokenManager * > cuTokenManagers
std::vector< RequestPort * > cpuPorts
void dumpErrorLog(std::stringstream &ss)
virtual void hitCallback(PacketPtr pkt)=0
#define panic(...)
This implements a cprintf based panic() function.
Definition logging.hh:188
virtual Port & getPort(const std::string &if_name, PortID idx=InvalidPortID)
Get a port with a given name and index.
#define warn(...)
Definition logging.hh:256
#define inform(...)
Definition logging.hh:257
Bitfield< 5 > t
Definition misc_types.hh:71
Bitfield< 7 > i
Definition misc_types.hh:67
Bitfield< 21 > ss
Definition misc_types.hh:60
Bitfield< 0 > p
Bitfield< 15 > system
Definition misc.hh:1032
Copyright (c) 2024 Arm Limited All rights reserved.
Definition binary32.hh:36
int16_t PortID
Port index/ID type, and a symbolic name for an invalid port id.
Definition types.hh:245
OutputDirectory simout
Definition output.cc:62
void exitSimLoop(const std::string &message, int exit_code, Tick when, Tick repeat, bool serialize)
Schedule an event to exit the simulation loop (returning to Python) at the end of the current cycle (...
Definition sim_events.cc:88
std::string csprintf(const char *format, const Args &...args)
Definition cprintf.hh:161
void ccprintf(cp::Print &print)
Definition cprintf.hh:130
Declaration of a request, the overall memory request consisting of the parts of the request that are ...

Generated on Mon Jan 13 2025 04:28:32 for gem5 by doxygen 1.9.8