gem5  v22.0.0.2
fetch_unit.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2017 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 
33 
36 #include "base/bitfield.hh"
37 #include "debug/GPUFetch.hh"
38 #include "debug/GPUPort.hh"
39 #include "debug/GPUTLB.hh"
43 #include "gpu-compute/shader.hh"
44 #include "gpu-compute/wavefront.hh"
46 
47 namespace gem5
48 {
49 
51 
52 FetchUnit::FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu)
53  : timingSim(true), computeUnit(cu), fetchScheduler(p),
54  waveList(nullptr), fetchDepth(p.fetch_depth)
55 {
56 }
57 
59 {
60  fetchQueue.clear();
61  fetchStatusQueue.clear();
62 }
63 
64 void
66 {
68  fetchQueue.clear();
71 
72  for (int i = 0; i < computeUnit.shader->n_wf; ++i) {
73  Wavefront *wf = waveList->at(i);
74  assert(wf->wfSlotId == i);
75  fetchStatusQueue[i] = std::make_pair(wf, false);
76  fetchBuf[i].allocateBuf(fetchDepth, computeUnit.cacheLineSize(), wf);
77  fetchBuf[i].decoder(&decoder);
78  }
79 
81 }
82 
83 void
85 {
94  for (auto &fetch_buf : fetchBuf) {
95  if (!fetch_buf.hasFreeSpace()) {
96  fetch_buf.checkWaveReleaseBuf();
97  }
98  if (fetch_buf.hasFetchDataToProcess()) {
99  fetch_buf.decodeInsts();
100  }
101  }
102 
103  // re-evaluate waves which are marked as not ready for fetch
104  for (int j = 0; j < computeUnit.shader->n_wf; ++j) {
105  // Following code assumes 64-bit opertaion and all insts are
106  // represented by 64-bit pointers to inst objects.
107  Wavefront *curWave = fetchStatusQueue[j].first;
108  assert (curWave);
109 
110  // The wavefront has to be active, the IB occupancy has to be
111  // 4 or less instructions and it can not have any branches to
112  // prevent speculative instruction fetches
113  if (!fetchStatusQueue[j].second) {
114  if ((curWave->getStatus() == Wavefront::S_RUNNING ||
115  curWave->getStatus() == Wavefront::S_WAITCNT) &&
116  fetchBuf[j].hasFreeSpace() &&
117  !curWave->stopFetch() &&
118  !curWave->pendingFetch) {
119  fetchQueue.push_back(curWave);
120  fetchStatusQueue[j].second = true;
121  }
122  }
123  }
124 
125  // Fetch only if there is some wave ready to be fetched
126  // An empty fetchQueue will cause the schedular to panic
127  if (fetchQueue.size()) {
128  Wavefront *waveToBeFetched = fetchScheduler.chooseWave();
129  waveToBeFetched->pendingFetch = true;
130  fetchStatusQueue[waveToBeFetched->wfSlotId].second = false;
131  initiateFetch(waveToBeFetched);
132  }
133 }
134 
135 void
137 {
138  assert(fetchBuf.at(wavefront->wfSlotId).hasFreeSpace());
139 
146  Addr vaddr = fetchBuf.at(wavefront->wfSlotId).nextFetchAddr();
147 
148  // this should already be aligned to a cache line
151 
152  // shouldn't be fetching a line that is already buffered
153  assert(!fetchBuf.at(wavefront->wfSlotId).pcBuffered(vaddr));
154 
155  fetchBuf.at(wavefront->wfSlotId).reserveBuf(vaddr);
156 
157  DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Id%d: Initiate fetch "
158  "from pc: %d %#x\n", computeUnit.cu_id, wavefront->simdId,
159  wavefront->wfSlotId, wavefront->wfDynId, wavefront->pc(), vaddr);
160 
161  DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
162  computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
163 
164  // set up virtual request
165  RequestPtr req = std::make_shared<Request>(
167  computeUnit.requestorId(), 0, 0, nullptr);
168 
169  PacketPtr pkt = new Packet(req, MemCmd::ReadReq);
170 
171  if (timingSim) {
172  // SenderState needed on Return
173  pkt->senderState = new ComputeUnit::ITLBPort::SenderState(wavefront);
174 
175  // Sender State needed by TLB hierarchy
176  pkt->senderState =
179  false, pkt->senderState);
180 
182  assert(computeUnit.sqcTLBPort.retries.size() > 0);
183 
184  DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
185  vaddr);
186 
187  computeUnit.sqcTLBPort.retries.push_back(pkt);
188  } else if (!computeUnit.sqcTLBPort.sendTimingReq(pkt)) {
189  // Stall the data port;
190  // No more packet is issued till
191  // ruby indicates resources are freed by
192  // a recvReqRetry() call back on this port.
194 
195  DPRINTF(GPUTLB, "Failed to send TLB req for FETCH addr %#x\n",
196  vaddr);
197 
198  computeUnit.sqcTLBPort.retries.push_back(pkt);
199  } else {
200  DPRINTF(GPUTLB, "sent FETCH translation request for %#x\n", vaddr);
201  }
202  } else {
203  pkt->senderState =
206 
208 
214  if (!pkt->req->systemReq()) {
215  pkt->req->requestorId(computeUnit.vramRequestorId());
216  }
217 
218  GpuTranslationState *sender_state =
219  safe_cast<GpuTranslationState*>(pkt->senderState);
220 
221  delete sender_state->tlbEntry;
222  delete sender_state;
223  // fetch the instructions from the SQC when we operate in
224  // functional mode only
225  fetch(pkt, wavefront);
226  }
227 }
228 
229 void
231 {
232  assert(pkt->req->hasPaddr());
233  assert(pkt->req->hasSize());
234 
235  DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch Access: %#x\n",
236  computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
237  pkt->req->getPaddr());
238 
245  PacketPtr oldPkt = pkt;
246  pkt = new Packet(oldPkt->req, oldPkt->cmd);
247  delete oldPkt;
248 
255  if (!fetchBuf.at(wavefront->wfSlotId).isReserved(pkt->req->getVaddr())) {
256  wavefront->dropFetch = false;
257  wavefront->pendingFetch = false;
258  return;
259  }
260 
266  if (!pkt->req->systemReq()) {
267  pkt->req->requestorId(computeUnit.vramRequestorId());
268  }
269 
275  pkt->dataStatic(fetchBuf.at(wavefront->wfSlotId)
276  .reservedBuf(pkt->req->getVaddr()));
277 
278  // New SenderState for the memory access
279  pkt->senderState = new ComputeUnit::SQCPort::SenderState(wavefront);
280 
281  if (timingSim) {
282  // translation is done. Send the appropriate timing memory request.
283 
284  if (pkt->req->systemReq()) {
285  SystemHubEvent *resp_event = new SystemHubEvent(pkt, this);
286  assert(computeUnit.shader->systemHub);
287  computeUnit.shader->systemHub->sendRequest(pkt, resp_event);
288  } else if (!computeUnit.sqcPort.sendTimingReq(pkt)) {
289  computeUnit.sqcPort.retries.push_back(std::make_pair(pkt,
290  wavefront));
291 
292  DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x failed!\n",
293  computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
294  pkt->req->getPaddr());
295  } else {
296  DPRINTF(GPUPort, "CU%d: WF[%d][%d]: Fetch addr %#x sent!\n",
297  computeUnit.cu_id, wavefront->simdId, wavefront->wfSlotId,
298  pkt->req->getPaddr());
299  }
300  } else {
302  processFetchReturn(pkt);
303  }
304 }
305 
306 void
308 {
309  ComputeUnit::SQCPort::SenderState *sender_state =
310  safe_cast<ComputeUnit::SQCPort::SenderState*>(pkt->senderState);
311 
312  Wavefront *wavefront = sender_state->wavefront;
313 
314  DPRINTF(GPUFetch, "CU%d: WF[%d][%d]: Fetch addr %#x returned "
315  "%d bytes!\n", computeUnit.cu_id, wavefront->simdId,
316  wavefront->wfSlotId, pkt->req->getPaddr(), pkt->req->getSize());
317 
318  if (wavefront->dropFetch) {
319  assert(wavefront->instructionBuffer.empty());
320  assert(!fetchBuf.at(wavefront->wfSlotId).hasFetchDataToProcess());
321  wavefront->dropFetch = false;
322  } else {
323  fetchBuf.at(wavefront->wfSlotId).fetchDone(pkt->req->getVaddr());
324  }
325 
326  wavefront->pendingFetch = false;
327 
328  delete pkt->senderState;
329  delete pkt;
330 }
331 
332 void
333 FetchUnit::flushBuf(int wfSlotId)
334 {
335  fetchBuf.at(wfSlotId).flushBuf();
336 }
337 
338 void
340 {
341  waveList = wave_list;
342 }
343 
345 void
346 FetchUnit::FetchBufDesc::allocateBuf(int fetch_depth, int cache_line_size,
347  Wavefront *wf)
348 {
349  wavefront = wf;
350  fetchDepth = fetch_depth;
352  cacheLineSize = cache_line_size;
354 
355  // Calculate the number of bits to address a cache line
357  "Cache line size should be a power of two.");
359 
360  bufStart = new uint8_t[maxFbSize];
361  readPtr = bufStart;
363 
364  for (int i = 0; i < fetchDepth; ++i) {
365  freeList.emplace_back(readPtr + i * cacheLineSize);
366  }
367 }
368 
369 void
371 {
372  restartFromBranch = true;
377  freeList.clear();
378  bufferedPCs.clear();
379  reservedPCs.clear();
380  readPtr = bufStart;
381 
382  for (int i = 0; i < fetchDepth; ++i) {
383  freeList.push_back(bufStart + i * cacheLineSize);
384  }
385 
386  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d Fetch dropped, flushing fetch "
387  "buffer\n", wavefront->simdId, wavefront->wfSlotId,
388  wavefront->wfDynId);
389 }
390 
391 Addr
393 {
394  Addr next_line = 0;
395 
396  if (bufferedAndReservedLines()) {
397  Addr last_line_fetched = 0;
398  if (!reservedLines()) {
403  last_line_fetched = bufferedPCs.rbegin()->first;
404  } else {
405  last_line_fetched = reservedPCs.rbegin()->first;
406  }
407 
408  next_line = last_line_fetched + cacheLineSize;
409 
414  assert(bufferedPCs.find(next_line) == bufferedPCs.end());
415  assert(reservedPCs.find(next_line) == reservedPCs.end());
416  } else {
425  next_line = ruby::makeLineAddress(wavefront->pc(), cacheLineBits);
426  readPtr = bufStart;
427 
434  if (restartFromBranch) {
435  restartFromBranch = false;
436  int byte_offset
437  = wavefront->pc() - ruby::makeLineAddress(wavefront->pc(),
438  cacheLineBits);
439  readPtr += byte_offset;
440  }
441  }
442 
443  return next_line;
444 }
445 
446 void
448 {
449  // we should have free buffer space, and the line
450  // at vaddr should not already be cached.
451  assert(hasFreeSpace());
452  assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
453  assert(reservedPCs.find(vaddr) == reservedPCs.end());
454  assert(bufferedAndReservedLines() < fetchDepth);
455 
456  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d reserved fetch buffer entry "
457  "for PC = %#x\n", wavefront->simdId, wavefront->wfSlotId,
458  wavefront->wfDynId, vaddr);
459 
466  uint8_t *inst_buf = freeList.front();
467  reservedPCs.emplace(vaddr, inst_buf);
468  freeList.pop_front();
469 }
470 
471 void
473 {
474  assert(bufferedPCs.find(vaddr) == bufferedPCs.end());
475  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for addr %#x\n",
476  wavefront->simdId, wavefront->wfSlotId,
477  wavefront->wfDynId, vaddr);
478 
484  auto reserved_pc = reservedPCs.find(vaddr);
485  assert(reserved_pc != reservedPCs.end());
486  bufferedPCs.emplace(vaddr, reserved_pc->second);
487 
488  if (readPtr == bufEnd) {
489  readPtr = bufStart;
490  }
491 
492  reserved_pc->second = nullptr;
493  reservedPCs.erase(reserved_pc);
494 }
495 
496 bool
498 {
499  return fetchBytesRemaining() >= sizeof(TheGpuISA::RawMachInst);
500 }
501 
502 void
504 {
505  Addr cur_wave_pc = roundDown(wavefront->pc(),
506  wavefront->computeUnit->cacheLineSize());
507  if (reservedPCs.find(cur_wave_pc) != reservedPCs.end()) {
508  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d current wave PC(%#x) still "
509  "being fetched.\n", wavefront->simdId, wavefront->wfSlotId,
510  wavefront->wfDynId, cur_wave_pc);
511 
512  // should be reserved, but not buffered yet
513  assert(bufferedPCs.find(cur_wave_pc) == bufferedPCs.end());
514 
515  return;
516  }
517 
518  auto current_buffered_pc = bufferedPCs.find(cur_wave_pc);
519  auto oldest_buffered_pc = bufferedPCs.begin();
520 
521  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d checking if PC block addr = %#x"
522  "(PC = %#x) can be released.\n", wavefront->simdId,
523  wavefront->wfSlotId, wavefront->wfDynId, cur_wave_pc,
524  wavefront->pc());
525 
526 #ifdef DEBUG
527  int idx = 0;
528  for (const auto &buf_pc : bufferedPCs) {
529  DPRINTF(GPUFetch, "PC[%d] = %#x\n", idx, buf_pc.first);
530  ++idx;
531  }
532 #endif
533 
534  // if we haven't buffered data for this PC, we shouldn't
535  // be fetching from it.
536  assert(current_buffered_pc != bufferedPCs.end());
537 
544  if (current_buffered_pc != oldest_buffered_pc) {
545  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d done fetching for PC = %#x, "
546  "removing it from the fetch buffer.\n", wavefront->simdId,
547  wavefront->wfSlotId, wavefront->wfDynId,
548  oldest_buffered_pc->first);
549 
550  freeList.emplace_back(oldest_buffered_pc->second);
551  oldest_buffered_pc->second = nullptr;
552  bufferedPCs.erase(oldest_buffered_pc);
553  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d has %d lines buffered.\n",
554  wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
555  bufferedLines());
556  }
557 }
558 
559 void
561 {
562  assert(readPtr);
563 
564  if (splitDecode()) {
565  decodeSplitInst();
566  }
567 
568  while (wavefront->instructionBuffer.size() < maxIbSize
569  && hasFetchDataToProcess()) {
570  if (splitDecode()) {
571  decodeSplitInst();
572  } else {
573  TheGpuISA::MachInst mach_inst
574  = reinterpret_cast<TheGpuISA::MachInst>(readPtr);
575  GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
576  readPtr += gpu_static_inst->instSize();
577 
578  assert(readPtr <= bufEnd);
579 
580  GPUDynInstPtr gpu_dyn_inst
581  = std::make_shared<GPUDynInst>(wavefront->computeUnit,
582  wavefront, gpu_static_inst,
583  wavefront->computeUnit->
584  getAndIncSeqNum());
585  wavefront->instructionBuffer.push_back(gpu_dyn_inst);
586 
587  DPRINTF(GPUFetch, "WF[%d][%d]: Id%ld decoded %s (%d bytes). "
588  "%d bytes remain.\n", wavefront->simdId,
589  wavefront->wfSlotId, wavefront->wfDynId,
590  gpu_static_inst->disassemble(),
591  gpu_static_inst->instSize(),
592  fetchBytesRemaining());
593  }
594  }
595 }
596 
597 void
599 {
600  TheGpuISA::RawMachInst split_inst = 0;
601  int dword_size = sizeof(uint32_t);
602  int num_dwords = sizeof(TheGpuISA::RawMachInst) / dword_size;
603 
604  for (int i = 0; i < num_dwords; ++i) {
605  replaceBits(split_inst, 32*(i+1)-1, 32*i,
606  *reinterpret_cast<uint32_t*>(readPtr));
607  if (readPtr + dword_size >= bufEnd) {
608  readPtr = bufStart;
609  }
610  }
611 
612  assert(readPtr == bufStart);
613 
614  TheGpuISA::MachInst mach_inst
615  = reinterpret_cast<TheGpuISA::MachInst>(&split_inst);
616  GPUStaticInst *gpu_static_inst = _decoder->decode(mach_inst);
617  readPtr += (gpu_static_inst->instSize() - dword_size);
618  assert(readPtr < bufEnd);
619 
620  GPUDynInstPtr gpu_dyn_inst
621  = std::make_shared<GPUDynInst>(wavefront->computeUnit,
622  wavefront, gpu_static_inst,
623  wavefront->computeUnit->
624  getAndIncSeqNum());
625  wavefront->instructionBuffer.push_back(gpu_dyn_inst);
626 
627  DPRINTF(GPUFetch, "WF[%d][%d]: Id%d decoded split inst %s (%#x) "
628  "(%d bytes). %d bytes remain in %d buffered lines.\n",
629  wavefront->simdId, wavefront->wfSlotId, wavefront->wfDynId,
630  gpu_static_inst->disassemble(), split_inst,
631  gpu_static_inst->instSize(), fetchBytesRemaining(),
632  bufferedLines());
633 }
634 
635 bool
637 {
642  bool is_split = (readPtr + sizeof(TheGpuISA::RawMachInst)) > bufEnd;
643 
644  return is_split;
645 }
646 
647 int
649 {
650  int bytes_remaining = 0;
651 
652  if (bufferedLines() && readPtr != bufEnd) {
653  auto last_buf_pc = bufferedPCs.rbegin();
654  uint8_t *end_ptr = last_buf_pc->second + cacheLineSize;
655  int byte_diff = end_ptr - readPtr;
656 
657  if (end_ptr > readPtr) {
658  bytes_remaining = byte_diff;
659  } else if (end_ptr < readPtr) {
660  bytes_remaining = bufferedBytes() + byte_diff;
661  }
662  }
663 
664  assert(bytes_remaining <= bufferedBytes());
665  return bytes_remaining;
666 }
667 
668 void
670 {
671  reqPkt->makeResponse();
672  fetchUnit->computeUnit.handleSQCReturn(reqPkt);
673 }
674 
675 } // namespace gem5
gem5::ComputeUnit::SQCPort::SenderState
Definition: compute_unit.hh:662
gem5::RequestPort::sendTimingReq
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
gem5::Shader::gpuTc
ThreadContext * gpuTc
Definition: shader.hh:109
gem5::Wavefront::S_RUNNING
@ S_RUNNING
Definition: wavefront.hh:70
gem5::Gcn3ISA::MachInst
InstFormat * MachInst
used to represent the encoding of a GCN3 inst.
Definition: gpu_types.hh:61
gem5::FetchUnit::~FetchUnit
~FetchUnit()
Definition: fetch_unit.cc:58
gem5::FetchUnit::flushBuf
void flushBuf(int wfSlotId)
Definition: fetch_unit.cc:333
gem5::ComputeUnit::ITLBPort::isStalled
bool isStalled()
Definition: compute_unit.hh:777
shader.hh
gem5::FetchUnit::FetchBufDesc::maxIbSize
int maxIbSize
Definition: fetch_unit.hh:231
gem5::GpuTranslationState
GPU TranslationState: this currently is a somewhat bastardization of the usage of SenderState,...
Definition: gpu_translation_state.hh:58
gem5::GPUStaticInst::disassemble
const std::string & disassemble()
Definition: gpu_static_inst.cc:47
gem5::FetchUnit::FetchBufDesc::cacheLineSize
int cacheLineSize
Definition: fetch_unit.hh:234
gem5::Packet::req
RequestPtr req
A pointer to the original request.
Definition: packet.hh:374
gem5::Wavefront
Definition: wavefront.hh:60
gem5::FetchUnit::FetchBufDesc::fetchDepth
int fetchDepth
Definition: fetch_unit.hh:229
gem5::Request::INST_FETCH
@ INST_FETCH
The request was an instruction fetch.
Definition: request.hh:115
gem5::FetchUnit::SystemHubEvent
Definition: fetch_unit.hh:242
compute_unit.hh
gem5::replaceBits
constexpr void replaceBits(T &val, unsigned first, unsigned last, B bit_val)
A convenience function to replace bits first to last of val with bit_val in place.
Definition: bitfield.hh:197
gem5::Wavefront::stopFetch
bool stopFetch()
Definition: wavefront.cc:727
gem5::ComputeUnit::ITLBPort::retries
std::deque< PacketPtr > retries
here we queue all the translation requests that were not successfully sent.
Definition: compute_unit.hh:785
gpu_static_inst.hh
gem5::FetchUnit::timingSim
bool timingSim
Definition: fetch_unit.hh:257
gem5::floorLog2
static constexpr std::enable_if_t< std::is_integral_v< T >, int > floorLog2(T x)
Definition: intmath.hh:59
gem5::Wavefront::pendingFetch
bool pendingFetch
Definition: wavefront.hh:111
gem5::ComputeUnit::SQCPort::SenderState::wavefront
Wavefront * wavefront
Definition: compute_unit.hh:664
gem5::FetchUnit::FetchBufDesc::splitDecode
bool splitDecode() const
check if the next instruction to be processed out of the fetch buffer is split across the end/beginni...
Definition: fetch_unit.cc:636
gem5::AMDGPUSystemHub::sendRequest
void sendRequest(PacketPtr pkt, Event *callback)
Definition: system_hub.cc:40
gem5::Wavefront::pc
Addr pc() const
Definition: wavefront.cc:1363
gem5::FetchUnit::fetchDepth
int fetchDepth
number of cache lines we can fetch and buffer.
Definition: fetch_unit.hh:286
std::vector
STL vector class.
Definition: stl.hh:37
fetch_unit.hh
gem5::FetchUnit::fetchBuf
std::vector< FetchBufDesc > fetchBuf
Definition: fetch_unit.hh:279
gem5::ComputeUnit::SQCPort::retries
std::deque< std::pair< PacketPtr, Wavefront * > > retries
Definition: compute_unit.hh:675
gem5::FetchUnit::init
void init()
Definition: fetch_unit.cc:65
gem5::GPUStaticInst
Definition: gpu_static_inst.hh:61
gem5::ArmISA::i
Bitfield< 7 > i
Definition: misc_types.hh:67
gem5::FetchUnit::FetchBufDesc::maxFbSize
int maxFbSize
Definition: fetch_unit.hh:233
gem5::ComputeUnit::shader
Shader * shader
Definition: compute_unit.hh:353
gem5::ruby::makeLineAddress
Addr makeLineAddress(Addr addr)
Definition: Address.cc:60
gem5::isPowerOf2
static constexpr bool isPowerOf2(const T &n)
Definition: intmath.hh:98
gem5::ComputeUnit::ITLBPort::stallPort
void stallPort()
Definition: compute_unit.hh:778
gem5::ComputeUnit::cu_id
int cu_id
Definition: compute_unit.hh:292
wavefront.hh
gem5::BaseMMU::Execute
@ Execute
Definition: mmu.hh:56
gem5::Shader::systemHub
AMDGPUSystemHub * systemHub
Definition: shader.hh:229
gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1147
gem5::FetchUnit::fetchQueue
std::vector< Wavefront * > fetchQueue
Definition: fetch_unit.hh:269
gem5::FetchUnit::FetchBufDesc::fetchDone
void fetchDone(Addr vaddr)
Definition: fetch_unit.cc:472
gem5::Wavefront::maxIbSize
int maxIbSize
Definition: wavefront.hh:107
gem5::ArmISA::j
Bitfield< 24 > j
Definition: misc_types.hh:57
gem5::FetchUnit::FetchBufDesc::wavefront
Wavefront * wavefront
Definition: fetch_unit.hh:238
gem5::ComputeUnit
Definition: compute_unit.hh:201
gem5::FetchUnit::decoder
TheGpuISA::Decoder decoder
Definition: fetch_unit.hh:259
gem5::RequestPort::sendFunctional
void sendFunctional(PacketPtr pkt) const
Send a functional request packet, where the data is instantly updated everywhere in the memory system...
Definition: port.hh:485
gem5::FetchUnit::FetchBufDesc::readPtr
uint8_t * readPtr
pointer that points to the next chunk of inst data to be decoded.
Definition: fetch_unit.hh:227
gem5::FetchUnit::FetchBufDesc::nextFetchAddr
Addr nextFetchAddr()
Definition: fetch_unit.cc:392
gem5::ComputeUnit::sqcTLBPort
ITLBPort sqcTLBPort
Definition: compute_unit.hh:906
gem5::FetchUnit::FetchBufDesc::flushBuf
void flushBuf()
Definition: fetch_unit.cc:370
bitfield.hh
gem5::VegaISA::p
Bitfield< 54 > p
Definition: pagetable.hh:70
DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186
gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:291
gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109
gem5::FetchUnit::initiateFetch
void initiateFetch(Wavefront *wavefront)
Definition: fetch_unit.cc:136
gem5::Wavefront::wfSlotId
const int wfSlotId
Definition: wavefront.hh:96
gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
gem5::FetchUnit::FetchBufDesc::decodeSplitInst
void decodeSplitInst()
Definition: fetch_unit.cc:598
gem5::MemCmd::ReadReq
@ ReadReq
Definition: packet.hh:86
gem5::ComputeUnit::vramRequestorId
RequestorID vramRequestorId()
Forward the VRAM requestor ID needed for device memory from shader.
Definition: compute_unit.cc:2089
gem5::GpuTranslationState::tlbEntry
Serializable * tlbEntry
Definition: gpu_translation_state.hh:73
gem5::FetchUnit::FetchBufDesc
fetch buffer descriptor.
Definition: fetch_unit.hh:74
gem5::FetchUnit::FetchBufDesc::bufEnd
uint8_t * bufEnd
Definition: fetch_unit.hh:222
gem5::FetchUnit::FetchUnit
FetchUnit(const ComputeUnitParams &p, ComputeUnit &cu)
Definition: fetch_unit.cc:52
gpu_dyn_inst.hh
gem5::FetchUnit::FetchBufDesc::fetchBytesRemaining
int fetchBytesRemaining() const
calculates the number of fetched bytes that have yet to be decoded.
Definition: fetch_unit.cc:648
gem5::ComputeUnit::cacheLineSize
int cacheLineSize() const
Definition: compute_unit.hh:411
gem5::GPUStaticInst::instSize
virtual int instSize() const =0
gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279
gpu_translation_state.hh
gem5::Packet::cmd
MemCmd cmd
The command field of the packet.
Definition: packet.hh:369
gem5::Wavefront::wfDynId
uint64_t wfDynId
Definition: wavefront.hh:226
RubySystem.hh
gem5::FetchUnit::FetchBufDesc::decodeInsts
void decodeInsts()
each time the fetch stage is ticked, we check if there are any data in the fetch buffer that may be d...
Definition: fetch_unit.cc:560
gem5::FetchUnit::SystemHubEvent::process
void process()
Definition: fetch_unit.cc:669
gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
tlb.hh
gem5::Packet::senderState
SenderState * senderState
This packet's sender state.
Definition: packet.hh:542
gem5::FetchUnit::bindWaveList
void bindWaveList(std::vector< Wavefront * > *list)
Definition: fetch_unit.cc:339
gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49
gem5::FetchUnit::FetchBufDesc::allocateBuf
void allocateBuf(int fetch_depth, int cache_line_size, Wavefront *wf)
allocate the fetch buffer space, and set the fetch depth (number of lines that may be buffered),...
Definition: fetch_unit.cc:346
gem5::Shader::timingSim
bool timingSim
Definition: shader.hh:192
gem5::ComputeUnit::getCacheLineBits
int getCacheLineBits() const
Definition: compute_unit.hh:412
panic_if
#define panic_if(cond,...)
Conditional panic macro that checks the supplied condition and only panics if the condition is true a...
Definition: logging.hh:204
gem5::FetchUnit::FetchBufDesc::bufStart
uint8_t * bufStart
raw instruction buffer.
Definition: fetch_unit.hh:221
gem5::FetchUnit::FetchBufDesc::reserveBuf
void reserveBuf(Addr vaddr)
reserve an entry in the fetch buffer for PC = vaddr,
Definition: fetch_unit.cc:447
gem5::FetchUnit::fetchScheduler
Scheduler fetchScheduler
Definition: fetch_unit.hh:265
gem5::FetchUnit::FetchBufDesc::checkWaveReleaseBuf
void checkWaveReleaseBuf()
checks if the wavefront can release any of its fetch buffer entries.
Definition: fetch_unit.cc:503
gem5::Wavefront::S_WAITCNT
@ S_WAITCNT
wavefront has unsatisfied wait counts
Definition: wavefront.hh:88
gem5::FetchUnit::globalFetchUnitID
static uint32_t globalFetchUnitID
Definition: fetch_unit.hh:67
gem5::Scheduler::chooseWave
Wavefront * chooseWave()
Definition: scheduler.cc:53
gem5::Shader::n_wf
int n_wf
Definition: shader.hh:206
gem5::FetchUnit::FetchBufDesc::cacheLineBits
int cacheLineBits
Definition: fetch_unit.hh:235
gem5::FetchUnit::waveList
std::vector< Wavefront * > * waveList
Definition: fetch_unit.hh:277
gem5::Wavefront::instructionBuffer
std::deque< GPUDynInstPtr > instructionBuffer
Definition: wavefront.hh:109
gem5::FetchUnit::fetchStatusQueue
std::vector< std::pair< Wavefront *, bool > > fetchStatusQueue
Definition: fetch_unit.hh:274
gem5::Scheduler::bindList
void bindList(std::vector< Wavefront * > *sched_list)
Definition: scheduler.cc:59
gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278
gem5::Wavefront::getStatus
status_e getStatus()
Definition: wavefront.hh:137
gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:37
gem5::FetchUnit::computeUnit
ComputeUnit & computeUnit
Definition: fetch_unit.hh:258
gem5::ComputeUnit::sqcPort
SQCPort sqcPort
Definition: compute_unit.hh:904
gem5::FetchUnit::exec
void exec()
Definition: fetch_unit.cc:84
gem5::FetchUnit::FetchBufDesc::hasFetchDataToProcess
bool hasFetchDataToProcess() const
checks if the buffer contains valid data.
Definition: fetch_unit.cc:497
gem5::FetchUnit::FetchBufDesc::freeList
std::deque< uint8_t * > freeList
represents the fetch buffer free list.
Definition: fetch_unit.hh:215
gem5::Wavefront::dropFetch
bool dropFetch
Definition: wavefront.hh:112
gem5::Gcn3ISA::RawMachInst
uint64_t RawMachInst
used to represnt a GPU inst in its raw format.
Definition: gpu_types.hh:42
gem5::FetchUnit::fetch
void fetch(PacketPtr pkt, Wavefront *wavefront)
Definition: fetch_unit.cc:230
gem5::ComputeUnit::requestorId
RequestorID requestorId()
Definition: compute_unit.hh:460
gem5::SenderState
RubyTester::SenderState SenderState
Definition: Check.cc:40
gem5::FetchUnit::processFetchReturn
void processFetchReturn(PacketPtr pkt)
Definition: fetch_unit.cc:307
gem5::Wavefront::simdId
const int simdId
Definition: wavefront.hh:99

Generated on Thu Jul 28 2022 13:32:33 for gem5 by doxygen 1.8.17