gem5  v19.0.0.0
global_memory_pipeline.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2014-2015 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * For use for simulation and test purposes only
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice,
11  * this list of conditions and the following disclaimer.
12  *
13  * 2. Redistributions in binary form must reproduce the above copyright notice,
14  * this list of conditions and the following disclaimer in the documentation
15  * and/or other materials provided with the distribution.
16  *
17  * 3. Neither the name of the copyright holder nor the names of its
18  * contributors may be used to endorse or promote products derived from this
19  * software without specific prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  *
33  * Authors: John Kalamatianos,
34  * Sooraj Puthoor
35  */
36 
38 
39 #include "debug/GPUMem.hh"
40 #include "debug/GPUReg.hh"
43 #include "gpu-compute/shader.hh"
45 #include "gpu-compute/wavefront.hh"
46 
47 GlobalMemPipeline::GlobalMemPipeline(const ComputeUnitParams* p) :
48  computeUnit(nullptr), gmQueueSize(p->global_mem_queue_size),
49  outOfOrderDataDelivery(p->out_of_order_data_delivery), inflightStores(0),
50  inflightLoads(0)
51 {
52 }
53 
54 void
56 {
57  computeUnit = cu;
59  _name = computeUnit->name() + ".GlobalMemPipeline";
60 }
61 
62 void
64 {
65  // apply any returned global memory operations
67 
68  bool accessVrf = true;
69  Wavefront *w = nullptr;
70 
71  // check the VRF to see if the operands of a load (or load component
72  // of an atomic) are accessible
73  if ((m) && (m->isLoad() || m->isAtomicRet())) {
74  w = m->wavefront();
75 
76  accessVrf =
77  w->computeUnit->vrf[w->simdId]->
78  vrfOperandAccessReady(m->seqNum(), w, m, VrfAccessType::WRITE);
79  }
80 
81  if (m && m->latency.rdy() && computeUnit->glbMemToVrfBus.rdy() &&
82  accessVrf && m->statusBitVector == VectorMask(0) &&
84  computeUnit->wfWait.at(m->pipeId).rdy())) {
85 
86  w = m->wavefront();
87 
88  m->completeAcc(m);
89 
90  completeRequest(m);
91 
92  // Decrement outstanding register count
93  computeUnit->shader->ScheduleAdd(&w->outstandingReqs, m->time, -1);
94 
95  if (m->isStore() || m->isAtomic()) {
97  m->time, -1);
98  }
99 
100  if (m->isLoad() || m->isAtomic()) {
102  m->time, -1);
103  }
104 
105  // Mark write bus busy for appropriate amount of time
106  computeUnit->glbMemToVrfBus.set(m->time);
108  w->computeUnit->wfWait.at(m->pipeId).set(m->time);
109  }
110 
111  // If pipeline has executed a global memory instruction
112  // execute global memory packets and issue global
113  // memory packets to DTLB
114  if (!gmIssuedRequests.empty()) {
116  if (mp->isLoad() || mp->isAtomic()) {
117  if (inflightLoads >= gmQueueSize) {
118  return;
119  } else {
120  ++inflightLoads;
121  }
122  } else if (mp->isStore()) {
123  if (inflightStores >= gmQueueSize) {
124  return;
125  } else {
126  ++inflightStores;
127  }
128  }
129 
130  mp->initiateAcc(mp);
131 
132  if (!outOfOrderDataDelivery && !mp->isMemFence()) {
142  gmOrderedRespBuffer.insert(std::make_pair(mp->seqNum(),
143  std::make_pair(mp, false)));
144  }
145 
146  gmIssuedRequests.pop();
147 
148  DPRINTF(GPUMem, "CU%d: WF[%d][%d] Popping 0 mem_op = \n",
149  computeUnit->cu_id, mp->simdId, mp->wfSlotId);
150  }
151 }
152 
155 {
157  if (!gmReturnedLoads.empty()) {
158  return gmReturnedLoads.front();
159  } else if (!gmReturnedStores.empty()) {
160  return gmReturnedStores.front();
161  }
162  } else {
163  if (!gmOrderedRespBuffer.empty()) {
164  auto mem_req = gmOrderedRespBuffer.begin();
165 
166  if (mem_req->second.second) {
167  return mem_req->second.first;
168  }
169  }
170  }
171 
172  return nullptr;
173 }
174 
175 void
177 {
178  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
179  assert(inflightLoads > 0);
180  --inflightLoads;
181  } else if (gpuDynInst->isStore()) {
182  assert(inflightStores > 0);
183  --inflightStores;
184  }
185 
187  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
188  assert(!gmReturnedLoads.empty());
189  gmReturnedLoads.pop();
190  } else if (gpuDynInst->isStore()) {
191  assert(!gmReturnedStores.empty());
192  gmReturnedStores.pop();
193  }
194  } else {
195  // we should only pop the oldest requst, and it
196  // should be marked as done if we are here
197  assert(gmOrderedRespBuffer.begin()->first == gpuDynInst->seqNum());
198  assert(gmOrderedRespBuffer.begin()->second.first == gpuDynInst);
199  assert(gmOrderedRespBuffer.begin()->second.second);
200  // remove this instruction from the buffer by its
201  // unique seq ID
202  gmOrderedRespBuffer.erase(gpuDynInst->seqNum());
203  }
204 }
205 
206 void
208 {
209  gmIssuedRequests.push(gpuDynInst);
210 }
211 
212 void
214 {
216  if (gpuDynInst->isLoad() || gpuDynInst->isAtomic()) {
217  assert(isGMLdRespFIFOWrRdy());
218  gmReturnedLoads.push(gpuDynInst);
219  } else {
220  assert(isGMStRespFIFOWrRdy());
221  gmReturnedStores.push(gpuDynInst);
222  }
223  } else {
224  auto mem_req = gmOrderedRespBuffer.find(gpuDynInst->seqNum());
225  // if we are getting a response for this mem request,
226  // then it ought to already be in the ordered response
227  // buffer
228  assert(mem_req != gmOrderedRespBuffer.end());
229  mem_req->second.second = true;
230  }
231 }
232 
233 void
235 {
237  .name(name() + ".load_vrf_bank_conflict_cycles")
238  .desc("total number of cycles GM data are delayed before updating "
239  "the VRF")
240  ;
241 }
int coissue_return
Definition: shader.hh:115
#define DPRINTF(x,...)
Definition: trace.hh:229
std::queue< GPUDynInstPtr > gmIssuedRequests
void handleResponse(GPUDynInstPtr gpuDynInst)
this method handles responses sent to this GM pipeline by the CU.
WaitClass glbMemToVrfBus
Bitfield< 0 > m
void completeRequest(GPUDynInstPtr gpuDynInst)
once a memory request is finished we remove it from the buffer.
GPUDynInstPtr getNextReadyResp()
find the next ready response to service.
std::bitset< std::numeric_limits< unsigned long long >::digits > VectorMask
Definition: misc.hh:45
std::map< uint64_t, std::pair< GPUDynInstPtr, bool > > gmOrderedRespBuffer
GlobalMemPipeline(const ComputeUnitParams *params)
int simdId
Definition: wavefront.hh:165
std::queue< GPUDynInstPtr > gmReturnedStores
bool rdy() const
Definition: misc.hh:70
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:48
Stats::Scalar loadVrfBankConflictCycles
ComputeUnit * computeUnit
Definition: wavefront.hh:167
Bitfield< 1 > mp
Definition: misc.hh:605
uint32_t outstandingReqsRdGm
Definition: wavefront.hh:219
const std::string & name() const
Bitfield< 0 > w
virtual const std::string name() const
Definition: sim_object.hh:120
uint32_t outstandingReqs
Definition: wavefront.hh:210
int globalMemSize
Definition: shader.hh:123
uint32_t outstandingReqsWrGm
Definition: wavefront.hh:215
Derived & name(const std::string &name)
Set the name and marks this stat to print at the end of simulation.
Definition: statistics.hh:279
Shader * shader
void ScheduleAdd(uint32_t *val, Tick when, int x)
Definition: shader.cc:315
void issueRequest(GPUDynInstPtr gpuDynInst)
issues a request to the pipeline - i.e., enqueue it in the request buffer.
std::vector< VectorRegisterFile * > vrf
std::queue< GPUDynInstPtr > gmReturnedLoads
void init(ComputeUnit *cu)
Derived & desc(const std::string &_desc)
Set the description and marks this stat to print at the end of simulation.
Definition: statistics.hh:312
std::vector< WaitClass > wfWait
Bitfield< 0 > p
void set(uint32_t i)
Definition: misc.hh:60

Generated on Fri Feb 28 2020 16:27:01 for gem5 by doxygen 1.8.13