gem5  v22.1.0.0
memory_manager.cc
Go to the documentation of this file.
1 /*
2  * Copyright (c) 2021 Advanced Micro Devices, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions are met:
7  *
8  * 1. Redistributions of source code must retain the above copyright notice,
9  * this list of conditions and the following disclaimer.
10  *
11  * 2. Redistributions in binary form must reproduce the above copyright notice,
12  * this list of conditions and the following disclaimer in the documentation
13  * and/or other materials provided with the distribution.
14  *
15  * 3. Neither the name of the copyright holder nor the names of its
16  * contributors may be used to endorse or promote products derived from this
17  * software without specific prior written permission.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
23  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  *
31  */
32 
34 
35 #include <memory>
36 
37 #include "base/chunk_generator.hh"
38 #include "debug/AMDGPUMem.hh"
39 #include "params/AMDGPUMemoryManager.hh"
40 #include "sim/system.hh"
41 
42 namespace gem5
43 {
44 
45 AMDGPUMemoryManager::AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
46  : ClockedObject(p), _gpuMemPort(csprintf("%s-port", name()), *this),
47  cacheLineSize(p.system->cacheLineSize()),
48  _requestorId(p.system->getRequestorId(this))
49 {
50 }
51 
52 void
54  Request::Flags flag, Event *callback)
55 {
56  assert(data);
57 
58  // Requests may return out of order, so we should track how many chunks
59  // are outstanding and if the last chunk was sent. Give each status struct
60  // a unique ID so that DMAs to the same address may occur at the same time
61  requestStatus.emplace(std::piecewise_construct,
62  std::forward_as_tuple(requestId), std::tuple<>{});
63 
64  DPRINTF(AMDGPUMem, "Created status for write request %ld\n", requestId);
65 
66  ChunkGenerator gen(addr, size, cacheLineSize);
67  for (; !gen.done(); gen.next()) {
68  RequestPtr req = std::make_shared<Request>(gen.addr(), gen.size(),
69  flag, _requestorId);
70 
71  PacketPtr pkt = Packet::createWrite(req);
72  uint8_t *dataPtr = new uint8_t[gen.size()];
73  std::memcpy(dataPtr, data + (gen.complete()/sizeof(uint8_t)),
74  gen.size());
75  pkt->dataDynamic<uint8_t>(dataPtr);
76 
77  pkt->pushSenderState(
78  new GPUMemPort::SenderState(callback, addr, requestId));
79  requestStatus.at(requestId).outstandingChunks++;
80  if (gen.last()) {
81  requestStatus.at(requestId).sentLastChunk = true;
82  }
83 
84  if (!_gpuMemPort.sendTimingReq(pkt)) {
85  DPRINTF(AMDGPUMem, "Request to %#lx needs retry\n", gen.addr());
86  _gpuMemPort.retries.push_back(pkt);
87  } else {
88  DPRINTF(AMDGPUMem, "Write request to %#lx sent\n", gen.addr());
89  }
90  }
91 
92  requestId++;
93 }
94 
95 void
97  Request::Flags flag, Event *callback)
98 {
99  assert(data);
100  uint8_t *dataPtr = data;
101 
102  // Requests may return out of order, so we should track how many chunks
103  // are outstanding and if the last chunk was sent. Give each status struct
104  // a unique ID so that DMAs to the same address may occur at the same time
105  requestStatus.emplace(std::piecewise_construct,
106  std::forward_as_tuple(requestId), std::tuple<>{});
107 
108  DPRINTF(AMDGPUMem, "Created status for read request %ld\n", requestId);
109 
110  ChunkGenerator gen(addr, size, cacheLineSize);
111  for (; !gen.done(); gen.next()) {
112  RequestPtr req = std::make_shared<Request>(gen.addr(), gen.size(),
113  flag, _requestorId);
114 
115  PacketPtr pkt = Packet::createRead(req);
116  pkt->dataStatic<uint8_t>(dataPtr);
117  dataPtr += gen.size();
118 
119  pkt->pushSenderState(
120  new GPUMemPort::SenderState(callback, addr, requestId));
121  requestStatus.at(requestId).outstandingChunks++;
122  if (gen.last()) {
123  requestStatus.at(requestId).sentLastChunk = true;
124  }
125 
126  if (!_gpuMemPort.sendTimingReq(pkt)) {
127  DPRINTF(AMDGPUMem, "Request to %#lx needs retry\n", gen.addr());
128  _gpuMemPort.retries.push_back(pkt);
129  } else {
130  DPRINTF(AMDGPUMem, "Read request to %#lx sent\n", gen.addr());
131  }
132  }
133 
134  requestId++;
135 }
136 
137 bool
139 {
140  // Retrieve sender state
141  [[maybe_unused]] SenderState *sender_state =
142  safe_cast<SenderState*>(pkt->senderState);
143 
144  // Check if all chunks have completed, the last chunk was sent, and there
145  // is a callback, call the callback now.
146  assert(gpu_mem.requestStatus.count(sender_state->_requestId));
147  auto& status = gpu_mem.requestStatus.at(sender_state->_requestId);
148 
149  assert(status.outstandingChunks != 0);
150  status.outstandingChunks--;
151  DPRINTF(AMDGPUMem, "Received Response for %#x. %d chunks remain, sent "
152  "last = %d, requestId = %ld\n", sender_state->_addr,
153  status.outstandingChunks, status.sentLastChunk,
154  sender_state->_requestId);
155 
156  if (!status.outstandingChunks && status.sentLastChunk) {
157  // Call and free the callback if there is one
158  if (sender_state->_callback) {
159  DPRINTF(AMDGPUMem, "Calling callback for request %ld\n",
160  sender_state->_requestId);
161  sender_state->_callback->process();
162  delete sender_state->_callback;
163  }
164  DPRINTF(AMDGPUMem, "Deleting status for request %ld\n",
165  sender_state->_requestId);
166  gpu_mem.requestStatus.erase(sender_state->_requestId);
167  }
168 
169  delete pkt->senderState;
170  delete pkt;
171  return true;
172 }
173 
174 void
176 {
177  for (const auto &pkt : retries) {
178  if (!sendTimingReq(pkt)) {
179  break;
180  } else {
181  DPRINTF(AMDGPUMem, "Retry for %#lx sent\n", pkt->getAddr());
182  retries.pop_front();
183  }
184  }
185 }
186 
187 } // namespace gem5
#define DPRINTF(x,...)
Definition: trace.hh:186
Declaration and inline definition of ChunkGenerator object.
const char data[]
bool recvTimingResp(PacketPtr pkt) override
Receive a timing response from the peer.
void recvReqRetry() override
Called by the peer if sendTimingReq was called on this peer (causing recvTimingReq to be called on th...
std::unordered_map< uint64_t, RequestStatus > requestStatus
AMDGPUMemoryManager(const AMDGPUMemoryManagerParams &p)
const RequestorID _requestorId
void writeRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Write size amount of data to device memory at addr using flags and callback.
void readRequest(Addr addr, uint8_t *data, int size, Request::Flags flag, Event *callback)
Read size amount of data from device memory at addr using flags and callback.
This class takes an arbitrary memory region (address/length pair) and generates a series of appropria...
The ClockedObject class extends the SimObject with a clock and accessor functions to relate ticks to ...
virtual void process()=0
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294
static PacketPtr createWrite(const RequestPtr &req)
Definition: packet.hh:1041
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1162
SenderState * senderState
This packet's sender state.
Definition: packet.hh:544
void pushSenderState(SenderState *sender_state)
Push a new sender state to the packet and make the current sender state the predecessor of the new on...
Definition: packet.cc:334
static PacketPtr createRead(const RequestPtr &req)
Constructor-like methods that return Packets based on Request objects.
Definition: packet.hh:1035
void dataDynamic(T *p)
Set the data pointer to a value that should have delete [] called on it.
Definition: packet.hh:1200
bool sendTimingReq(PacketPtr pkt)
Attempt to send a timing request to the responder port by calling its corresponding receive function.
Definition: port.hh:495
bool last() const
Is this the last chunk?
Addr addr() const
Return starting address of current chunk.
Addr complete() const
Number of bytes we have already chunked up.
bool done() const
Are we done? That is, did the last call to next() advance past the end of the region?
Addr size() const
Return size in bytes of current chunk.
bool next()
Advance generator to next chunk.
Bitfield< 5, 0 > status
Definition: misc_types.hh:429
Bitfield< 54 > p
Definition: pagetable.hh:70
Bitfield< 15 > system
Definition: misc.hh:1004
Bitfield< 3 > addr
Definition: types.hh:84
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147
std::string csprintf(const char *format, const Args &...args)
Definition: cprintf.hh:161
const std::string & name()
Definition: trace.cc:49

Generated on Wed Dec 21 2022 10:22:32 for gem5 by doxygen 1.9.1