release/current/gcn3_2gpu__mem__helpers_8hh_source.html

 /*

  * Copyright (c) 2021 Advanced Micro Devices, Inc.

  * All rights reserved.

  *

  * Redistribution and use in source and binary forms, with or without

  * modification, are permitted provided that the following conditions are met:

  *

  * 1. Redistributions of source code must retain the above copyright notice,

  * this list of conditions and the following disclaimer.

  *

  * 2. Redistributions in binary form must reproduce the above copyright notice,

  * this list of conditions and the following disclaimer in the documentation

  * and/or other materials provided with the distribution.

  *

  * 3. Neither the name of the copyright holder nor the names of its

  * contributors may be used to endorse or promote products derived from this

  * software without specific prior written permission.

  *

  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE

  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

  * POSSIBILITY OF SUCH DAMAGE.

  */


 #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__

 #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__


 #include "arch/amdgpu/gcn3/insts/gpu_static_inst.hh"

 #include "arch/amdgpu/gcn3/insts/op_encodings.hh"

 #include "debug/GPUMem.hh"

 #include "gpu-compute/gpu_dyn_inst.hh"


 namespace gem5

 {


 template<typename T, int N>

 inline void

 initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,

                  bool is_atomic=false)

 {

     // local variables

     int req_size = N * sizeof(T);

     int block_size = gpuDynInst->computeUnit()->cacheLineSize();

     Addr vaddr = 0, split_addr = 0;

     bool misaligned_acc = false;

     RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;

     PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;


     gpuDynInst->resetEntireStatusVector();

     for (int lane = 0; lane < Gcn3ISA::NumVecElemPerVecReg; ++lane) {

         if (gpuDynInst->exec_mask[lane]) {

             vaddr = gpuDynInst->addr[lane];


             split_addr = roundDown(vaddr + req_size - 1, block_size);


             assert(split_addr <= vaddr || split_addr - vaddr < block_size);

             misaligned_acc = split_addr > vaddr;


             if (is_atomic) {

                 // make sure request is word aligned

                 assert((vaddr & 0x3) == 0);


                 // a given lane's atomic can't cross cache lines

                 assert(!misaligned_acc);


                 req = std::make_shared<Request>(vaddr, sizeof(T), 0,

                     gpuDynInst->computeUnit()->requestorId(), 0,

                     gpuDynInst->wfDynId,

                     gpuDynInst->makeAtomicOpFunctor<T>(

                         &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],

                         &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));

             } else {

                 req = std::make_shared<Request>(vaddr, req_size, 0,

                                   gpuDynInst->computeUnit()->requestorId(), 0,

                                   gpuDynInst->wfDynId);

             }


             if (misaligned_acc) {

                 gpuDynInst->setStatusVector(lane, 2);

                 req->splitOnVaddr(split_addr, req1, req2);

                 gpuDynInst->setRequestFlags(req1);

                 gpuDynInst->setRequestFlags(req2);

                 pkt1 = new Packet(req1, mem_req_type);

                 pkt2 = new Packet(req2, mem_req_type);

                 pkt1->dataStatic(&(reinterpret_cast<T*>(

                     gpuDynInst->d_data))[lane * N]);

                 pkt2->dataStatic(&(reinterpret_cast<T*>(

                     gpuDynInst->d_data))[lane * N +

                                          req1->getSize()/sizeof(T)]);

                 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "

                         "request for %#x\n", gpuDynInst->cu_id,

                         gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,

                         split_addr);

                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);

                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);

             } else {

                 gpuDynInst->setStatusVector(lane, 1);

                 gpuDynInst->setRequestFlags(req);

                 pkt = new Packet(req, mem_req_type);

                 pkt->dataStatic(&(reinterpret_cast<T*>(

                     gpuDynInst->d_data))[lane * N]);

                 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);

             }

         } else { // if lane is not active, then no pending requests

             gpuDynInst->setStatusVector(lane, 0);

         }

     }

 }


 template<typename T, int N>

 inline void

 initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)

 {

     int req_size = N * sizeof(T);

     int block_size = gpuDynInst->computeUnit()->cacheLineSize();

     Addr vaddr = gpuDynInst->scalarAddr;


     Addr split_addr = roundDown(vaddr + req_size - 1, block_size);


     assert(split_addr <= vaddr || split_addr - vaddr < block_size);

     bool misaligned_acc = split_addr > vaddr;


     RequestPtr req = std::make_shared<Request>(vaddr, req_size, 0,

                                  gpuDynInst->computeUnit()->requestorId(), 0,

                                  gpuDynInst->wfDynId);


     if (misaligned_acc) {

         RequestPtr req1, req2;

         req->splitOnVaddr(split_addr, req1, req2);

         gpuDynInst->numScalarReqs = 2;

         gpuDynInst->setRequestFlags(req1);

         gpuDynInst->setRequestFlags(req2);

         PacketPtr pkt1 = new Packet(req1, mem_req_type);

         PacketPtr pkt2 = new Packet(req2, mem_req_type);

         pkt1->dataStatic(gpuDynInst->scalar_data);

         pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());

         DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"

                 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,

                 gpuDynInst->wfSlotId, split_addr);

         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);

         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);

     } else {

         gpuDynInst->numScalarReqs = 1;

         gpuDynInst->setRequestFlags(req);

         PacketPtr pkt = new Packet(req, mem_req_type);

         pkt->dataStatic(gpuDynInst->scalar_data);

         gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);

     }

 }


 } // namespace gem5


 #endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__

gpu_static_inst.hh

DPRINTF
#define DPRINTF(x,...)
Definition: trace.hh:186

gem5::MemCmd
Definition: packet.hh:76

gem5::Packet
A Packet is used to encapsulate a transfer between two objects in the memory system (e....
Definition: packet.hh:294

gem5::Packet::dataStatic
void dataStatic(T *p)
Set the data pointer to the following value that should not be freed.
Definition: packet.hh:1162

op_encodings.hh

gpu_dyn_inst.hh

gem5::roundDown
static constexpr T roundDown(const T &val, const U &align)
This function is used to align addresses in memory.
Definition: intmath.hh:279

gem5::Gcn3ISA::NumVecElemPerVecReg
const int NumVecElemPerVecReg(64)

gem5::MipsISA::vaddr
vaddr
Definition: pra_constants.hh:278

gem5::probing::Packet
ProbePointArg< PacketInfo > Packet
Packet probe point.
Definition: mem.hh:109

gem5
Reference material can be found at the JEDEC website: UFS standard http://www.jedec....
Definition: gpu_translation_state.hh:38

gem5::RequestPtr
std::shared_ptr< Request > RequestPtr
Definition: request.hh:92

gem5::GPUDynInstPtr
std::shared_ptr< GPUDynInst > GPUDynInstPtr
Definition: misc.hh:49

gem5::Addr
uint64_t Addr
Address type This will probably be moved somewhere else in the near future.
Definition: types.hh:147

gem5::initMemReqHelper
void initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type, bool is_atomic=false)
Helper function for instructions declared in op_encodings.
Definition: gpu_mem_helpers.hh:51

gem5::initMemReqScalarHelper
void initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
Helper function for scalar instructions declared in op_encodings.
Definition: gpu_mem_helpers.hh:140