Skip to content

Commit

Permalink
Make test for paged mem
Browse files Browse the repository at this point in the history
  • Loading branch information
CyprienBosserelle committed Oct 11, 2024
1 parent 9cb83cb commit 1b6c499
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 7 deletions.
1 change: 1 addition & 0 deletions src/Arrays.h
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ struct RiverInfo
int nribmax; // size of (max number of) rivers in one block
int* Xbidir; // array of block id for each river size(nburmax,nribmax)
int* Xridib; // array of river id in each block size(nburmax,nribmax)
float* qnow;

};

Expand Down
20 changes: 13 additions & 7 deletions src/MemManagement.cu
Original file line number Diff line number Diff line change
Expand Up @@ -396,39 +396,45 @@ template <class T> void AllocateMappedMemCPU(int nx, int ny,int gpudevice, T*& z
fprintf(stderr, "Device %d does not support mapping CPU host memory!\n", gpudevice);
bPinGenericMemory = false;
}

size_t bytes = nx * ny * sizeof(T);
if (bPinGenericMemory)
{

size_t bytes = nx * ny * sizeof(T);


T* a_UA = (float*)malloc(bytes + MEMORY_ALIGNMENT);
T* a_UA = (T*)malloc(bytes + MEMORY_ALIGNMENT);


// We need to ensure memory is aligned to 4K (so we will need to padd memory accordingly)
z = (float*)ALIGN_UP(a_UA, MEMORY_ALIGNMENT);
z = (T*)ALIGN_UP(a_UA, MEMORY_ALIGNMENT);


checkCudaErrors(cudaHostRegister(z, bytes, cudaHostRegisterMapped));
CUDA_CHECK(cudaHostRegister(z, bytes, cudaHostRegisterMapped));


}
else
{

//flags = cudaHostAllocMapped;
checkCudaErrors(cudaHostAlloc((void**)&z, bytes, cudaHostAllocMapped));
CUDA_CHECK(cudaHostAlloc((void**)&z, bytes, cudaHostAllocMapped));


}


}
template void AllocateMappedMemCPU<int>(int nx, int ny, int gpudevice, int*& z);
template void AllocateMappedMemCPU<float>(int nx, int ny, int gpudevice, float*& z);
template void AllocateMappedMemCPU<double>(int nx, int ny, int gpudevice, double*& z);

template <class T> void AllocateMappedMemGPU(int nx, int ny, int gpudevice, T*& z_g, T* z)
{
checkCudaErrors(cudaHostGetDevicePointer((void**)&z_g, (void*)z, 0));
CUDA_CHECK(cudaHostGetDevicePointer((void**)&z_g, (void*)z, 0));
}
template void AllocateMappedMemGPU<int>(int nx, int ny, int gpudevice, int*& z_g, int* z);
template void AllocateMappedMemGPU<float>(int nx, int ny, int gpudevice,float*& z_g, float* z);
template void AllocateMappedMemGPU<double>(int nx, int ny, int gpudevice, double*& z_g, double* z);


template <class T> void AllocateGPU(int nx, int ny, T*& z_g)
Expand Down
4 changes: 4 additions & 0 deletions src/MemManagement.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ template <class T> void ReallocArray(int nblk, int blksize, EvolvingP<T>& Ev);
template <class T> void ReallocArray(int nblk, int blksize, EvolvingP_M<T>& Ev);
template <class T> void ReallocArray(int nblk, int blksize, Param XParam, Model<T>& XModel);

template <class T> void AllocateMappedMemCPU(int nx, int ny, int gpudevice, T*& z);


template <class T> __host__ void FillCPU(int nx, int ny, T fillval, T*& zb);

int memloc(Param XParam, int i, int j, int ib);
Expand All @@ -33,5 +36,6 @@ __host__ __device__ int memloc(int halowidth, int blkmemwidth, int i, int j, int

template <class T> void AllocateGPU(int nblk, int blksize, Param XParam, Model<T>& XModel);
template <class T> void AllocateGPU(int nx, int ny, T*& z_g);
template <class T> void AllocateMappedMemGPU(int nx, int ny, int gpudevice, T*& z_g, T* z);
// End of global definition
#endif
82 changes: 82 additions & 0 deletions src/Testing.cu
Original file line number Diff line number Diff line change
Expand Up @@ -315,6 +315,12 @@ template <class T> bool Testing(Param XParam, Forcing<float> XForcing, Model<T>
result = (wallbndleft & wallbndright & wallbndbot & wallbndtop) ? "successful" : "failed";
log("\t\tAOI bnd wall test : " + result);
}

if (mytest == 993)
{
//pinned pageable Memory test
TestPinMem(XParam, XModel, XModel_g);
}
if (mytest == 994)
{
Testzbinit(XParam, XForcing, XModel, XModel_g);
Expand Down Expand Up @@ -4847,6 +4853,82 @@ template <class T> int TestAIObnd(Param XParam, Model<T> XModel, Model<T> XModel
}


template <class T> __global__ void vectoroffsetGPU(int nx, T offset, T* z)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;

if (idx < nx)
{
z[idx] = z[idx] + offset;
}
}

template <class T> int TestPinMem(Param XParam, Model<T> XModel, Model<T> XModel_g)
{
T* zf, *zf_g, * zf_recov;


int nx = 32;
int ny = 1;

int nelem = nx * ny;


AllocateMappedMemCPU(nx, ny, XParam.GPUDEVICE, zf);


AllocateCPU(nx, ny, zf_recov);

for (int i = 0; i < nx; i++)
{
for (int j = 0; j < ny; j++)
{
zf[i + j * nx] = i + j * nx +T(0.25);

}
}

AllocateMappedMemGPU(nx, ny, XParam.GPUDEVICE, zf_g, zf);



dim3 block(16);
dim3 grid((unsigned int)ceil(nelem / (float)block.x));

vectoroffsetGPU << <grid, block >> > (nelem, T(1.0), zf_g);
CUDA_CHECK(cudaDeviceSynchronize());

CUDA_CHECK(cudaMemcpy(zf_recov, zf_g, nx*ny * sizeof(T), cudaMemcpyDeviceToHost));

T checkrem = T(0.0);

for (int i = 0; i < nx; i++)
{
for (int j = 0; j < ny; j++)
{

checkrem = checkrem + abs(zf[i + j * nx] - zf_recov[i + j * nx]);
}
}

int modelgood = checkrem < 1.e-6f;

if (checkrem > 1.e-6f)
{
printf("\n Test Failed error = %e \n", checkrem);
}
else
{
printf("\n Test Success error = %e \n", checkrem);
}
return modelgood;
}
template int TestPinMem<float>(Param XParam, Model<float> XModel, Model<float> XModel_g);
template int TestPinMem<double>(Param XParam, Model<double> XModel, Model<double> XModel_g);




template <class T> Forcing<float> MakValleyBathy(Param XParam, T slope, bool bottop, bool flip)
{
//
Expand Down

0 comments on commit 1b6c499

Please sign in to comment.