Make test for paged mem

CyprienBosserelle · Oct 11, 2024 · 1b6c499 · 1b6c499
1 parent 9cb83cb
commit 1b6c499
Show file tree

Hide file tree

Showing 4 changed files with 100 additions and 7 deletions.
diff --git a/src/Arrays.h b/src/Arrays.h
@@ -89,6 +89,7 @@ struct RiverInfo
 	int nribmax; // size of (max number of) rivers in one block
 	int* Xbidir; // array of block id for each river size(nburmax,nribmax)
 	int* Xridib; // array of river id in each block size(nburmax,nribmax)
+	float* qnow;
 
 };
 

diff --git a/src/MemManagement.cu b/src/MemManagement.cu
@@ -396,39 +396,45 @@ template <class T> void AllocateMappedMemCPU(int nx, int ny,int gpudevice, T*& z
 		fprintf(stderr, "Device %d does not support mapping CPU host memory!\n", gpudevice);
 		bPinGenericMemory = false;
 	}
-
+	size_t bytes = nx * ny * sizeof(T);
 	if (bPinGenericMemory)
 	{
 
-		size_t bytes = nx * ny * sizeof(T);
+
 
-		T* a_UA = (float*)malloc(bytes + MEMORY_ALIGNMENT);
+		T* a_UA = (T*)malloc(bytes + MEMORY_ALIGNMENT);
 
 
 		// We need to ensure memory is aligned to 4K (so we will need to padd memory accordingly)
-		z = (float*)ALIGN_UP(a_UA, MEMORY_ALIGNMENT);
+		z = (T*)ALIGN_UP(a_UA, MEMORY_ALIGNMENT);
 
 
-		checkCudaErrors(cudaHostRegister(z, bytes, cudaHostRegisterMapped));
+		CUDA_CHECK(cudaHostRegister(z, bytes, cudaHostRegisterMapped));
 
 
 	}
 	else
 	{
 
 		//flags = cudaHostAllocMapped;
-		checkCudaErrors(cudaHostAlloc((void**)&z, bytes, cudaHostAllocMapped));
+		CUDA_CHECK(cudaHostAlloc((void**)&z, bytes, cudaHostAllocMapped));
 
 
 	}
 
 
 }
+template void AllocateMappedMemCPU<int>(int nx, int ny, int gpudevice, int*& z);
+template void AllocateMappedMemCPU<float>(int nx, int ny, int gpudevice, float*& z);
+template void AllocateMappedMemCPU<double>(int nx, int ny, int gpudevice, double*& z);
 
 template <class T> void AllocateMappedMemGPU(int nx, int ny, int gpudevice, T*& z_g, T* z)
 {
-	checkCudaErrors(cudaHostGetDevicePointer((void**)&z_g, (void*)z, 0));
+	CUDA_CHECK(cudaHostGetDevicePointer((void**)&z_g, (void*)z, 0));
 }
+template void AllocateMappedMemGPU<int>(int nx, int ny, int gpudevice, int*& z_g, int* z);
+template void AllocateMappedMemGPU<float>(int nx, int ny, int gpudevice,float*& z_g, float* z);
+template void AllocateMappedMemGPU<double>(int nx, int ny, int gpudevice, double*& z_g, double* z);
 
 
 template <class T> void AllocateGPU(int nx, int ny, T*& z_g)

diff --git a/src/MemManagement.h b/src/MemManagement.h
@@ -25,6 +25,9 @@ template <class T> void ReallocArray(int nblk, int blksize, EvolvingP<T>& Ev);
 template <class T> void ReallocArray(int nblk, int blksize, EvolvingP_M<T>& Ev);
 template <class T> void ReallocArray(int nblk, int blksize, Param XParam, Model<T>& XModel);
 
+template <class T> void AllocateMappedMemCPU(int nx, int ny, int gpudevice, T*& z);
+
+
 template <class T> __host__ void FillCPU(int nx, int ny, T fillval, T*& zb);
 
 int memloc(Param XParam, int i, int j, int ib);
@@ -33,5 +36,6 @@ __host__ __device__ int memloc(int halowidth, int blkmemwidth, int i, int j, int
 
 template <class T> void AllocateGPU(int nblk, int blksize, Param XParam, Model<T>& XModel);
 template <class T> void AllocateGPU(int nx, int ny, T*& z_g);
+template <class T> void AllocateMappedMemGPU(int nx, int ny, int gpudevice, T*& z_g, T* z);
 // End of global definition
 #endif
diff --git a/src/Testing.cu b/src/Testing.cu
@@ -315,6 +315,12 @@ template <class T> bool Testing(Param XParam, Forcing<float> XForcing, Model<T>
 		result = (wallbndleft & wallbndright & wallbndbot & wallbndtop) ? "successful" : "failed";
 		log("\t\tAOI bnd wall test : " + result);
 	}
+
+	if (mytest == 993)
+	{
+		//pinned pageable Memory test
+		TestPinMem(XParam, XModel, XModel_g);
+	}
 		if (mytest == 994)
 		{
 			Testzbinit(XParam, XForcing, XModel, XModel_g);
@@ -4847,6 +4853,82 @@ template <class T> int TestAIObnd(Param XParam, Model<T> XModel, Model<T> XModel
 }
 
 
+template <class T> __global__ void vectoroffsetGPU(int nx, T offset, T* z)
+{
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+
+	if (idx < nx)
+	{
+		z[idx] = z[idx] + offset;
+	}
+}
+
+template <class T> int TestPinMem(Param XParam, Model<T> XModel, Model<T> XModel_g)
+{
+	T* zf, *zf_g, * zf_recov;
+
+
+	int nx = 32;
+	int ny = 1;
+
+	int nelem = nx * ny;
+
+
+	AllocateMappedMemCPU(nx, ny, XParam.GPUDEVICE, zf);
+
+
+	AllocateCPU(nx, ny, zf_recov);
+
+	for (int i = 0; i < nx; i++)
+	{
+		for (int j = 0; j < ny; j++)
+		{
+			zf[i + j * nx] = i + j * nx +T(0.25);
+
+		}
+	}
+
+	AllocateMappedMemGPU(nx, ny, XParam.GPUDEVICE, zf_g, zf);
+
+
+
+	dim3 block(16);
+	dim3 grid((unsigned int)ceil(nelem / (float)block.x));
+
+	vectoroffsetGPU << <grid, block >> > (nelem, T(1.0), zf_g);
+	CUDA_CHECK(cudaDeviceSynchronize());
+
+	CUDA_CHECK(cudaMemcpy(zf_recov, zf_g, nx*ny * sizeof(T), cudaMemcpyDeviceToHost));
+
+	T checkrem = T(0.0);
+
+	for (int i = 0; i < nx; i++)
+	{
+		for (int j = 0; j < ny; j++)
+		{
+
+			checkrem = checkrem + abs(zf[i + j * nx] - zf_recov[i + j * nx]);
+		}
+	}
+
+	int modelgood = checkrem < 1.e-6f;
+
+	if (checkrem > 1.e-6f)
+	{
+		printf("\n Test Failed error = %e \n", checkrem);
+	}
+	else
+	{
+		printf("\n Test Success error = %e \n", checkrem);
+	}
+	return modelgood;
+}
+template int TestPinMem<float>(Param XParam, Model<float> XModel, Model<float> XModel_g);
+template int TestPinMem<double>(Param XParam, Model<double> XModel, Model<double> XModel_g);
+
+
+
+
 template <class T> Forcing<float> MakValleyBathy(Param XParam, T slope, bool bottop, bool flip)
 {
 	//