Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add heat2d to workshop-examples on refactored examples #2

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
File renamed without changes.
File renamed without changes.
File renamed without changes.
67 changes: 67 additions & 0 deletions WorkshopPlasmaPepscOct2024/heatEquation2D/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#
# Copyright 2023 Benjamin Worpitz, Jan Stephan
# SPDX-License-Identifier: ISC
#

################################################################################
# Required CMake version.

cmake_minimum_required(VERSION 3.22)

set_property(GLOBAL PROPERTY USE_FOLDERS ON)

################################################################################
# Project.

set(_TARGET_NAME heatEquation2D)

project(${_TARGET_NAME} LANGUAGES CXX)


################################################################################
# PNGwriter
################################################################################

# find PNGwriter installation
find_package(PNGwriter 0.7.0 CONFIG)

if(PNGwriter_FOUND)
set(PNGWRITER_ENABLED True)
else()
set(PNGWRITER_ENABLED False)
endif()

#-------------------------------------------------------------------------------
# Find alpaka.

if(NOT TARGET alpaka::alpaka)
option(alpaka_USE_SOURCE_TREE "Use alpaka's source tree instead of an alpaka installation" OFF)

if(alpaka_USE_SOURCE_TREE)
# Don't build the examples recursively
set(alpaka_BUILD_EXAMPLES OFF)
add_subdirectory("${CMAKE_CURRENT_LIST_DIR}/../.." "${CMAKE_BINARY_DIR}/alpaka")
else()
find_package(alpaka REQUIRED)
endif()
endif()

#-------------------------------------------------------------------------------
# Add executable.

alpaka_add_executable(
${_TARGET_NAME}
src/heatEquation2D.cpp)
target_link_libraries(
${_TARGET_NAME}
PUBLIC alpaka::alpaka)
if(PNGwriter_FOUND)
target_link_libraries(
${_TARGET_NAME}
PRIVATE PNGwriter::PNGwriter)
target_compile_definitions(${_TARGET_NAME} PRIVATE PNGWRITER_ENABLED)
endif()

set_target_properties(${_TARGET_NAME} PROPERTIES FOLDER example)

add_test(NAME ${_TARGET_NAME} COMMAND ${_TARGET_NAME})
86 changes: 86 additions & 0 deletions WorkshopPlasmaPepscOct2024/heatEquation2D/src/BoundaryKernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/* Copyright 2024 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include "analyticalSolution.hpp"
#include "helpers.hpp"

#include <alpaka/alpaka.hpp>

//! alpaka version of explicit finite-difference 1d heat equation solver
//!
//! Applies boundary conditions
//! forward difference in t and second-order central difference in x
//!
//! \param uBuf grid values of u for each x, y and the current value of t:
//! u(x, y, t) | t = t_current
//! \param chunkSize
//! \param pitch
//! \param dx step in x
//! \param dy step in y
//! \param dt step in t
struct BoundaryKernel
{
template<typename TAcc, typename TChunk>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
double* const uBuf,
TChunk const chunkSize,
TChunk const pitch,
uint32_t step,
double const dx,
double const dy,
double const dt) const -> void
{
using Dim = alpaka::DimInt<2u>;
using Idx = uint32_t;

// Get extents(dimensions)
auto const gridBlockExtent = alpaka::getWorkDiv<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
auto const numThreadsPerBlock = blockThreadExtent.prod();

// Get indexes
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
auto const blockStartIdx = gridBlockIdx * chunkSize;

// Lambda function to apply boundary conditions
auto applyBoundary = [&](auto const& globalIdxStart, auto const length, bool isRow)
{
for(auto i = threadIdx1D; i < length; i += numThreadsPerBlock)
{
auto idx2D = globalIdxStart + (isRow ? alpaka::Vec<Dim, Idx>{0, i} : alpaka::Vec<Dim, Idx>{i, 0});
auto elem = getElementPtr(uBuf, idx2D, pitch);
*elem = exactSolution(idx2D[1] * dx, idx2D[0] * dy, step * dt);
}
};

// Apply boundary conditions for the top row
if(gridBlockIdx[0] == 0)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{0, 1}, chunkSize[1], true);
}

// Apply boundary conditions for the bottom row
if(gridBlockIdx[0] == gridBlockExtent[0] - 1)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{chunkSize[0] + 1, 1}, chunkSize[1], true);
}

// Apply boundary conditions for the left column
if(gridBlockIdx[1] == 0)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, 0}, chunkSize[0], false);
}

// Apply boundary conditions for the right column
if(gridBlockIdx[1] == gridBlockExtent[1] - 1)
{
applyBoundary(blockStartIdx + alpaka::Vec<Dim, Idx>{1, chunkSize[1] + 1}, chunkSize[0], false);
}
}
};
89 changes: 89 additions & 0 deletions WorkshopPlasmaPepscOct2024/heatEquation2D/src/StencilKernel.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
/* Copyright 2024 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include "helpers.hpp"

#include <alpaka/alpaka.hpp>

//! alpaka version of explicit finite-difference 2D heat equation solver
//!
//! \tparam T_SharedMemSize1D size of the shared memory box
//!
//! Solving equation u_t(x, t) = u_xx(x, t) + u_yy(y, t) using a simple explicit scheme with
//! forward difference in t and second-order central difference in x and y
//!
//! \param uCurrBuf Current buffer with grid values of u for each x, y pair and the current value of t:
//! u(x, y, t) | t = t_current
//! \param uNextBuf resulting grid values of u for each x, y pair and the next value of t:
//! u(x, y, t) | t = t_current + dt
//! \param chunkSize The size of the chunk or tile that the user divides the problem into. This defines the size of the
//! workload handled by each thread block.
//! \param pitchCurr The pitch (or stride) in memory corresponding to the TDim grid in the accelerator's memory.
//! This is used to calculate memory offsets when accessing elements in the current buffer.
//! \param pitchNext The pitch used to calculate memory offsets when accessing elements in the next buffer.
//! \param dx step in x
//! \param dy step in y
//! \param dt step in t
template<size_t T_SharedMemSize1D>
struct StencilKernel
{
template<typename TAcc, typename TDim, typename TIdx>
ALPAKA_FN_ACC auto operator()(
TAcc const& acc,
double const* const uCurrBuf,
double* const uNextBuf,
alpaka::Vec<TDim, TIdx> const chunkSize,
alpaka::Vec<TDim, TIdx> const pitchCurr,
alpaka::Vec<TDim, TIdx> const pitchNext,
double const dx,
double const dy,
double const dt) const -> void
{
auto& sdata = alpaka::declareSharedVar<double[T_SharedMemSize1D], __COUNTER__>(acc);

// Get extents(dimensions)
auto const blockThreadExtent = alpaka::getWorkDiv<alpaka::Block, alpaka::Threads>(acc);
auto const numThreadsPerBlock = blockThreadExtent.prod();

// Get indexes
auto const gridBlockIdx = alpaka::getIdx<alpaka::Grid, alpaka::Blocks>(acc);
auto const blockThreadIdx = alpaka::getIdx<alpaka::Block, alpaka::Threads>(acc);
auto const threadIdx1D = alpaka::mapIdx<1>(blockThreadIdx, blockThreadExtent)[0u];
auto const blockStartIdx = gridBlockIdx * chunkSize;

constexpr alpaka::Vec<TDim, TIdx> halo{2, 2};

for(auto i = threadIdx1D; i < T_SharedMemSize1D; i += numThreadsPerBlock)
{
auto idx2d = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize + halo);
idx2d = idx2d + blockStartIdx;
auto elem = getElementPtr(uCurrBuf, idx2d, pitchCurr);
sdata[i] = *elem;
}

alpaka::syncBlockThreads(acc);

// Each kernel executes one element
double const rX = dt / (dx * dx);
double const rY = dt / (dy * dy);

// go over only core cells
for(auto i = threadIdx1D; i < chunkSize.prod(); i += numThreadsPerBlock)
{
auto idx2D = alpaka::mapIdx<2>(alpaka::Vec(i), chunkSize);
idx2D = idx2D + alpaka::Vec<TDim, TIdx>{1, 1}; // offset for halo above and to the left
auto localIdx1D = alpaka::mapIdx<1>(idx2D, chunkSize + halo)[0u];


auto bufIdx = idx2D + blockStartIdx;
auto elem = getElementPtr(uNextBuf, bufIdx, pitchNext);

*elem = sdata[localIdx1D] * (1.0 - 2.0 * rX - 2.0 * rY) + sdata[localIdx1D - 1] * rX
+ sdata[localIdx1D + 1] * rX + sdata[localIdx1D - chunkSize[1] - halo[1]] * rY
+ sdata[localIdx1D + chunkSize[1] + halo[1]] * rY;
}
}
};
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
/* Copyright 2020 Tapish Narwal
* SPDX-License-Identifier: ISC
*/

#pragma once

#include <alpaka/alpaka.hpp>

#include <cmath>

//! Exact solution to the test problem
//! u_t(x, y, t) = u_xx(x, t) + u_yy(y, t), x in [0, 1], y in [0, 1], t in [0, T]
//!
//! \param x value of x
//! \param x value of y
//! \param t value of t
ALPAKA_FN_HOST_ACC auto exactSolution(double const x, double const y, double const t) -> double
{
constexpr double pi = alpaka::math::constants::pi;
return std::exp(-pi * pi * t) * (std::sin(pi * x) + std::sin(pi * y));
}

//! Valdidate calculated solution in the buffer to the analytical solution at t=tMax
//!
//! \param buffer buffer holding the solution at t=tMax
//! \param extent extents of the buffer
//! \param dx
//! \param dy
//! \param tMax time at simulation end
template<typename T_Buffer, typename T_Extent>
auto validateSolution(
T_Buffer const& buffer,
T_Extent const& extent,
double const dx,
double const dy,
double const tMax) -> std::pair<bool, double>
{
// Calculate error
double maxError = 0.0;
for(uint32_t j = 1; j < extent[0] - 1; ++j)
{
for(uint32_t i = 0; i < extent[1]; ++i)
{
auto const error = std::abs(buffer.data()[j * extent[1] + i] - exactSolution(i * dx, j * dy, tMax));
maxError = std::max(maxError, error);
}
}

constexpr double errorThreshold = 1e-5;
return std::make_pair(maxError < errorThreshold, maxError);
}

//! Initialize the buffer to the analytical solution at t=0
//!
//! \param buffer buffer holding the solution at tMax
//! \param extent extents of the buffer
//! \param dx
//! \param dy
template<typename TBuffer>
auto initalizeBuffer(TBuffer& buffer, double const dx, double const dy) -> void
{
auto extents = alpaka::getExtents(buffer);
// Apply initial conditions for the test problem
for(uint32_t j = 0; j < extents[0]; ++j)
{
for(uint32_t i = 0; i < extents[1]; ++i)
{
buffer.data()[j * extents[1] + i] = exactSolution(i * dx, j * dy, 0.0);
}
}
}
Loading