update

PharmCat · Feb 15, 2024 · f01515e · f01515e
1 parent 7e08f4b
commit f01515e
Show file tree

Hide file tree

Showing 10 changed files with 202 additions and 165 deletions.
diff --git a/examples/cellist.jl b/examples/cellist.jl
@@ -13,8 +13,8 @@ H   = 2h
 h⁻¹ = 1/h
 H⁻¹ = 1/H
 dist = H
-cellsize = (H, H)
+cellsize = (dist, dist)
 
-system  = GPUCellList(cpupoints, cellsize, H)
+system  = GPUCellList(cpupoints, cellsize, dist)
 
 update!(system)
diff --git a/examples/profile.jl b/examples/profile.jl
@@ -24,10 +24,10 @@ c₀  = sqrt(g * 2) * 20
 Δt  = dt  = 1e-5
 δᵩ  = 0.1
 CFL = 0.2
-cellsize = (1.2*H, 1.2*H)
+cellsize = (dist, dist)
 sphkernel    = WendlandC2(Float64, 2)
 
-system  = GPUCellListSPH.GPUCellList(cpupoints, cellsize, H)
+system  = GPUCellListSPH.GPUCellList(cpupoints, cellsize, dist)
 
 ρ           = cu(Array([DF_FLUID.Rhop;DF_BOUND.Rhop]))
 ml          = cu(append!(ones(Float64, size(DF_FLUID, 1)), zeros(Float64, size(DF_BOUND, 1))))

diff --git a/examples/sphproblem.jl b/examples/sphproblem.jl
@@ -22,10 +22,10 @@ c₀  = sqrt(g * 2) * 20
 Δt  = dt  = 1e-5
 δᵩ  = 0.1
 CFL = 0.2
-cellsize = (1.2*H, 1.2*H)
+cellsize = (dist, dist)
 sphkernel    = WendlandC2(Float64, 2)
 
-system  = GPUCellListSPH.GPUCellList(cpupoints, cellsize, H)
+system  = GPUCellListSPH.GPUCellList(cpupoints, cellsize, dist)
 
 ρ           = cu(Array([DF_FLUID.Rhop;DF_BOUND.Rhop]))
 ml          = cu(append!(ones(Float64, size(DF_FLUID, 1)), zeros(Float64, size(DF_BOUND, 1))))

diff --git a/examples/timesolve.jl b/examples/timesolve.jl
@@ -16,7 +16,7 @@ h   = 1.2 * sqrt(2) * dx    # smoothinl length
 H   = 2h                    # kernel support length
 h⁻¹ = 1/h
 H⁻¹ = 1/H
-dist = H                    # distance for neighborlist
+dist = 1.2H                    # distance for neighborlist
 ρ₀  = 1000.0                 
 m₀  = ρ₀ * dx * dx
 α   = 0.01                  # Artificial viscosity constant
@@ -26,10 +26,10 @@ c₀  = sqrt(g * 2) * 20      # Speed of sound
 Δt  = dt  = 1e-5
 δᵩ  = 0.1                   # Coefficient for density diffusion
 CFL = 0.2                   # Courant–Friedrichs–Lewy condition for Δt stepping
-cellsize = (1.2*H, 1.2*H)           # cell size
+cellsize = (dist, dist)           # cell size
 sphkernel    = WendlandC2(Float64, 2) # SPH kernel from SPHKernels.jl
 
-system  = GPUCellList(cpupoints, cellsize, H)
+system  = GPUCellList(cpupoints, cellsize, dist)
 N       = length(cpupoints)
 ρ       = CUDA.zeros(Float64, N)
 copyto!(ρ, Array([DF_FLUID.Rhop;DF_BOUND.Rhop]))
@@ -51,9 +51,9 @@ sphprob =  SPHProblem(system, h, H, sphkernel, ρ, v, ml, gf, isboundary, ρ₀,
 # writetime - write vtp file each interval
 # path - path to vtp files
 # pvc - make paraview collection
-timesolve!(sphprob; batch = 10, timeframe = 1.0, writetime = 0.02, path = "D:/vtk/", pvc = true)
+timesolve!(sphprob; batch = 10, timeframe = 2.0, writetime = 0.025, path = "D:/vtk/", pvc = true)
 
 # timestepping adjust dt
 # time lims for dt
 # now Δt adjust often buggy
-timesolve!(sphprob; batch = 50, timeframe = 10.0, writetime = 0.02, path = "D:/vtk/", pvc = true, timestepping = true) 
+#timesolve!(sphprob; batch = 50, timeframe = 3.5, writetime = 0.01, path = "D:/vtk/", pvc = true, timestepping = true, timelims = (eps(), 1e-5)) 
diff --git a/src/GPUCellListSPH.jl b/src/GPUCellListSPH.jl
@@ -8,7 +8,7 @@ import Plots: Animation
 
 export GPUCellList, update!, partialupdate!, neighborlist
 
-export SPHProblem, stepsolve!, timesolve!, get_points, get_velocity, get_density, get_acceleration, ∑∇W_2d!, ∑W_2d!, ∂ρ∂tDDT!, ∂Π∂t!, ∂v∂t!
+export SPHProblem, stepsolve!, timesolve!, get_points, get_velocity, get_density, get_pressure, get_acceleration, ∑∇W_2d!, ∑W_2d!, ∂ρ∂tDDT!, ∂Π∂t!, ∂v∂t!
 
 #include("sphkernels.jl")
 include("gpukernels.jl")

diff --git a/src/gpukernels.jl b/src/gpukernels.jl
@@ -219,16 +219,20 @@ end
 #####################################################################
 # SPH
 #####################################################################
-function kernel_∑W_2d!(sumW, pairs, sphkernel, H⁻¹) 
+function kernel_∑W_2d!(∑W, pairs, points, sphkernel, H⁻¹) 
     index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x
     if index <= length(pairs)
         pair  = pairs[index]
         pᵢ    = pair[1]; pⱼ = pair[2]; d = pair[3]
         if !isnan(d)
+            xᵢ    = points[pᵢ]
+            xⱼ    = points[pⱼ]
+            Δx    = (xᵢ[1] - xⱼ[1], xᵢ[2] - xⱼ[2])
+            d     = sqrt(Δx[1]^2 + Δx[2]^2) 
             u     = d * H⁻¹
             w     = 𝒲(sphkernel, u, H⁻¹)
-            CUDA.@atomic sumW[pᵢ] += w
-            CUDA.@atomic sumW[pⱼ] += w
+            CUDA.@atomic ∑W[pᵢ] += w
+            CUDA.@atomic ∑W[pⱼ] += w
         end
     end
     return nothing
@@ -239,31 +243,40 @@ end
 
 Compute ∑W for each particles pair in list.
 """
-function ∑W_2d!(sumW, pairs, sphkernel, H⁻¹) 
-    gpukernel = @cuda launch=false kernel_∑W_2d!(sumW, pairs, sphkernel, H⁻¹) 
+function ∑W_2d!(∑W, pairs, points, sphkernel, H⁻¹) 
+    gpukernel = @cuda launch=false kernel_∑W_2d!(∑W, pairs, points, sphkernel, H⁻¹) 
     config = launch_configuration(gpukernel.fun)
     Nx = length(pairs)
     maxThreads = config.threads
     Tx  = min(maxThreads, Nx)
     Bx = cld(Nx, Tx)
-    CUDA.@sync gpukernel(sumW, pairs, sphkernel, H⁻¹; threads = Tx, blocks = Bx)
+    CUDA.@sync gpukernel(∑W, pairs, points, sphkernel, H⁻¹; threads = Tx, blocks = Bx)
 end
 #####################################################################
-function kernel_∑∇W_2d!(sum∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
+function kernel_∑∇W_2d!(∑∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
     index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x
     if index <= length(pairs)
         pair  = pairs[index]
         pᵢ    = pair[1]; pⱼ = pair[2]; d = pair[3]
         if !isnan(d)
+
             xᵢ    = points[pᵢ]
             xⱼ    = points[pⱼ]
+            Δx    = (xᵢ[1] - xⱼ[1], xᵢ[2] - xⱼ[2])
+            d     = sqrt(Δx[1]^2 + Δx[2]^2) 
             u     = d * H⁻¹
             dwk_r = d𝒲(kernel, u, H⁻¹) / d
-            ∇w    = ((xᵢ[1] - xⱼ[1]) * dwk_r, (xᵢ[2] - xⱼ[2]) * dwk_r)
-            CUDA.@atomic sum∇W[pᵢ, 1] += ∇w[1]
-            CUDA.@atomic sum∇W[pᵢ, 2] += ∇w[2]
-            CUDA.@atomic sum∇W[pⱼ, 1] -= ∇w[1]
-            CUDA.@atomic sum∇W[pⱼ, 2] -= ∇w[2]
+            ∇w    = (Δx[1] * dwk_r, Δx[2] * dwk_r)
+
+            if isnan(dwk_r) 
+                @cuprintln "kernel W_2d  dwk_r = $dwk_r, pair = $pair"
+                error() 
+            end
+
+            CUDA.@atomic ∑∇W[pᵢ, 1] += ∇w[1]
+            CUDA.@atomic ∑∇W[pᵢ, 2] += ∇w[2]
+            CUDA.@atomic ∑∇W[pⱼ, 1] -= ∇w[1]
+            CUDA.@atomic ∑∇W[pⱼ, 2] -= ∇w[2]
             ∇Wₙ[index] = ∇w
         end
     end
@@ -276,14 +289,14 @@ end
 Compute gradients.
 
 """
-function ∑∇W_2d!(sum∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
-    gpukernel = @cuda launch=false kernel_∑∇W_2d!(sum∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
+function ∑∇W_2d!(∑∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
+    gpukernel = @cuda launch=false kernel_∑∇W_2d!(∑∇W, ∇Wₙ, pairs, points, kernel, H⁻¹) 
     config = launch_configuration(gpukernel.fun)
     Nx = length(pairs)
     maxThreads = config.threads
     Tx  = min(maxThreads, Nx)
     Bx = cld(Nx, Tx)
-    CUDA.@sync gpukernel(sum∇W, ∇Wₙ, pairs, points, kernel, H⁻¹; threads = Tx, blocks = Bx)
+    CUDA.@sync gpukernel(∑∇W, ∇Wₙ, pairs, points, kernel, H⁻¹; threads = Tx, blocks = Bx)
 end
 
 
@@ -341,6 +354,7 @@ function kernel_∂ρ∂tDDT!(∑∂ρ∂t,  ∇Wₙ, pairs, points, h, m₀, δ
             delta_j    = visc_densi * dot3 * m₀ / ρᵢ
 
             m₀dot     = m₀ * (Δv[1] * ∇W[1] + Δv[2] * ∇W[2])  #  Δv ⋅ ∇W
+
             #=
             if isnan(delta_j) || isnan(m₀dot)  || isnan(ρᵢ) || isnan(ρⱼ) 
                 @cuprintln "kernel_DDT 1 isnan dx1 = $(Δx[1]) , dx2 = $(Δx[2]) rhoi = $ρᵢ , dot3 = $dot3 , visc_densi = $visc_densi drhopvn = $drhopvn $(∇W[1]) $(Δv[1])"
@@ -356,17 +370,13 @@ function kernel_∂ρ∂tDDT!(∑∂ρ∂t,  ∇Wₙ, pairs, points, h, m₀, δ
             ∑∂ρ∂tj = m₀dot + delta_j *  MotionLimiter[pⱼ]
             ∑∂ρ∂tval1 = CUDA.@atomic ∑∂ρ∂t[pᵢ] += ∑∂ρ∂ti
             ∑∂ρ∂tval2 = CUDA.@atomic ∑∂ρ∂t[pⱼ] += ∑∂ρ∂tj
-            #=
-            if isnan(ρᵢ) || iszero(ρᵢ) || ρᵢ < 0.001 || isnan(ρⱼ) || iszero(ρⱼ) || ρⱼ < 0.001
-                @cuprintln "kernel DDT rho index =  $index , rhoi = $ρᵢ , rhoi = $ρⱼ, dx = $Δx , r =  $r², val1 = $∑∂ρ∂tval1 ,   val2 = $∑∂ρ∂tval2 , pair = $pair"
-                error() 
-            end
+
 
-            if isnan(∑∂ρ∂tval1) || isnan(∑∂ρ∂tval2)
-                @cuprintln "kernel DDT 3 val1 = $(∑∂ρ∂tval1), val2 = $(∑∂ρ∂tval2), dx1 = $(Δx[1]) , dx2 = $(Δx[2]) rhoi = $ρᵢ , dot3 = $dot3 , visc_densi = $visc_densi drhopvn = $drhopvn $(∇W[1]) $(Δv[1])"
+            if isnan(∑∂ρ∂tval1) || isnan(∑∂ρ∂tval2) || abs(∑∂ρ∂tval1) >  10000000 || abs(∑∂ρ∂tval2) >  10000000
+                @cuprintln "kernel DDT: drhodti = $∑∂ρ∂ti drhodtj = $∑∂ρ∂tj, val1 = $(∑∂ρ∂tval1), val2 = $(∑∂ρ∂tval2), dx1 = $(Δx[1]), dx2 = $(Δx[2]) rhoi = $ρᵢ, rhoj = $ρⱼ, dot3 = $dot3, visc_densi = $visc_densi, drhopvn = $drhopvn, dw = $(∇W[1]),  dv = $(Δv[1])"
                 error() 
             end
-            =#
+
         end
     end
     return nothing
@@ -407,33 +417,27 @@ function kernel_∂Π∂t!(∑∂Π∂t, ∇Wₙ, pairs, points, h, ρ, α, v, c
             η²    = (0.1 * h) * (0.1 * h)
             ρᵢ    = ρ[pᵢ]
             ρⱼ    = ρ[pⱼ]
-            #=
+
             if isnan(ρᵢ) || iszero(ρᵢ) || ρᵢ < 0.001 || isnan(ρⱼ) || iszero(ρⱼ) || ρⱼ < 0.001
-                @cuprintln "kernel Π index =  $index , rhoi = $ρᵢ , rhoi = $ρⱼ, dx = $Δx , r =  $r², pair = $pair"
+                @cuprintln "kernel Π: index =  $index, rhoi = $ρᵢ, rhoi = $ρⱼ, dx = $Δx, r =  $r², pair = $pair"
                 error() 
             end
-            =#
+
             Δv    = (v[pᵢ][1] - v[pⱼ][1], v[pᵢ][2] - v[pⱼ][2])
-
             ρₘ    = (ρᵢ + ρⱼ) * 0.5
-
             ∇W    = ∇Wₙ[index]
-
             cond   = Δv[1] * Δx[1] +  Δv[2] * Δx[2] 
 
             if cond < 0
-
                 Δμ   = h * cond / (r² + η²)
-
                 ΔΠ   =  (-α * c₀ * Δμ) / ρₘ
-
                 ΔΠm₀∇W = (-ΔΠ * m₀ * ∇W[1], -ΔΠ * m₀ * ∇W[2])
-                #=
+
                 if isnan(ΔΠm₀∇W[1])
                     @cuprintln "kernel Π: Π = $ΔΠ ,  W = $(∇W[1])"
                     error() 
                 end
-                =#
+
                 CUDA.@atomic ∑∂Π∂t[pᵢ, 1] += ΔΠm₀∇W[1]
                 CUDA.@atomic ∑∂Π∂t[pᵢ, 2] += ΔΠm₀∇W[2]
                 CUDA.@atomic ∑∂Π∂t[pⱼ, 1] -= ΔΠm₀∇W[1]
@@ -470,8 +474,40 @@ Equation of State in Weakly-Compressible SPH
 function pressure(ρ, c₀, γ, ρ₀)
     return ((c₀ ^ 2 * ρ₀) / γ) * ((ρ / ρ₀) ^ γ - 1)
 end
+function pressure(ρ, c₀, γ, ρ₀, γ⁻¹)
+    return (c₀ ^ 2 * ρ₀ * γ⁻¹) * ((ρ / ρ₀) ^ γ - 1)
+end
+
+#####################################################################
+function kernel_pressure!(P, ρ, ρ₀, c₀, γ, γ⁻¹) 
+    index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x
+    if index <= length(ρ)
+        P[index] = pressure(ρ[index], c₀, γ, ρ₀, γ⁻¹)
+    end
+    return nothing
+end
+"""
+    
+    pressure!(P, ρ, ρ₀, c₀, γ) 
+
+Equation of State in Weakly-Compressible SPH
+"""
+function pressure!(P, ρ, ρ₀, c₀, γ) 
+    if length(P) != length(ρ) error("Wrong length") end
+    γ⁻¹ = 1/γ
+    gpukernel = @cuda launch=false kernel_pressure!(P, ρ, ρ₀, c₀, γ, γ⁻¹) 
+    config = launch_configuration(gpukernel.fun)
+    Nx = length(ρ)
+    maxThreads = config.threads
+    Tx  = min(maxThreads, Nx)
+    Bx = cld(Nx, Tx)
+    CUDA.@sync gpukernel(P, ρ, ρ₀, c₀, γ, γ⁻¹; threads = Tx, blocks = Bx)
+end
+
+
+
 #####################################################################
-function kernel_∂v∂t!(∑∂v∂t,  ∇Wₙ,  pairs, m, ρ, c₀, γ, ρ₀) 
+function kernel_∂v∂t!(∑∂v∂t, ∇Wₙ, P, pairs, m, ρ, c₀, γ, ρ₀) 
     index = (blockIdx().x - Int32(1)) * blockDim().x + threadIdx().x
     if index <= length(pairs)
         pair  = pairs[index]
@@ -480,25 +516,25 @@ function kernel_∂v∂t!(∑∂v∂t,  ∇Wₙ,  pairs, m, ρ, c₀, γ, ρ₀)
 
             ρᵢ    = ρ[pᵢ]
             ρⱼ    = ρ[pⱼ]
-            #=
-            if isnan(ρᵢ) || iszero(ρᵢ) || ρᵢ < 0.001 || isnan(ρⱼ) || iszero(ρⱼ) || ρⱼ < 0.001
-                @cuprintln "kernel update rho: index =  $index , rhoi = $ρᵢ , rhoi = $ρⱼ, dpdt =  $(∑∂v∂t[index]), pair = $pair"
-                error() 
-            end
-            =#
-            Pᵢ    = pressure(ρᵢ, c₀, γ, ρ₀)
-            Pⱼ    = pressure(ρⱼ, c₀, γ, ρ₀)
+
+            Pᵢ    = P[pᵢ]
+            Pⱼ    = P[pⱼ]
             ∇W    = ∇Wₙ[index]
 
             Pfac  = (Pᵢ + Pⱼ) / (ρᵢ * ρⱼ)
 
             ∂v∂t  = (- m * Pfac * ∇W[1], - m * Pfac * ∇W[2])
 
+            #=
             if isnan(∂v∂t[1])
                 @cuprintln "kernel dvdt: rhoi = $ρᵢ , Pi =  $Pᵢ , m = $m , Pfac = $Pfac , W1 = $(∇W[1])"
                 error() 
             end
-
+            if isnan(ρᵢ) || iszero(ρᵢ) || ρᵢ < 0.001 || isnan(ρⱼ) || iszero(ρⱼ) || ρⱼ < 0.001
+                @cuprintln "kernel update rho: index =  $index , rhoi = $ρᵢ , rhoi = $ρⱼ, dpdt =  $(∑∂v∂t[index]), pair = $pair"
+                error() 
+            end
+            =#
             CUDA.@atomic ∑∂v∂t[pᵢ, 1] +=  ∂v∂t[1]
             CUDA.@atomic ∑∂v∂t[pᵢ, 2] +=  ∂v∂t[2]
             CUDA.@atomic ∑∂v∂t[pⱼ, 1] -=  ∂v∂t[1]
@@ -513,14 +549,14 @@ end
 
 The momentum equation (without dissipation).
 """
-function ∂v∂t!(∑∂v∂t,  ∇Wₙ, pairs, m, ρ, c₀, γ, ρ₀) 
-    gpukernel = @cuda launch=false kernel_∂v∂t!(∑∂v∂t,  ∇Wₙ, pairs, m, ρ, c₀, γ, ρ₀) 
+function ∂v∂t!(∑∂v∂t,  ∇Wₙ, P, pairs, m, ρ, c₀, γ, ρ₀) 
+    gpukernel = @cuda launch=false kernel_∂v∂t!(∑∂v∂t,  ∇Wₙ, P, pairs, m, ρ, c₀, γ, ρ₀) 
     config = launch_configuration(gpukernel.fun)
     Nx = length(pairs)
     maxThreads = config.threads
     Tx  = min(maxThreads, Nx)
     Bx = cld(Nx, Tx)
-    CUDA.@sync gpukernel(∑∂v∂t,  ∇Wₙ, pairs, m, ρ, c₀, γ, ρ₀; threads = Tx, blocks = Bx)
+    CUDA.@sync gpukernel(∑∂v∂t,  ∇Wₙ, P, pairs, m, ρ, c₀, γ, ρ₀; threads = Tx, blocks = Bx)
 end
 
 #####################################################################
@@ -556,12 +592,12 @@ function kernel_update_ρ!(ρ, ∑∂ρ∂t, Δt, ρ₀, isboundary)
     if index <= length(ρ)
         ρval = ρ[index] + ∑∂ρ∂t[index] * Δt
         if ρval < ρ₀ && isboundary[index] ρval = ρ₀ end
-        #=
+
         if isnan(ρval) || iszero(ρval) || ρval < 0.001
-            @cuprintln "kernel update rho: index =  $index , rhoval = $ρval  ,rhoi = $(ρ[index]) , dpdt =  $(∑∂ρ∂t[index]), dt = $Δt , isboundary = $(isboundary[index])"
+            @cuprintln "kernel update rho: index =  $index, rhoval = $ρval, rhoi = $(ρ[index]), dpdt = $(∑∂ρ∂t[index]), dt = $Δt, isboundary = $(isboundary[index])"
             error() 
         end
-        =#
+
         ρ[index] = ρval
     end
     return nothing
@@ -653,12 +689,12 @@ function kernel_update_all!(ρ, ρΔt½, v, vΔt½, x, xΔt½, ∑∂ρ∂t, ∑
         ρval       = ρ[index]  * (2 - epsi)/(2 + epsi)
         if ρval < ρ₀ && isboundary[index] ρval = ρ₀ end
 
-        #=
+
         if isnan(ρval) || iszero(ρval) || ρval < 0.01
-            @cuprintln "kernel update all rho: rhova = $ρval , epsi = $epsi , drhodt = $(∑∂ρ∂t[index]) , rhot12 = $(ρΔt½[index]) $Δt"
+            @cuprintln "kernel update all rho: rhova = $ρval, epsi = $epsi, drhodt = $(∑∂ρ∂t[index]), rhot12 = $(ρΔt½[index]), dt = $Δt"
             error() 
         end
-        =#
+
         ρΔt½[index] = ρval
         ρ[index]    = ρval
         #=