From 8b52602c3039281a12b01afb2fbada21e0c5ee74 Mon Sep 17 00:00:00 2001 From: nefrathenrici Date: Fri, 20 Dec 2024 16:47:19 -0800 Subject: [PATCH] Add MPILogger and MPIFileLoger --- .buildkite/pipeline.yml | 2 +- .github/workflows/JuliaFormatter.yml | 2 +- .github/workflows/OS-Tests.yml | 2 +- .github/workflows/docs.yml | 2 +- Project.toml | 6 +++ docs/Manifest.toml | 59 ++++++++++++++++++++++------ src/ClimaComms.jl | 1 + src/logging.jl | 43 ++++++++++++++++++++ test/Project.toml | 1 + test/logging.jl | 40 +++++++++++++++++++ test/runtests.jl | 4 ++ 11 files changed, 146 insertions(+), 16 deletions(-) create mode 100644 src/logging.jl create mode 100644 test/logging.jl diff --git a/.buildkite/pipeline.yml b/.buildkite/pipeline.yml index d94fcf9..9d74a33 100644 --- a/.buildkite/pipeline.yml +++ b/.buildkite/pipeline.yml @@ -11,7 +11,7 @@ steps: key: "initialize" command: - echo "--- Instantiate project" - - "julia --project=test -e 'using Pkg; Pkg.develop(;path=\".\"); Pkg.add(\"CUDA\"); Pkg.add(\"MPI\"); Pkg.instantiate(;verbose=true); Pkg.precompile(;strict=true)'" + - julia --project=test -e 'using Pkg; Pkg.develop(;path="."); Pkg.add("CUDA"); Pkg.add("MPI"); Pkg.instantiate(;verbose=true); Pkg.precompile(;strict=true)' # force the initialization of the CUDA runtime as it is lazily loaded by default - "julia --project=test -e 'using CUDA; CUDA.precompile_runtime()'" - "julia --project=test -e 'using Pkg; Pkg.status()'" diff --git a/.github/workflows/JuliaFormatter.yml b/.github/workflows/JuliaFormatter.yml index 6544786..dd6c3e0 100644 --- a/.github/workflows/JuliaFormatter.yml +++ b/.github/workflows/JuliaFormatter.yml @@ -29,7 +29,7 @@ jobs: - uses: julia-actions/setup-julia@latest if: steps.filter.outputs.julia_file_change == 'true' with: - version: 1.9 + version: '1.11' - name: Apply JuliaFormatter if: steps.filter.outputs.julia_file_change == 'true' diff --git a/.github/workflows/OS-Tests.yml b/.github/workflows/OS-Tests.yml index 2e08c9a..f280402 100644 --- a/.github/workflows/OS-Tests.yml +++ b/.github/workflows/OS-Tests.yml @@ -40,7 +40,7 @@ jobs: uses: julia-actions/setup-julia@latest if: steps.filter.outputs.run_test == 'true' with: - version: 1.9 + version: '1.11' - name: Cache artifacts uses: actions/cache@v1 diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml index db75151..df23dbe 100644 --- a/.github/workflows/docs.yml +++ b/.github/workflows/docs.yml @@ -16,7 +16,7 @@ jobs: - uses: actions/checkout@v2 - uses: julia-actions/setup-julia@latest with: - version: 1.9 + version: '1.11' - name: Install dependencies run: | julia --project=docs/ -e 'using Pkg; Pkg.instantiate()' diff --git a/Project.toml b/Project.toml index 4644639..6acebbe 100644 --- a/Project.toml +++ b/Project.toml @@ -10,6 +10,10 @@ authors = [ ] version = "0.6.4" +[deps] +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" +LoggingExtras = "e6f89c97-d47a-5376-807f-9c37f3926c36" + [weakdeps] CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" @@ -20,5 +24,7 @@ ClimaCommsMPIExt = "MPI" [compat] CUDA = "3, 4, 5" +Logging = "1.9.4" +LoggingExtras = "1.1.0" MPI = "0.20.18" julia = "1.9" diff --git a/docs/Manifest.toml b/docs/Manifest.toml index 4f64e84..ecab8f7 100644 --- a/docs/Manifest.toml +++ b/docs/Manifest.toml @@ -1,6 +1,6 @@ # This file is machine-generated - editing it directly is not advised -julia_version = "1.10.2" +julia_version = "1.11.0" manifest_format = "2.0" project_hash = "c5b9e727593a1bc35ccae9b71e346465d8a7803c" @@ -16,18 +16,21 @@ version = "0.4.5" [[deps.ArgTools]] uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" -version = "1.1.1" +version = "1.1.2" [[deps.Artifacts]] uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" +version = "1.11.0" [[deps.Base64]] uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" +version = "1.11.0" [[deps.ClimaComms]] +deps = ["Logging", "LoggingExtras"] path = ".." uuid = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" -version = "0.6.0" +version = "0.6.4" [deps.ClimaComms.extensions] ClimaCommsCUDAExt = "CUDA" @@ -46,15 +49,17 @@ version = "0.7.4" [[deps.CompilerSupportLibraries_jll]] deps = ["Artifacts", "Libdl"] uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" -version = "1.1.0+0" +version = "1.1.1+0" [[deps.Dates]] deps = ["Printf"] uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" +version = "1.11.0" [[deps.Distributed]] deps = ["Random", "Serialization", "Sockets"] uuid = "8ba89e20-285c-5b6f-9357-94700520ee1b" +version = "1.11.0" [[deps.DocStringExtensions]] deps = ["LibGit2"] @@ -81,6 +86,7 @@ version = "2.5.0+0" [[deps.FileWatching]] uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" +version = "1.11.0" [[deps.Git]] deps = ["Git_jll"] @@ -109,6 +115,7 @@ version = "0.2.4" [[deps.InteractiveUtils]] deps = ["Markdown"] uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" +version = "1.11.0" [[deps.JLLWrappers]] deps = ["Artifacts", "Preferences"] @@ -130,6 +137,7 @@ version = "1.2.2" [[deps.LazyArtifacts]] deps = ["Artifacts", "Pkg"] uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" +version = "1.11.0" [[deps.LibCURL]] deps = ["LibCURL_jll", "MozillaCACerts_jll"] @@ -139,16 +147,17 @@ version = "0.6.4" [[deps.LibCURL_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" -version = "8.4.0+0" +version = "8.6.0+0" [[deps.LibGit2]] deps = ["Base64", "LibGit2_jll", "NetworkOptions", "Printf", "SHA"] uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" +version = "1.11.0" [[deps.LibGit2_jll]] deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll"] uuid = "e37daf67-58a4-590a-8e99-b0245dd2ffc5" -version = "1.6.4+0" +version = "1.7.2+0" [[deps.LibSSH2_jll]] deps = ["Artifacts", "Libdl", "MbedTLS_jll"] @@ -157,6 +166,7 @@ version = "1.11.0+1" [[deps.Libdl]] uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" +version = "1.11.0" [[deps.Libiconv_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl"] @@ -166,6 +176,13 @@ version = "1.17.0+0" [[deps.Logging]] uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" +version = "1.11.0" + +[[deps.LoggingExtras]] +deps = ["Dates", "Logging"] +git-tree-sha1 = "f02b56007b064fbfddb4c9cd60161b6dd0f40df3" +uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" +version = "1.1.0" [[deps.MPI]] deps = ["Distributed", "DocStringExtensions", "Libdl", "MPICH_jll", "MPIPreferences", "MPItrampoline_jll", "MicrosoftMPI_jll", "OpenMPI_jll", "PkgVersion", "PrecompileTools", "Requires", "Serialization", "Sockets"] @@ -202,6 +219,7 @@ version = "5.3.3+0" [[deps.Markdown]] deps = ["Base64"] uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" +version = "1.11.0" [[deps.MarkdownAST]] deps = ["AbstractTrees", "Markdown"] @@ -212,7 +230,7 @@ version = "0.1.2" [[deps.MbedTLS_jll]] deps = ["Artifacts", "Libdl"] uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" -version = "2.28.2+1" +version = "2.28.6+0" [[deps.MicrosoftMPI_jll]] deps = ["Artifacts", "JLLWrappers", "Libdl", "Pkg"] @@ -222,10 +240,11 @@ version = "10.1.4+2" [[deps.Mmap]] uuid = "a63ad114-7e13-5084-954f-fe012c677804" +version = "1.11.0" [[deps.MozillaCACerts_jll]] uuid = "14a3606d-f60d-562e-9121-12d972cd8159" -version = "2023.1.10" +version = "2023.12.12" [[deps.NetworkOptions]] uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" @@ -255,9 +274,13 @@ uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" version = "2.8.1" [[deps.Pkg]] -deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] +deps = ["Artifacts", "Dates", "Downloads", "FileWatching", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "Random", "SHA", "TOML", "Tar", "UUIDs", "p7zip_jll"] uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" -version = "1.10.0" +version = "1.11.0" +weakdeps = ["REPL"] + + [deps.Pkg.extensions] + REPLExt = "REPL" [[deps.PkgVersion]] deps = ["Pkg"] @@ -280,14 +303,17 @@ version = "1.4.3" [[deps.Printf]] deps = ["Unicode"] uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" +version = "1.11.0" [[deps.REPL]] -deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] +deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" +version = "1.11.0" [[deps.Random]] deps = ["SHA"] uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" +version = "1.11.0" [[deps.RegistryInstances]] deps = ["LazilyInitializedFields", "Pkg", "TOML", "Tar"] @@ -307,9 +333,15 @@ version = "0.7.0" [[deps.Serialization]] uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" +version = "1.11.0" [[deps.Sockets]] uuid = "6462fe0b-24de-5631-8697-dd941f90decc" +version = "1.11.0" + +[[deps.StyledStrings]] +uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" +version = "1.11.0" [[deps.TOML]] deps = ["Dates"] @@ -324,6 +356,7 @@ version = "1.10.0" [[deps.Test]] deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" +version = "1.11.0" [[deps.TranscodingStreams]] git-tree-sha1 = "71509f04d045ec714c4748c785a59045c3736349" @@ -337,9 +370,11 @@ weakdeps = ["Random", "Test"] [[deps.UUIDs]] deps = ["Random", "SHA"] uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" +version = "1.11.0" [[deps.Unicode]] uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" +version = "1.11.0" [[deps.Zlib_jll]] deps = ["Libdl"] @@ -349,7 +384,7 @@ version = "1.2.13+1" [[deps.nghttp2_jll]] deps = ["Artifacts", "Libdl"] uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" -version = "1.52.0+1" +version = "1.59.0+0" [[deps.p7zip_jll]] deps = ["Artifacts", "Libdl"] diff --git a/src/ClimaComms.jl b/src/ClimaComms.jl index a860523..86916a4 100644 --- a/src/ClimaComms.jl +++ b/src/ClimaComms.jl @@ -16,5 +16,6 @@ include("context.jl") include("singleton.jl") include("mpi.jl") include("loading.jl") +include("logging.jl") end # module diff --git a/src/logging.jl b/src/logging.jl new file mode 100644 index 0000000..e0c8c79 --- /dev/null +++ b/src/logging.jl @@ -0,0 +1,43 @@ +import Logging, LoggingExtras + +export MPILogger, MPIFileLogger + +""" + MPILogger(context::AbstractCommsContext) + MPILogger(iostream, context) + +Add a rank prefix before log messages. + +Outputs to `stdout` if no IOStream is given. +""" +MPILogger(ctx::AbstractCommsContext) = MPILogger(stdout, ctx) + +function MPILogger(iostream, ctx::AbstractCommsContext) + pid = mypid(ctx) + + function format_log(io, log) + print(io, "[P$pid] ") + println(io, " $(log.level): $(log.message)") + end + + return LoggingExtras.FormatLogger(format_log, iostream) +end + +""" + MPIFileLogger(context, log_dir) + +Log MPI ranks to different files within the `log_dir`. +""" +function MPIFileLogger( + ctx::AbstractCommsContext, + log_dir::AbstractString; + min_level::Logging.LogLevel = Logging.Info, +) + rank = mypid(ctx) + !isdir(log_dir) && mkdir(log_dir) + return LoggingExtras.FileLogger( + joinpath(log_dir, "rank_$rank.log"); + append = true, + always_flush = true, + ) +end diff --git a/test/Project.toml b/test/Project.toml index 6dec72b..93c1bde 100644 --- a/test/Project.toml +++ b/test/Project.toml @@ -1,5 +1,6 @@ [deps] ClimaComms = "3a4d1b5c-c61d-41fd-a00a-5873ba7a1b0d" +Logging = "56ddb016-857b-54e1-b83d-db4d58db5568" MPI = "da04e1cc-30fd-572f-bb4f-1f8673147195" SafeTestsets = "1bc83da4-3b8d-516f-aca4-4fe02f6d838f" Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" diff --git a/test/logging.jl b/test/logging.jl new file mode 100644 index 0000000..f8c77a2 --- /dev/null +++ b/test/logging.jl @@ -0,0 +1,40 @@ +import ClimaComms, MPI +using Logging, Test + +ctx = ClimaComms.context() +(mypid, nprocs) = ClimaComms.init(ctx) + +@testset "MPIFileLogger" begin + log_dir = mktempdir() + logger = ClimaComms.MPIFileLogger(ctx, log_dir) + with_logger(logger) do + test_str = "Test message from rank $mypid" + @info test_str + log_content = read(joinpath(log_dir, "rank_$mypid.log"), String) + @test occursin(test_str, log_content) + end +end + +@testset "MPILogger" begin + io = IOBuffer() + logger = ClimaComms.MPILogger(io, ctx) + with_logger(logger) do + @info "smoke test" + end + str = String(take!(io)) + @test contains(str, "[P$mypid] Info: smoke test\n") + + # Test with file IOStream + test_filename, io = mktemp() + logger = ClimaComms.MPILogger(io, ctx) + with_logger(logger) do + test_str = "Test message from rank $mypid" + @info test_str + flush(io) + close(io) + + log_content = read(test_filename, String) + @test occursin(test_str, log_content) + @test occursin(test_str, log_content) + end +end diff --git a/test/runtests.jl b/test/runtests.jl index 2f4b6c6..c5cf829 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -227,3 +227,7 @@ end device isa ClimaComms.CUDADevice && @test_throws ErrorException a[1] @test x == Array(a)[1] end + +@testset "logging" begin + include("logging.jl") +end