From 35585a843b640fbffbc6676880a7fab5cc34433b Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 20 Dec 2023 09:28:52 -0300 Subject: [PATCH 01/12] Add second-derivative functions to interface --- docs/src/implementer_guide.md | 3 +++ docs/src/user_guide.md | 7 ++++-- src/AbstractDifferentiation.jl | 40 +++++++++++++++++++++++++++++++++- 3 files changed, 47 insertions(+), 3 deletions(-) diff --git a/docs/src/implementer_guide.md b/docs/src/implementer_guide.md index 8dfafa3..7d8b7f0 100644 --- a/docs/src/implementer_guide.md +++ b/docs/src/implementer_guide.md @@ -29,11 +29,14 @@ They are just listed here to help readers figure out the code structure: - `derivative` calls `jacobian` - `gradient` calls `jacobian` - `hessian` calls `jacobian` and `gradient` + - `second_derivative` calls `derivative` - `value_and_jacobian` calls `jacobian` - `value_and_derivative` calls `value_and_jacobian` - `value_and_gradient` calls `value_and_jacobian` - `value_and_hessian` calls `jacobian` and `gradient` + - `value_and_second_derivative` calls `second_derivative` - `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient` + - `value_and_derivatives` calls `value_and_derivative` and `second_derivative` - `pushforward_function` calls `jacobian` - `value_and_pushforward_function` calls `pushforward_function` - `pullback_function` calls `value_and_pullback_function` diff --git a/docs/src/user_guide.md b/docs/src/user_guide.md index 77ba2c2..333d69f 100644 --- a/docs/src/user_guide.md +++ b/docs/src/user_guide.md @@ -53,24 +53,27 @@ AbstractDifferentiation.HigherOrderBackend ## Derivatives -The following list of functions can be used to request the derivative, gradient, Jacobian or Hessian without the function value. +The following list of functions can be used to request the derivative, gradient, Jacobian, second derivative or Hessian without the function value. ```@docs AbstractDifferentiation.derivative AbstractDifferentiation.gradient AbstractDifferentiation.jacobian +AbstractDifferentiation.second_derivative AbstractDifferentiation.hessian ``` ## Value and derivatives -The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian or Hessian. You can also request the function value, its gradient and Hessian for single-input functions. +The following list of functions can be used to request the function value along with its derivative, gradient, Jacobian, second derivative, or Hessian. You can also request the function value, its derivative (or its gradient) and its second derivative (or Hessian) for single-input functions. ```@docs AbstractDifferentiation.value_and_derivative AbstractDifferentiation.value_and_gradient AbstractDifferentiation.value_and_jacobian +AbstractDifferentiation.value_and_second_derivative AbstractDifferentiation.value_and_hessian +AbstractDifferentiation.value_and_derivatives AbstractDifferentiation.value_gradient_and_hessian ``` diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index a868b37..c730b2b 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -85,6 +85,24 @@ function jacobian(ab::HigherOrderBackend, f, xs...) return jacobian(lowest(ab), f, xs...) end +""" + AD.second_derivative(ab::AD.AbstractBackend, f, xs...) + +Compute the second derivative of `f` with respect to the input `x` using the backend `ab`. + +The function returns a single value because `second_derivative` currently only supports a single input. +""" +function second_derivative(ab::AbstractBackend, f, x) + if x isa Tuple + # only support computation of second derivative for functions with single input argument + x = only(x) + end + return derivative(second_lowest(ab), x -> begin + d = derivative(lowest(ab), f, x) + return d[1] # derivative returns a tuple + end, x) +end + """ AD.hessian(ab::AD.AbstractBackend, f, x) @@ -139,12 +157,23 @@ function value_and_jacobian(ab::AbstractBackend, f, xs...) return value, jacs end +""" + AD.value_and_second_derivative(ab::AD.AbstractBackend, f, x) + +Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.hessian(ab, f, x)`. + +See also [`AbstractDifferentiation.second_derivative`](@ref) +""" +function value_and_second_derivative(ab::AbstractBackend, f, x) + return f(x), second_derivative(ab, f, x) +end + """ AD.value_and_hessian(ab::AD.AbstractBackend, f, x) Return the tuple `(v, H)` of the function value `v = f(x)` and the Hessian `H = AD.hessian(ab, f, x)`. -See also [`AbstractDifferentiation.hessian`](@ref). +See also [`AbstractDifferentiation.hessian`](@ref). """ function value_and_hessian(ab::AbstractBackend, f, x) if x isa Tuple @@ -176,6 +205,15 @@ function value_and_hessian(ab::HigherOrderBackend, f, x) return value, hess end +""" + AD.value_and_derivatives(ab::AD.AbstractBackend, f, x) + +Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.second_derivative(ab, f, x)`. +""" +function value_and_derivatives(ab::AbstractBackend, f, x) + return value_and_derivative(ab, f, x)..., second_derivative(ab, f, x)[1] +end + """ AD.value_gradient_and_hessian(ab::AD.AbstractBackend, f, x) From 5ffb97cc7811150d689fd5717e6009eeb080eba8 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 20 Dec 2023 09:29:10 -0300 Subject: [PATCH 02/12] Add ForwardDiff-specific methods of second-derivative functions --- ext/AbstractDifferentiationForwardDiffExt.jl | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index ff0c52c..ae28c4b 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -61,6 +61,12 @@ function AD.hessian(ba::AD.ForwardDiffBackend, f, x::AbstractArray) return (ForwardDiff.hessian(f, x, cfg),) end +function AD.value_and_derivative(::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + ydual = f(ForwardDiff.Dual{T}(x, one(x))) + return ForwardDiff.value(T, ydual), (ForwardDiff.extract_derivative(T, ydual),) +end + function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray) result = DiffResults.GradientResult(x) cfg = ForwardDiff.GradientConfig(f, x, chunk(ba, x)) @@ -68,6 +74,12 @@ function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray) return DiffResults.value(result), (DiffResults.derivative(result),) end +function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + ydual, ddual = AD.value_and_derivative(ba, f, ForwardDiff.Dual{T}(x, one(x))) + return ForwardDiff.value(T, ydual), (ForwardDiff.extract_derivative(T, ddual[1]),) +end + function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) result = DiffResults.HessianResult(x) cfg = ForwardDiff.HessianConfig(f, result, x, chunk(ba, x)) @@ -75,6 +87,14 @@ function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) return DiffResults.value(result), (DiffResults.hessian(result),) end +function AD.value_and_derivatives(ba::AD.ForwardDiffBackend, f, x::Real) + T = typeof(ForwardDiff.Tag(f, typeof(x))) + ydual, ddual = AD.value_and_derivative(ba, f, ForwardDiff.Dual{T}(x, one(x))) + return ForwardDiff.value(T, ydual), + (ForwardDiff.value(T, ddual[1]),), + (ForwardDiff.extract_derivative(T, ddual[1]),) +end + @inline step_toward(x::Number, v::Number, h) = x + h * v # support arrays and tuples @noinline step_toward(x, v, h) = x .+ h .* v From ea14c4e84276271a13049299a7df9216d2d84008 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Thu, 21 Dec 2023 11:37:02 -0300 Subject: [PATCH 03/12] Add tests for second derivatives --- test/finitedifferences.jl | 3 +++ test/forwarddiff.jl | 3 +++ test/reversediff.jl | 3 +++ test/test_utils.jl | 44 +++++++++++++++++++++++++++++++++++++++ 4 files changed, 53 insertions(+) diff --git a/test/finitedifferences.jl b/test/finitedifferences.jl index 568f0e9..df97b5e 100644 --- a/test/finitedifferences.jl +++ b/test/finitedifferences.jl @@ -21,6 +21,9 @@ using FiniteDifferences @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/forwarddiff.jl b/test/forwarddiff.jl index 0b6bf26..47a95c9 100644 --- a/test/forwarddiff.jl +++ b/test/forwarddiff.jl @@ -19,6 +19,9 @@ using ForwardDiff @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/reversediff.jl b/test/reversediff.jl index 06da46f..ed6ad21 100644 --- a/test/reversediff.jl +++ b/test/reversediff.jl @@ -14,6 +14,9 @@ using ReverseDiff @testset "Jacobian" begin test_jacobians(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Hessian" begin test_hessians(backend) end diff --git a/test/test_utils.jl b/test/test_utils.jl index 6eb4677..692b723 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -6,6 +6,7 @@ Random.seed!(1234) fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1 / x dfderdy(x, y) = exp(y) * x + log(x) +dfderdxdx(x, y) = -y * 1 / x^2 fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) dfgraddx(x, y) = prod(x) ./ x @@ -143,6 +144,49 @@ function test_jacobians(backend; multiple_inputs=true, test_types=true) @test yvec == yvec2 end +function test_second_derivatives(backend; multiple_inputs=false, test_types=true) + if multiple_inputs + # ... but + error("multiple_inputs=true is not supported.") + else + # explicit test that AbstractDifferentiation throws an error + # don't support tuple of second derivatives + @test_throws ArgumentError AD.second_derivative( + backend, x -> fder(x, yscalar), (xscalar, yscalar) + ) + @test_throws MethodError AD.second_derivative( + backend, x -> fder(x, yscalar), xscalar, yscalar + ) + end + + # test if single input (no tuple works) + dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) + if test_types + @test dder1[1] isa Float64 + end + @test dfderdxdx(xscalar, yscalar) ≈ dder1[1] atol = 1e-8 + valscalar, dder2 = AD.value_and_second_derivative( + backend, x -> fder(x, yscalar), xscalar + ) + if test_types + @test valscalar isa Float64 + @test dder2[1] isa Float64 + end + @test valscalar == fder(xscalar, yscalar) + @test norm.(dder2 .- dder1) == (0,) + valscalar, der, dder3 = AD.value_and_derivatives( + backend, x -> fder(x, yscalar), xscalar + ) + if test_types + @test valscalar isa Float64 + @test der[1] isa Float64 + @test dder3[1] isa Float64 + end + @test valscalar == fder(xscalar, yscalar) + @test norm.(der .- AD.derivative(backend, x -> fder(x, yscalar), xscalar)) == (0,) + @test norm.(dder3 .- dder1) == (0,) +end + function test_hessians(backend; multiple_inputs=false, test_types=true) if multiple_inputs # ... but From 3c4652af115662964c7be2d5d4d407f8bcc326d1 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 3 Jan 2024 14:01:04 -0300 Subject: [PATCH 04/12] Apply suggestions from code review Co-authored-by: David Widmann --- docs/src/implementer_guide.md | 4 ++-- ext/AbstractDifferentiationForwardDiffExt.jl | 18 +++++++++++------- src/AbstractDifferentiation.jl | 15 ++++++++++++++- 3 files changed, 27 insertions(+), 10 deletions(-) diff --git a/docs/src/implementer_guide.md b/docs/src/implementer_guide.md index 7d8b7f0..fd1efe6 100644 --- a/docs/src/implementer_guide.md +++ b/docs/src/implementer_guide.md @@ -29,14 +29,14 @@ They are just listed here to help readers figure out the code structure: - `derivative` calls `jacobian` - `gradient` calls `jacobian` - `hessian` calls `jacobian` and `gradient` - - `second_derivative` calls `derivative` + - `secondderivative` calls `derivative` - `value_and_jacobian` calls `jacobian` - `value_and_derivative` calls `value_and_jacobian` - `value_and_gradient` calls `value_and_jacobian` - `value_and_hessian` calls `jacobian` and `gradient` - `value_and_second_derivative` calls `second_derivative` - `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient` - - `value_and_derivatives` calls `value_and_derivative` and `second_derivative` + - `value_derivative_and_second_derivative` calls `value_and_derivative` and `second_derivative` - `pushforward_function` calls `jacobian` - `value_and_pushforward_function` calls `pushforward_function` - `pullback_function` calls `value_and_pullback_function` diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index ae28c4b..2931bfe 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -64,7 +64,7 @@ end function AD.value_and_derivative(::AD.ForwardDiffBackend, f, x::Real) T = typeof(ForwardDiff.Tag(f, typeof(x))) ydual = f(ForwardDiff.Dual{T}(x, one(x))) - return ForwardDiff.value(T, ydual), (ForwardDiff.extract_derivative(T, ydual),) + return ForwardDiff.value(T, ydual), (ForwardDiff.partials(T, ydual, 1),) end function AD.value_and_gradient(ba::AD.ForwardDiffBackend, f, x::AbstractArray) @@ -76,8 +76,10 @@ end function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) T = typeof(ForwardDiff.Tag(f, typeof(x))) - ydual, ddual = AD.value_and_derivative(ba, f, ForwardDiff.Dual{T}(x, one(x))) - return ForwardDiff.value(T, ydual), (ForwardDiff.extract_derivative(T, ddual[1]),) + xdual = ForwardDiff.Dual{T}(x, one(x)) + T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) + ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) + return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) end function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) @@ -89,10 +91,12 @@ end function AD.value_and_derivatives(ba::AD.ForwardDiffBackend, f, x::Real) T = typeof(ForwardDiff.Tag(f, typeof(x))) - ydual, ddual = AD.value_and_derivative(ba, f, ForwardDiff.Dual{T}(x, one(x))) - return ForwardDiff.value(T, ydual), - (ForwardDiff.value(T, ddual[1]),), - (ForwardDiff.extract_derivative(T, ddual[1]),) + xdual = ForwardDiff.Dual{T}(x, one(x)) + T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) + ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) + return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), + (ForwardDiff.partials(T, ForwardDiff.value(T2, ydual), 1),), + (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) end @inline step_toward(x::Number, v::Number, h) = x + h * v diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index c730b2b..781b84e 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -211,7 +211,20 @@ end Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.second_derivative(ab, f, x)`. """ function value_and_derivatives(ab::AbstractBackend, f, x) - return value_and_derivative(ab, f, x)..., second_derivative(ab, f, x)[1] + if x isa Tuple + # only support computation of Hessian for functions with single input argument + x = only(x) + end + + value = f(x) + deriv, second_deriv = value_and_derivative( + second_lowest(ab), _x -> begin + d = derivative(lowest(ab), f, _x) + return d[1] # derivative returns a tuple + end, x + ) + + return value, (deriv,), second_deriv end """ From 4e0f3f532402922aaeb5600e5fdd9801f0aa62a6 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 3 Jan 2024 14:13:09 -0300 Subject: [PATCH 05/12] Reformat code --- ext/AbstractDifferentiationForwardDiffExt.jl | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index 2931bfe..cb52c4d 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -79,7 +79,8 @@ function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) xdual = ForwardDiff.Dual{T}(x, one(x)) T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) - return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) + return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), + (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) end function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) From 94342dec6852089238dd3a93f205d323519e9aa9 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 3 Jan 2024 14:21:33 -0300 Subject: [PATCH 06/12] Test RuleConfig backend with second derivatives --- test/ruleconfig.jl | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/ruleconfig.jl b/test/ruleconfig.jl index 0a97b66..a4b2dd9 100644 --- a/test/ruleconfig.jl +++ b/test/ruleconfig.jl @@ -21,6 +21,9 @@ using Zygote @testset "j′vp" begin test_j′vp(backend) end + @testset "Second derivative" begin + test_second_derivatives(backend) + end @testset "Lazy Derivative" begin test_lazy_derivatives(backend) end From db8607cb21134d94c5c9f36b303a837343e767ba Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 3 Jan 2024 18:08:08 -0300 Subject: [PATCH 07/12] Rename second_derivative -> secondderivative --- docs/src/implementer_guide.md | 4 ++-- docs/src/user_guide.md | 2 +- src/AbstractDifferentiation.jl | 16 ++++++++-------- test/test_utils.jl | 6 +++--- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/docs/src/implementer_guide.md b/docs/src/implementer_guide.md index fd1efe6..94929ad 100644 --- a/docs/src/implementer_guide.md +++ b/docs/src/implementer_guide.md @@ -34,9 +34,9 @@ They are just listed here to help readers figure out the code structure: - `value_and_derivative` calls `value_and_jacobian` - `value_and_gradient` calls `value_and_jacobian` - `value_and_hessian` calls `jacobian` and `gradient` - - `value_and_second_derivative` calls `second_derivative` + - `value_and_second_derivative` calls `secondderivative` - `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient` - - `value_derivative_and_second_derivative` calls `value_and_derivative` and `second_derivative` + - `value_derivative_and_second_derivative` calls `value_and_derivative` and `secondderivative` - `pushforward_function` calls `jacobian` - `value_and_pushforward_function` calls `pushforward_function` - `pullback_function` calls `value_and_pullback_function` diff --git a/docs/src/user_guide.md b/docs/src/user_guide.md index 333d69f..04becca 100644 --- a/docs/src/user_guide.md +++ b/docs/src/user_guide.md @@ -59,7 +59,7 @@ The following list of functions can be used to request the derivative, gradient, AbstractDifferentiation.derivative AbstractDifferentiation.gradient AbstractDifferentiation.jacobian -AbstractDifferentiation.second_derivative +AbstractDifferentiation.secondderivative AbstractDifferentiation.hessian ``` diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 781b84e..f4cd7d9 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -86,13 +86,13 @@ function jacobian(ab::HigherOrderBackend, f, xs...) end """ - AD.second_derivative(ab::AD.AbstractBackend, f, xs...) + AD.secondderivative(ab::AD.AbstractBackend, f, xs...) Compute the second derivative of `f` with respect to the input `x` using the backend `ab`. -The function returns a single value because `second_derivative` currently only supports a single input. +The function returns a single value because `secondderivative` currently only supports a single input. """ -function second_derivative(ab::AbstractBackend, f, x) +function secondderivative(ab::AbstractBackend, f, x) if x isa Tuple # only support computation of second derivative for functions with single input argument x = only(x) @@ -162,10 +162,10 @@ end Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.hessian(ab, f, x)`. -See also [`AbstractDifferentiation.second_derivative`](@ref) +See also [`AbstractDifferentiation.secondderivative`](@ref) """ function value_and_second_derivative(ab::AbstractBackend, f, x) - return f(x), second_derivative(ab, f, x) + return f(x), secondderivative(ab, f, x) end """ @@ -208,7 +208,7 @@ end """ AD.value_and_derivatives(ab::AD.AbstractBackend, f, x) -Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.second_derivative(ab, f, x)`. +Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.secondderivative(ab, f, x)`. """ function value_and_derivatives(ab::AbstractBackend, f, x) if x isa Tuple @@ -217,14 +217,14 @@ function value_and_derivatives(ab::AbstractBackend, f, x) end value = f(x) - deriv, second_deriv = value_and_derivative( + deriv, secondderiv = value_and_derivative( second_lowest(ab), _x -> begin d = derivative(lowest(ab), f, _x) return d[1] # derivative returns a tuple end, x ) - return value, (deriv,), second_deriv + return value, (deriv,), secondderiv end """ diff --git a/test/test_utils.jl b/test/test_utils.jl index 692b723..d826e84 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -151,16 +151,16 @@ function test_second_derivatives(backend; multiple_inputs=false, test_types=true else # explicit test that AbstractDifferentiation throws an error # don't support tuple of second derivatives - @test_throws ArgumentError AD.second_derivative( + @test_throws ArgumentError AD.secondderivative( backend, x -> fder(x, yscalar), (xscalar, yscalar) ) - @test_throws MethodError AD.second_derivative( + @test_throws MethodError AD.secondderivative( backend, x -> fder(x, yscalar), xscalar, yscalar ) end # test if single input (no tuple works) - dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) + dder1 = AD.secondderivative(backend, x -> fder(x, yscalar), xscalar) if test_types @test dder1[1] isa Float64 end From 3b7a5b53f60944629613d0fd22bae7ec455c5518 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Wed, 3 Jan 2024 18:09:03 -0300 Subject: [PATCH 08/12] Rename value_and_derivatives -> value_and_derivative_and_second_derivative --- docs/src/user_guide.md | 2 +- ext/AbstractDifferentiationForwardDiffExt.jl | 2 +- src/AbstractDifferentiation.jl | 4 ++-- test/test_utils.jl | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/docs/src/user_guide.md b/docs/src/user_guide.md index 04becca..c0efdce 100644 --- a/docs/src/user_guide.md +++ b/docs/src/user_guide.md @@ -73,7 +73,7 @@ AbstractDifferentiation.value_and_gradient AbstractDifferentiation.value_and_jacobian AbstractDifferentiation.value_and_second_derivative AbstractDifferentiation.value_and_hessian -AbstractDifferentiation.value_and_derivatives +AbstractDifferentiation.value_derivative_and_second_derivative AbstractDifferentiation.value_gradient_and_hessian ``` diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index cb52c4d..e647b4d 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -90,7 +90,7 @@ function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) return DiffResults.value(result), (DiffResults.hessian(result),) end -function AD.value_and_derivatives(ba::AD.ForwardDiffBackend, f, x::Real) +function AD.value_derivative_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) T = typeof(ForwardDiff.Tag(f, typeof(x))) xdual = ForwardDiff.Dual{T}(x, one(x)) T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index f4cd7d9..570a15a 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -206,11 +206,11 @@ function value_and_hessian(ab::HigherOrderBackend, f, x) end """ - AD.value_and_derivatives(ab::AD.AbstractBackend, f, x) + AD.value_derivative_and_second_derivative(ab::AD.AbstractBackend, f, x) Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.secondderivative(ab, f, x)`. """ -function value_and_derivatives(ab::AbstractBackend, f, x) +function value_derivative_and_second_derivative(ab::AbstractBackend, f, x) if x isa Tuple # only support computation of Hessian for functions with single input argument x = only(x) diff --git a/test/test_utils.jl b/test/test_utils.jl index d826e84..ab0d85e 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -174,7 +174,7 @@ function test_second_derivatives(backend; multiple_inputs=false, test_types=true end @test valscalar == fder(xscalar, yscalar) @test norm.(dder2 .- dder1) == (0,) - valscalar, der, dder3 = AD.value_and_derivatives( + valscalar, der, dder3 = AD.value_derivative_and_second_derivative( backend, x -> fder(x, yscalar), xscalar ) if test_types From 008e284238b84fe95b9784e7a2f954eeb902b024 Mon Sep 17 00:00:00 2001 From: Guillaume Dalle <22795598+gdalle@users.noreply.github.com> Date: Fri, 2 Feb 2024 10:12:48 +0100 Subject: [PATCH 09/12] Rename secondderivative to second_derivative --- docs/src/implementer_guide.md | 6 +++--- docs/src/user_guide.md | 2 +- src/AbstractDifferentiation.jl | 12 ++++++------ test/test_utils.jl | 6 +++--- 4 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/src/implementer_guide.md b/docs/src/implementer_guide.md index 94929ad..4a14e43 100644 --- a/docs/src/implementer_guide.md +++ b/docs/src/implementer_guide.md @@ -29,14 +29,14 @@ They are just listed here to help readers figure out the code structure: - `derivative` calls `jacobian` - `gradient` calls `jacobian` - `hessian` calls `jacobian` and `gradient` - - `secondderivative` calls `derivative` + - `second_derivative` calls `derivative` - `value_and_jacobian` calls `jacobian` - `value_and_derivative` calls `value_and_jacobian` - `value_and_gradient` calls `value_and_jacobian` - `value_and_hessian` calls `jacobian` and `gradient` - - `value_and_second_derivative` calls `secondderivative` + - `value_and_second_derivative` calls `second_derivative` - `value_gradient_and_hessian` calls `value_and_jacobian` and `gradient` - - `value_derivative_and_second_derivative` calls `value_and_derivative` and `secondderivative` + - `value_derivative_and_second_derivative` calls `value_and_derivative` and `second_derivative` - `pushforward_function` calls `jacobian` - `value_and_pushforward_function` calls `pushforward_function` - `pullback_function` calls `value_and_pullback_function` diff --git a/docs/src/user_guide.md b/docs/src/user_guide.md index c0efdce..e09768c 100644 --- a/docs/src/user_guide.md +++ b/docs/src/user_guide.md @@ -59,7 +59,7 @@ The following list of functions can be used to request the derivative, gradient, AbstractDifferentiation.derivative AbstractDifferentiation.gradient AbstractDifferentiation.jacobian -AbstractDifferentiation.secondderivative +AbstractDifferentiation.second_derivative AbstractDifferentiation.hessian ``` diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index 570a15a..c4e9e13 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -86,13 +86,13 @@ function jacobian(ab::HigherOrderBackend, f, xs...) end """ - AD.secondderivative(ab::AD.AbstractBackend, f, xs...) + AD.second_derivative(ab::AD.AbstractBackend, f, xs...) Compute the second derivative of `f` with respect to the input `x` using the backend `ab`. -The function returns a single value because `secondderivative` currently only supports a single input. +The function returns a single value because `second_derivative` currently only supports a single input. """ -function secondderivative(ab::AbstractBackend, f, x) +function second_derivative(ab::AbstractBackend, f, x) if x isa Tuple # only support computation of second derivative for functions with single input argument x = only(x) @@ -162,10 +162,10 @@ end Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.hessian(ab, f, x)`. -See also [`AbstractDifferentiation.secondderivative`](@ref) +See also [`AbstractDifferentiation.second_derivative`](@ref) """ function value_and_second_derivative(ab::AbstractBackend, f, x) - return f(x), secondderivative(ab, f, x) + return f(x), second_derivative(ab, f, x) end """ @@ -208,7 +208,7 @@ end """ AD.value_derivative_and_second_derivative(ab::AD.AbstractBackend, f, x) -Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.secondderivative(ab, f, x)`. +Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.second_derivative(ab, f, x)`. """ function value_derivative_and_second_derivative(ab::AbstractBackend, f, x) if x isa Tuple diff --git a/test/test_utils.jl b/test/test_utils.jl index ab0d85e..b8ce54b 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -151,16 +151,16 @@ function test_second_derivatives(backend; multiple_inputs=false, test_types=true else # explicit test that AbstractDifferentiation throws an error # don't support tuple of second derivatives - @test_throws ArgumentError AD.secondderivative( + @test_throws ArgumentError AD.second_derivative( backend, x -> fder(x, yscalar), (xscalar, yscalar) ) - @test_throws MethodError AD.secondderivative( + @test_throws MethodError AD.second_derivative( backend, x -> fder(x, yscalar), xscalar, yscalar ) end # test if single input (no tuple works) - dder1 = AD.secondderivative(backend, x -> fder(x, yscalar), xscalar) + dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) if test_types @test dder1[1] isa Float64 end From 3dfdd52cde609376e264ec9b730bde357fc9cb77 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Fri, 2 Feb 2024 12:26:37 -0300 Subject: [PATCH 10/12] Update AbstractDifferentiationForwardDiffExt.jl --- ext/AbstractDifferentiationForwardDiffExt.jl | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/ext/AbstractDifferentiationForwardDiffExt.jl b/ext/AbstractDifferentiationForwardDiffExt.jl index e647b4d..b8e82cf 100644 --- a/ext/AbstractDifferentiationForwardDiffExt.jl +++ b/ext/AbstractDifferentiationForwardDiffExt.jl @@ -79,8 +79,9 @@ function AD.value_and_second_derivative(ba::AD.ForwardDiffBackend, f, x::Real) xdual = ForwardDiff.Dual{T}(x, one(x)) T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) - return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), - (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) + v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual)) + d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1) + return v, (d2,) end function AD.value_and_hessian(ba::AD.ForwardDiffBackend, f, x) @@ -95,9 +96,10 @@ function AD.value_derivative_and_second_derivative(ba::AD.ForwardDiffBackend, f, xdual = ForwardDiff.Dual{T}(x, one(x)) T2 = typeof(ForwardDiff.Tag(f, typeof(xdual))) ydual = f(ForwardDiff.Dual{T2}(xdual, one(xdual))) - return ForwardDiff.value(T, ForwardDiff.value(T2, ydual)), - (ForwardDiff.partials(T, ForwardDiff.value(T2, ydual), 1),), - (ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1),) + v = ForwardDiff.value(T, ForwardDiff.value(T2, ydual)) + d = ForwardDiff.partials(T, ForwardDiff.value(T2, ydual), 1) + d2 = ForwardDiff.partials(T, ForwardDiff.partials(T2, ydual, 1), 1) + return v, (d,), (d2,) end @inline step_toward(x::Number, v::Number, h) = x + h * v From 897caddbfca7fba77912a9b177bae144823690c2 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Fri, 2 Feb 2024 13:38:33 -0300 Subject: [PATCH 11/12] Apply suggestions from code review Co-authored-by: David Widmann --- src/AbstractDifferentiation.jl | 6 ++--- test/test_utils.jl | 40 +++++++++++++++------------------- 2 files changed, 21 insertions(+), 25 deletions(-) diff --git a/src/AbstractDifferentiation.jl b/src/AbstractDifferentiation.jl index c4e9e13..e7a6fd5 100644 --- a/src/AbstractDifferentiation.jl +++ b/src/AbstractDifferentiation.jl @@ -86,7 +86,7 @@ function jacobian(ab::HigherOrderBackend, f, xs...) end """ - AD.second_derivative(ab::AD.AbstractBackend, f, xs...) + AD.second_derivative(ab::AD.AbstractBackend, f, x) Compute the second derivative of `f` with respect to the input `x` using the backend `ab`. @@ -160,7 +160,7 @@ end """ AD.value_and_second_derivative(ab::AD.AbstractBackend, f, x) -Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.hessian(ab, f, x)`. +Return the tuple `(v, d2)` of the function value `v = f(x)` and the second derivative `d2 = AD.second_derivative(ab, f, x)`. See also [`AbstractDifferentiation.second_derivative`](@ref) """ @@ -208,7 +208,7 @@ end """ AD.value_derivative_and_second_derivative(ab::AD.AbstractBackend, f, x) -Return the tuple `(v, d, d2)` of the function value `v = f(x)` and the first and second derivatives `d = AD.derivative(ab, f, x)` and `d2 = AD.second_derivative(ab, f, x)`. +Return the tuple `(v, d, d2)` of the function value `v = f(x)`, the first derivative `d = AD.derivative(ab, f, x)`, and the second derivative `d2 = AD.second_derivative(ab, f, x)`. """ function value_derivative_and_second_derivative(ab::AbstractBackend, f, x) if x isa Tuple diff --git a/test/test_utils.jl b/test/test_utils.jl index b8ce54b..22722a0 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -6,7 +6,7 @@ Random.seed!(1234) fder(x, y) = exp(y) * x + y * log(x) dfderdx(x, y) = exp(y) + y * 1 / x dfderdy(x, y) = exp(y) * x + log(x) -dfderdxdx(x, y) = -y * 1 / x^2 +dfderdxdx(x, y) = -y / x^2 fgrad(x, y) = prod(x) + sum(y ./ (1:length(y))) dfgraddx(x, y) = prod(x) ./ x @@ -144,47 +144,43 @@ function test_jacobians(backend; multiple_inputs=true, test_types=true) @test yvec == yvec2 end -function test_second_derivatives(backend; multiple_inputs=false, test_types=true) - if multiple_inputs - # ... but - error("multiple_inputs=true is not supported.") - else - # explicit test that AbstractDifferentiation throws an error - # don't support tuple of second derivatives - @test_throws ArgumentError AD.second_derivative( - backend, x -> fder(x, yscalar), (xscalar, yscalar) - ) - @test_throws MethodError AD.second_derivative( - backend, x -> fder(x, yscalar), xscalar, yscalar - ) +function test_second_derivatives(backend; test_types=true) + # explicit test that AbstractDifferentiation throws an error + # don't support tuple of second derivatives + @test_throws ArgumentError AD.second_derivative( + backend, x -> fder(x, yscalar), (xscalar, yscalar) + ) + @test_throws MethodError AD.second_derivative( + backend, x -> fder(x, yscalar), xscalar, yscalar + ) end # test if single input (no tuple works) dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) if test_types - @test dder1[1] isa Float64 + @test only(dder1) isa Float64 end - @test dfderdxdx(xscalar, yscalar) ≈ dder1[1] atol = 1e-8 + @test dfderdxdx(xscalar, yscalar) ≈ only(dder1) atol = 1e-8 valscalar, dder2 = AD.value_and_second_derivative( backend, x -> fder(x, yscalar), xscalar ) if test_types @test valscalar isa Float64 - @test dder2[1] isa Float64 + @test only(dder2) isa Float64 end @test valscalar == fder(xscalar, yscalar) - @test norm.(dder2 .- dder1) == (0,) + @test dder2 == dder1 valscalar, der, dder3 = AD.value_derivative_and_second_derivative( backend, x -> fder(x, yscalar), xscalar ) if test_types @test valscalar isa Float64 - @test der[1] isa Float64 - @test dder3[1] isa Float64 + @test only(der) isa Float64 + @test only(dder3) isa Float64 end @test valscalar == fder(xscalar, yscalar) - @test norm.(der .- AD.derivative(backend, x -> fder(x, yscalar), xscalar)) == (0,) - @test norm.(dder3 .- dder1) == (0,) + @test der == AD.derivative(backend, x -> fder(x, yscalar), xscalar)) + @test dder3 == dder1 end function test_hessians(backend; multiple_inputs=false, test_types=true) From 72e42012891ae39f3aa50891f8a7e0cab91003f1 Mon Sep 17 00:00:00 2001 From: Gabriel Gerlero Date: Fri, 2 Feb 2024 13:53:02 -0300 Subject: [PATCH 12/12] Fix errors --- test/test_utils.jl | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/test/test_utils.jl b/test/test_utils.jl index 22722a0..22e00c3 100644 --- a/test/test_utils.jl +++ b/test/test_utils.jl @@ -153,7 +153,6 @@ function test_second_derivatives(backend; test_types=true) @test_throws MethodError AD.second_derivative( backend, x -> fder(x, yscalar), xscalar, yscalar ) - end # test if single input (no tuple works) dder1 = AD.second_derivative(backend, x -> fder(x, yscalar), xscalar) @@ -179,7 +178,7 @@ function test_second_derivatives(backend; test_types=true) @test only(dder3) isa Float64 end @test valscalar == fder(xscalar, yscalar) - @test der == AD.derivative(backend, x -> fder(x, yscalar), xscalar)) + @test der == AD.derivative(backend, x -> fder(x, yscalar), xscalar) @test dder3 == dder1 end