From ad47ce2dbb4f6d4a5728624f3bf3f529887c4d71 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Tue, 21 Nov 2023 04:55:25 -0600 Subject: [PATCH 01/29] Update README.md --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 6c737b4b..50f9be0e 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,6 @@ ## About -Mathematics.NET provides custom types for complex, real, and rational numbers as well as other mathematical objects such as vectors, matrices, and tensors.[^1] Mathematics.NET also supports automatic differentiation.[^2] +Mathematics.NET provides custom types for complex, real, and rational numbers as well as other mathematical objects such as vectors, matrices, and tensors.[^1] Mathematics.NET also supports first-order, forward and reverse-mode automatic differentiation. [^1]: Please visit the [documentation site](https://mathematics.hamlettanyavong.com) for detailed information. -[^2]: So far, only first-order, reverse-mode automatic differentiation is supported; first-order, forward-mode, as well as second-order forward and reverse-modes are planned features. From c7e345307deac8db82e44e78395e05478816b9a4 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Tue, 21 Nov 2023 06:00:31 -0600 Subject: [PATCH 02/29] Create HessianNode.cs --- src/Mathematics.NET/AutoDiff/HessianNode.cs | 89 +++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 src/Mathematics.NET/AutoDiff/HessianNode.cs diff --git a/src/Mathematics.NET/AutoDiff/HessianNode.cs b/src/Mathematics.NET/AutoDiff/HessianNode.cs new file mode 100644 index 00000000..6c18d064 --- /dev/null +++ b/src/Mathematics.NET/AutoDiff/HessianNode.cs @@ -0,0 +1,89 @@ +// +// Mathematics.NET +// https://github.com/HamletTanyavong/Mathematics.NET +// +// MIT License +// +// Copyright (c) 2023 Hamlet Tanyavong +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +using System.Runtime.InteropServices; + +namespace Mathematics.NET.AutoDiff; + +/// Represents a node on a Hessian tape +/// A type that implements +[StructLayout(LayoutKind.Sequential)] +internal readonly record struct HessianNode + where T : IComplex +{ + /// The first derivative of the left component of the binary operation + public readonly T DX; + /// The second derivative of the left component of the binary operation + public readonly T DXX; + /// The derivative of the left or right component of the binary operation with respect to the left and right variables + public readonly T DXY; + /// The first derivative of the right component of the binary operation + public readonly T DY; + /// The second derivative of the right component of the binary operation + public readonly T DYY; + + /// The parent index of the left node + public readonly int PX; + /// The parent index of the right node + public readonly int PY; + + public HessianNode(int index) + { + DX = T.Zero; + DXX = T.Zero; + DXY = T.Zero; + DY = T.Zero; + DYY = T.Zero; + + PX = index; + PY = index; + } + + public HessianNode(T dfx, T dfxx, int px, int py) + { + DX = dfx; + DXX = dfxx; + DXY = T.Zero; + DY = T.Zero; + DYY = T.Zero; + + PX = px; + PY = py; + } + + public HessianNode(T dfx, T dfxx, T dfxy, T dfy, T dfyy, int px, int py) + { + DX = dfx; + DXX = dfxx; + DXY = dfxy; + DY = dfy; + DYY = dfyy; + + PX = px; + PY = py; + } +} From fefc20de8a80eb3af3eddc83b67bbf8d424db1e7 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Tue, 21 Nov 2023 09:44:43 -0600 Subject: [PATCH 03/29] Create HessianTape.cs --- src/Mathematics.NET/AutoDiff/HessianTape.cs | 629 ++++++++++++++++++++ 1 file changed, 629 insertions(+) create mode 100644 src/Mathematics.NET/AutoDiff/HessianTape.cs diff --git a/src/Mathematics.NET/AutoDiff/HessianTape.cs b/src/Mathematics.NET/AutoDiff/HessianTape.cs new file mode 100644 index 00000000..232ccf49 --- /dev/null +++ b/src/Mathematics.NET/AutoDiff/HessianTape.cs @@ -0,0 +1,629 @@ +// +// Mathematics.NET +// https://github.com/HamletTanyavong/Mathematics.NET +// +// MIT License +// +// Copyright (c) 2023 Hamlet Tanyavong +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +using System.Runtime.CompilerServices; +using System.Runtime.InteropServices; + +namespace Mathematics.NET.AutoDiff; + +/// Represents a Hessian tape +/// A type that implements and +public record class HessianTape + where T : IComplex, IDifferentiableFunctions +{ + private List> _nodes; + private int _variableCount; + + public HessianTape() + { + _nodes = []; + } + + /// Get the number of nodes on the gradient tape. + public int NodeCount => _nodes.Count; + + /// Get the number of variables that are being tracked. + public int VariableCount => _variableCount; + + // + // Methods + // + + /// Create a variable for the gradient tape to track. + /// A seed value + /// A variable + public Variable CreateVariable(T seed) + { + _nodes.Add(new(_variableCount)); + Variable variable = new(_variableCount++, seed); + return variable; + } + + /// Print the nodes of the gradient tape to the console. + /// A cancellation token + /// The total number of nodes to print + public void PrintNodes(CancellationToken cancellationToken, int limit = 100) + { + const string tab = " "; + + ReadOnlySpan> nodeSpan = CollectionsMarshal.AsSpan(_nodes); + HessianNode node; + + int i = 0; + while (i < Math.Min(_variableCount, limit)) + { + CheckForCancellation(cancellationToken); + node = nodeSpan[i]; + Console.WriteLine($"Root Node {i}:"); + Console.WriteLine($"{tab}Weights: [[{node.DX}, {node.DXX}, {node.DXY}],"); + Console.WriteLine($"{tab} [{node.DY}, {node.DYY}, {node.DXY}]]"); + Console.WriteLine($"{tab}Parents: [{node.PX}, {node.PY}]"); + i++; + } + + CheckForCancellation(cancellationToken); + Console.WriteLine(); + + while (i < Math.Min(nodeSpan.Length, limit)) + { + CheckForCancellation(cancellationToken); + node = nodeSpan[i]; + Console.WriteLine($"Node {i}:"); + Console.WriteLine($"{tab}Weights: [[{node.DX}, {node.DXX}, {node.DXY}],"); + Console.WriteLine($"{tab} [{node.DY}, {node.DYY}, {node.DXY}]]"); + Console.WriteLine($"{tab}Parents: [{node.PX}, {node.PY}]"); + i++; + } + + static void CheckForCancellation(CancellationToken cancellationToken) + { + if (cancellationToken.IsCancellationRequested) + { + Console.WriteLine("Print node operation cancelled"); + cancellationToken.ThrowIfCancellationRequested(); + } + } + } + + /// Perform reverse accumulation on the Hessian tape and output the resulting Hessian. + /// The gradient + /// The Hessian + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan gradient, out ReadOnlySpan2D hessian) + => ReverseAccumulation(out gradient, out hessian, T.One); + + // The following method uses the edge-pushing algorithm outlined by Gower and Mello: https://arxiv.org/pdf/2007.15040.pdf. + // TODO: use newer variations/versions of this algorithm since they are more performant + + /// Perform reverse accumulation on the Hessian tape and output the resulting Hessian. + /// The gradient + /// The Hessian + /// A seed value + /// The Hessian tape does not have any tracked variables. + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan gradient, out ReadOnlySpan2D hessian, T seed) + { + if (_variableCount == 0) + { + throw new Exception("Hessian tape contains no root nodes"); + } + + ReadOnlySpan> nodes = CollectionsMarshal.AsSpan(_nodes); + ref var start = ref MemoryMarshal.GetReference(nodes); + var length = nodes.Length; + + Span gradientSpan = new T[length]; + gradientSpan[length - 1] = seed; + + Span2D hessianSpan = new T[length, length]; + + for (int i = length - 1; i >= _variableCount; i--) + { + var node = Unsafe.Add(ref start, i); + var gradientElement = gradientSpan[i]; + + EdgePush(hessianSpan, ref node, i); + Accumulate(hessianSpan, ref node, gradientElement); + + gradientSpan[node.PX] += gradientElement * node.DX; + gradientSpan[node.PY] += gradientElement * node.DY; + } + + gradient = gradientSpan[.._variableCount]; + hessian = hessianSpan.Slice(0, 0, _variableCount, _variableCount); + } + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static void EdgePush(Span2D weight, ref HessianNode node, int i) + { + for (int p = 0; p <= i; p++) + { + if (weight[i, p] == 0 || weight[p, i] == 0) + { + continue; + } + if (p != i) + { + if (node.PX != p) + { + var x = node.DX * weight[i, p]; + weight[node.PX, p] += x; + weight[p, node.PX] += x; + } + else + { + weight[p, p] += 2 * node.DX * weight[i, p]; + } + + if (node.PY != p) + { + var x = node.DY * weight[i, p]; + weight[node.PY, p] += x; + weight[p, node.PY] += x; + } + else + { + weight[p, p] += 2 * node.DY * weight[i, p]; + } + } + else + { + var x = weight[i, i]; + weight[node.PX, node.PX] += node.DX * node.DX * x; + weight[node.PX, node.PY] += node.DX * node.DY * x; + weight[node.PY, node.PX] += node.DY * node.DX * x; + weight[node.PY, node.PY] += node.DY * node.DY * x; + } + } + } + + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + private static void Accumulate(Span2D weight, ref HessianNode node, T v) + { + weight[node.PX, node.PX] += v * node.DXX; + weight[node.PX, node.PY] += v * node.DXY; + weight[node.PY, node.PX] += v * node.DXY; + weight[node.PY, node.PY] += v * node.DYY; + } + + // + // Basic operations + // + + /// + public Variable Add(Variable x, Variable y) + { + _nodes.Add(new(T.One, T.Zero, T.Zero, T.One, T.Zero, x._index, y._index)); + return new(_nodes.Count - 1, x.Value + y.Value); + } + + /// + public Variable Add(T c, Variable x) + { + _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, c + x.Value); + } + + /// + public Variable Add(Variable x, T c) + { + _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, x.Value + c); + } + + /// + public Variable Divide(Variable x, Variable y) + { + var n = T.One / y.Value; + var dfxy = -n * n; + _nodes.Add(new(n, T.Zero, dfxy, x.Value * dfxy, -2.0 * n * x.Value * dfxy, x._index, y._index)); + return new(_nodes.Count - 1, x.Value * n); + } + + /// + public Variable Divide(T c, Variable x) + { + var n = T.One / x.Value; + var dfxy = -n * n; + _nodes.Add(new(c * dfxy, -2.0 * n * c * dfxy, x._index, _nodes.Count)); + return new(_nodes.Count - 1, c * n); + } + + /// + public Variable Divide(Variable x, T c) + { + var n = T.One / c; + _nodes.Add(new(n, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, x.Value * n); + } + + /// + public Variable Modulo(Variable x, Variable y) + { + _nodes.Add(new(T.One, T.Zero, T.Zero, x.Value * Real.Floor(x.Value / y.Value), T.Zero, x._index, y._index)); + return new(_nodes.Count - 1, x.Value % y.Value); + } + + /// + public Variable Modulo(Real c, Variable x) + { + _nodes.Add(new(c.Value * Real.Floor(c.Value / x.Value), T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, c % x.Value); + } + + /// + public Variable Modulo(Variable x, Real c) + { + _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, x.Value % c); + } + + /// + public Variable Multiply(Variable x, Variable y) + { + _nodes.Add(new(y.Value, T.Zero, T.One, x.Value, T.Zero, x._index, y._index)); + return new(_nodes.Count - 1, x.Value * y.Value); + } + + /// + public Variable Multiply(T c, Variable x) + { + _nodes.Add(new(c, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, c * x.Value); + } + + /// + public Variable Multiply(Variable x, T c) + { + _nodes.Add(new(c, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, x.Value * c); + } + + /// + public Variable Subtract(Variable x, Variable y) + { + _nodes.Add(new(T.One, T.Zero, T.Zero, -T.One, T.Zero, x._index, y._index)); + return new(_nodes.Count - 1, x.Value - y.Value); + } + + /// + public Variable Subtract(T c, Variable x) + { + _nodes.Add(new(-T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, c - x.Value); + } + + /// + public Variable Subtract(Variable x, T c) + { + _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, x.Value - c); + } + + // + // Other operations + // + + /// + public Variable Negate(Variable x) + { + _nodes.Add(new(-T.One, T.Zero, x._index, _nodes.Count)); + return new(_nodes.Count - 1, -x.Value); + } + + // Exponential functions + + /// + public Variable Exp(Variable x) + { + var exp = T.Exp(x.Value); + _nodes.Add(new(exp, exp, x._index, _nodes.Count)); + return new(_nodes.Count - 1, exp); + } + + /// + public Variable Exp2(Variable x) + { + var exp2 = T.Exp(x.Value); + var df = Real.Ln2 * exp2; + _nodes.Add(new(df, Real.Ln2 * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, exp2); + } + + /// + public Variable Exp10(Variable x) + { + var exp10 = T.Exp(x.Value); + var df = Real.Ln10 * exp10; + _nodes.Add(new(df, Real.Ln10 * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, exp10); + } + + // Hyperbolic functions + + /// + public Variable Acosh(Variable x) + { + var u = x.Value - T.One; + var v = x.Value + T.One; + _nodes.Add(new(T.One / (T.Sqrt(u) * T.Sqrt(v)), -x.Value * T.Pow(u, -1.5) * T.Pow(v, -1.5), x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Acosh(x.Value)); + } + + /// + public Variable Asinh(Variable x) + { + var u = T.One + x.Value * x.Value; + _nodes.Add(new(T.One / T.Sqrt(u), -x.Value * T.Pow(u, -1.5), x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Asinh(x.Value)); + } + + /// + public Variable Atanh(Variable x) + { + var df = T.One / (T.One - x.Value * x.Value); + _nodes.Add(new(df, 2.0 * df * x.Value * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Atanh(x.Value)); + } + + /// + public Variable Cosh(Variable x) + { + var cosh = T.Cosh(x.Value); + _nodes.Add(new(T.Sinh(x.Value), cosh, x._index, _nodes.Count)); + return new(_nodes.Count - 1, cosh); + } + + /// + public Variable Sinh(Variable x) + { + var sinh = T.Sinh(x.Value); + _nodes.Add(new(T.Cosh(x.Value), sinh, x._index, _nodes.Count)); + return new(_nodes.Count - 1, sinh); + } + + /// + public Variable Tanh(Variable x) + { + var tanh = T.Tanh(x.Value); + var u = T.One / T.Cosh(x.Value); + var df = u * u; + _nodes.Add(new(df, -2.0 * df * tanh, x._index, _nodes.Count)); + return new(_nodes.Count - 1, tanh); + } + + // Logarithmic functions + + /// + public Variable Ln(Variable x) + { + var df = T.One / x.Value; + _nodes.Add(new(df, -df * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Ln(x.Value)); + } + + /// + public Variable Log(Variable x, Variable b) + { + var lnx = T.Ln(x.Value); + var lnb = T.Ln(b.Value); + var dfx = T.One / (lnb * x.Value); + var dfb = -lnx / (lnb * lnb * b.Value); + _nodes.Add(new(dfx, -dfx / x.Value, -dfx / (lnb * b.Value), dfb, -dfb * (2.0 / lnb + T.One) / b.Value, x._index, b._index)); + return new(_nodes.Count - 1, T.Log(x.Value, b.Value)); + } + + /// + public Variable Log2(Variable x) + { + var u = T.One / x.Value; + var df = u / Real.Ln2; + _nodes.Add(new(df, -u * u / Real.Ln2, x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Log2(x.Value)); + } + + /// + public Variable Log10(Variable x) + { + var u = T.One / x.Value; + var df = u / Real.Ln10; + _nodes.Add(new(df, -u * u / Real.Ln10, x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Log10(x.Value)); + } + + // Power functions + + /// + public Variable Pow(Variable x, Variable n) + { + var pow = T.Pow(x.Value, n.Value); + var lnx = T.Ln(x.Value); + var pownmo = T.Pow(x.Value, n.Value - T.One); + var dfn = lnx * pow; + _nodes.Add(new( + n.Value * pownmo, + (n.Value - T.One) * n.Value * T.Pow(x.Value, n.Value - 2.0), + (T.One + lnx * n.Value) * pownmo, + dfn, + lnx * dfn, + x._index, + n._index)); + return new(_nodes.Count - 1, pow); + } + + // Root functions + + /// + public Variable Cbrt(Variable x) + { + var cbrt = T.Cbrt(x.Value); + var df = T.One / (3.0 * cbrt * cbrt); + _nodes.Add(new(df, -2.0 * df / (3.0 * x.Value), x._index, _nodes.Count)); + return new(_nodes.Count - 1, cbrt); + } + + /// + public Variable Root(Variable x, Variable n) + { + var root = T.Root(x.Value, n.Value); + var lnx = T.Ln(x.Value); + var u = T.One / n.Value; + var v = T.One / x.Value; + var w = u * u; + var dfx = u * v * root; + var dfn = -lnx * root * w; + _nodes.Add(new( + dfx, + (n.Value - T.One) * v * root, + -(dfx * u + lnx * w), + dfn, + -(2.0 * u + lnx * w) * dfn, + x._index, + n._index)); + return new(_nodes.Count - 1, root); + } + + /// + public Variable Sqrt(Variable x) + { + var sqrt = T.Sqrt(x.Value); + var df = 0.5 / sqrt; + _nodes.Add(new(df, -0.5 / x.Value * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, sqrt); + } + + // Trigonometric functions + + /// + public Variable Cos(Variable x) + { + var cos = T.Cos(x.Value); + _nodes.Add(new(-T.Sin(x.Value), -cos, x._index, _nodes.Count)); + return new(_nodes.Count - 1, cos); + } + + /// + public Variable Acos(Variable x) + { + var u = T.One - x.Value * x.Value; + _nodes.Add(new(-T.One / T.Sqrt(u), -x.Value * T.Pow(u, -1.5), x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Acos(x.Value)); + } + + /// + public Variable Asin(Variable x) + { + var u = T.One - x.Value * x.Value; + _nodes.Add(new(T.One / T.Sqrt(u), x.Value * T.Pow(u, -1.5), x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Asin(x.Value)); + } + + /// + public Variable Atan(Variable x) + { + var df = T.One / (T.One + x.Value * x.Value); + _nodes.Add(new(df, -2.0 * df * x.Value * df, x._index, _nodes.Count)); + return new(_nodes.Count - 1, T.Asin(x.Value)); + } + + /// + public Variable Atan2(Variable y, Variable x) + { + var u = y.Value * y.Value; + var v = x.Value * x.Value; + var a = T.One / (u + v); + var b = a * a; + var dfyy = -2.0 * x.Value * b * y.Value; + _nodes.Add(new( + x.Value * a, + dfyy, + (u - v) * b, + -y.Value * a, + -dfyy, + y._index, + x._index)); + return new(_nodes.Count - 1, Real.Atan2(y.Value, x.Value)); + } + + /// + public Variable Sin(Variable x) + { + var sin = T.Sin(x.Value); + _nodes.Add(new(T.Cos(x.Value), -sin, x._index, _nodes.Count)); + return new(_nodes.Count - 1, sin); + } + + /// + public Variable Tan(Variable x) + { + var tan = T.Tan(x.Value); + var sec = T.One / T.Cos(x.Value); + var df = sec * sec; + _nodes.Add(new(df, 2.0 * df * tan, x._index, _nodes.Count)); + return new(_nodes.Count - 1, tan); + } + + // + // Custom operations + // + + /// Add a node to the Hessian tape using a custom unary operation. + /// A variable + /// A function + /// The derivative of the function + /// The second derivative of the function + /// A variable + public Variable CustomOperation(Variable x, Func f, Func dfx, Func dfxx) + { + _nodes.Add(new(dfx(x.Value), dfxx(x.Value), x._index, _nodes.Count)); + return new(_nodes.Count - 1, f(x.Value)); + } + + /// Add a node to the Hessian tape using a custom binary operation. + /// The first variable + /// The second variable + /// A function + /// The first derivative of the function with respect to the first variable + /// The second derivative of the function with respect to the first variable + /// The second derivative of the function with respect to both variables + /// The first derivative of the function with respect to the second variable + /// The second derivative of the function with respect to the second variable + /// A variable + public Variable CustomOperation( + Variable x, + Variable y, + Func f, + Func dfx, + Func dfxx, + Func dfxy, + Func dfy, + Func dfyy) + { + _nodes.Add(new(dfx(x.Value, y.Value), dfxx(x.Value, y.Value), dfxy(x.Value, y.Value), dfy(x.Value, y.Value), dfyy(x.Value, y.Value), x._index, y._index)); + return new(_nodes.Count - 1, f(x.Value, y.Value)); + } +} From fbfe94f6581ab1dd8f8dc79c1a8b42854ad1b500 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:14:56 -0600 Subject: [PATCH 04/29] Update GradientTape.cs - Update documentation comments --- src/Mathematics.NET/AutoDiff/GradientTape.cs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/Mathematics.NET/AutoDiff/GradientTape.cs b/src/Mathematics.NET/AutoDiff/GradientTape.cs index 27dd5b60..eabff864 100644 --- a/src/Mathematics.NET/AutoDiff/GradientTape.cs +++ b/src/Mathematics.NET/AutoDiff/GradientTape.cs @@ -142,15 +142,16 @@ static void CheckForCancellation(CancellationToken cancellationToken) } } - /// Perform reverse accumulation on the gradient tape and output the resulting gradients. - /// The gradients + /// Perform reverse accumulation on the gradient tape and output the resulting gradient. + /// The gradient [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradients) => ReverseAccumulation(out gradients, T.One); - /// Perform reverse accumulation on the gradient tape and output the resulting gradients. - /// The gradients + /// Perform reverse accumulation on the gradient tape and output the resulting gradient. + /// The gradient /// A seed value + /// The gradient tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradients, T seed) { From f2a3fd0da6ec30631fc17455f621d1200a7f5ed8 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:15:25 -0600 Subject: [PATCH 05/29] Add method for formatting 2D read-only spans --- .../LinearAlgebra/LinAlgExtensions.cs | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/src/Mathematics.NET/LinearAlgebra/LinAlgExtensions.cs b/src/Mathematics.NET/LinearAlgebra/LinAlgExtensions.cs index acc8e214..c0686c9e 100644 --- a/src/Mathematics.NET/LinearAlgebra/LinAlgExtensions.cs +++ b/src/Mathematics.NET/LinearAlgebra/LinAlgExtensions.cs @@ -93,6 +93,20 @@ public static string ToDisplayString(this Span span, string? format = null return string.Format(provider, builder.ToString()); } + /// Get the string representation of this + /// A type that implements + /// A 2D read-only span to format + /// The format to use + /// The provider to use to format the value + /// A string representation of this object + public static string ToDisplayString(this ReadOnlySpan2D readOnlySpan2D, string? format = null, IFormatProvider? provider = null) + where T : IComplex + { + Span2D span = new T[readOnlySpan2D.Width, readOnlySpan2D.Height]; + readOnlySpan2D.CopyTo(span); + return span.ToDisplayString(format, provider); + } + /// Get the string representation of this object /// A type that implements /// The span to format From d89d21c560605e20bcc96a4ae7c7ecfec50a0d32 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:15:55 -0600 Subject: [PATCH 06/29] Make helpers private --- .../AutoDiff/DualVector3OfRealTests.cs | 8 ++++---- .../AutoDiff/GradientTapeExtensionsOfRealTests.cs | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/DualVector3OfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/DualVector3OfRealTests.cs index 04ea9378..7ab90b59 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/DualVector3OfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/DualVector3OfRealTests.cs @@ -133,15 +133,15 @@ public void VJP_VectorAndR3VectorFunction_ReturnsVJP(double vx, double vy, doubl // Helpers // - public static Dual F(DualVector3 x) + private static Dual F(DualVector3 x) => Cos(x.X1) / ((x.X1 + x.X2) * Sin(x.X3)); - public static Dual FX(DualVector3 x) + private static Dual FX(DualVector3 x) => Sin(x.X1) * (Cos(x.X2) + Sqrt(x.X3)); - public static Dual FY(DualVector3 x) + private static Dual FY(DualVector3 x) => Sqrt(x.X1 + x.X2 + x.X3); - public static Dual FZ(DualVector3 x) + private static Dual FZ(DualVector3 x) => Sinh(Exp(x.X1) * x.X2 / x.X3); } diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs index 1e2c05de..4dfdfd4e 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs @@ -140,7 +140,7 @@ public void VJP_VectorAndR3VectorFunction_ReturnsVJP(double vx, double vy, doubl // // f(x, y, z) = Cos(x) / ((x + y) * Sin(z)) - public static Variable F(GradientTape tape, VariableVector3 x) + private static Variable F(GradientTape tape, VariableVector3 x) { return tape.Divide( tape.Cos(x.X1), @@ -150,7 +150,7 @@ public static Variable F(GradientTape tape, VariableVector3 x) } // f(x, y, z) = Sin(x) * (Cos(y) + Sqrt(z)) - public static Variable FX(GradientTape tape, VariableVector3 x) + private static Variable FX(GradientTape tape, VariableVector3 x) { return tape.Multiply( tape.Sin(x.X1), @@ -160,7 +160,7 @@ public static Variable FX(GradientTape tape, VariableVector3 x } // f(x, y, z) = Sqrt(x + y + z) - public static Variable FY(GradientTape tape, VariableVector3 x) + private static Variable FY(GradientTape tape, VariableVector3 x) { return tape.Sqrt( tape.Add( @@ -171,7 +171,7 @@ public static Variable FY(GradientTape tape, VariableVector3 x } // f(x, y, z) = Sinh(Exp(x) * y / z) - public static Variable FZ(GradientTape tape, VariableVector3 x) + private static Variable FZ(GradientTape tape, VariableVector3 x) { return tape.Sinh( tape.Multiply( From 6e8b4d177bfd2a9fafbde2052a8ccb1909c5ff82 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:15:59 -0600 Subject: [PATCH 07/29] Update Mathematics.NET.SourceGenerators.csproj --- .../Mathematics.NET.SourceGenerators.csproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Mathematics.NET.SourceGenerators/Mathematics.NET.SourceGenerators.csproj b/src/Mathematics.NET.SourceGenerators/Mathematics.NET.SourceGenerators.csproj index ea490301..31e273d9 100644 --- a/src/Mathematics.NET.SourceGenerators/Mathematics.NET.SourceGenerators.csproj +++ b/src/Mathematics.NET.SourceGenerators/Mathematics.NET.SourceGenerators.csproj @@ -15,7 +15,7 @@ all runtime; build; native; contentfiles; analyzers; buildtransitive - + From 5d0d3669c9665aa06ae674ba63671cedbe21df87 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:16:05 -0600 Subject: [PATCH 08/29] Update first-order-reverse-mode.md --- docs/guide/autodiff/first-order-reverse-mode.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/autodiff/first-order-reverse-mode.md b/docs/guide/autodiff/first-order-reverse-mode.md index bb5c0c2b..5b886561 100644 --- a/docs/guide/autodiff/first-order-reverse-mode.md +++ b/docs/guide/autodiff/first-order-reverse-mode.md @@ -200,7 +200,7 @@ graph BT mul -- adj(w₆) = adj(w₇) ∂w₇/∂w₆ --> div div -- adj(f) = adj(w₇) = 1 (seed) --> function["f(x, y, z)"] ``` -As before, we can use the `ReverseAccumulation` to get our gradients +As before, we can use `ReverseAccumulation` to get our gradients ```csharp tape.ReverseAccumulation(out var gradients); ``` From c5927deb6956ee8b2efb6e7840a18e05e57993fa Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:44:19 -0600 Subject: [PATCH 09/29] Update GradientTapeOfRealTests.cs - Rename methods and some parameters --- .../AutoDiff/GradientTapeOfRealTests.cs | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs index 5f91f194..06d484de 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs @@ -69,7 +69,7 @@ public void Add_TwoVariables_ReturnsGradients(double left, double right, double { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Add, left, right); + var actual = ComputeGradient(_tape.Add, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-16); } @@ -133,7 +133,7 @@ public void Atan2_TwoVariables_ReturnsGradients(double left, double right, doubl { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Atan2, left, right); + var actual = ComputeGradient(_tape.Atan2, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -233,7 +233,7 @@ public void Divide_TwoVariables_ReturnsGradients(double left, double right, doub { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Divide, left, right); + var actual = ComputeGradient(_tape.Divide, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -306,7 +306,7 @@ public void Log_TwoVariables_ReturnsGradients(double left, double right, double { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Log, left, right); + var actual = ComputeGradient(_tape.Log, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -335,7 +335,7 @@ public void Modulo_TwoVariables_ReturnsGradients(double left, double right, doub { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Modulo, left, right); + var actual = ComputeGradient(_tape.Modulo, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -372,7 +372,7 @@ public void Multiply_TwoVariables_ReturnsGradients(double left, double right, do { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Multiply, left, right); + var actual = ComputeGradient(_tape.Multiply, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-16); } @@ -422,7 +422,7 @@ public void Pow_TwoVariables_ReturnsGradients(double left, double right, double { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Pow, left, right); + var actual = ComputeGradient(_tape.Pow, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -433,7 +433,7 @@ public void Root_TwoVariables_ReturnsGradients(double left, double right, double { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Root, left, right); + var actual = ComputeGradient(_tape.Root, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -471,7 +471,7 @@ public void Subtract_TwoVariables_ReturnsGradients(double left, double right, do { Real[] expected = [expectedLeft, expectedRight]; - var actual = ComputeGradients(_tape.Subtract, left, right); + var actual = ComputeGradient(_tape.Subtract, left, right); Assert.AreApproximatelyEqual(expected, actual, 1e-16); } @@ -528,16 +528,16 @@ private Real ComputeGradient(Func, Variable> function, Real { var x = _tape.CreateVariable(input); _ = function(x); - _tape.ReverseAccumulation(out var gradients); - return gradients[0]; + _tape.ReverseAccumulation(out var gradient); + return gradient[0]; } - private Real[] ComputeGradients(Func, Variable, Variable> function, Real left, Real right) + private Real[] ComputeGradient(Func, Variable, Variable> function, Real left, Real right) { var x = _tape.CreateVariable(left); var y = _tape.CreateVariable(right); _ = function(x, y); - _tape.ReverseAccumulation(out var gradients); - return gradients.ToArray(); + _tape.ReverseAccumulation(out var gradient); + return gradient.ToArray(); } } From 8182ef428f9da562dd54d42ac2a1eddc423be26a Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 04:52:19 -0600 Subject: [PATCH 10/29] Create HessianTapeOfRealTests.cs --- .../AutoDiff/HessianTapeOfRealTests.cs | 557 ++++++++++++++++++ 1 file changed, 557 insertions(+) create mode 100644 tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs diff --git a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs new file mode 100644 index 00000000..5f29faaa --- /dev/null +++ b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs @@ -0,0 +1,557 @@ +// +// Mathematics.NET +// https://github.com/HamletTanyavong/Mathematics.NET +// +// MIT License +// +// Copyright (c) 2023 Hamlet Tanyavong +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +using Mathematics.NET.AutoDiff; + +namespace Mathematics.NET.Tests.AutoDiff; + +[TestClass] +[TestCategory("AutoDiff"), TestCategory("Hessian Tape")] +public sealed class HessianTapeOfRealTests +{ + private HessianTape _tape; + + public HessianTapeOfRealTests() + { + _tape = new(); + } + + // + // Tests + // + + [TestMethod] + [DataRow(0.123, -0.125845035324435)] + public void Acos_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Acos, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -3.348544407755665)] + public void Acosh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Acosh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0, 0, 0)] + public void Add_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Add, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Add_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Add(left, x); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Add_VariableAndConstant_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Add(x, right); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(0.123, 0.125845035324435)] + public void Asin_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Asin, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.3087751219667227)] + public void Asinh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Asinh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.3895692725912341)] + public void Atan_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Atan, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, -0.1178645019844717, -0.081137805227897, 0.1178645019844717)] + public void Atan2_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Atan2, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-14); + } + + [TestMethod] + [DataRow(1.23, 9.35125088756106)] + public void Atanh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Atanh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.1573785841306649)] + public void Cbrt_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Cbrt, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.3342377271245026)] + public void Cos_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Cos, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 1.856761056985266)] + public void Cosh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Cosh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(2)] + [DataRow(3)] + [DataRow(4)] + public void CreateVariable_Multiple_TracksCorrectNumberOfVariables(int amount) + { + for (int i = 0; i < amount; i++) + { + _ = _tape.CreateVariable(0); + } + + var actual = _tape.VariableCount; + + Assert.AreEqual(amount, actual); + } + + [TestMethod] + [DataRow(1.23)] + public void CreateVariable_WithSeedValue_ReturnsVariable(double value) + { + var actual = _tape.CreateVariable(value).Value; + + Assert.AreEqual(value, actual); + } + + [TestMethod] + [DataRow(1.23, 2.34, -0.1178645019844717, -0.081137805227897, 0.1178645019844717)] + public void CustomOperation_Binary_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + var y = _tape.CreateVariable(left); + var x = _tape.CreateVariable(right); + + var u = y.Value * y.Value; + var v = x.Value * x.Value; + var a = Real.One / (u + v); + var b = a * a; + var dfyy = -2.0 * x.Value * b * y.Value; + + _ = _tape.CustomOperation( + y, + x, + Real.Atan2, + (y, x) => x.Value * a, + (y, x) => dfyy, + (y, x) => (u - v) * b, + (y, x) => -y.Value * a, + (y, x) => -dfyy); + + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian.ToArray(); + + Assert.AreApproximatelyEqual(expected, actual, 1e-14); + } + + [TestMethod] + [DataRow(1.23, -0.942488801931697)] + public void CustomOperation_Unary_ReturnsHessian(double input, double expected) + { + var x = _tape.CreateVariable(input); + _ = _tape.CustomOperation(x, Real.Sin, Real.Cos, x => -Real.Sin(x)); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0, -0.1826283877565929, 0.1919939461030848)] + public void Divide_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Divide, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0.1919939461030848)] + public void Divide_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Divide(left, x); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Divide_VariableAndConstant_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Divide(x, right); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 3.421229536289673)] + public void Exp_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Exp, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 1.126984172374114)] + public void Exp2_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Exp2, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 90.0391481211886)] + public void Exp10_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Exp10, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.6609822195782934)] + public void Ln_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Ln, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, -0.7774880868134957, -0.4807142135095495, 0.1753670976636016)] + public void Log_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Log, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.953595770301384)] + public void Log2_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Log2, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.2870609305990163)] + public void Log10_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Log10, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0, 0, 0)] + public void Modulo_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Modulo, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Modulo_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Modulo(left, x); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Modulo_VariableAndConstant_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Modulo(x, right); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0, 1, 0)] + public void Multiply_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Multiply, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Multiply_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Multiply(left, x); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Multiply_VariableAndConstant_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Multiply(x, right); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 0)] + public void Negate_Variable_ReturnsNegation(double input, double expected) + { + var x = _tape.CreateVariable(input); + + var actual = ComputeHessian(_tape.Negate, input); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 3.364251027050465, 1.958969363947686, 0.06956296832768787)] + public void Pow_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Pow, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, -0.1767192454212087, -0.1765629860861052, 0.03686389570982799)] + public void Root_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Root, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.942488801931697)] + public void Sin_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Sin, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 1.564468479304407)] + public void Sinh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Sinh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.1832661859080147)] + public void Sqrt_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Sqrt, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0, 0, 0)] + public void Subtract_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) + { + Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; + + var actual = ComputeHessian(_tape.Subtract, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Subtract_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Subtract(left, x); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Subtract_VariableAndConstant_ReturnsHessian(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Subtract(x, right); + _tape.ReverseAccumulation(out var _, out var hessian); + + var actual = hessian[0, 0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + + [TestMethod] + [DataRow(1.23, 50.4823759141874)] + public void Tan_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Tan, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + [TestMethod] + [DataRow(1.23, -0.4887972531670078)] + public void Tanh_Variable_ReturnsHessian(double input, double expected) + { + var actual = ComputeHessian(_tape.Tanh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + + // + // Helpers + // + + private Real ComputeHessian(Func, Variable> function, Real input) + { + var x = _tape.CreateVariable(input); + _ = function(x); + _tape.ReverseAccumulation(out var _, out var hessian); + return hessian[0, 0]; + } + + private Real[,] ComputeHessian(Func, Variable, Variable> function, Real left, Real right) + { + var x = _tape.CreateVariable(left); + var y = _tape.CreateVariable(right); + _ = function(x, y); + _tape.ReverseAccumulation(out var _, out var hessian); + return hessian.ToArray(); + } +} From 6330f3bf46190d11b9ab552270edd595fbe889f8 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 23:53:41 -0600 Subject: [PATCH 11/29] Update HessianTape.cs - Fix Exp2, Exp10, and Root methods - Use Real.One instead of T.One in Atan2 --- src/Mathematics.NET/AutoDiff/HessianTape.cs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/Mathematics.NET/AutoDiff/HessianTape.cs b/src/Mathematics.NET/AutoDiff/HessianTape.cs index 232ccf49..2e77dd1e 100644 --- a/src/Mathematics.NET/AutoDiff/HessianTape.cs +++ b/src/Mathematics.NET/AutoDiff/HessianTape.cs @@ -348,7 +348,7 @@ public Variable Exp(Variable x) /// public Variable Exp2(Variable x) { - var exp2 = T.Exp(x.Value); + var exp2 = T.Exp2(x.Value); var df = Real.Ln2 * exp2; _nodes.Add(new(df, Real.Ln2 * df, x._index, _nodes.Count)); return new(_nodes.Count - 1, exp2); @@ -357,7 +357,7 @@ public Variable Exp2(Variable x) /// public Variable Exp10(Variable x) { - var exp10 = T.Exp(x.Value); + var exp10 = T.Exp10(x.Value); var df = Real.Ln10 * exp10; _nodes.Add(new(df, Real.Ln10 * df, x._index, _nodes.Count)); return new(_nodes.Count - 1, exp10); @@ -493,15 +493,15 @@ public Variable Root(Variable x, Variable n) var lnx = T.Ln(x.Value); var u = T.One / n.Value; var v = T.One / x.Value; - var w = u * u; + var uu = u * u; var dfx = u * v * root; - var dfn = -lnx * root * w; + var dfn = -lnx * root * uu; _nodes.Add(new( dfx, - (n.Value - T.One) * v * root, - -(dfx * u + lnx * w), + (uu - u) * root * v * v, + -root * (lnx * u + T.One) * v * uu, dfn, - -(2.0 * u + lnx * w) * dfn, + -(2.0 * u + lnx * uu) * dfn, x._index, n._index)); return new(_nodes.Count - 1, root); @@ -555,7 +555,7 @@ public Variable Atan2(Variable y, Variable x) { var u = y.Value * y.Value; var v = x.Value * x.Value; - var a = T.One / (u + v); + var a = Real.One / (u + v); var b = a * a; var dfyy = -2.0 * x.Value * b * y.Value; _nodes.Add(new( From bacebd17e38a08a5ad058b22c8126ffb1dea1b9b Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Wed, 22 Nov 2023 23:56:20 -0600 Subject: [PATCH 12/29] Update GradientTapeOfRealTests.cs - Update test names - Update precision thresholds for some tests - Fix negate test --- .../AutoDiff/GradientTapeOfRealTests.cs | 92 +++++++++---------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs index 06d484de..b467f91b 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs @@ -65,13 +65,13 @@ public void Acosh_Variable_ReturnsGradient(double input, double expected) [TestMethod] [DataRow(1.23, 2.34, 1, 1)] - public void Add_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Add_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; var actual = ComputeGradient(_tape.Add, left, right); - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] @@ -80,11 +80,11 @@ public void Add_ConstantAndVariable_ReturnsGradient(double left, double right, d { var x = _tape.CreateVariable(right); _ = _tape.Add(left, x); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] @@ -93,11 +93,11 @@ public void Add_VariableAndConstant_ReturnsGradient(double left, double right, d { var x = _tape.CreateVariable(left); _ = _tape.Add(x, right); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] @@ -129,7 +129,7 @@ public void Atan_Variable_ReturnsGradient(double input, double expected) [TestMethod] [DataRow(1.23, 2.34, 0.334835801674179, -0.1760034342133505)] - public void Atan2_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Atan2_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -200,13 +200,13 @@ public void CreateVariable_WithSeedValue_ReturnsVariable(double value) } [TestMethod] - [DataRow(1.23, 2.34, -0.1760034342133505, 0.334835801674179)] + [DataRow(1.23, 2.34, 0.334835801674179, -0.1760034342133505)] public void CustomOperation_Binary_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { var y = _tape.CreateVariable(left); var x = _tape.CreateVariable(right); var u = Real.One / (x.Value * x.Value + y.Value * y.Value); - _ = _tape.CustomOperation(x, y, Real.Atan2, (x, y) => x.Value * u, (x, y) => -y.Value * u); + _ = _tape.CustomOperation(y, x, Real.Atan2, (y, x) => x.Value * u, (y, x) => -y.Value * u); _tape.ReverseAccumulation(out var actual); Real[] expected = [expectedLeft, expectedRight]; @@ -220,16 +220,16 @@ public void CustomOperation_Unary_ReturnsGradient(double input, double expected) { var x = _tape.CreateVariable(input); _ = _tape.CustomOperation(x, Real.Sin, Real.Cos); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; Assert.AreApproximatelyEqual(expected, actual, 1e-15); } [TestMethod] [DataRow(1.23, 2.34, 0.4273504273504274, -0.2246329169406093)] - public void Divide_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Divide_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -244,9 +244,9 @@ public void Divide_ConstantAndVariable_ReturnsGradient(double left, double right { var x = _tape.CreateVariable(right); _ = _tape.Divide(left, x); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -257,9 +257,9 @@ public void Divide_VariableAndConstant_ReturnsGradient(double left, double right { var x = _tape.CreateVariable(left); _ = _tape.Divide(x, right); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; Assert.AreApproximatelyEqual(expected, actual, 1e-15); } @@ -302,7 +302,7 @@ public void Ln_Variable_ReturnsGradient(double input, double expected) [TestMethod] [DataRow(1.23, 2.34, 0.9563103467806, -0.1224030239537303)] - public void Log_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Log_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -331,13 +331,13 @@ public void Log10_Variable_ReturnsGradient(double input, double expected) [TestMethod] [DataRow(1.23, 2.34, 1, 0)] - public void Modulo_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Modulo_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; var actual = ComputeGradient(_tape.Modulo, left, right); - Assert.AreApproximatelyEqual(expected, actual, 1e-15); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] @@ -346,11 +346,11 @@ public void Modulo_ConstantAndVariable_ReturnsGradient(double left, double right { var x = _tape.CreateVariable(right); _ = _tape.Modulo(left, x); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] @@ -363,12 +363,12 @@ public void Modulo_VariableAndConstant_ReturnsGradient(double left, double right var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] [DataRow(1.23, 2.34, 2.34, 1.23)] - public void Multiply_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Multiply_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -383,9 +383,9 @@ public void Multiply_ConstantAndVariable_ReturnsGradient(double left, double rig { var x = _tape.CreateVariable(right); _ = _tape.Multiply(left, x); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; Assert.AreApproximatelyEqual(expected, actual, 1e-16); } @@ -396,9 +396,9 @@ public void Multiply_VariableAndConstant_ReturnsGradient(double left, double rig { var x = _tape.CreateVariable(left); _ = _tape.Multiply(x, right); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; Assert.AreApproximatelyEqual(expected, actual, 1e-16); } @@ -407,18 +407,14 @@ public void Multiply_VariableAndConstant_ReturnsGradient(double left, double rig [DataRow(1.23, -1)] public void Negate_Variable_ReturnsNegation(double input, double expected) { - var x = _tape.CreateVariable(input); - _ = _tape.Negate(x); - _tape.ReverseAccumulation(out var gradients); - - var actual = gradients[0]; + var actual = ComputeGradient(_tape.Negate, input); - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] [DataRow(1.23, 2.34, 3.088081166620949, 0.3360299854573856)] - public void Pow_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Pow_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -429,7 +425,7 @@ public void Pow_TwoVariables_ReturnsGradients(double left, double right, double [TestMethod] [DataRow(1.23, 2.34, 0.3795771135606888, -0.04130373687338086)] - public void Root_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Root_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; @@ -467,39 +463,39 @@ public void Sqrt_Variable_ReturnsGradient(double input, double expected) [TestMethod] [DataRow(1.23, 2.34, 1, -1)] - public void Subtract_TwoVariables_ReturnsGradients(double left, double right, double expectedLeft, double expectedRight) + public void Subtract_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) { Real[] expected = [expectedLeft, expectedRight]; var actual = ComputeGradient(_tape.Subtract, left, right); - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] [DataRow(1.23, 2.34, -1)] - public void Subtract_ConstantAndVariable_ReturnsGradients(double left, double right, double expected) + public void Subtract_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) { var x = _tape.CreateVariable(right); _ = _tape.Subtract(left, x); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] [DataRow(1.23, 2.34, 1)] - public void Subtract_VariableAndConstant_ReturnsGradients(double left, double right, double expected) + public void Subtract_VariableAndConstant_ReturnsGradient(double left, double right, double expected) { var x = _tape.CreateVariable(left); _ = _tape.Subtract(x, right); - _tape.ReverseAccumulation(out var gradients); + _tape.ReverseAccumulation(out var gradient); - var actual = gradients[0]; + var actual = gradient[0]; - Assert.AreApproximatelyEqual(expected, actual, 1e-16); + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } [TestMethod] From afb6ee8262ae35e5d3938487cab422f1a2dfc998 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 00:21:55 -0600 Subject: [PATCH 13/29] Update GradientTape.cs - Update parameter name - Update documentation comments --- src/Mathematics.NET/AutoDiff/GradientTape.cs | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/src/Mathematics.NET/AutoDiff/GradientTape.cs b/src/Mathematics.NET/AutoDiff/GradientTape.cs index eabff864..89c53a0b 100644 --- a/src/Mathematics.NET/AutoDiff/GradientTape.cs +++ b/src/Mathematics.NET/AutoDiff/GradientTape.cs @@ -143,17 +143,18 @@ static void CheckForCancellation(CancellationToken cancellationToken) } /// Perform reverse accumulation on the gradient tape and output the resulting gradient. - /// The gradient + /// The gradient + /// The gradient tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public void ReverseAccumulation(out ReadOnlySpan gradients) - => ReverseAccumulation(out gradients, T.One); + public void ReverseAccumulation(out ReadOnlySpan gradient) + => ReverseAccumulation(out gradient, T.One); /// Perform reverse accumulation on the gradient tape and output the resulting gradient. - /// The gradient + /// The gradient /// A seed value /// The gradient tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] - public void ReverseAccumulation(out ReadOnlySpan gradients, T seed) + public void ReverseAccumulation(out ReadOnlySpan gradient, T seed) { if (_variableCount == 0) { @@ -170,13 +171,13 @@ public void ReverseAccumulation(out ReadOnlySpan gradients, T seed) for (int i = length - 1; i >= _variableCount; i--) { var node = Unsafe.Add(ref start, i); - var gradient = gradientSpan[i]; + var gradientElement = gradientSpan[i]; - gradientSpan[node.PX] += gradient * node.DX; - gradientSpan[node.PY] += gradient * node.DY; + gradientSpan[node.PX] += gradientElement * node.DX; + gradientSpan[node.PY] += gradientElement * node.DY; } - gradients = gradientSpan[.._variableCount]; + gradient = gradientSpan[.._variableCount]; } // From 7819323460ee8e2388d39f5a3dbbe82e6171dd29 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 00:25:47 -0600 Subject: [PATCH 14/29] Add method for getting only gradients or Hessians - Update documentation comments --- src/Mathematics.NET/AutoDiff/HessianTape.cs | 84 ++++++++++++++++++++- 1 file changed, 83 insertions(+), 1 deletion(-) diff --git a/src/Mathematics.NET/AutoDiff/HessianTape.cs b/src/Mathematics.NET/AutoDiff/HessianTape.cs index 2e77dd1e..4b4f340f 100644 --- a/src/Mathematics.NET/AutoDiff/HessianTape.cs +++ b/src/Mathematics.NET/AutoDiff/HessianTape.cs @@ -109,17 +109,99 @@ static void CheckForCancellation(CancellationToken cancellationToken) } } + /// Perform reverse accumulation on the Hessian tape and output the resulting gradient. + /// The gradient + /// The Hessian tape does not have any tracked variables. + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan gradient) + => ReverseAccumulation(out gradient, T.One); + /// Perform reverse accumulation on the Hessian tape and output the resulting Hessian. + /// The Hessian + /// The Hessian tape does not have any tracked variables. + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan2D hessian) + => ReverseAccumulation(out hessian, T.One); + + /// Perform reverse accumulation on the Hessian tape and output the resulting gradient and Hessian. /// The gradient /// The Hessian + /// The Hessian tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradient, out ReadOnlySpan2D hessian) => ReverseAccumulation(out gradient, out hessian, T.One); + /// Perform reverse accumulation on the Hessian tape and output the resulting gradient. + /// The gradient + /// A seed value + /// The Hessian tape does not have any tracked variables. + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan gradient, T seed) + { + if (_variableCount == 0) + { + throw new Exception("Hessian tape contains no root nodes"); + } + + ReadOnlySpan> nodes = CollectionsMarshal.AsSpan(_nodes); + ref var start = ref MemoryMarshal.GetReference(nodes); + var length = nodes.Length; + + Span gradientSpan = new T[length]; + gradientSpan[length - 1] = seed; + + for (int i = length - 1; i >= _variableCount; i--) + { + var node = Unsafe.Add(ref start, i); + var gradientElement = gradientSpan[i]; + + gradientSpan[node.PX] += gradientElement * node.DX; + gradientSpan[node.PY] += gradientElement * node.DY; + } + + gradient = gradientSpan[.._variableCount]; + } + + /// Perform reverse accumulation on the Hessian tape and output the resulting Hessian. + /// The Hessian + /// A seed value + /// The Hessian tape does not have any tracked variables. + [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] + public void ReverseAccumulation(out ReadOnlySpan2D hessian, T seed) + { + if (_variableCount == 0) + { + throw new Exception("Hessian tape contains no root nodes"); + } + + ReadOnlySpan> nodes = CollectionsMarshal.AsSpan(_nodes); + ref var start = ref MemoryMarshal.GetReference(nodes); + var length = nodes.Length; + + Span gradientSpan = new T[length]; + gradientSpan[length - 1] = seed; + + Span2D hessianSpan = new T[length, length]; + + for (int i = length - 1; i >= _variableCount; i--) + { + var node = Unsafe.Add(ref start, i); + var gradientElement = gradientSpan[i]; + + EdgePush(hessianSpan, ref node, i); + Accumulate(hessianSpan, ref node, gradientElement); + + gradientSpan[node.PX] += gradientElement * node.DX; + gradientSpan[node.PY] += gradientElement * node.DY; + } + + hessian = hessianSpan.Slice(0, 0, _variableCount, _variableCount); + } + // The following method uses the edge-pushing algorithm outlined by Gower and Mello: https://arxiv.org/pdf/2007.15040.pdf. // TODO: use newer variations/versions of this algorithm since they are more performant - /// Perform reverse accumulation on the Hessian tape and output the resulting Hessian. + /// Perform reverse accumulation on the Hessian tape and output the resulting gradient and Hessian. /// The gradient /// The Hessian /// A seed value From 1a1f262dbbf11282b892c961475074ad98603e05 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 00:27:37 -0600 Subject: [PATCH 15/29] Update HessianTapeOfRealTests.cs - Use more performant overload of ReverseAccumulation --- .../Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs index 5f29faaa..106ff7f1 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs @@ -25,6 +25,7 @@ // SOFTWARE. // +using CommunityToolkit.HighPerformance; using Mathematics.NET.AutoDiff; namespace Mathematics.NET.Tests.AutoDiff; @@ -542,7 +543,7 @@ private Real ComputeHessian(Func, Variable> function, Real { var x = _tape.CreateVariable(input); _ = function(x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); return hessian[0, 0]; } @@ -551,7 +552,7 @@ private Real ComputeHessian(Func, Variable> function, Real var x = _tape.CreateVariable(left); var y = _tape.CreateVariable(right); _ = function(x, y); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); return hessian.ToArray(); } } From fa897d94dade3cafb17629ad8b6e82245386f3de Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 00:40:19 -0600 Subject: [PATCH 16/29] Update GradientTapeOfRealTests.cs - Update test name --- tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs index b467f91b..965bed31 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeOfRealTests.cs @@ -405,7 +405,7 @@ public void Multiply_VariableAndConstant_ReturnsGradient(double left, double rig [TestMethod] [DataRow(1.23, -1)] - public void Negate_Variable_ReturnsNegation(double input, double expected) + public void Negate_Variable_ReturnsGradient(double input, double expected) { var actual = ComputeGradient(_tape.Negate, input); From 378b354bdc91f19a04bc1df8e2c4cb47fe0d742d Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 00:43:36 -0600 Subject: [PATCH 17/29] Add tests for gradients - Add tests for gradients computed with Hessian tapes - Rename negation test --- .../AutoDiff/HessianTapeOfRealTests.cs | 479 +++++++++++++++++- 1 file changed, 478 insertions(+), 1 deletion(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs index 106ff7f1..66acfa22 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs @@ -45,6 +45,15 @@ public HessianTapeOfRealTests() // Tests // + [TestMethod] + [DataRow(0.123, -1.007651429146436)] + public void Acos_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Acos, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(0.123, -0.125845035324435)] public void Acos_Variable_ReturnsHessian(double input, double expected) @@ -54,6 +63,15 @@ public void Acos_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 1.396315794095838)] + public void Acosh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Acosh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -3.348544407755665)] public void Acosh_Variable_ReturnsHessian(double input, double expected) @@ -63,6 +81,17 @@ public void Acosh_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 1, 1)] + public void Add_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Add, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0, 0, 0)] public void Add_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -74,6 +103,19 @@ public void Add_TwoVariables_ReturnsHessian(double left, double right, double ex Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 1)] + public void Add_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Add(left, x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Add_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) @@ -87,6 +129,19 @@ public void Add_ConstantAndVariable_ReturnsHessian(double left, double right, do Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 1)] + public void Add_VariableAndConstant_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Add(x, right); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Add_VariableAndConstant_ReturnsHessian(double left, double right, double expected) @@ -100,6 +155,15 @@ public void Add_VariableAndConstant_ReturnsHessian(double left, double right, do Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(0.123, 1.007651429146436)] + public void Asin_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Asin, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(0.123, 0.125845035324435)] public void Asin_Variable_ReturnsHessian(double input, double expected) @@ -109,6 +173,15 @@ public void Asin_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.6308300845448597)] + public void Asinh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Asinh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.3087751219667227)] public void Asinh_Variable_ReturnsHessian(double input, double expected) @@ -118,6 +191,15 @@ public void Asinh_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.3979465955668749)] + public void Atan_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Atan, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.3895692725912341)] public void Atan_Variable_ReturnsHessian(double input, double expected) @@ -127,6 +209,17 @@ public void Atan_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 0.334835801674179, -0.1760034342133505)] + public void Atan2_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Atan2, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, -0.1178645019844717, -0.081137805227897, 0.1178645019844717)] public void Atan2_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -138,6 +231,15 @@ public void Atan2_TwoVariables_ReturnsHessian(double left, double right, double Assert.AreApproximatelyEqual(expected, actual, 1e-14); } + [TestMethod] + [DataRow(1.23, -1.94969779684149)] + public void Atanh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Atanh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 9.35125088756106)] public void Atanh_Variable_ReturnsHessian(double input, double expected) @@ -147,6 +249,15 @@ public void Atanh_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.2903634877210767)] + public void Cbrt_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Cbrt, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.1573785841306649)] public void Cbrt_Variable_ReturnsHessian(double input, double expected) @@ -156,6 +267,15 @@ public void Cbrt_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, -0.942488801931697)] + public void Cos_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Cos, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.3342377271245026)] public void Cos_Variable_ReturnsHessian(double input, double expected) @@ -165,6 +285,15 @@ public void Cos_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 1.564468479304407)] + public void Cosh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Cosh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 1.856761056985266)] public void Cosh_Variable_ReturnsHessian(double input, double expected) @@ -199,6 +328,31 @@ public void CreateVariable_WithSeedValue_ReturnsVariable(double value) Assert.AreEqual(value, actual); } + [TestMethod] + [DataRow(1.23, 2.34, 0.334835801674179, -0.1760034342133505)] + public void CustomOperation_Binary_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + var y = _tape.CreateVariable(left); + var x = _tape.CreateVariable(right); + var u = Real.One / (x.Value * x.Value + y.Value * y.Value); + + _ = _tape.CustomOperation( + y, + x, + Real.Atan2, + (y, x) => x.Value * u, + (y, x) => Real.Zero, // Not of interest + (y, x) => Real.Zero, // Not of interest + (y, x) => -y.Value * u, + (y, x) => Real.Zero); // Not of interest + + _tape.ReverseAccumulation(out ReadOnlySpan actual); + + Real[] expected = [expectedLeft, expectedRight]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, -0.1178645019844717, -0.081137805227897, 0.1178645019844717)] public void CustomOperation_Binary_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -231,6 +385,23 @@ public void CustomOperation_Binary_ReturnsHessian(double left, double right, dou Assert.AreApproximatelyEqual(expected, actual, 1e-14); } + [TestMethod] + [DataRow(1.23, 0.3342377271245026)] + public void CustomOperation_Unary_ReturnsGradient(double input, double expected) + { + var x = _tape.CreateVariable(input); + _ = _tape.CustomOperation( + x, + Real.Sin, + Real.Cos, + x => Real.Zero); // Not of interest + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.942488801931697)] public void CustomOperation_Unary_ReturnsHessian(double input, double expected) @@ -244,6 +415,17 @@ public void CustomOperation_Unary_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 0.4273504273504274, -0.2246329169406093)] + public void Divide_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Divide, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, 0, -0.1826283877565929, 0.1919939461030848)] public void Divide_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -255,6 +437,19 @@ public void Divide_TwoVariables_ReturnsHessian(double left, double right, double Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, -0.2246329169406093)] + public void Divide_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Divide(left, x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, 0.1919939461030848)] public void Divide_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) @@ -268,6 +463,19 @@ public void Divide_ConstantAndVariable_ReturnsHessian(double left, double right, Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 0.4273504273504274)] + public void Divide_VariableAndConstant_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Divide(x, right); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Divide_VariableAndConstant_ReturnsHessian(double left, double right, double expected) @@ -281,6 +489,15 @@ public void Divide_VariableAndConstant_ReturnsHessian(double left, double right, Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 3.421229536289673)] + public void Exp_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Exp, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 3.421229536289673)] public void Exp_Variable_ReturnsHessian(double input, double expected) @@ -290,6 +507,15 @@ public void Exp_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 1.625894476644487)] + public void Exp2_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Exp2, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 1.126984172374114)] public void Exp2_Variable_ReturnsHessian(double input, double expected) @@ -299,6 +525,15 @@ public void Exp2_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 39.10350518430174)] + public void Exp10_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Exp10, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 90.0391481211886)] public void Exp10_Variable_ReturnsHessian(double input, double expected) @@ -308,6 +543,15 @@ public void Exp10_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.813008130081301)] + public void Ln_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Ln, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.6609822195782934)] public void Ln_Variable_ReturnsHessian(double input, double expected) @@ -317,6 +561,17 @@ public void Ln_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 0.9563103467806, -0.1224030239537303)] + public void Log_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Log, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, -0.7774880868134957, -0.4807142135095495, 0.1753670976636016)] public void Log_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -328,6 +583,15 @@ public void Log_TwoVariables_ReturnsHessian(double left, double right, double ex Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 1.172922797470702)] + public void Log2_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Log2, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.953595770301384)] public void Log2_Variable_ReturnsHessian(double input, double expected) @@ -337,6 +601,15 @@ public void Log2_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.35308494463679)] + public void Log10_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Log10, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.2870609305990163)] public void Log10_Variable_ReturnsHessian(double input, double expected) @@ -346,6 +619,17 @@ public void Log10_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 1, 0)] + public void Modulo_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Modulo, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0, 0, 0)] public void Modulo_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -357,6 +641,19 @@ public void Modulo_TwoVariables_ReturnsHessian(double left, double right, double Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 0)] + public void Modulo_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Modulo(left, x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Modulo_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) @@ -370,6 +667,19 @@ public void Modulo_ConstantAndVariable_ReturnsHessian(double left, double right, Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 1)] + public void Modulo_VariableAndConstant_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Modulo(x, right); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Modulo_VariableAndConstant_ReturnsHessian(double left, double right, double expected) @@ -383,6 +693,17 @@ public void Modulo_VariableAndConstant_ReturnsHessian(double left, double right, Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 2.34, 1.23)] + public void Multiply_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Multiply, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-16); + } + [TestMethod] [DataRow(1.23, 2.34, 0, 1, 0)] public void Multiply_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -394,6 +715,19 @@ public void Multiply_TwoVariables_ReturnsHessian(double left, double right, doub Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 1.23)] + public void Multiply_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Multiply(left, x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-16); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Multiply_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) @@ -407,6 +741,19 @@ public void Multiply_ConstantAndVariable_ReturnsHessian(double left, double righ Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 2.34)] + public void Multiply_VariableAndConstant_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Multiply(x, right); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, 1e-16); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Multiply_VariableAndConstant_ReturnsHessian(double left, double right, double expected) @@ -420,9 +767,18 @@ public void Multiply_VariableAndConstant_ReturnsHessian(double left, double righ Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, -1)] + public void Negate_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Negate, input); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 0)] - public void Negate_Variable_ReturnsNegation(double input, double expected) + public void Negate_Variable_ReturnsHessian(double input, double expected) { var x = _tape.CreateVariable(input); @@ -431,6 +787,17 @@ public void Negate_Variable_ReturnsNegation(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 3.088081166620949, 0.3360299854573856)] + public void Pow_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Pow, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, 3.364251027050465, 1.958969363947686, 0.06956296832768787)] public void Pow_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -442,6 +809,17 @@ public void Pow_TwoVariables_ReturnsHessian(double left, double right, double ex Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 0.3795771135606888, -0.04130373687338086)] + public void Root_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Root, left, right); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 2.34, -0.1767192454212087, -0.1765629860861052, 0.03686389570982799)] public void Root_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -453,6 +831,15 @@ public void Root_TwoVariables_ReturnsHessian(double left, double right, double e Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.3342377271245026)] + public void Sin_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Sin, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.942488801931697)] public void Sin_Variable_ReturnsHessian(double input, double expected) @@ -462,6 +849,15 @@ public void Sin_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 1.856761056985266)] + public void Sinh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Sinh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 1.564468479304407)] public void Sinh_Variable_ReturnsHessian(double input, double expected) @@ -471,6 +867,15 @@ public void Sinh_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.4508348173337161)] + public void Sqrt_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Sqrt, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.1832661859080147)] public void Sqrt_Variable_ReturnsHessian(double input, double expected) @@ -480,6 +885,17 @@ public void Sqrt_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 2.34, 1, -1)] + public void Subtract_TwoVariables_ReturnsGradient(double left, double right, double expectedLeft, double expectedRight) + { + Real[] expected = [expectedLeft, expectedRight]; + + var actual = ComputeGradient(_tape.Subtract, left, right); + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0, 0, 0)] public void Subtract_TwoVariables_ReturnsHessian(double left, double right, double expectedXX, double expectedXY, double expectedYY) @@ -491,6 +907,19 @@ public void Subtract_TwoVariables_ReturnsHessian(double left, double right, doub Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, -1)] + public void Subtract_ConstantAndVariable_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(right); + _ = _tape.Subtract(left, x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Subtract_ConstantAndVariable_ReturnsHessian(double left, double right, double expected) @@ -504,6 +933,19 @@ public void Subtract_ConstantAndVariable_ReturnsHessian(double left, double righ Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 2.34, 1)] + public void Subtract_VariableAndConstant_ReturnsGradient(double left, double right, double expected) + { + var x = _tape.CreateVariable(left); + _ = _tape.Subtract(x, right); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + + var actual = gradient[0]; + + Assert.AreApproximatelyEqual(expected, actual, Real.Zero); + } + [TestMethod] [DataRow(1.23, 2.34, 0)] public void Subtract_VariableAndConstant_ReturnsHessian(double left, double right, double expected) @@ -517,6 +959,15 @@ public void Subtract_VariableAndConstant_ReturnsHessian(double left, double righ Assert.AreApproximatelyEqual(expected, actual, Real.Zero); } + [TestMethod] + [DataRow(1.23, 8.95136077522624)] + public void Tan_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Tan, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, 50.4823759141874)] public void Tan_Variable_ReturnsHessian(double input, double expected) @@ -526,6 +977,15 @@ public void Tan_Variable_ReturnsHessian(double input, double expected) Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [DataRow(1.23, 0.2900600799721436)] + public void Tanh_Variable_ReturnsGradient(double input, double expected) + { + var actual = ComputeGradient(_tape.Tanh, input); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [DataRow(1.23, -0.4887972531670078)] public void Tanh_Variable_ReturnsHessian(double input, double expected) @@ -539,6 +999,23 @@ public void Tanh_Variable_ReturnsHessian(double input, double expected) // Helpers // + private Real ComputeGradient(Func, Variable> function, Real input) + { + var x = _tape.CreateVariable(input); + _ = function(x); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + return gradient[0]; + } + + private Real[] ComputeGradient(Func, Variable, Variable> function, Real left, Real right) + { + var x = _tape.CreateVariable(left); + var y = _tape.CreateVariable(right); + _ = function(x, y); + _tape.ReverseAccumulation(out ReadOnlySpan gradient); + return gradient.ToArray(); + } + private Real ComputeHessian(Func, Variable> function, Real input) { var x = _tape.CreateVariable(input); From e7c556d5689bb11e12b0974fca7e9984ec2ed86f Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 01:56:29 -0600 Subject: [PATCH 18/29] Create and use ITape interface --- src/Mathematics.NET/AutoDiff/GradientTape.cs | 104 +------- src/Mathematics.NET/AutoDiff/HessianTape.cs | 57 +---- src/Mathematics.NET/AutoDiff/ITape.cs | 248 +++++++++++++++++++ 3 files changed, 250 insertions(+), 159 deletions(-) create mode 100644 src/Mathematics.NET/AutoDiff/ITape.cs diff --git a/src/Mathematics.NET/AutoDiff/GradientTape.cs b/src/Mathematics.NET/AutoDiff/GradientTape.cs index 89c53a0b..4fb25865 100644 --- a/src/Mathematics.NET/AutoDiff/GradientTape.cs +++ b/src/Mathematics.NET/AutoDiff/GradientTape.cs @@ -65,7 +65,7 @@ namespace Mathematics.NET.AutoDiff; /// Represents a gradient tape /// A type that implements and -public record class GradientTape +public record class GradientTape : ITape where T : IComplex, IDifferentiableFunctions { // TODO: Measure performance with Stack> instead of List> @@ -78,19 +78,14 @@ public GradientTape() _nodes = []; } - /// Get the number of nodes on the gradient tape. public int NodeCount => _nodes.Count; - /// Get the number of variables that are being tracked. public int VariableCount => _variableCount; // // Methods // - /// Create a variable for the gradient tape to track. - /// A seed value - /// A variable public Variable CreateVariable(T seed) { _nodes.Add(new(_variableCount)); @@ -98,9 +93,6 @@ public Variable CreateVariable(T seed) return variable; } - /// Print the nodes of the gradient tape to the console. - /// A cancellation token - /// The total number of nodes to print public void PrintNodes(CancellationToken cancellationToken, int limit = 100) { const string tab = " "; @@ -142,17 +134,10 @@ static void CheckForCancellation(CancellationToken cancellationToken) } } - /// Perform reverse accumulation on the gradient tape and output the resulting gradient. - /// The gradient - /// The gradient tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradient) => ReverseAccumulation(out gradient, T.One); - /// Perform reverse accumulation on the gradient tape and output the resulting gradient. - /// The gradient - /// A seed value - /// The gradient tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradient, T seed) { @@ -184,40 +169,24 @@ public void ReverseAccumulation(out ReadOnlySpan gradient, T seed) // Basic operations // - /// Add two variables - /// The first variable - /// The second variable - /// A variable public Variable Add(Variable x, Variable y) { _nodes.Add(new(T.One, T.One, x._index, y._index)); return new(_nodes.Count - 1, x.Value + y.Value); } - /// Add a constant value and a variable - /// A constant value - /// A variable - /// A variable public Variable Add(T c, Variable x) { _nodes.Add(new(T.One, x._index, _nodes.Count)); return new(_nodes.Count - 1, c + x.Value); } - /// Add a variable and a constant value - /// A variable - /// A constant value - /// A variable public Variable Add(Variable x, T c) { _nodes.Add(new(T.One, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value + c); } - /// Divide two variables - /// A dividend - /// A divisor - /// A variable public Variable Divide(Variable x, Variable y) { var u = T.One / y.Value; @@ -225,10 +194,6 @@ public Variable Divide(Variable x, Variable y) return new(_nodes.Count - 1, x.Value * u); } - /// Divide a constant value by a variable - /// A constant dividend - /// A variable divisor - /// A variable public Variable Divide(T c, Variable x) { var u = T.One / x.Value; @@ -236,10 +201,6 @@ public Variable Divide(T c, Variable x) return new(_nodes.Count - 1, x.Value * u); } - /// Divide a variable by a constant value - /// A variable dividend - /// A constant divisor - /// A variable public Variable Divide(Variable x, T c) { var u = T.One / c; @@ -247,90 +208,54 @@ public Variable Divide(Variable x, T c) return new(_nodes.Count - 1, x.Value * u); } - /// Compute the modulo of a variable given a divisor - /// A dividend - /// A divisor - /// mod public Variable Modulo(Variable x, Variable y) { _nodes.Add(new(T.One, x.Value * Real.Floor(x.Value / y.Value), x._index, y._index)); return new(_nodes.Count - 1, x.Value % y.Value); } - /// Compute the modulo of a real value given a divisor - /// A real dividend - /// A variable divisor - /// mod public Variable Modulo(Real c, Variable x) { _nodes.Add(new(c * Real.Floor(c / x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, c % x.Value); } - /// Compute the modulo of a variable given a divisor - /// A variable dividend - /// A real divisor - /// mod public Variable Modulo(Variable x, Real c) { _nodes.Add(new(T.One, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value % c); } - /// Multiply two variables - /// The first variable - /// The second variable - /// A variable public Variable Multiply(Variable x, Variable y) { _nodes.Add(new(y.Value, x.Value, x._index, y._index)); return new(_nodes.Count - 1, x.Value * y.Value); } - /// Multiply a constant value by a variable - /// A constant value - /// A variable - /// A variable public Variable Multiply(T c, Variable x) { _nodes.Add(new(c, x._index, _nodes.Count)); return new(_nodes.Count - 1, c * x.Value); } - /// Multiply a variable by a constant value - /// A variable - /// A constant value - /// A variable public Variable Multiply(Variable x, T c) { _nodes.Add(new(c, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value * c); } - /// Subract two variables - /// The first variable - /// The second variable - /// A variable public Variable Subtract(Variable x, Variable y) { _nodes.Add(new(T.One, -T.One, x._index, y._index)); return new(_nodes.Count - 1, x.Value - y.Value); } - /// Subtract a variable from a constant value - /// A constant value - /// A variable - /// A variable public Variable Subtract(T c, Variable x) { _nodes.Add(new(-T.One, x._index, _nodes.Count)); return new(_nodes.Count - 1, c - x.Value); } - /// Subtract a constant value from a variable - /// A variable - /// A constant value - /// A variable public Variable Subtract(Variable x, T c) { _nodes.Add(new(T.One, x._index, _nodes.Count)); @@ -341,9 +266,6 @@ public Variable Subtract(Variable x, T c) // Other operations // - /// Negate a variable - /// A variable - /// Minus one times the variable public Variable Negate(Variable x) { _nodes.Add(new(-T.One, x._index, _nodes.Count)); @@ -352,7 +274,6 @@ public Variable Negate(Variable x) // Exponential functions - /// public Variable Exp(Variable x) { var exp = T.Exp(x.Value); @@ -360,7 +281,6 @@ public Variable Exp(Variable x) return new(_nodes.Count - 1, exp); } - /// public Variable Exp2(Variable x) { var exp2 = T.Exp2(x.Value); @@ -368,7 +288,6 @@ public Variable Exp2(Variable x) return new(_nodes.Count - 1, exp2); } - /// public Variable Exp10(Variable x) { var exp10 = T.Exp10(x.Value); @@ -378,42 +297,36 @@ public Variable Exp10(Variable x) // Hyperbolic functions - /// public Variable Acosh(Variable x) { _nodes.Add(new(T.One / (T.Sqrt(x.Value - T.One) * T.Sqrt(x.Value + T.One)), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Acosh(x.Value)); } - /// public Variable Asinh(Variable x) { _nodes.Add(new(T.One / T.Sqrt(x.Value * x.Value + T.One), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Asinh(x.Value)); } - /// public Variable Atanh(Variable x) { _nodes.Add(new(T.One / (T.One - x.Value * x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Atanh(x.Value)); } - /// public Variable Cosh(Variable x) { _nodes.Add(new(T.Sinh(x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Cosh(x.Value)); } - /// public Variable Sinh(Variable x) { _nodes.Add(new(T.Cosh(x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Sinh(x.Value)); } - /// public Variable Tanh(Variable x) { var u = T.One / T.Cosh(x.Value); @@ -423,14 +336,12 @@ public Variable Tanh(Variable x) // Logarithmic functions - /// public Variable Ln(Variable x) { _nodes.Add(new(T.One / x.Value, x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Ln(x.Value)); } - /// public Variable Log(Variable x, Variable b) { var lnB = T.Ln(b.Value); @@ -438,14 +349,12 @@ public Variable Log(Variable x, Variable b) return new(_nodes.Count - 1, T.Log(x.Value, b.Value)); } - /// public Variable Log2(Variable x) { _nodes.Add(new(T.One / (Real.Ln2 * x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Log2(x.Value)); } - /// public Variable Log10(Variable x) { _nodes.Add(new(T.One / (Real.Ln10 * x.Value), x._index, _nodes.Count)); @@ -454,7 +363,6 @@ public Variable Log10(Variable x) // Power functions - /// public Variable Pow(Variable x, Variable y) { var pow = T.Pow(x.Value, y.Value); @@ -464,7 +372,6 @@ public Variable Pow(Variable x, Variable y) // Root functions - /// public Variable Cbrt(Variable x) { var cbrt = T.Cbrt(x.Value); @@ -472,7 +379,6 @@ public Variable Cbrt(Variable x) return new(_nodes.Count - 1, cbrt); } - /// public Variable Root(Variable x, Variable n) { var root = T.Root(x.Value, n.Value); @@ -480,7 +386,6 @@ public Variable Root(Variable x, Variable n) return new(_nodes.Count - 1, root); } - /// public Variable Sqrt(Variable x) { var sqrt = T.Sqrt(x.Value); @@ -490,28 +395,24 @@ public Variable Sqrt(Variable x) // Trigonometric functions - /// public Variable Acos(Variable x) { _nodes.Add(new(-T.One / T.Sqrt(T.One - x.Value * x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Acos(x.Value)); } - /// public Variable Asin(Variable x) { _nodes.Add(new(T.One / T.Sqrt(T.One - x.Value * x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Asin(x.Value)); } - /// public Variable Atan(Variable x) { _nodes.Add(new(T.One / (T.One + x.Value * x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Atan(x.Value)); } - /// public Variable Atan2(Variable y, Variable x) { var u = Real.One / (x.Value * x.Value + y.Value * y.Value); @@ -519,21 +420,18 @@ public Variable Atan2(Variable y, Variable x) return new(_nodes.Count - 1, Real.Atan2(y.Value, x.Value)); } - /// public Variable Cos(Variable x) { _nodes.Add(new(-T.Sin(x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Cos(x.Value)); } - /// public Variable Sin(Variable x) { _nodes.Add(new(T.Cos(x.Value), x._index, _nodes.Count)); return new(_nodes.Count - 1, T.Sin(x.Value)); } - /// public Variable Tan(Variable x) { var sec = T.One / T.Cos(x.Value); diff --git a/src/Mathematics.NET/AutoDiff/HessianTape.cs b/src/Mathematics.NET/AutoDiff/HessianTape.cs index 4b4f340f..bb5ca52c 100644 --- a/src/Mathematics.NET/AutoDiff/HessianTape.cs +++ b/src/Mathematics.NET/AutoDiff/HessianTape.cs @@ -32,7 +32,7 @@ namespace Mathematics.NET.AutoDiff; /// Represents a Hessian tape /// A type that implements and -public record class HessianTape +public record class HessianTape : ITape where T : IComplex, IDifferentiableFunctions { private List> _nodes; @@ -43,19 +43,14 @@ public HessianTape() _nodes = []; } - /// Get the number of nodes on the gradient tape. public int NodeCount => _nodes.Count; - /// Get the number of variables that are being tracked. public int VariableCount => _variableCount; // // Methods // - /// Create a variable for the gradient tape to track. - /// A seed value - /// A variable public Variable CreateVariable(T seed) { _nodes.Add(new(_variableCount)); @@ -63,9 +58,6 @@ public Variable CreateVariable(T seed) return variable; } - /// Print the nodes of the gradient tape to the console. - /// A cancellation token - /// The total number of nodes to print public void PrintNodes(CancellationToken cancellationToken, int limit = 100) { const string tab = " "; @@ -109,9 +101,6 @@ static void CheckForCancellation(CancellationToken cancellationToken) } } - /// Perform reverse accumulation on the Hessian tape and output the resulting gradient. - /// The gradient - /// The Hessian tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradient) => ReverseAccumulation(out gradient, T.One); @@ -131,10 +120,6 @@ public void ReverseAccumulation(out ReadOnlySpan2D hessian) public void ReverseAccumulation(out ReadOnlySpan gradient, out ReadOnlySpan2D hessian) => ReverseAccumulation(out gradient, out hessian, T.One); - /// Perform reverse accumulation on the Hessian tape and output the resulting gradient. - /// The gradient - /// A seed value - /// The Hessian tape does not have any tracked variables. [MethodImpl(MethodImplOptions.AggressiveInlining | MethodImplOptions.AggressiveOptimization)] public void ReverseAccumulation(out ReadOnlySpan gradient, T seed) { @@ -296,28 +281,24 @@ private static void Accumulate(Span2D weight, ref HessianNode node, T v) // Basic operations // - /// public Variable Add(Variable x, Variable y) { _nodes.Add(new(T.One, T.Zero, T.Zero, T.One, T.Zero, x._index, y._index)); return new(_nodes.Count - 1, x.Value + y.Value); } - /// public Variable Add(T c, Variable x) { _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, c + x.Value); } - /// public Variable Add(Variable x, T c) { _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value + c); } - /// public Variable Divide(Variable x, Variable y) { var n = T.One / y.Value; @@ -326,7 +307,6 @@ public Variable Divide(Variable x, Variable y) return new(_nodes.Count - 1, x.Value * n); } - /// public Variable Divide(T c, Variable x) { var n = T.One / x.Value; @@ -335,7 +315,6 @@ public Variable Divide(T c, Variable x) return new(_nodes.Count - 1, c * n); } - /// public Variable Divide(Variable x, T c) { var n = T.One / c; @@ -343,63 +322,54 @@ public Variable Divide(Variable x, T c) return new(_nodes.Count - 1, x.Value * n); } - /// public Variable Modulo(Variable x, Variable y) { _nodes.Add(new(T.One, T.Zero, T.Zero, x.Value * Real.Floor(x.Value / y.Value), T.Zero, x._index, y._index)); return new(_nodes.Count - 1, x.Value % y.Value); } - /// public Variable Modulo(Real c, Variable x) { _nodes.Add(new(c.Value * Real.Floor(c.Value / x.Value), T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, c % x.Value); } - /// public Variable Modulo(Variable x, Real c) { _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value % c); } - /// public Variable Multiply(Variable x, Variable y) { _nodes.Add(new(y.Value, T.Zero, T.One, x.Value, T.Zero, x._index, y._index)); return new(_nodes.Count - 1, x.Value * y.Value); } - /// public Variable Multiply(T c, Variable x) { _nodes.Add(new(c, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, c * x.Value); } - /// public Variable Multiply(Variable x, T c) { _nodes.Add(new(c, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, x.Value * c); } - /// public Variable Subtract(Variable x, Variable y) { _nodes.Add(new(T.One, T.Zero, T.Zero, -T.One, T.Zero, x._index, y._index)); return new(_nodes.Count - 1, x.Value - y.Value); } - /// public Variable Subtract(T c, Variable x) { _nodes.Add(new(-T.One, T.Zero, x._index, _nodes.Count)); return new(_nodes.Count - 1, c - x.Value); } - /// public Variable Subtract(Variable x, T c) { _nodes.Add(new(T.One, T.Zero, x._index, _nodes.Count)); @@ -410,7 +380,6 @@ public Variable Subtract(Variable x, T c) // Other operations // - /// public Variable Negate(Variable x) { _nodes.Add(new(-T.One, T.Zero, x._index, _nodes.Count)); @@ -419,7 +388,6 @@ public Variable Negate(Variable x) // Exponential functions - /// public Variable Exp(Variable x) { var exp = T.Exp(x.Value); @@ -427,7 +395,6 @@ public Variable Exp(Variable x) return new(_nodes.Count - 1, exp); } - /// public Variable Exp2(Variable x) { var exp2 = T.Exp2(x.Value); @@ -436,7 +403,6 @@ public Variable Exp2(Variable x) return new(_nodes.Count - 1, exp2); } - /// public Variable Exp10(Variable x) { var exp10 = T.Exp10(x.Value); @@ -447,7 +413,6 @@ public Variable Exp10(Variable x) // Hyperbolic functions - /// public Variable Acosh(Variable x) { var u = x.Value - T.One; @@ -456,7 +421,6 @@ public Variable Acosh(Variable x) return new(_nodes.Count - 1, T.Acosh(x.Value)); } - /// public Variable Asinh(Variable x) { var u = T.One + x.Value * x.Value; @@ -464,7 +428,6 @@ public Variable Asinh(Variable x) return new(_nodes.Count - 1, T.Asinh(x.Value)); } - /// public Variable Atanh(Variable x) { var df = T.One / (T.One - x.Value * x.Value); @@ -472,7 +435,6 @@ public Variable Atanh(Variable x) return new(_nodes.Count - 1, T.Atanh(x.Value)); } - /// public Variable Cosh(Variable x) { var cosh = T.Cosh(x.Value); @@ -480,7 +442,6 @@ public Variable Cosh(Variable x) return new(_nodes.Count - 1, cosh); } - /// public Variable Sinh(Variable x) { var sinh = T.Sinh(x.Value); @@ -488,7 +449,6 @@ public Variable Sinh(Variable x) return new(_nodes.Count - 1, sinh); } - /// public Variable Tanh(Variable x) { var tanh = T.Tanh(x.Value); @@ -500,7 +460,6 @@ public Variable Tanh(Variable x) // Logarithmic functions - /// public Variable Ln(Variable x) { var df = T.One / x.Value; @@ -508,7 +467,6 @@ public Variable Ln(Variable x) return new(_nodes.Count - 1, T.Ln(x.Value)); } - /// public Variable Log(Variable x, Variable b) { var lnx = T.Ln(x.Value); @@ -519,7 +477,6 @@ public Variable Log(Variable x, Variable b) return new(_nodes.Count - 1, T.Log(x.Value, b.Value)); } - /// public Variable Log2(Variable x) { var u = T.One / x.Value; @@ -528,7 +485,6 @@ public Variable Log2(Variable x) return new(_nodes.Count - 1, T.Log2(x.Value)); } - /// public Variable Log10(Variable x) { var u = T.One / x.Value; @@ -539,7 +495,6 @@ public Variable Log10(Variable x) // Power functions - /// public Variable Pow(Variable x, Variable n) { var pow = T.Pow(x.Value, n.Value); @@ -559,7 +514,6 @@ public Variable Pow(Variable x, Variable n) // Root functions - /// public Variable Cbrt(Variable x) { var cbrt = T.Cbrt(x.Value); @@ -568,7 +522,6 @@ public Variable Cbrt(Variable x) return new(_nodes.Count - 1, cbrt); } - /// public Variable Root(Variable x, Variable n) { var root = T.Root(x.Value, n.Value); @@ -589,7 +542,6 @@ public Variable Root(Variable x, Variable n) return new(_nodes.Count - 1, root); } - /// public Variable Sqrt(Variable x) { var sqrt = T.Sqrt(x.Value); @@ -600,7 +552,6 @@ public Variable Sqrt(Variable x) // Trigonometric functions - /// public Variable Cos(Variable x) { var cos = T.Cos(x.Value); @@ -608,7 +559,6 @@ public Variable Cos(Variable x) return new(_nodes.Count - 1, cos); } - /// public Variable Acos(Variable x) { var u = T.One - x.Value * x.Value; @@ -616,7 +566,6 @@ public Variable Acos(Variable x) return new(_nodes.Count - 1, T.Acos(x.Value)); } - /// public Variable Asin(Variable x) { var u = T.One - x.Value * x.Value; @@ -624,7 +573,6 @@ public Variable Asin(Variable x) return new(_nodes.Count - 1, T.Asin(x.Value)); } - /// public Variable Atan(Variable x) { var df = T.One / (T.One + x.Value * x.Value); @@ -632,7 +580,6 @@ public Variable Atan(Variable x) return new(_nodes.Count - 1, T.Asin(x.Value)); } - /// public Variable Atan2(Variable y, Variable x) { var u = y.Value * y.Value; @@ -651,7 +598,6 @@ public Variable Atan2(Variable y, Variable x) return new(_nodes.Count - 1, Real.Atan2(y.Value, x.Value)); } - /// public Variable Sin(Variable x) { var sin = T.Sin(x.Value); @@ -659,7 +605,6 @@ public Variable Sin(Variable x) return new(_nodes.Count - 1, sin); } - /// public Variable Tan(Variable x) { var tan = T.Tan(x.Value); diff --git a/src/Mathematics.NET/AutoDiff/ITape.cs b/src/Mathematics.NET/AutoDiff/ITape.cs new file mode 100644 index 00000000..db0a640d --- /dev/null +++ b/src/Mathematics.NET/AutoDiff/ITape.cs @@ -0,0 +1,248 @@ +// +// Mathematics.NET +// https://github.com/HamletTanyavong/Mathematics.NET +// +// MIT License +// +// Copyright (c) 2023 Hamlet Tanyavong +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the Software is +// furnished to do so, subject to the following conditions: +// +// The above copyright notice and this permission notice shall be included in all +// copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +// SOFTWARE. +// + +namespace Mathematics.NET.AutoDiff; + +/// Defines support for tapes used in reverse-mode automatic differentiation +/// A type that implements and +public interface ITape + where T : IComplex, IDifferentiableFunctions +{ + /// Get the number of nodes on the gradient tape. + public int NodeCount { get; } + + /// Get the number of variables that are being tracked. + public int VariableCount { get; } + + /// Create a variable for the gradient tape to track. + /// A seed value + /// A variable + public Variable CreateVariable(T seed); + + /// Print the nodes of the gradient tape to the console. + /// A cancellation token + /// The total number of nodes to print + public void PrintNodes(CancellationToken cancellationToken, int limit = 100); + + /// Perform reverse accumulation on the gradient or Hessian tape and output the resulting gradient. + /// The gradient + /// The gradient tape does not have any tracked variables. + public void ReverseAccumulation(out ReadOnlySpan gradient); + + /// Perform reverse accumulation on the gradient or Hessian tape and output the resulting gradient. + /// The gradient + /// A seed value + /// The gradient tape does not have any tracked variables. + public void ReverseAccumulation(out ReadOnlySpan gradient, T seed); + + // + // Basic operations + // + + /// Add two variables + /// The first variable + /// The second variable + /// A variable + public Variable Add(Variable x, Variable y); + + /// Add a constant value and a variable + /// A constant value + /// A variable + /// A variable + public Variable Add(T c, Variable x); + + /// Add a variable and a constant value + /// A variable + /// A constant value + /// A variable + public Variable Add(Variable x, T c); + + /// Divide two variables + /// A dividend + /// A divisor + /// A variable + public Variable Divide(Variable x, Variable y); + + /// Divide a constant value by a variable + /// A constant dividend + /// A variable divisor + /// A variable + public Variable Divide(T c, Variable x); + + /// Divide a variable by a constant value + /// A variable dividend + /// A constant divisor + /// A variable + public Variable Divide(Variable x, T c); + + /// Compute the modulo of a variable given a divisor + /// A dividend + /// A divisor + /// mod + public Variable Modulo(Variable x, Variable y); + + /// Compute the modulo of a real value given a divisor + /// A real dividend + /// A variable divisor + /// mod + public Variable Modulo(Real c, Variable x); + + /// Compute the modulo of a variable given a divisor + /// A variable dividend + /// A real divisor + /// mod + public Variable Modulo(Variable x, Real c); + + /// Multiply two variables + /// The first variable + /// The second variable + /// A variable + public Variable Multiply(Variable x, Variable y); + + /// Multiply a constant value by a variable + /// A constant value + /// A variable + /// A variable + public Variable Multiply(T c, Variable x); + + /// Multiply a variable by a constant value + /// A variable + /// A constant value + /// A variable + public Variable Multiply(Variable x, T c); + + /// Subract two variables + /// The first variable + /// The second variable + /// A variable + public Variable Subtract(Variable x, Variable y); + + /// Subtract a variable from a constant value + /// A constant value + /// A variable + /// A variable + public Variable Subtract(T c, Variable x); + + /// Subtract a constant value from a variable + /// A variable + /// A constant value + /// A variable + public Variable Subtract(Variable x, T c); + + // + // Other operations + // + + /// Negate a variable + /// A variable + /// Minus one times the variable + public Variable Negate(Variable x); + + // Exponential functions + + /// + public Variable Exp(Variable x); + + /// + public Variable Exp2(Variable x); + + /// + public Variable Exp10(Variable x); + + // Hyperbolic functions + + /// + public Variable Acosh(Variable x); + + /// + public Variable Asinh(Variable x); + + /// + public Variable Atanh(Variable x); + + /// + public Variable Cosh(Variable x); + + /// + public Variable Sinh(Variable x); + + /// + public Variable Tanh(Variable x); + + // Logarithmic functions + + /// + public Variable Ln(Variable x); + + /// + public Variable Log(Variable x, Variable b); + + /// + public Variable Log2(Variable x); + + /// + public Variable Log10(Variable x); + + // Power functions + + /// + public Variable Pow(Variable x, Variable y); + + // Root functions + + /// + public Variable Cbrt(Variable x); + + /// + public Variable Root(Variable x, Variable n); + + /// + public Variable Sqrt(Variable x); + + // Trigonometric function + + /// + public Variable Acos(Variable x); + + /// + public Variable Asin(Variable x); + + /// + public Variable Atan(Variable x); + + /// + public Variable Atan2(Variable y, Variable x); + + /// + public Variable Cos(Variable x); + + /// + public Variable Sin(Variable x); + + /// + public Variable Tan(Variable x); +} From 17627a7984fcd082cc9f01511cf9e7b6038a4fb1 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 02:04:42 -0600 Subject: [PATCH 19/29] Use ITape --- .../AutoDiff/AutoDiffExtensions.cs | 60 +++++++++---------- .../GradientTapeExtensionsOfRealTests.cs | 8 +-- 2 files changed, 34 insertions(+), 34 deletions(-) diff --git a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs index 0e3d51dd..b111f478 100644 --- a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs +++ b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs @@ -36,23 +36,23 @@ public static class AutoDiffExtensions // Variable creation // - /// Create a three-element vector from a seed vector of length three - /// A gradient tape + /// Create a three-element vector from a seed vector of length three. + /// A type that implements /// A three-element vector of seed values /// A variable vector of length three /// A type that implements and - public static VariableVector3 CreateVariableVector(this GradientTape tape, Vector3 x) + public static VariableVector3 CreateVariableVector(this ITape tape, Vector3 x) where T : IComplex, IDifferentiableFunctions => new(tape.CreateVariable(x.X1), tape.CreateVariable(x.X2), tape.CreateVariable(x.X3)); - /// Create a three-element vector from seed values - /// A gradient tape + /// Create a three-element vector from seed values. + /// A type that implements /// The first seed value /// The second seed value /// The third seed value /// A variable vector of length three /// A type that implements and - public static VariableVector3 CreateVariableVector(this GradientTape tape, T x1Seed, T x2Seed, T x3Seed) + public static VariableVector3 CreateVariableVector(this ITape tape, T x1Seed, T x2Seed, T x3Seed) where T : IComplex, IDifferentiableFunctions => new(tape.CreateVariable(x1Seed), tape.CreateVariable(x2Seed), tape.CreateVariable(x3Seed)); @@ -71,10 +71,10 @@ public static VariableVector3 CreateVariableVector(this GradientTape ta /// The curl of the vector field /// A type that implements and public static Vector3 Curl( - this GradientTape tape, - Func, VariableVector3, Variable> fx, - Func, VariableVector3, Variable> fy, - Func, VariableVector3, Variable> fz, + this ITape tape, + Func, VariableVector3, Variable> fx, + Func, VariableVector3, Variable> fy, + Func, VariableVector3, Variable> fz, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { @@ -101,9 +101,9 @@ public static Vector3 Curl( /// The directional derivative /// A type that implements and public static T DirectionalDerivative( - this GradientTape tape, + this ITape tape, Vector3 v, - Func, VariableVector3, Variable> f, + Func, VariableVector3, Variable> f, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { @@ -123,10 +123,10 @@ public static T DirectionalDerivative( /// The divergence of the vector field /// A type that implements and public static T Divergence( - this GradientTape tape, - Func, VariableVector3, Variable> fx, - Func, VariableVector3, Variable> fy, - Func, VariableVector3, Variable> fz, + this ITape tape, + Func, VariableVector3, Variable> fx, + Func, VariableVector3, Variable> fy, + Func, VariableVector3, Variable> fz, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { @@ -154,8 +154,8 @@ public static T Divergence( /// The gradient of the scalar function /// A type that implements and public static Vector3 Gradient( - this GradientTape tape, - Func, VariableVector3, Variable> f, + this ITape tape, + Func, VariableVector3, Variable> f, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { @@ -175,10 +175,10 @@ public static Vector3 Gradient( /// The Jacobian of the vector function /// A type that implements and public static Matrix3x3 Jacobian( - this GradientTape tape, - Func, VariableVector3, Variable> fx, - Func, VariableVector3, Variable> fy, - Func, VariableVector3, Variable> fz, + this ITape tape, + Func, VariableVector3, Variable> fx, + Func, VariableVector3, Variable> fy, + Func, VariableVector3, Variable> fz, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { @@ -209,10 +209,10 @@ public static Matrix3x3 Jacobian( /// The Jacobian-vector product of the vector function and vector /// A type that implements and public static Vector3 JVP( - this GradientTape tape, - Func, VariableVector3, Variable> fx, - Func, VariableVector3, Variable> fy, - Func, VariableVector3, Variable> fz, + this ITape tape, + Func, VariableVector3, Variable> fx, + Func, VariableVector3, Variable> fy, + Func, VariableVector3, Variable> fz, VariableVector3 x, Vector3 v) where T : IComplex, IDifferentiableFunctions @@ -244,11 +244,11 @@ public static Vector3 JVP( /// The vector-Jacobian product of the vector and vector-function /// A type that implements and public static unsafe Vector3 VJP( - this GradientTape tape, + this ITape tape, Vector3 v, - Func, VariableVector3, Variable> fx, - Func, VariableVector3, Variable> fy, - Func, VariableVector3, Variable> fz, + Func, VariableVector3, Variable> fx, + Func, VariableVector3, Variable> fy, + Func, VariableVector3, Variable> fz, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs index 4dfdfd4e..93211686 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs @@ -140,7 +140,7 @@ public void VJP_VectorAndR3VectorFunction_ReturnsVJP(double vx, double vy, doubl // // f(x, y, z) = Cos(x) / ((x + y) * Sin(z)) - private static Variable F(GradientTape tape, VariableVector3 x) + private static Variable F(ITape tape, VariableVector3 x) { return tape.Divide( tape.Cos(x.X1), @@ -150,7 +150,7 @@ private static Variable F(GradientTape tape, VariableVector3 x } // f(x, y, z) = Sin(x) * (Cos(y) + Sqrt(z)) - private static Variable FX(GradientTape tape, VariableVector3 x) + private static Variable FX(ITape tape, VariableVector3 x) { return tape.Multiply( tape.Sin(x.X1), @@ -160,7 +160,7 @@ private static Variable FX(GradientTape tape, VariableVector3 } // f(x, y, z) = Sqrt(x + y + z) - private static Variable FY(GradientTape tape, VariableVector3 x) + private static Variable FY(ITape tape, VariableVector3 x) { return tape.Sqrt( tape.Add( @@ -171,7 +171,7 @@ private static Variable FY(GradientTape tape, VariableVector3 } // f(x, y, z) = Sinh(Exp(x) * y / z) - private static Variable FZ(GradientTape tape, VariableVector3 x) + private static Variable FZ(ITape tape, VariableVector3 x) { return tape.Sinh( tape.Multiply( From 09349dfc0032993c48b9f053489229e33b8b6ffd Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 02:05:00 -0600 Subject: [PATCH 20/29] Update HessianTapeOfRealTests.cs - Use more appropriate overload of ReverseAccumulation --- .../AutoDiff/HessianTapeOfRealTests.cs | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs index 66acfa22..258038d9 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/HessianTapeOfRealTests.cs @@ -122,7 +122,7 @@ public void Add_ConstantAndVariable_ReturnsHessian(double left, double right, do { var x = _tape.CreateVariable(right); _ = _tape.Add(left, x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -148,7 +148,7 @@ public void Add_VariableAndConstant_ReturnsHessian(double left, double right, do { var x = _tape.CreateVariable(left); _ = _tape.Add(x, right); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -378,7 +378,7 @@ public void CustomOperation_Binary_ReturnsHessian(double left, double right, dou Real[,] expected = new Real[2, 2] { { expectedXX, expectedXY }, { expectedXY, expectedYY } }; - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian.ToArray(); @@ -408,7 +408,7 @@ public void CustomOperation_Unary_ReturnsHessian(double input, double expected) { var x = _tape.CreateVariable(input); _ = _tape.CustomOperation(x, Real.Sin, Real.Cos, x => -Real.Sin(x)); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -456,7 +456,7 @@ public void Divide_ConstantAndVariable_ReturnsHessian(double left, double right, { var x = _tape.CreateVariable(right); _ = _tape.Divide(left, x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -482,7 +482,7 @@ public void Divide_VariableAndConstant_ReturnsHessian(double left, double right, { var x = _tape.CreateVariable(left); _ = _tape.Divide(x, right); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -660,7 +660,7 @@ public void Modulo_ConstantAndVariable_ReturnsHessian(double left, double right, { var x = _tape.CreateVariable(right); _ = _tape.Modulo(left, x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -686,7 +686,7 @@ public void Modulo_VariableAndConstant_ReturnsHessian(double left, double right, { var x = _tape.CreateVariable(left); _ = _tape.Modulo(x, right); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -734,7 +734,7 @@ public void Multiply_ConstantAndVariable_ReturnsHessian(double left, double righ { var x = _tape.CreateVariable(right); _ = _tape.Multiply(left, x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -760,7 +760,7 @@ public void Multiply_VariableAndConstant_ReturnsHessian(double left, double righ { var x = _tape.CreateVariable(left); _ = _tape.Multiply(x, right); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -926,7 +926,7 @@ public void Subtract_ConstantAndVariable_ReturnsHessian(double left, double righ { var x = _tape.CreateVariable(right); _ = _tape.Subtract(left, x); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; @@ -952,7 +952,7 @@ public void Subtract_VariableAndConstant_ReturnsHessian(double left, double righ { var x = _tape.CreateVariable(left); _ = _tape.Subtract(x, right); - _tape.ReverseAccumulation(out var _, out var hessian); + _tape.ReverseAccumulation(out ReadOnlySpan2D hessian); var actual = hessian[0, 0]; From c8e66030524bf9d922f21693644be8437b950de8 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 02:05:18 -0600 Subject: [PATCH 21/29] Update first-order-reverse-mode.md --- .../autodiff/first-order-reverse-mode.md | 24 +++++++++---------- 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/docs/guide/autodiff/first-order-reverse-mode.md b/docs/guide/autodiff/first-order-reverse-mode.md index 5b886561..75c6c638 100644 --- a/docs/guide/autodiff/first-order-reverse-mode.md +++ b/docs/guide/autodiff/first-order-reverse-mode.md @@ -98,11 +98,11 @@ graph BT ``` We can then calculate the gradient of our function by using the `ReverseAccumulation` method. ```csharp -tape.ReverseAccumulation(out var gradients); +tape.ReverseAccumulation(out var gradient); ``` Since this is a single variable equation, we can access the first element of `gradients` to get our result. ```csharp -Console.WriteLine(gradients[0]); +Console.WriteLine(gradient[0]); ``` The correct value for the derivative should be `3.525753368769319`. The complete code looks as follows: ```csharp @@ -119,12 +119,12 @@ var result = tape.Divide( // Optional: examine the nodes on the gradient tape tape.PrintNodes(); -tape.ReverseAccumulation(out var gradients); +tape.ReverseAccumulation(out var gradient); // The value of the function at the point x = 1.23: 0.6675110878078776 Console.WriteLine("Value: {0}", result); // The derivative of the function with respect to x at the point x = 1.23: 3.525753368769319 -Console.WriteLine("Derivative: {0}", gradients[0]); +Console.WriteLine("Derivative: {0}", gradient[0]); ``` ## Multivariable Equations @@ -202,7 +202,7 @@ graph BT ``` As before, we can use `ReverseAccumulation` to get our gradients ```csharp -tape.ReverseAccumulation(out var gradients); +tape.ReverseAccumulation(out var gradient); ``` and print them to the console with ```csharp @@ -210,7 +210,7 @@ using Mathematics.NET.LinearAlgebra; // code -Console.WriteLine(gradients.ToDisplayString()); +Console.WriteLine(gradient.ToDisplayString()); ``` This will print the following to the console: ``` @@ -303,12 +303,12 @@ var result = tape.Cos( tape.PrintNodes(CancellationToken.None); Console.WriteLine(); -tape.ReverseAccumulation(out var gradients); +tape.ReverseAccumulation(out var gradient); // The value of the function at the point z = 1.23 + i2.34 and w = -0.66 + i0.23 Console.WriteLine("Value: {0}", result); -// The gradients of the function: ∂f/∂z and ∂f/∂w, respectively -Console.WriteLine("Gradients: {0}", gradients.ToDisplayString()); +// The gradient of the function: ∂f/∂z and ∂f/∂w, respectively +Console.WriteLine("Gradient: {0}", gradient.ToDisplayString()); ``` which is almost the exact same code we would have written in the real case. (Note that some methods such as `Atan2` are not available for complex gradient tapes.) This should output the following to the console: ``` @@ -333,7 +333,7 @@ Node 5: Parents: [4, 5] Value: (27.784322505370138, 24.753716703326287) -Gradients: [(126.28638563049401, -98.74954259806483), (-38.801295827094066, -109.6878698782088) ] +Gradient: [(126.28638563049401, -98.74954259806483), (-38.801295827094066, -109.6878698782088) ] ``` ## Custom Operations @@ -348,9 +348,9 @@ var result = tape.CustomOperation( x => Real.Sin(x), // The function x => Real.Cos(x)); // The derivative of the function -tape.ReverseAccumulation(out var gradients); +tape.ReverseAccumulation(out var gradient); Console.WriteLine("Value: {0}", result); -Console.WriteLine("Gradient: {0}", gradients.ToDisplayString()); +Console.WriteLine("Gradient: {0}", gradient.ToDisplayString()); ``` For custom binary operations, we can write ```csharp From 51b5628b1a8ab60794fc5a8245a12213c61361eb Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 03:09:23 -0600 Subject: [PATCH 22/29] Update AutoDiffExtensions.cs - Update documentation comments --- .../AutoDiff/AutoDiffExtensions.cs | 45 ++++++++----------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs index b111f478..ce6ceceb 100644 --- a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs +++ b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs @@ -37,21 +37,21 @@ public static class AutoDiffExtensions // /// Create a three-element vector from a seed vector of length three. + /// A type that implements and /// A type that implements /// A three-element vector of seed values /// A variable vector of length three - /// A type that implements and public static VariableVector3 CreateVariableVector(this ITape tape, Vector3 x) where T : IComplex, IDifferentiableFunctions => new(tape.CreateVariable(x.X1), tape.CreateVariable(x.X2), tape.CreateVariable(x.X3)); /// Create a three-element vector from seed values. + /// A type that implements and /// A type that implements /// The first seed value /// The second seed value /// The third seed value /// A variable vector of length three - /// A type that implements and public static VariableVector3 CreateVariableVector(this ITape tape, T x1Seed, T x2Seed, T x3Seed) where T : IComplex, IDifferentiableFunctions => new(tape.CreateVariable(x1Seed), tape.CreateVariable(x2Seed), tape.CreateVariable(x3Seed)); @@ -63,13 +63,13 @@ public static VariableVector3 CreateVariableVector(this ITape tape, T x // TODO: Improve performance; perhaps see if caching is possible for some of these methods /// Compute the curl of a vector field using reverse-mode automatic differentiation: $ (\nabla\times\textbf{F})(\textbf{x}) $. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// The x-component of the vector field /// The y-component of the vector field /// The z-component of the vector field /// The point at which to compute the curl /// The curl of the vector field - /// A type that implements and public static Vector3 Curl( this ITape tape, Func, VariableVector3, Variable> fx, @@ -94,17 +94,13 @@ public static Vector3 Curl( } /// Compute the derivative of a scalar function along a particular direction using reverse-mode automatic differentiation: $ \nabla_{\textbf{v}}f(\textbf{x}) $. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// A direction /// A scalar function /// The point at which to compute the directional derivative /// The directional derivative - /// A type that implements and - public static T DirectionalDerivative( - this ITape tape, - Vector3 v, - Func, VariableVector3, Variable> f, - VariableVector3 x) + public static T DirectionalDerivative(this ITape tape, Vector3 v, Func, VariableVector3, Variable> f, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { _ = f(tape, x); @@ -115,13 +111,13 @@ public static T DirectionalDerivative( } /// Compute the divergence of a vector field using reverse-mode automatic differentiation: $ (\nabla\cdot\textbf{F})(\textbf{x}) $. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// The x-component of the vector field /// The y-component of the vector field /// The z-component of the vector field /// The point at which to compute the divergence /// The divergence of the vector field - /// A type that implements and public static T Divergence( this ITape tape, Func, VariableVector3, Variable> fx, @@ -148,15 +144,12 @@ public static T Divergence( } /// Compute the gradient of a scalar function using reverse-mode automatic differentiation: $ \nabla f(\textbf{x}) $. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// A scalar function /// The point at which to compute the gradient /// The gradient of the scalar function - /// A type that implements and - public static Vector3 Gradient( - this ITape tape, - Func, VariableVector3, Variable> f, - VariableVector3 x) + public static Vector3 Gradient(this ITape tape, Func, VariableVector3, Variable> f, VariableVector3 x) where T : IComplex, IDifferentiableFunctions { _ = f(tape, x); @@ -167,13 +160,13 @@ public static Vector3 Gradient( } /// Compute the Jacobian of a vector function using reverse-mode automatic differentiation: $ \nabla^\text{T}f_i(\textbf{x}) $ for $ i=\left\{1,2,3\right\} $. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// The first function /// The second function /// The third function /// The point at which to compute the Jacobian /// The Jacobian of the vector function - /// A type that implements and public static Matrix3x3 Jacobian( this ITape tape, Func, VariableVector3, Variable> fx, @@ -200,14 +193,14 @@ public static Matrix3x3 Jacobian( } /// Compute the Jacobian-vector product of a vector function and a vector using reverse-mode automatic differentiation. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// The first function /// The second function /// The third function /// The point at which to compute the Jacobian-vector product /// A vector /// The Jacobian-vector product of the vector function and vector - /// A type that implements and public static Vector3 JVP( this ITape tape, Func, VariableVector3, Variable> fx, @@ -235,15 +228,15 @@ public static Vector3 JVP( } /// Compute the vector-Jacobian product of a vector and a vector function using reverse-mode automatic differentiation. - /// A gradient tape + /// A type that implements and + /// A gradient or Hessian tape /// A vector /// The first function /// The second function /// The third function /// The point at which to compute the vector-Jacobian product /// The vector-Jacobian product of the vector and vector-function - /// A type that implements and - public static unsafe Vector3 VJP( + public static Vector3 VJP( this ITape tape, Vector3 v, Func, VariableVector3, Variable> fx, From 19c8dffdbb4da5eeb36bfb7d7c99271484225b9a Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 03:09:44 -0600 Subject: [PATCH 23/29] Add method for computing Laplacians --- .../AutoDiff/AutoDiffExtensions.cs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs index ce6ceceb..570a0624 100644 --- a/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs +++ b/src/Mathematics.NET/AutoDiff/AutoDiffExtensions.cs @@ -227,6 +227,22 @@ public static Vector3 JVP( return result; } + /// Compute the Laplacian of a scalar function using reverse-mode automatic differentiation: $ \nabla^2f $. + /// A type that implements and + /// A gradient or Hessian tape + /// A scalar function + /// The point at which to compute the Laplacian + /// The Laplacian of the scalar function + public static T Laplacian(this HessianTape tape, Func, VariableVector3, Variable> f, VariableVector3 x) + where T : IComplex, IDifferentiableFunctions + { + _ = f(tape, x); + + tape.ReverseAccumulation(out ReadOnlySpan2D hessian); + + return hessian[0, 0] + hessian[1, 1] + hessian[2, 2]; + } + /// Compute the vector-Jacobian product of a vector and a vector function using reverse-mode automatic differentiation. /// A type that implements and /// A gradient or Hessian tape From 7df4da6e753c9ce7d403992528e843a3e4199923 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 03:19:06 -0600 Subject: [PATCH 24/29] Add test for Laplacian --- .../AutoDiff/GradientTapeExtensionsOfRealTests.cs | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs index 93211686..cf9a4ea1 100644 --- a/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs +++ b/tests/Mathematics.NET.Tests/AutoDiff/GradientTapeExtensionsOfRealTests.cs @@ -121,6 +121,19 @@ public void JVP_R3VectorFunctionAndVector_ReturnsJVP(double x, double y, double Assert.AreApproximatelyEqual(expected, actual, 1e-15); } + [TestMethod] + [TestCategory("Vector Calculus")] + [DataRow(1.23, 0.66, 2.34, 1.471507039061705)] + public void Laplacian_ScalarFunction_ReturnsLaplacian(double x, double y, double z, double expected) + { + HessianTape tape = new(); + var u = tape.CreateVariableVector(x, y, z); + + var actual = tape.Laplacian(F, u); + + Assert.AreApproximatelyEqual(expected, actual, 1e-15); + } + [TestMethod] [TestCategory("Vector Calculus")] [DataRow(0.23, 1.57, -1.71, 1.23, 0.66, 2.34, -1.919813065970865, -3.508528536106042, 1.512286126049506)] From 45efb77dc913583372655334693b212f844ccd5b Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 03:23:54 -0600 Subject: [PATCH 25/29] Update first-order-reverse-mode.md --- docs/guide/autodiff/first-order-reverse-mode.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/guide/autodiff/first-order-reverse-mode.md b/docs/guide/autodiff/first-order-reverse-mode.md index 75c6c638..6415f538 100644 --- a/docs/guide/autodiff/first-order-reverse-mode.md +++ b/docs/guide/autodiff/first-order-reverse-mode.md @@ -2,7 +2,7 @@ Support for first-order, reverse-mode automatic differentiation (autodiff) is provided by the `GradientTape` class. -## Gradient tapes +## Gradient Tapes Gradient tapes keep track of operations for autodiff; unlike forward-mode autodiff, tracking is required since gradients have to be calculated in reverse order. To begin using reverse-mode autodiff, we must create a gradient tape and assign it variables to track. These variables will be passed into and returned from methods that will compute the local gradients for us and record them on the tape. ```csharp From 44da776ce1131566c4e802e410f2be4254fde315 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 04:52:08 -0600 Subject: [PATCH 26/29] Update HessianTape.cs --- src/Mathematics.NET/AutoDiff/HessianTape.cs | 1 + 1 file changed, 1 insertion(+) diff --git a/src/Mathematics.NET/AutoDiff/HessianTape.cs b/src/Mathematics.NET/AutoDiff/HessianTape.cs index bb5ca52c..47846a51 100644 --- a/src/Mathematics.NET/AutoDiff/HessianTape.cs +++ b/src/Mathematics.NET/AutoDiff/HessianTape.cs @@ -185,6 +185,7 @@ public void ReverseAccumulation(out ReadOnlySpan2D hessian, T seed) // The following method uses the edge-pushing algorithm outlined by Gower and Mello: https://arxiv.org/pdf/2007.15040.pdf. // TODO: use newer variations/versions of this algorithm since they are more performant + // TODO: consider creating an overload that computes only the diagonal components of Hessians /// Perform reverse accumulation on the Hessian tape and output the resulting gradient and Hessian. /// The gradient From 28d931e152adb03ee7478424267f67eb57c748fe Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 04:53:20 -0600 Subject: [PATCH 27/29] Create new page - Create page for second-order, reverse-mode autodiff --- .../autodiff/second-order-reverse-mode.md | 58 +++++++++++++++++++ docs/guide/toc.yml | 2 + 2 files changed, 60 insertions(+) create mode 100644 docs/guide/autodiff/second-order-reverse-mode.md diff --git a/docs/guide/autodiff/second-order-reverse-mode.md b/docs/guide/autodiff/second-order-reverse-mode.md new file mode 100644 index 00000000..90107123 --- /dev/null +++ b/docs/guide/autodiff/second-order-reverse-mode.md @@ -0,0 +1,58 @@ +# Second-Order, Reverse Mode Automatic Differentiation + +Support for first-order, reverse-mode automatic differentiation (autodiff) is provided by the `HessianTape` class. + +## Hessian Tapes + +The steps needed to perform second-order, reverse-mode autodiff is similar to the steps needed to perform the first-order case. This time, however, we have access to the following overloads and/or versions of `ReverseAccumulation`: +```csharp +HessianTape tape = new(); + +// Do some math... + +// Use when we are only interested in the gradient +tape.ReverseAccumulation(out ReadOnlySpan gradient); +// Use when we are only interested in the Hessian +tape.ReverseAccumulation(out ReadOnlySpan2D hessian); +// Use when we are interested in both the gradient and Hessian +tape.ReverseAccumulation(out var gradient, out var hessian); +``` +The last version may be useful for calculations such as finding the Laplacian of a scalar function in spherical coordinates which involves derivatives of first and second orders: +$$ +\begin{align} + \nabla^2f(r,\theta,\phi) & =\frac{1}{r^2}\frac{\partial}{\partial r}\left(r^2\frac{\partial f}{\partial r}\right)+\frac{1}{r^2\sin{\theta}}\frac{\partial}{\partial\theta}\left(\sin{\theta}\frac{\partial f}{\partial\theta}\right)+\frac{1}{r^2\sin^2{\theta}}\frac{\partial^2f}{\partial\phi^2} \\ + & =\frac{2}{r}\frac{\partial f}{\partial r}+\frac{\partial^2f}{\partial r^2}+\frac{1}{r^2\sin{\theta}}\left(\cos{\theta}\frac{\partial f}{\partial\theta}+\sin{\theta}\frac{\partial^2f}{\partial\theta^2}\right)+\frac{1}{r^2\sin^2{\theta}}\frac{\partial^2f}{\partial\phi^2} +\end{align} +$$ +Note that, in the future, we will not have to do this manually since there will be a method made specifically to compute Laplacians in spherical coordinates. For now, if we wanted to compute the Laplacian of the function +$$ + f(x,y,z) = \frac{\cos(x)}{(x+y)\sin(z)} +$$ +we can write +```csharp +using Mathematics.NET.AutoDiff; +using Mathematics.NET.Core; + +HessianTape tape = new(); +var x = tape.CreateVariableVector(1.23, 0.66, 0.23); + +// f(x, y, z) = cos(x) / ((x + y) * sin(z)) +_ = tape.Divide( + tape.Cos(x.X1), + tape.Multiply( + tape.Add(x.X1, x.X2), + tape.Sin(x.X3))); + +tape.ReverseAccumulation(out var gradient, out var hessian); + +// Manual Laplacian computation +var u = Real.One / (x.X1.Value * Real.Sin(x.X2.Value)); // 1 / (r * sin(θ)) +var laplacian = 2.0 * gradient[0] / x.X1.Value + + hessian[0, 0] + + u * Real.Cos(x.X2.Value) * gradient[1] / x.X1.Value + + hessian[1, 1] / (x.X1.Value * x.X1.Value) + + u * u * hessian[2, 2]; + +Console.WriteLine(laplacian); +``` +which should give us `48.80966092022821`. diff --git a/docs/guide/toc.yml b/docs/guide/toc.yml index 65dd0a71..1feffb99 100644 --- a/docs/guide/toc.yml +++ b/docs/guide/toc.yml @@ -14,3 +14,5 @@ href: autodiff/first-order-reverse-mode.md - name: First-Order, Forward-Mode Automatic Differentiation href: autodiff/first-order-forward-mode.md + - name: Second-Order, Reverse-Mode Automatic Differentiation + href: autodiff/second-order-reverse-mode.md From 5451491d4370ce2377ca7e5ec90344c4619e8b88 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 05:01:56 -0600 Subject: [PATCH 28/29] Update index.md --- docs/index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/index.md b/docs/index.md index a4654d64..71aa48fa 100644 --- a/docs/index.md +++ b/docs/index.md @@ -54,4 +54,4 @@ ## About -Mathematics.NET provides custom types for complex, real, and rational numbers as well as other mathematical objects such as vectors, matrices, and tensors. +Mathematics.NET provides custom types for complex, real, and rational numbers as well as other mathematical objects such as vectors, matrices, and tensors. Mathematics.NET also supports forward and reverse-mode automatic differentiation. From bb1fc908f302cd7436236078f988c85e17ccf2a8 Mon Sep 17 00:00:00 2001 From: Hamlet Tanyavong <34531738+HamletTanyavong@users.noreply.github.com> Date: Thu, 23 Nov 2023 05:02:22 -0600 Subject: [PATCH 29/29] Update Mathematics.NET.csproj - Update version and description --- src/Mathematics.NET/Mathematics.NET.csproj | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/Mathematics.NET/Mathematics.NET.csproj b/src/Mathematics.NET/Mathematics.NET.csproj index 7c74ca9e..db00fb91 100644 --- a/src/Mathematics.NET/Mathematics.NET.csproj +++ b/src/Mathematics.NET/Mathematics.NET.csproj @@ -7,10 +7,10 @@ enable x64 Mathematics.NET - 0.1.0-alpha.7 + 0.1.0-alpha.8 mathematics.net.png Hamlet Tanyavong - Mathematics.NET is a C# class library that provides tools for solving mathematical problems. + Mathematics.NET is a C# class library that provides tools for solving mathematical problems. Included are custom types for real, complex, and rational numbers as well as other mathematical objects such as vectors, matrices, and tensors. The library also contains methods for performing forward and reverse-mode automatic differentiation. autodiff; complex; math; mathematics; physics; rational; tensors; LICENSE True