From 97e6185c0b727a238e28527e2002d4c82439ac83 Mon Sep 17 00:00:00 2001 From: MichaelHatherly Date: Thu, 4 Jul 2024 16:11:13 +0100 Subject: [PATCH] Interpolated HTML attribute escaping improvements --- src/HypertextTemplates.jl | 1 + src/SafeString.jl | 19 +++++++++++ src/nodes/element.jl | 13 ++++++-- src/nodes/julia.jl | 39 ++++++++++++++++++---- test/runtests.jl | 24 +++++++++++++ test/templates/basic/escaped-content.1.txt | 2 +- test/templates/basic/escaped-content.2.txt | 1 + test/templates/basic/escaped-content.3.txt | 1 + test/templates/basic/escaped-content.4.txt | 2 ++ test/templates/basic/special-symbols.html | 3 +- 10 files changed, 95 insertions(+), 10 deletions(-) create mode 100644 src/SafeString.jl create mode 100644 test/templates/basic/escaped-content.2.txt create mode 100644 test/templates/basic/escaped-content.3.txt create mode 100644 test/templates/basic/escaped-content.4.txt diff --git a/src/HypertextTemplates.jl b/src/HypertextTemplates.jl index 1c2b831..02e6d89 100644 --- a/src/HypertextTemplates.jl +++ b/src/HypertextTemplates.jl @@ -24,6 +24,7 @@ const SRC_DIR = @__DIR__ # Includes. include("Lexbor.jl") +include("SafeString.jl") include("utilities.jl") include("macro.jl") include("nodes/abstract.jl") diff --git a/src/SafeString.jl b/src/SafeString.jl new file mode 100644 index 0000000..016645a --- /dev/null +++ b/src/SafeString.jl @@ -0,0 +1,19 @@ +""" + SafeString <: AbstractString + +Represents an internal string type that was created directly within a template, +rather than being passed into a template from a potentially untrusted source as +a parameter. Never create these objects from application-level code. +""" +struct SafeString <: AbstractString + str::String +end + +Base.show(io::IO, s::SafeString) = show(io, s.str) +Base.ncodeunits(s::SafeString) = ncodeunits(s.str) +Base.codeunit(s::SafeString) = codeunit(s.str) +Base.codeunit(s::SafeString, i::Integer) = codeunit(s.str, i) +Base.isvalid(s::SafeString, index::Integer) = isvalid(s.str, index) +Base.iterate(s::SafeString) = iterate(s.str) +Base.iterate(s::SafeString, state::Integer) = iterate(s.str, state) +Base.String(s::SafeString) = String(s.str) diff --git a/src/nodes/element.jl b/src/nodes/element.jl index f969882..a52353a 100644 --- a/src/nodes/element.jl +++ b/src/nodes/element.jl @@ -24,9 +24,18 @@ function expression(c::BuilderContext, a::Attribute) elseif a.dynamic return Expr(:kw, name, Meta.parse(a.value)) elseif a.interpolate - return Expr(:kw, name, Meta.parse("\"\"\"$(a.value)\"\"\"")) + expr = Meta.parse("\"\"\"$(a.value)\"\"\"") + # When we interpolate prop values into another prop via Julia string + # interpolation with the `$prop=value` syntax, we want to perform HTML + # escaping on each value interpolated into the string, but not perform + # escaping on the hardcoded values that appear directly in the HTML + # template. This is what `_escape_html_str_expr` does. The resulting + # value is a `SafeString`, which will not be further HTML escaped it + # itself is interpolated into another string. + escaped_expr = _escape_html_attr_str_expr(expr) + return Expr(:kw, name, escaped_expr) else - return Expr(:kw, name, a.value) + return Expr(:kw, name, SafeString(a.value)) end end diff --git a/src/nodes/julia.jl b/src/nodes/julia.jl index a999c44..f1f3f30 100644 --- a/src/nodes/julia.jl +++ b/src/nodes/julia.jl @@ -30,13 +30,39 @@ function expression(c::BuilderContext, j::Julia) end |> lln_replacer(c.file, j.line) end -function escape_html(io::IO, value) - if showable("text/html", value) - show(io, "text/html", value) - else - escape_html(io, string(value)) - end +function _escape_html_attr_str_expr(expr::Expr) + Meta.isexpr(expr, :string) || return expr + # When we encounter an interpolated string we wrap each interpolated + # value with a call to `_escape_html_attribute` which will escape the + # contents of the string. Strings that have already been escaped that + # are then interpolated into nested element attributes will be wrapped + # in a `SafeString` and will not be escaped again. + escaped_expr = Expr(:string, _escape_html_str_expr_part.(expr.args)...) + return Expr(:call, SafeString, escaped_expr) end +_escape_html_attr_str_expr(other) = other + +_escape_html_str_expr_part(string::String) = string +_escape_html_str_expr_part(other) = Expr(:call, _escape_html_attribute, other) + +_escape_html_attribute(value) = sprint(escape_html, string(value)) +# Strings marked as "safe" will not be escaped. +_escape_html_attribute(ss::SafeString) = ss + +""" + escape_html(io::IO, value::T) + +Provide a custom implementation of HTML escaping for a given type `T`. This is +useful if you have specific types within your application that you would like to +have handle their own HTML escaping rather than the default behavior where all +values are stringified and escaped. + +You should only ever use this on types that you know are safe to be rendered +directly into HTML without escaping. Never pass user-generated content into +types that implement this method without first verifying and escaping content +manually. +""" +function escape_html end function escape_html(io::IO, value::AbstractString) for c in value @@ -61,3 +87,4 @@ function escape_html(io::IO, value::AbstractString) end end end +escape_html(io::IO, value) = escape_html(io, string(value)) diff --git a/test/runtests.jl b/test/runtests.jl index 9cb922a..611149d 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -172,6 +172,30 @@ end @test_reference joinpath(basic, "escaped-content.1.txt") render( Templates.var"escaped-content"; content = "&='`<>/", + interpolated = "user-name", + ) + + @test_reference joinpath(basic, "escaped-content.2.txt") render( + Templates.var"escaped-content"; + content = "&='`<>/", + interpolated = "\">&='`<>/

\ No newline at end of file +

&='`<>/

Link \ No newline at end of file diff --git a/test/templates/basic/escaped-content.2.txt b/test/templates/basic/escaped-content.2.txt new file mode 100644 index 0000000..b3e81ad --- /dev/null +++ b/test/templates/basic/escaped-content.2.txt @@ -0,0 +1 @@ +

&='`<>/

Link \ No newline at end of file diff --git a/test/templates/basic/escaped-content.3.txt b/test/templates/basic/escaped-content.3.txt new file mode 100644 index 0000000..ba28cc1 --- /dev/null +++ b/test/templates/basic/escaped-content.3.txt @@ -0,0 +1 @@ +

Node(CommonMark.Document)

Link \ No newline at end of file diff --git a/test/templates/basic/escaped-content.4.txt b/test/templates/basic/escaped-content.4.txt new file mode 100644 index 0000000..cb79ab7 --- /dev/null +++ b/test/templates/basic/escaped-content.4.txt @@ -0,0 +1,2 @@ +

bold

+

Link \ No newline at end of file diff --git a/test/templates/basic/special-symbols.html b/test/templates/basic/special-symbols.html index 54a1e84..d76fee4 100644 --- a/test/templates/basic/special-symbols.html +++ b/test/templates/basic/special-symbols.html @@ -8,6 +8,7 @@ - +

+ Link
\ No newline at end of file