From 97e6185c0b727a238e28527e2002d4c82439ac83 Mon Sep 17 00:00:00 2001
From: MichaelHatherly
Date: Thu, 4 Jul 2024 16:11:13 +0100
Subject: [PATCH] Interpolated HTML attribute escaping improvements
---
src/HypertextTemplates.jl | 1 +
src/SafeString.jl | 19 +++++++++++
src/nodes/element.jl | 13 ++++++--
src/nodes/julia.jl | 39 ++++++++++++++++++----
test/runtests.jl | 24 +++++++++++++
test/templates/basic/escaped-content.1.txt | 2 +-
test/templates/basic/escaped-content.2.txt | 1 +
test/templates/basic/escaped-content.3.txt | 1 +
test/templates/basic/escaped-content.4.txt | 2 ++
test/templates/basic/special-symbols.html | 3 +-
10 files changed, 95 insertions(+), 10 deletions(-)
create mode 100644 src/SafeString.jl
create mode 100644 test/templates/basic/escaped-content.2.txt
create mode 100644 test/templates/basic/escaped-content.3.txt
create mode 100644 test/templates/basic/escaped-content.4.txt
diff --git a/src/HypertextTemplates.jl b/src/HypertextTemplates.jl
index 1c2b831..02e6d89 100644
--- a/src/HypertextTemplates.jl
+++ b/src/HypertextTemplates.jl
@@ -24,6 +24,7 @@ const SRC_DIR = @__DIR__
# Includes.
include("Lexbor.jl")
+include("SafeString.jl")
include("utilities.jl")
include("macro.jl")
include("nodes/abstract.jl")
diff --git a/src/SafeString.jl b/src/SafeString.jl
new file mode 100644
index 0000000..016645a
--- /dev/null
+++ b/src/SafeString.jl
@@ -0,0 +1,19 @@
+"""
+ SafeString <: AbstractString
+
+Represents an internal string type that was created directly within a template,
+rather than being passed into a template from a potentially untrusted source as
+a parameter. Never create these objects from application-level code.
+"""
+struct SafeString <: AbstractString
+ str::String
+end
+
+Base.show(io::IO, s::SafeString) = show(io, s.str)
+Base.ncodeunits(s::SafeString) = ncodeunits(s.str)
+Base.codeunit(s::SafeString) = codeunit(s.str)
+Base.codeunit(s::SafeString, i::Integer) = codeunit(s.str, i)
+Base.isvalid(s::SafeString, index::Integer) = isvalid(s.str, index)
+Base.iterate(s::SafeString) = iterate(s.str)
+Base.iterate(s::SafeString, state::Integer) = iterate(s.str, state)
+Base.String(s::SafeString) = String(s.str)
diff --git a/src/nodes/element.jl b/src/nodes/element.jl
index f969882..a52353a 100644
--- a/src/nodes/element.jl
+++ b/src/nodes/element.jl
@@ -24,9 +24,18 @@ function expression(c::BuilderContext, a::Attribute)
elseif a.dynamic
return Expr(:kw, name, Meta.parse(a.value))
elseif a.interpolate
- return Expr(:kw, name, Meta.parse("\"\"\"$(a.value)\"\"\""))
+ expr = Meta.parse("\"\"\"$(a.value)\"\"\"")
+ # When we interpolate prop values into another prop via Julia string
+ # interpolation with the `$prop=value` syntax, we want to perform HTML
+ # escaping on each value interpolated into the string, but not perform
+ # escaping on the hardcoded values that appear directly in the HTML
+ # template. This is what `_escape_html_str_expr` does. The resulting
+ # value is a `SafeString`, which will not be further HTML escaped it
+ # itself is interpolated into another string.
+ escaped_expr = _escape_html_attr_str_expr(expr)
+ return Expr(:kw, name, escaped_expr)
else
- return Expr(:kw, name, a.value)
+ return Expr(:kw, name, SafeString(a.value))
end
end
diff --git a/src/nodes/julia.jl b/src/nodes/julia.jl
index a999c44..f1f3f30 100644
--- a/src/nodes/julia.jl
+++ b/src/nodes/julia.jl
@@ -30,13 +30,39 @@ function expression(c::BuilderContext, j::Julia)
end |> lln_replacer(c.file, j.line)
end
-function escape_html(io::IO, value)
- if showable("text/html", value)
- show(io, "text/html", value)
- else
- escape_html(io, string(value))
- end
+function _escape_html_attr_str_expr(expr::Expr)
+ Meta.isexpr(expr, :string) || return expr
+ # When we encounter an interpolated string we wrap each interpolated
+ # value with a call to `_escape_html_attribute` which will escape the
+ # contents of the string. Strings that have already been escaped that
+ # are then interpolated into nested element attributes will be wrapped
+ # in a `SafeString` and will not be escaped again.
+ escaped_expr = Expr(:string, _escape_html_str_expr_part.(expr.args)...)
+ return Expr(:call, SafeString, escaped_expr)
end
+_escape_html_attr_str_expr(other) = other
+
+_escape_html_str_expr_part(string::String) = string
+_escape_html_str_expr_part(other) = Expr(:call, _escape_html_attribute, other)
+
+_escape_html_attribute(value) = sprint(escape_html, string(value))
+# Strings marked as "safe" will not be escaped.
+_escape_html_attribute(ss::SafeString) = ss
+
+"""
+ escape_html(io::IO, value::T)
+
+Provide a custom implementation of HTML escaping for a given type `T`. This is
+useful if you have specific types within your application that you would like to
+have handle their own HTML escaping rather than the default behavior where all
+values are stringified and escaped.
+
+You should only ever use this on types that you know are safe to be rendered
+directly into HTML without escaping. Never pass user-generated content into
+types that implement this method without first verifying and escaping content
+manually.
+"""
+function escape_html end
function escape_html(io::IO, value::AbstractString)
for c in value
@@ -61,3 +87,4 @@ function escape_html(io::IO, value::AbstractString)
end
end
end
+escape_html(io::IO, value) = escape_html(io, string(value))
diff --git a/test/runtests.jl b/test/runtests.jl
index 9cb922a..611149d 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -172,6 +172,30 @@ end
@test_reference joinpath(basic, "escaped-content.1.txt") render(
Templates.var"escaped-content";
content = "&='`<>/",
+ interpolated = "user-name",
+ )
+
+ @test_reference joinpath(basic, "escaped-content.2.txt") render(
+ Templates.var"escaped-content";
+ content = "&='`<>/",
+ interpolated = "\">&='`<>/
\ No newline at end of file
+&='`<>/
Link
\ No newline at end of file
diff --git a/test/templates/basic/escaped-content.2.txt b/test/templates/basic/escaped-content.2.txt
new file mode 100644
index 0000000..b3e81ad
--- /dev/null
+++ b/test/templates/basic/escaped-content.2.txt
@@ -0,0 +1 @@
+&='`<>/
Link
\ No newline at end of file
diff --git a/test/templates/basic/escaped-content.3.txt b/test/templates/basic/escaped-content.3.txt
new file mode 100644
index 0000000..ba28cc1
--- /dev/null
+++ b/test/templates/basic/escaped-content.3.txt
@@ -0,0 +1 @@
+Node(CommonMark.Document)
Link
\ No newline at end of file
diff --git a/test/templates/basic/escaped-content.4.txt b/test/templates/basic/escaped-content.4.txt
new file mode 100644
index 0000000..cb79ab7
--- /dev/null
+++ b/test/templates/basic/escaped-content.4.txt
@@ -0,0 +1,2 @@
+bold
+Link
\ No newline at end of file
diff --git a/test/templates/basic/special-symbols.html b/test/templates/basic/special-symbols.html
index 54a1e84..d76fee4 100644
--- a/test/templates/basic/special-symbols.html
+++ b/test/templates/basic/special-symbols.html
@@ -8,6 +8,7 @@
-
+
+ Link
\ No newline at end of file