Skip to content

Commit

Permalink
Optimize to generate less bytecode
Browse files Browse the repository at this point in the history
It was possible for the macros to generate so much bytecode that they
would exceed Java's 64KB limit for method code size. In such situations
the Clojure compiler would fail with the message "Method code too
large!" See #205

This commit does the following optimizations:

1. Concatenate string literals at compile time, so that we can replace
   multiple StringBuilder.append() calls with just one. This reduces the
   generated code from O(n) to O(1). This also improves performance by
   10-20%, since copying one long string is faster than many short
   strings.

2. When a runtime check is needed to determine whether a value is the
   element's attribute map or its first child element, avoid duplicating
   the code for the element's content. This reduces the generated code
   from O(n^2) to O(n).

While improving the test coverage, some edge cases of generating bad
HTML were detected. This commit doesn't change the existing behavior,
but only documents it in the new tests. Fixing that behavior will be
done in future commits.
  • Loading branch information
luontola committed Dec 6, 2023
1 parent 707f7fd commit 00ddf99
Show file tree
Hide file tree
Showing 3 changed files with 231 additions and 18 deletions.
58 changes: 40 additions & 18 deletions src/hiccup/compiler.clj
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,30 @@
(while (.hasNext iterator)
(callback (.next iterator))))))

(defn- concatenate-strings [coll]
(->> coll
(partition-by string?)
(mapcat (fn [group]
(if (string? (first group))
[(apply str group)]
group)))))

(defmacro build-string [& strs]
(let [w (gensym)]
`(let [~w (StringBuilder.)]
~@(map (fn [arg] `(.append ~w (or ~arg ""))) strs)
(.toString ~w))))
(let [strs (concatenate-strings strs)
w (gensym)]
(case (count strs)
0 ""
1 (let [arg (first strs)]
(if (string? arg)
arg
`(String/valueOf (or ~arg ""))))

This comment has been minimized.

Copy link
@jeroenvandijk

jeroenvandijk Oct 11, 2024

I'm looking into making hiccup more platform independent, e.g. using it in Sci. Most of the Java interop can be replaced with Clojure functions (e.g. .indexOf with clojure.string/index-of, .substring with subs). However for (String/valueOf (or ~arg "")) I didn't find an equivalent.

After looking further into this the above case might actually be redundant. String/valueOf is only different of String/toString in the case for nil (source). With the or here String/valueOf will never be called with nil, making it effectively equivalent to String/toString, which can be replaced with str. The above would then become (str ~arg)

@luontola Do you know if I might be missing something here?

This comment has been minimized.

Copy link
@luontola

luontola Oct 11, 2024

Author Contributor

I think you’re correct.

This comment has been minimized.

Copy link
@jeroenvandijk

jeroenvandijk Oct 14, 2024

FYI I've made a PR with this and some other changes #215

`(let [~w (StringBuilder.)]
~@(map (fn [arg]
(if (string? arg)
`(.append ~w ~arg)
`(.append ~w (or ~arg ""))))
strs)
(.toString ~w)))))

(defn- render-style-map [value]
(let [sb (StringBuilder.)]
Expand Down Expand Up @@ -316,20 +335,23 @@
(let [[tag tag-attrs _] (normalize-element-form [tag])
attrs-sym (gensym "attrs")]
`(let [~attrs-sym ~attrs]
(if (map? ~attrs-sym)
~(if (container-tag? tag content)
`(build-string ~(str "<" tag)
(render-attr-map (merge ~tag-attrs ~attrs-sym)) ">"
~@(compile-seq content)
~(str "</" tag ">"))
`(build-string ~(str "<" tag)
(render-attr-map (merge ~tag-attrs ~attrs-sym))
~(end-tag)))
~(if (container-tag? tag attrs)
`(build-string ~(str "<" tag (render-attr-map tag-attrs) ">")
~@(compile-seq (cons attrs-sym content))
~(str "</" tag ">"))
(build-string "<" tag (render-attr-map tag-attrs) (end-tag)))))))
(build-string
(if (map? ~attrs-sym)
~(if (container-tag? tag content)
`(build-string ~(str "<" tag)
(render-attr-map (merge ~tag-attrs ~attrs-sym))
">")
`(build-string ~(str "<" tag)
(render-attr-map (merge ~tag-attrs ~attrs-sym))
~(end-tag)))
(build-string ~(str "<" tag (render-attr-map tag-attrs) ">")
~@(compile-seq [attrs-sym])))
~@(compile-seq content)
;; ending tag, when the above code did not emit an ending tag
~(if (container-tag? tag content)
(str "</" tag ">")
`(when-not (map? ~attrs-sym)
~(str "</" tag ">")))))))

(defmethod compile-element :default
[element]
Expand Down
113 changes: 113 additions & 0 deletions test/hiccup/compiler_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
(ns hiccup.compiler-test
(:require [clojure.test :refer :all]
[hiccup2.core :refer [html]]))

(deftest test-compile-element-literal-tag
;; `compile-element ::literal-tag` behavior varies based on the following
;; things, so we need to test all their combinations:
;; - mode: xhtml, html, xml, sgml
;; - runtime type of the first child: attributes, content, nil
;; - tag: normal element, void element

(testing "runtime attributes,"
(testing "normal tag"
(is (= (str (html {:mode :xhtml} [:p (identity {:id 1})]))
"<p id=\"1\"></p>"))
(is (= (str (html {:mode :html} [:p (identity {:id 1})]))
"<p id=\"1\"></p>"))
(is (= (str (html {:mode :xml} [:p (identity {:id 1})]))
"<p id=\"1\" />"))
(is (= (str (html {:mode :sgml} [:p (identity {:id 1})]))
"<p id=\"1\">")))
(testing "void tag"
(is (= (str (html {:mode :xhtml} [:br (identity {:id 1})]))
"<br id=\"1\" />"))
(is (= (str (html {:mode :html} [:br (identity {:id 1})]))
"<br id=\"1\">"))
(is (= (str (html {:mode :xml} [:br (identity {:id 1})]))
"<br id=\"1\" />"))
(is (= (str (html {:mode :sgml} [:br (identity {:id 1})]))
"<br id=\"1\">"))))

(testing "runtime content,"
(testing "normal tag"
(is (= (str (html {:mode :xhtml} [:p (identity "x")])) "<p>x</p>"))
(is (= (str (html {:mode :html} [:p (identity "x")])) "<p>x</p>"))
(is (= (str (html {:mode :xml} [:p (identity "x")])) "<p>x</p>"))
(is (= (str (html {:mode :sgml} [:p (identity "x")])) "<p>x</p>")))
(testing "void tag"
;; it's not valid HTML to have content inside void elements,
;; but Hiccup should still obey what the user told it to do
(is (= (str (html {:mode :xhtml} [:br (identity "x")])) "<br>x</br>"))
(is (= (str (html {:mode :html} [:br (identity "x")])) "<br>x</br>"))
(is (= (str (html {:mode :xml} [:br (identity "x")])) "<br>x</br>"))
(is (= (str (html {:mode :sgml} [:br (identity "x")])) "<br>x</br>"))))

(testing "runtime nil,"
(testing "normal tag"
(is (= (str (html {:mode :xhtml} [:p (identity nil)])) "<p></p>"))
(is (= (str (html {:mode :html} [:p (identity nil)])) "<p></p>"))
(is (= (str (html {:mode :xml} [:p (identity nil)])) "<p></p>"))
(is (= (str (html {:mode :sgml} [:p (identity nil)])) "<p></p>")))
(testing "void tag"
;; TODO: this might not be desired behavior (use case: the user has
;; a function which returns a map of attributes or nil)
(is (= (str (html {:mode :xhtml} [:br (identity nil)])) "<br></br>"))
(is (= (str (html {:mode :html} [:br (identity nil)])) "<br></br>"))
(is (= (str (html {:mode :xml} [:br (identity nil)])) "<br></br>"))
(is (= (str (html {:mode :sgml} [:br (identity nil)])) "<br></br>")))))

(deftest test-compile-element-default
(testing "runtime tag"
(is (= (str (html {:mode :xhtml} [(identity :p)])) "<p></p>"))
(is (= (str (html {:mode :html} [(identity :p)])) "<p></p>"))
(is (= (str (html {:mode :xml} [(identity :p)])) "<p />"))
(is (= (str (html {:mode :sgml} [(identity :p)])) "<p>")))

(testing "runtime tag with attributes"
(is (= (str (html {:mode :xhtml} [(identity :p) {:id 1}]))
(str (html {:mode :xhtml} [(identity :p) (identity {:id 1})]))
"<p id=\"1\"></p>"))
(is (= (str (html {:mode :html} [(identity :p) {:id 1}]))
(str (html {:mode :html} [(identity :p) (identity {:id 1})]))
"<p id=\"1\"></p>"))
(is (= (str (html {:mode :xml} [(identity :p) {:id 1}]))
(str (html {:mode :xml} [(identity :p) (identity {:id 1})]))
"<p id=\"1\" />"))
(is (= (str (html {:mode :sgml} [(identity :p) {:id 1}]))
(str (html {:mode :sgml} [(identity :p) (identity {:id 1})]))
"<p id=\"1\">")))

(testing "runtime tag with text content"
(is (= (str (html {:mode :xhtml} [(identity :p) "x"]))
(str (html {:mode :xhtml} [(identity :p) (identity "x")]))
"<p>x</p>"))
(is (= (str (html {:mode :html} [(identity :p) "x"]))
(str (html {:mode :html} [(identity :p) (identity "x")]))
"<p>x</p>"))
(is (= (str (html {:mode :xml} [(identity :p) "x"]))
(str (html {:mode :xml} [(identity :p) (identity "x")]))
"<p>x</p>"))
(is (= (str (html {:mode :sgml} [(identity :p) "x"]))
(str (html {:mode :sgml} [(identity :p) (identity "x")]))
"<p>x</p>")))

(testing "runtime tag with child elements"
;; FIXME: this should return "<p><span>x</span></p>"
(is (= (str (html {:mode :xhtml} [(identity :p) [:span "x"]]))
"<p>&lt;span&gt;x&lt;/span&gt;</p>"))
(is (= (str (html {:mode :html} [(identity :p) [:span "x"]]))
"<p>&lt;span&gt;x&lt;/span&gt;</p>"))
(is (= (str (html {:mode :xml} [(identity :p) [:span "x"]]))
"<p>&lt;span&gt;x&lt;/span&gt;</p>"))
(is (= (str (html {:mode :sgml} [(identity :p) [:span "x"]]))
"<p>&lt;span&gt;x&lt;/span&gt;</p>"))

(is (= (str (html {:mode :xhtml} [(identity :p) (identity [:span "x"])]))
"<p><span>x</span></p>"))
(is (= (str (html {:mode :html} [(identity :p) (identity [:span "x"])]))
"<p><span>x</span></p>"))
(is (= (str (html {:mode :xml} [(identity :p) (identity [:span "x"])]))
"<p><span>x</span></p>"))
(is (= (str (html {:mode :sgml} [(identity :p) (identity [:span "x"])]))
"<p><span>x</span></p>"))))
78 changes: 78 additions & 0 deletions test/hiccup2/optimizations_test.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
(ns hiccup2.optimizations-test
(:require [clojure.test :refer :all]
[clojure.walk :as walk]
[hiccup2.core :as h]))

(defn- count-forms [data]
(count (filter seq? (tree-seq coll? seq data))))

(deftest method-code-size
;; With Hiccup 2.0.0-RC2, it was easy to cause the hiccup2.core/html macro to
;; generate so much bytecode that it would go over the 64KB limit of how much
;; bytecode one Java method may contain. It would crash the Clojure compiler
;; with a "Method code too large!" exception. These are a regression tests for
;; that. See https://github.com/weavejester/hiccup/issues/205

(testing "static elements should be concatenated to one string, also when they have dynamic sibling elements"
(let [baseline (walk/macroexpand-all
`(h/html [:div
[:p]
(identity nil)
[:p]]))
pathological (walk/macroexpand-all
`(h/html [:div
[:p] [:p] [:p] [:p] [:p]
(identity nil)
[:p] [:p] [:p] [:p] [:p]]))]
(is (= (count-forms baseline)
(count-forms pathological)))))

(testing "code size should grow O(n), instead of O(n^2), as more dynamic first-child elements are added"
(let [example-0 (walk/macroexpand-all
`(h/html [:div
[:div
[:div
[:div
[:div]]]]]))
example-1 (walk/macroexpand-all
`(h/html [:div (identity nil)
[:div
[:div
[:div
[:div]]]]]))
example-2 (walk/macroexpand-all
`(h/html [:div (identity nil)
[:div (identity nil)
[:div
[:div
[:div]]]]]))
example-3 (walk/macroexpand-all
`(h/html [:div (identity nil)
[:div (identity nil)
[:div (identity nil)
[:div
[:div]]]]]))
example-4 (walk/macroexpand-all
`(h/html [:div (identity nil)
[:div (identity nil)
[:div (identity nil)
[:div (identity nil)
[:div]]]]]))
example-5 (walk/macroexpand-all
`(h/html [:div (identity nil)
[:div (identity nil)
[:div (identity nil)
[:div (identity nil)
[:div (identity nil)]]]]]))
examples [example-0
example-1
example-2
example-3
example-4
example-5]
diffs (->> examples
(map count-forms)
(partition 2 1)
(map (fn [[a b]] (- b a))))]
(is (< (apply max diffs)
(* 1.1 (apply min diffs)))))))

0 comments on commit 00ddf99

Please sign in to comment.