From 021bac45eed4dd24ad57298b075ba7b3afc49b43 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Fri, 10 May 2024 15:28:03 -0500 Subject: [PATCH 1/9] Add support for wikilinks syntax supporting either [[url|link text]] or [[link text|url]] --- examples/s-expr.rs | 2 + fuzz/fuzz_targets/all_options.rs | 4 +- fuzz/fuzz_targets/quadratic.rs | 4 + script/cibuild | 4 + src/main.rs | 4 + src/parser/inlines.rs | 140 +++++++++++++++++- src/parser/mod.rs | 30 ++++ src/tests.rs | 3 + src/tests/api.rs | 2 + .../fixtures/wikilinks_title_after_pipe.md | 47 ++++++ .../fixtures/wikilinks_title_before_pipe.md | 55 +++++++ src/tests/wikilinks.rs | 77 ++++++++++ 12 files changed, 366 insertions(+), 6 deletions(-) create mode 100644 src/tests/fixtures/wikilinks_title_after_pipe.md create mode 100644 src/tests/fixtures/wikilinks_title_before_pipe.md create mode 100644 src/tests/wikilinks.rs diff --git a/examples/s-expr.rs b/examples/s-expr.rs index ecee3eb5..dc1d2e52 100644 --- a/examples/s-expr.rs +++ b/examples/s-expr.rs @@ -86,6 +86,8 @@ fn dump(source: &str) -> io::Result<()> { .multiline_block_quotes(true) .math_dollars(true) .math_code(true) + .wikilinks_title_after_pipe(true) + .wikilinks_title_before_pipe(true) .build() .unwrap(); diff --git a/fuzz/fuzz_targets/all_options.rs b/fuzz/fuzz_targets/all_options.rs index 6b2b6df8..af9ed045 100644 --- a/fuzz/fuzz_targets/all_options.rs +++ b/fuzz/fuzz_targets/all_options.rs @@ -23,7 +23,9 @@ fuzz_target!(|s: &str| { extension.math_code = true; extension.front_matter_delimiter = Some("---".to_string()); extension.shortcodes = true; - + extension.wikilinks_title_after_pipe = true; + extension.wikilinks_title_before_pipe = true; + let mut parse = ParseOptions::default(); parse.smart = true; parse.default_info_string = Some("rust".to_string()); diff --git a/fuzz/fuzz_targets/quadratic.rs b/fuzz/fuzz_targets/quadratic.rs index f7afb1b0..4fcbe47a 100644 --- a/fuzz/fuzz_targets/quadratic.rs +++ b/fuzz/fuzz_targets/quadratic.rs @@ -196,6 +196,8 @@ struct FuzzExtensionOptions { math_dollars: bool, math_code: bool, shortcodes: bool, + wikilinks_title_after_pipe: bool, + wikilinks_title_before_pipe: bool, } impl FuzzExtensionOptions { @@ -213,6 +215,8 @@ impl FuzzExtensionOptions { extension.math_dollars = self.math_dollars; extension.math_code = self.math_code; extension.shortcodes = self.shortcodes; + extension.wikilinks_title_after_pipe = self.wikilinks_title_after_pipe; + extension.wikilinks_title_before_pipe = self.wikilinks_title_before_pipe; extension.front_matter_delimiter = None; extension.header_ids = None; extension diff --git a/script/cibuild b/script/cibuild index 503c7124..2df91085 100755 --- a/script/cibuild +++ b/script/cibuild @@ -40,6 +40,10 @@ python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/math_dol || failed=1 python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/math_code.md "$PROGRAM_ARG -e math-code" \ || failed=1 +python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilinks_title_after_pipe.md "$PROGRAM_ARG -e wikilinks-title-after-pipe" \ + || failed=1 +python3 spec_tests.py --no-normalize --spec ../../../src/tests/fixtures/wikilinks_title_before_pipe.md "$PROGRAM_ARG -e wikilinks-title-before-pipe" \ + || failed=1 python3 spec_tests.py --no-normalize --spec regression.txt "$PROGRAM_ARG" \ || failed=1 diff --git a/src/main.rs b/src/main.rs index a259232b..3caebc9b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -159,6 +159,8 @@ enum Extension { MultilineBlockQuotes, MathDollars, MathCode, + WikilinksTitleAfterPipe, + WikilinksTitleBeforePipe, } #[derive(Clone, Copy, Debug, ValueEnum)] @@ -238,6 +240,8 @@ fn main() -> Result<(), Box> { .multiline_block_quotes(exts.contains(&Extension::MultilineBlockQuotes)) .math_dollars(exts.contains(&Extension::MathDollars)) .math_code(exts.contains(&Extension::MathCode)) + .wikilinks_title_after_pipe(exts.contains(&Extension::WikilinksTitleAfterPipe)) + .wikilinks_title_before_pipe(exts.contains(&Extension::WikilinksTitleBeforePipe)) .front_matter_delimiter(cli.front_matter_delimiter); #[cfg(feature = "shortcodes")] diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index c582b309..925e5711 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -183,11 +183,30 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { '.' => Some(self.handle_period()), '[' => { self.pos += 1; - let inl = - self.make_inline(NodeValue::Text("[".to_string()), self.pos - 1, self.pos - 1); - self.push_bracket(false, inl); - self.within_brackets = true; - Some(inl) + + let mut wikilink_inl = None; + + if (self.options.extension.wikilinks_title_after_pipe + || self.options.extension.wikilinks_title_before_pipe) + && !self.within_brackets + && self.peek_char() == Some(&(b'[')) + { + wikilink_inl = self.handle_wikilink(); + } + + if wikilink_inl.is_none() { + let inl = self.make_inline( + NodeValue::Text("[".to_string()), + self.pos - 1, + self.pos - 1, + ); + self.push_bracket(false, inl); + self.within_brackets = true; + + Some(inl) + } else { + wikilink_inl + } } ']' => { self.within_brackets = false; @@ -1548,6 +1567,117 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } } + // Handles wikilink syntax + // [[link text|url]] + // [[url|link text]] + pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> { + let startpos = self.pos; + let (url, title) = self.wikilink_url_title(); + + url?; + + let url_clean = strings::clean_url(url.unwrap()); + let title_clean = match title { + Some(title) => entity::unescape_html(title), + None => entity::unescape_html(url.unwrap()), + }; + + let nl = NodeLink { + url: String::from_utf8(url_clean).unwrap(), + title: String::new(), + }; + let inl = self.make_inline(NodeValue::Link(nl), startpos - 1, self.pos - 1); + inl.append(self.make_inline( + NodeValue::Text(String::from_utf8(title_clean).unwrap()), + startpos - 1, + self.pos - 1, + )); + + Some(inl) + } + + pub fn wikilink_url_title(&mut self) -> (Option<&[u8]>, Option<&[u8]>) { + let left_startpos = self.pos; + + if self.peek_char() != Some(&(b'[')) { + return (None, None); + } + + let found_left = self.wikilink_component(); + + if !found_left { + self.pos = left_startpos; + return (None, None); + } + + let left = strings::trim_slice(&self.input[left_startpos + 1..self.pos]); + + if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { + self.pos += 2; + return (Some(left), None); + } else if self.peek_char() != Some(&(b'|')) { + self.pos = left_startpos; + return (None, None); + } + + let right_startpos = self.pos; + let found_right = self.wikilink_component(); + + if !found_right { + self.pos = left_startpos; + return (None, None); + } + + let right = strings::trim_slice(&self.input[right_startpos + 1..self.pos]); + + if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { + self.pos += 2; + + if self.options.extension.wikilinks_title_after_pipe { + (Some(left), Some(right)) + } else { + (Some(right), Some(left)) + } + } else { + self.pos = left_startpos; + (None, None) + } + } + + // Locates the edge of a wikilink component (link text or url), and sets the + // self.pos to it's end if it's found. + pub fn wikilink_component(&mut self) -> bool { + let startpos = self.pos; + + if self.peek_char() != Some(&(b'[')) && self.peek_char() != Some(&(b'|')) { + return false; + } + + self.pos += 1; + + let mut length = 0; + let mut c = 0; + while unwrap_into_copy(self.peek_char(), &mut c) && c != b'[' && c != b']' && c != b'|' { + if c == b'\\' { + self.pos += 1; + length += 1; + if self.peek_char().map_or(false, |&c| ispunct(c)) { + self.pos += 1; + length += 1; + } + } else { + self.pos += 1; + length += 1; + } + if length > MAX_LINK_LABEL_LENGTH { + self.pos = startpos; + return false; + } + } + + true + } + pub fn spnl(&mut self) { self.skip_spaces(); if self.skip_line_end() { diff --git a/src/parser/mod.rs b/src/parser/mod.rs index c5c10f4c..fbdb11e0 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -423,6 +423,36 @@ pub struct ExtensionOptions { /// "

Happy Friday! 😄

\n"); /// ``` pub shortcodes: bool, + + /// Enables wikilinks using title after pipe syntax + /// + /// ```` md + /// [[url|link text]] + /// ```` + /// + /// ``` + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// options.extension.wikilinks_title_after_pipe = true; + /// assert_eq!(markdown_to_html("[[url|link text]]", &options), + /// "

link text

\n"); + /// ``` + pub wikilinks_title_after_pipe: bool, + + /// Enables wikilinks using title before pipe syntax + /// + /// ```` md + /// [[link text|url]] + /// ```` + /// + /// ``` + /// # use comrak::{markdown_to_html, Options}; + /// let mut options = Options::default(); + /// options.extension.wikilinks_title_before_pipe = true; + /// assert_eq!(markdown_to_html("[[link text|url]]", &options), + /// "

link text

\n"); + /// ``` + pub wikilinks_title_before_pipe: bool, } #[non_exhaustive] diff --git a/src/tests.rs b/src/tests.rs index a3467bf9..986cc83c 100644 --- a/src/tests.rs +++ b/src/tests.rs @@ -25,6 +25,7 @@ mod superscript; mod table; mod tagfilter; mod tasklist; +mod wikilinks; mod xml; #[track_caller] @@ -141,6 +142,8 @@ macro_rules! html_opts { math_code: true, front_matter_delimiter: Some("---".to_string()), shortcodes: true, + wikilinks_title_after_pipe: true, + wikilinks_title_before_pipe: true, }, parse: $crate::ParseOptions { smart: true, diff --git a/src/tests/api.rs b/src/tests/api.rs index f89125b7..e7ec6022 100644 --- a/src/tests/api.rs +++ b/src/tests/api.rs @@ -50,6 +50,8 @@ fn exercise_full_api() { extension.front_matter_delimiter(None); #[cfg(feature = "shortcodes")] extension.shortcodes(true); + extension.wikilinks_title_after_pipe(true); + extension.wikilinks_title_before_pipe(true); let mut parse = ParseOptionsBuilder::default(); parse.smart(false); diff --git a/src/tests/fixtures/wikilinks_title_after_pipe.md b/src/tests/fixtures/wikilinks_title_after_pipe.md new file mode 100644 index 00000000..4b8e068e --- /dev/null +++ b/src/tests/fixtures/wikilinks_title_after_pipe.md @@ -0,0 +1,47 @@ +--- +title: Wikilinks +based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/wikilinks_title_after_pipe.md +--- + +# Wikilinks, title after pipe + +Wikilinks can have one of the following forms: + + [[https://example.org]] + [[https://example.org|title]] + [[name of page]] + [[name of page|title]] + +With this version of wikilinks, the title comes after the pipe. + +```````````````````````````````` example +[[https://example.org]] +. +

https://example.org

+```````````````````````````````` + +```````````````````````````````` example +[[https://example.org|title]] +. +

title

+```````````````````````````````` + +```````````````````````````````` example +[[Name of page]] +. +

Name of page

+```````````````````````````````` + +```````````````````````````````` example +[[Name of page|Title]] +. +

Title

+```````````````````````````````` + +HTML entities are recognized both in the name of page and in the link title. + +```````````````````````````````` example +[[Geschütztes Leerzeichen|Über &nbsp;]] +. +

Ãœber &nbsp;

+```````````````````````````````` \ No newline at end of file diff --git a/src/tests/fixtures/wikilinks_title_before_pipe.md b/src/tests/fixtures/wikilinks_title_before_pipe.md new file mode 100644 index 00000000..81e5ccbc --- /dev/null +++ b/src/tests/fixtures/wikilinks_title_before_pipe.md @@ -0,0 +1,55 @@ +--- +title: Wikilinks +based_on: https://github.com/jgm/commonmark-hs/blob/master/commonmark-extensions/test/wikilinks_title_before_pipe.md +--- + +# Wikilinks, title before pipe + +Wikilinks can have one of the following forms: + + [[https://example.org]] + [[title|https://example.org]] + [[name of page]] + [[title|name of page]] + +With this version of wikilinks, the title comes before the pipe. + +```````````````````````````````` example +[[https://example.org]] +. +

https://example.org

+```````````````````````````````` + +```````````````````````````````` example +[[title|https://example.org]] +. +

title

+```````````````````````````````` + +```````````````````````````````` example +[[Name of page]] +. +

Name of page

+```````````````````````````````` + +```````````````````````````````` example +[[Title|Name of page]] +. +

Title

+```````````````````````````````` + +Regular links should still work! + +```````````````````````````````` example +[Title](Name%20of%20page) +. +

Title

+```````````````````````````````` + +HTML entities are recognized both in the name of page and in the link title. + +```````````````````````````````` example +[[Über &nbsp;|Geschütztes Leerzeichen]] +. +

Ãœber &nbsp;

+```````````````````````````````` \ No newline at end of file diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs new file mode 100644 index 00000000..89faf48e --- /dev/null +++ b/src/tests/wikilinks.rs @@ -0,0 +1,77 @@ +use super::*; + +#[test] +fn wikilinks_does_not_unescape_html_entities_in_link_text() { + html_opts!( + [extension.wikilinks_title_after_pipe], + concat!("This is [[<script>alert(0)</script>|a <link]]",), + concat!("

This is a <link

\n"), + ); + + html_opts!( + [extension.wikilinks_title_before_pipe], + concat!("This is [[a <link|<script>alert(0)</script>]]",), + concat!("

This is a <link

\n"), + ); +} + +#[test] +fn wikilinks_sanitizes_the_href_attribute_case_1() { + html_opts!( + [extension.wikilinks_title_after_pipe], + concat!("[[http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com|a]]",), + concat!("

a

\n"), + ); + + html_opts!( + [extension.wikilinks_title_before_pipe], + concat!("[[a|http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com]]",), + concat!("

a

\n"), + ); +} + +#[test] +fn wikilinks_sanitizes_the_href_attribute_case_2() { + html_opts!( + [extension.wikilinks_title_after_pipe], + concat!("[[\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com|a]]",), + concat!("

a

\n"), + ); + + html_opts!( + [extension.wikilinks_title_before_pipe], + concat!("[[a|\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com]]",), + concat!("

a

\n"), + ); +} + +#[test] +fn sourcepos() { + assert_ast_match!( + [extension.wikilinks_title_after_pipe], + "This [[http://example.com|link text]] that\n", + (document (1:1-1:42) [ + (paragraph (1:1-1:42) [ + (text (1:1-1:5) "This ") + (link (1:6-1:37) [ + (text (1:6-1:37) "link text") + ]) + (text (1:38-1:42) " that") + ]) + ]) + ); + + assert_ast_match!( + [extension.wikilinks_title_before_pipe], + "This [[link text|http://example.com]] that\n", + (document (1:1-1:42) [ + (paragraph (1:1-1:42) [ + (text (1:1-1:5) "This ") + (link (1:6-1:37) [ + (text (1:6-1:37) "link text") + ]) + (text (1:38-1:42) " that") + ]) + ]) + ); +} From d22ac4867b6cc5ca48895d70fb4cf0ccd3ff1bc7 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Sun, 12 May 2024 10:18:01 -0500 Subject: [PATCH 2/9] =?UTF-8?q?Output=20=E2=80=98data-wikilink=3D=E2=80=9C?= =?UTF-8?q?true=E2=80=9D=E2=80=99=20for=20wikilinks?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is so it’s possible to tell the difference between a normal link and a wikilink during post-processing. --- src/cm.rs | 26 ++++++++++++++++++- src/html.rs | 3 +++ src/nodes.rs | 3 +++ src/parser/autolink.rs | 3 +++ src/parser/inlines.rs | 8 +++++- src/parser/mod.rs | 4 +-- .../fixtures/wikilinks_title_after_pipe.md | 10 +++---- .../fixtures/wikilinks_title_before_pipe.md | 10 +++---- src/tests/wikilinks.rs | 19 ++++++++------ 9 files changed, 64 insertions(+), 22 deletions(-) diff --git a/src/cm.rs b/src/cm.rs index 192d2aa2..07feb21c 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -367,7 +367,13 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { NodeValue::TaskItem(symbol) => self.format_task_item(symbol, node, entering), NodeValue::Strikethrough => self.format_strikethrough(), NodeValue::Superscript => self.format_superscript(), - NodeValue::Link(ref nl) => return self.format_link(node, nl, entering), + NodeValue::Link(ref nl) => { + if nl.wikilink { + return self.format_wikilink(nl, entering); + } else { + return self.format_link(node, nl, entering); + } + } NodeValue::Image(ref nl) => self.format_image(nl, allow_wrap, entering), #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref ne) => self.format_shortcode(ne, entering), @@ -689,6 +695,24 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { true } + fn format_wikilink(&mut self, nl: &NodeLink, entering: bool) -> bool { + if entering { + write!(self, "[[").unwrap(); + if self.options.extension.wikilinks_title_after_pipe { + self.output(nl.url.as_bytes(), false, Escaping::Url); + write!(self, "|").unwrap(); + } + } else { + if self.options.extension.wikilinks_title_before_pipe { + write!(self, "|").unwrap(); + self.output(nl.url.as_bytes(), false, Escaping::Url); + } + write!(self, "]]").unwrap(); + } + + true + } + fn format_image(&mut self, nl: &NodeLink, allow_wrap: bool, entering: bool) { if entering { write!(self, "![").unwrap(); diff --git a/src/html.rs b/src/html.rs index c78a229b..3a6b6971 100644 --- a/src/html.rs +++ b/src/html.rs @@ -826,6 +826,9 @@ impl<'o> HtmlFormatter<'o> { self.output.write_all(b"\" title=\"")?; self.escape(nl.title.as_bytes())?; } + if nl.wikilink { + self.output.write_all(b"\" data-wikilink=\"true")?; + } self.output.write_all(b"\">")?; } else { self.output.write_all(b"")?; diff --git a/src/nodes.rs b/src/nodes.rs index 0e073263..24230318 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -251,6 +251,9 @@ pub struct NodeLink { /// Note this field is used for the `title` attribute by the HTML formatter even for images; /// `alt` text is supplied in the image inline text. pub title: String, + + /// Whether this is a wikilink or not + pub wikilink: bool, } /// The metadata of a list; the kind of list, the delimiter used and so on. diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs index 287dc64d..a9a57040 100644 --- a/src/parser/autolink.rs +++ b/src/parser/autolink.rs @@ -122,6 +122,7 @@ fn www_match<'a>( NodeValue::Link(NodeLink { url, title: String::new(), + wikilink: false, }), (0, 1, 0, 1).into(), ); @@ -290,6 +291,7 @@ fn url_match<'a>( NodeValue::Link(NodeLink { url: url.clone(), title: String::new(), + wikilink: false, }), (0, 1, 0, 1).into(), ); @@ -398,6 +400,7 @@ fn email_match<'a>( NodeValue::Link(NodeLink { url, title: String::new(), + wikilink: false, }), (0, 1, 0, 1).into(), ); diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 925e5711..60322714 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -1491,7 +1491,11 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { pub fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) { let brackets_len = self.brackets.len(); - let nl = NodeLink { url, title }; + let nl = NodeLink { + url, + title, + wikilink: false, + }; let inl = self.make_inline( if is_image { NodeValue::Image(nl) @@ -1585,6 +1589,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { let nl = NodeLink { url: String::from_utf8(url_clean).unwrap(), title: String::new(), + wikilink: true, }; let inl = self.make_inline(NodeValue::Link(nl), startpos - 1, self.pos - 1); inl.append(self.make_inline( @@ -1724,6 +1729,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { NodeValue::Link(NodeLink { url: String::from_utf8(strings::clean_autolink(url, kind)).unwrap(), title: String::new(), + wikilink: false, }), start_column + 1, end_column + 1, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index fbdb11e0..006d0442 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -435,7 +435,7 @@ pub struct ExtensionOptions { /// let mut options = Options::default(); /// options.extension.wikilinks_title_after_pipe = true; /// assert_eq!(markdown_to_html("[[url|link text]]", &options), - /// "

link text

\n"); + /// "

link text

\n"); /// ``` pub wikilinks_title_after_pipe: bool, @@ -450,7 +450,7 @@ pub struct ExtensionOptions { /// let mut options = Options::default(); /// options.extension.wikilinks_title_before_pipe = true; /// assert_eq!(markdown_to_html("[[link text|url]]", &options), - /// "

link text

\n"); + /// "

link text

\n"); /// ``` pub wikilinks_title_before_pipe: bool, } diff --git a/src/tests/fixtures/wikilinks_title_after_pipe.md b/src/tests/fixtures/wikilinks_title_after_pipe.md index 4b8e068e..7c645ccf 100644 --- a/src/tests/fixtures/wikilinks_title_after_pipe.md +++ b/src/tests/fixtures/wikilinks_title_after_pipe.md @@ -17,25 +17,25 @@ With this version of wikilinks, the title comes after the pipe. ```````````````````````````````` example [[https://example.org]] . -

https://example.org

+

https://example.org

```````````````````````````````` ```````````````````````````````` example [[https://example.org|title]] . -

title

+

title

```````````````````````````````` ```````````````````````````````` example [[Name of page]] . -

Name of page

+

Name of page

```````````````````````````````` ```````````````````````````````` example [[Name of page|Title]] . -

Title

+

Title

```````````````````````````````` HTML entities are recognized both in the name of page and in the link title. @@ -43,5 +43,5 @@ HTML entities are recognized both in the name of page and in the link title. ```````````````````````````````` example [[Geschütztes Leerzeichen|Über &nbsp;]] . -

Ãœber &nbsp;

+

Ãœber &nbsp;

```````````````````````````````` \ No newline at end of file diff --git a/src/tests/fixtures/wikilinks_title_before_pipe.md b/src/tests/fixtures/wikilinks_title_before_pipe.md index 81e5ccbc..e430380c 100644 --- a/src/tests/fixtures/wikilinks_title_before_pipe.md +++ b/src/tests/fixtures/wikilinks_title_before_pipe.md @@ -17,25 +17,25 @@ With this version of wikilinks, the title comes before the pipe. ```````````````````````````````` example [[https://example.org]] . -

https://example.org

+

https://example.org

```````````````````````````````` ```````````````````````````````` example [[title|https://example.org]] . -

title

+

title

```````````````````````````````` ```````````````````````````````` example [[Name of page]] . -

Name of page

+

Name of page

```````````````````````````````` ```````````````````````````````` example [[Title|Name of page]] . -

Title

+

Title

```````````````````````````````` Regular links should still work! @@ -51,5 +51,5 @@ HTML entities are recognized both in the name of page and in the link title. ```````````````````````````````` example [[Über &nbsp;|Geschütztes Leerzeichen]] . -

Ãœber &nbsp;

+

Ãœber &nbsp;

```````````````````````````````` \ No newline at end of file diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 89faf48e..9f4167f7 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -1,17 +1,20 @@ use super::*; +// html_opts! does a roundtrip check unless sourcepos is set. +// These cases don't work roundtrip, because converting to commonmark +// automatically escapes certain characters. #[test] fn wikilinks_does_not_unescape_html_entities_in_link_text() { html_opts!( - [extension.wikilinks_title_after_pipe], + [extension.wikilinks_title_after_pipe, render.sourcepos], concat!("This is [[<script>alert(0)</script>|a <link]]",), - concat!("

This is a <link

\n"), + concat!("

This is a <link

\n"), ); html_opts!( - [extension.wikilinks_title_before_pipe], + [extension.wikilinks_title_before_pipe, render.sourcepos], concat!("This is [[a <link|<script>alert(0)</script>]]",), - concat!("

This is a <link

\n"), + concat!("

This is a <link

\n"), ); } @@ -20,13 +23,13 @@ fn wikilinks_sanitizes_the_href_attribute_case_1() { html_opts!( [extension.wikilinks_title_after_pipe], concat!("[[http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com|a]]",), - concat!("

a

\n"), + concat!("

a

\n"), ); html_opts!( [extension.wikilinks_title_before_pipe], concat!("[[a|http:\'\"injected=attribute><img/src=\"0\"onerror=\"alert(0)\">https://example.com]]",), - concat!("

a

\n"), + concat!("

a

\n"), ); } @@ -35,13 +38,13 @@ fn wikilinks_sanitizes_the_href_attribute_case_2() { html_opts!( [extension.wikilinks_title_after_pipe], concat!("[[\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com|a]]",), - concat!("

a

\n"), + concat!("

a

\n"), ); html_opts!( [extension.wikilinks_title_before_pipe], concat!("[[a|\'\"><svg><i/class=gl-show-field-errors><input/title=\"<script>alert(0)</script>\"/></svg>https://example.com]]",), - concat!("

a

\n"), + concat!("

a

\n"), ); } From 158f06a839e832e1e8388f233adbe8828cd4fd53 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Sun, 12 May 2024 14:46:56 -0500 Subject: [PATCH 3/9] Check that wikilinks supercedes relaxed_autolinks --- src/tests/wikilinks.rs | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 9f4167f7..0017d467 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -48,6 +48,31 @@ fn wikilinks_sanitizes_the_href_attribute_case_2() { ); } +#[test] +fn wikilinks_supercedes_relaxed_autolinks() { + html_opts!( + [ + extension.wikilinks_title_after_pipe, + parse.relaxed_autolinks + ], + concat!("[[http://example.com]]",), + concat!( + "

http://example.com

\n" + ), + ); + + html_opts!( + [ + extension.wikilinks_title_before_pipe, + parse.relaxed_autolinks + ], + concat!("[[http://example.com]]",), + concat!( + "

http://example.com

\n" + ), + ); +} + #[test] fn sourcepos() { assert_ast_match!( From 5e6fb995599cbf90644e1445cde70b46b54d3e7b Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Mon, 13 May 2024 16:20:09 -0500 Subject: [PATCH 4/9] Apply suggestions from code review Co-authored-by: Asherah Connor --- src/nodes.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/nodes.rs b/src/nodes.rs index 24230318..1de8cf50 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -252,8 +252,8 @@ pub struct NodeLink { /// `alt` text is supplied in the image inline text. pub title: String, - /// Whether this is a wikilink or not - pub wikilink: bool, + /// Whether this is a wikilink or not. + pub wikilink: bool = false, } /// The metadata of a list; the kind of list, the delimiter used and so on. From 50eaa413438ec44d0da08bee24c1af459a6ba53e Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Mon, 13 May 2024 20:30:45 -0500 Subject: [PATCH 5/9] Use a WikilinkComponents struct --- src/nodes.rs | 2 +- src/parser/inlines.rs | 46 +++++++++++++++++++++++++++++-------------- 2 files changed, 32 insertions(+), 16 deletions(-) diff --git a/src/nodes.rs b/src/nodes.rs index 1de8cf50..a8821921 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -253,7 +253,7 @@ pub struct NodeLink { pub title: String, /// Whether this is a wikilink or not. - pub wikilink: bool = false, + pub wikilink: bool, } /// The metadata of a list; the kind of list, the delimiter used and so on. diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 60322714..88dab3f2 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -105,6 +105,12 @@ struct Bracket<'a> { bracket_after: bool, } +#[derive(Clone, Copy)] +struct WikilinkComponents<'i> { + url: &'i [u8], + title: Option<&'i [u8]>, +} + impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { pub fn new( arena: &'a Arena>, @@ -1576,14 +1582,15 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { // [[url|link text]] pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> { let startpos = self.pos; - let (url, title) = self.wikilink_url_title(); + let component = self.wikilink_url_title(); - url?; + component?; - let url_clean = strings::clean_url(url.unwrap()); - let title_clean = match title { + let component = component.unwrap(); + let url_clean = strings::clean_url(component.url); + let title_clean = match component.title { Some(title) => entity::unescape_html(title), - None => entity::unescape_html(url.unwrap()), + None => entity::unescape_html(component.url), }; let nl = NodeLink { @@ -1601,28 +1608,31 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { Some(inl) } - pub fn wikilink_url_title(&mut self) -> (Option<&[u8]>, Option<&[u8]>) { + fn wikilink_url_title(&mut self) -> Option> { let left_startpos = self.pos; if self.peek_char() != Some(&(b'[')) { - return (None, None); + return None; } let found_left = self.wikilink_component(); if !found_left { self.pos = left_startpos; - return (None, None); + return None; } let left = strings::trim_slice(&self.input[left_startpos + 1..self.pos]); if self.peek_char() == Some(&(b']')) && self.peek_char_n(1) == Some(&(b']')) { self.pos += 2; - return (Some(left), None); + return Some(WikilinkComponents { + url: left, + title: None, + }); } else if self.peek_char() != Some(&(b'|')) { self.pos = left_startpos; - return (None, None); + return None; } let right_startpos = self.pos; @@ -1630,7 +1640,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if !found_right { self.pos = left_startpos; - return (None, None); + return None; } let right = strings::trim_slice(&self.input[right_startpos + 1..self.pos]); @@ -1639,19 +1649,25 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 2; if self.options.extension.wikilinks_title_after_pipe { - (Some(left), Some(right)) + Some(WikilinkComponents { + url: left, + title: Some(right), + }) } else { - (Some(right), Some(left)) + Some(WikilinkComponents { + url: right, + title: Some(left), + }) } } else { self.pos = left_startpos; - (None, None) + None } } // Locates the edge of a wikilink component (link text or url), and sets the // self.pos to it's end if it's found. - pub fn wikilink_component(&mut self) -> bool { + fn wikilink_component(&mut self) -> bool { let startpos = self.pos; if self.peek_char() != Some(&(b'[')) && self.peek_char() != Some(&(b'|')) { From e65c4e832e2e36618db80d9366635916b92d208e Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Mon, 13 May 2024 20:31:00 -0500 Subject: [PATCH 6/9] Add spec for exceeding label limit --- src/tests/wikilinks.rs | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 0017d467..21c6bdf2 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -73,6 +73,18 @@ fn wikilinks_supercedes_relaxed_autolinks() { ); } +#[test] +fn wikilinks_exceeds_label_limit() { + let long_label = format!("[[{:b<1100}]]", "a"); + let expected = format!("

{}

\n", long_label); + + html_opts!( + [extension.wikilinks_title_after_pipe], + &long_label, + &expected, + ); +} + #[test] fn sourcepos() { assert_ast_match!( From 653471f02999d70fdf3214e01a0b524d3db4c94c Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Wed, 15 May 2024 11:44:06 -0500 Subject: [PATCH 7/9] Use new NodeValue::WikiLink instead of NodeValue::Link --- src/cm.rs | 13 ++++--------- src/html.rs | 18 +++++++++++++++--- src/nodes.rs | 17 ++++++++++++++--- src/parser/autolink.rs | 3 --- src/parser/inlines.rs | 41 ++++++++++++++++------------------------- src/parser/mod.rs | 12 ++++++------ src/tests/api.rs | 3 +++ src/tests/wikilinks.rs | 26 +++++++++++++------------- src/xml.rs | 5 +++++ 9 files changed, 76 insertions(+), 62 deletions(-) diff --git a/src/cm.rs b/src/cm.rs index 07feb21c..9b8ba51b 100644 --- a/src/cm.rs +++ b/src/cm.rs @@ -2,7 +2,7 @@ use crate::ctype::{isalpha, isdigit, ispunct, isspace}; use crate::nodes::TableAlignment; use crate::nodes::{ AstNode, ListDelimType, ListType, NodeCodeBlock, NodeHeading, NodeHtmlBlock, NodeLink, - NodeMath, NodeTable, NodeValue, + NodeMath, NodeTable, NodeValue, NodeWikiLink, }; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; @@ -367,13 +367,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { NodeValue::TaskItem(symbol) => self.format_task_item(symbol, node, entering), NodeValue::Strikethrough => self.format_strikethrough(), NodeValue::Superscript => self.format_superscript(), - NodeValue::Link(ref nl) => { - if nl.wikilink { - return self.format_wikilink(nl, entering); - } else { - return self.format_link(node, nl, entering); - } - } + NodeValue::Link(ref nl) => return self.format_link(node, nl, entering), NodeValue::Image(ref nl) => self.format_image(nl, allow_wrap, entering), #[cfg(feature = "shortcodes")] NodeValue::ShortCode(ref ne) => self.format_shortcode(ne, entering), @@ -391,6 +385,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { // noop - automatic escaping is already being done } NodeValue::Math(ref math) => self.format_math(math, allow_wrap, entering), + NodeValue::WikiLink(ref nl) => return self.format_wikilink(nl, entering), }; true } @@ -695,7 +690,7 @@ impl<'a, 'o> CommonMarkFormatter<'a, 'o> { true } - fn format_wikilink(&mut self, nl: &NodeLink, entering: bool) -> bool { + fn format_wikilink(&mut self, nl: &NodeWikiLink, entering: bool) -> bool { if entering { write!(self, "[[").unwrap(); if self.options.extension.wikilinks_title_after_pipe { diff --git a/src/html.rs b/src/html.rs index 3a6b6971..025565d2 100644 --- a/src/html.rs +++ b/src/html.rs @@ -826,9 +826,6 @@ impl<'o> HtmlFormatter<'o> { self.output.write_all(b"\" title=\"")?; self.escape(nl.title.as_bytes())?; } - if nl.wikilink { - self.output.write_all(b"\" data-wikilink=\"true")?; - } self.output.write_all(b"\">")?; } else { self.output.write_all(b"")?; @@ -1041,6 +1038,21 @@ impl<'o> HtmlFormatter<'o> { self.render_math_inline(node, literal, display_math, dollar_math)?; } } + NodeValue::WikiLink(ref nl) => { + if entering { + self.output.write_all(b"")?; + } else { + self.output.write_all(b"")?; + } + } } Ok(false) } diff --git a/src/nodes.rs b/src/nodes.rs index a8821921..79bbfd7e 100644 --- a/src/nodes.rs +++ b/src/nodes.rs @@ -182,6 +182,9 @@ pub enum NodeValue { /// **Inline**. A character that has been [escaped](https://github.github.com/gfm/#backslash-escapes) Escaped, + + /// **Inline**. A wikilink to some URL. + WikiLink(NodeWikiLink), } /// Alignment of a single table cell. @@ -251,9 +254,13 @@ pub struct NodeLink { /// Note this field is used for the `title` attribute by the HTML formatter even for images; /// `alt` text is supplied in the image inline text. pub title: String, +} - /// Whether this is a wikilink or not. - pub wikilink: bool, +/// The details of a wikilink's destination. +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct NodeWikiLink { + /// The URL for the link destination. + pub url: String, } /// The metadata of a list; the kind of list, the delimiter used and so on. @@ -492,6 +499,7 @@ impl NodeValue { NodeValue::MultilineBlockQuote(_) => "multiline_block_quote", NodeValue::Escaped => "escaped", NodeValue::Math(..) => "math", + NodeValue::WikiLink(..) => "wikilink", } } } @@ -642,7 +650,8 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::Emph | NodeValue::Strong | NodeValue::Link(..) - | NodeValue::Image(..) => !child.block(), + | NodeValue::Image(..) + | NodeValue::WikiLink(..) => !child.block(), NodeValue::Table(..) => matches!(*child, NodeValue::TableRow(..)), @@ -660,6 +669,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::Strikethrough | NodeValue::HtmlInline(..) | NodeValue::Math(..) + | NodeValue::WikiLink(..) ), #[cfg(feature = "shortcodes")] @@ -675,6 +685,7 @@ pub fn can_contain_type<'a>(node: &'a AstNode<'a>, child: &NodeValue) -> bool { | NodeValue::Strikethrough | NodeValue::HtmlInline(..) | NodeValue::Math(..) + | NodeValue::WikiLink(..) ), NodeValue::MultilineBlockQuote(_) => { diff --git a/src/parser/autolink.rs b/src/parser/autolink.rs index a9a57040..287dc64d 100644 --- a/src/parser/autolink.rs +++ b/src/parser/autolink.rs @@ -122,7 +122,6 @@ fn www_match<'a>( NodeValue::Link(NodeLink { url, title: String::new(), - wikilink: false, }), (0, 1, 0, 1).into(), ); @@ -291,7 +290,6 @@ fn url_match<'a>( NodeValue::Link(NodeLink { url: url.clone(), title: String::new(), - wikilink: false, }), (0, 1, 0, 1).into(), ); @@ -400,7 +398,6 @@ fn email_match<'a>( NodeValue::Link(NodeLink { url, title: String::new(), - wikilink: false, }), (0, 1, 0, 1).into(), ); diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index 88dab3f2..b3811f3c 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -2,7 +2,8 @@ use crate::arena_tree::Node; use crate::ctype::{isdigit, ispunct, isspace}; use crate::entity; use crate::nodes::{ - Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, Sourcepos, + Ast, AstNode, NodeCode, NodeFootnoteReference, NodeLink, NodeMath, NodeValue, NodeWikiLink, + Sourcepos, }; #[cfg(feature = "shortcodes")] use crate::parser::shortcodes::NodeShortCode; @@ -108,7 +109,7 @@ struct Bracket<'a> { #[derive(Clone, Copy)] struct WikilinkComponents<'i> { url: &'i [u8], - title: Option<&'i [u8]>, + link_label: Option<&'i [u8]>, } impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { @@ -1497,11 +1498,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { pub fn close_bracket_match(&mut self, is_image: bool, url: String, title: String) { let brackets_len = self.brackets.len(); - let nl = NodeLink { - url, - title, - wikilink: false, - }; + let nl = NodeLink { url, title }; let inl = self.make_inline( if is_image { NodeValue::Image(nl) @@ -1582,25 +1579,20 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { // [[url|link text]] pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> { let startpos = self.pos; - let component = self.wikilink_url_title(); - - component?; - - let component = component.unwrap(); + let component = self.wikilink_url_link_label(); + let component = component?; let url_clean = strings::clean_url(component.url); - let title_clean = match component.title { - Some(title) => entity::unescape_html(title), + let link_label_clean = match component.link_label { + Some(link_label) => entity::unescape_html(link_label), None => entity::unescape_html(component.url), }; - let nl = NodeLink { + let nl = NodeWikiLink { url: String::from_utf8(url_clean).unwrap(), - title: String::new(), - wikilink: true, }; - let inl = self.make_inline(NodeValue::Link(nl), startpos - 1, self.pos - 1); + let inl = self.make_inline(NodeValue::WikiLink(nl), startpos - 1, self.pos - 1); inl.append(self.make_inline( - NodeValue::Text(String::from_utf8(title_clean).unwrap()), + NodeValue::Text(String::from_utf8(link_label_clean).unwrap()), startpos - 1, self.pos - 1, )); @@ -1608,7 +1600,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { Some(inl) } - fn wikilink_url_title(&mut self) -> Option> { + fn wikilink_url_link_label(&mut self) -> Option> { let left_startpos = self.pos; if self.peek_char() != Some(&(b'[')) { @@ -1628,7 +1620,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { self.pos += 2; return Some(WikilinkComponents { url: left, - title: None, + link_label: None, }); } else if self.peek_char() != Some(&(b'|')) { self.pos = left_startpos; @@ -1651,12 +1643,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if self.options.extension.wikilinks_title_after_pipe { Some(WikilinkComponents { url: left, - title: Some(right), + link_label: Some(right), }) } else { Some(WikilinkComponents { url: right, - title: Some(left), + link_label: Some(left), }) } } else { @@ -1665,7 +1657,7 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { } } - // Locates the edge of a wikilink component (link text or url), and sets the + // Locates the edge of a wikilink component (link label or url), and sets the // self.pos to it's end if it's found. fn wikilink_component(&mut self) -> bool { let startpos = self.pos; @@ -1745,7 +1737,6 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { NodeValue::Link(NodeLink { url: String::from_utf8(strings::clean_autolink(url, kind)).unwrap(), title: String::new(), - wikilink: false, }), start_column + 1, end_column + 1, diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 006d0442..bddd69f5 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -427,30 +427,30 @@ pub struct ExtensionOptions { /// Enables wikilinks using title after pipe syntax /// /// ```` md - /// [[url|link text]] + /// [[url|link label]] /// ```` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.wikilinks_title_after_pipe = true; - /// assert_eq!(markdown_to_html("[[url|link text]]", &options), - /// "

link text

\n"); + /// assert_eq!(markdown_to_html("[[url|link label]]", &options), + /// "

link label

\n"); /// ``` pub wikilinks_title_after_pipe: bool, /// Enables wikilinks using title before pipe syntax /// /// ```` md - /// [[link text|url]] + /// [[link label|url]] /// ```` /// /// ``` /// # use comrak::{markdown_to_html, Options}; /// let mut options = Options::default(); /// options.extension.wikilinks_title_before_pipe = true; - /// assert_eq!(markdown_to_html("[[link text|url]]", &options), - /// "

link text

\n"); + /// assert_eq!(markdown_to_html("[[link label|url]]", &options), + /// "

link label

\n"); /// ``` pub wikilinks_title_before_pipe: bool, } diff --git a/src/tests/api.rs b/src/tests/api.rs index e7ec6022..16adef8d 100644 --- a/src/tests/api.rs +++ b/src/tests/api.rs @@ -228,5 +228,8 @@ fn exercise_full_api() { let _: bool = math.dollar_math; let _: String = math.literal; } + nodes::NodeValue::WikiLink(nl) => { + let _: String = nl.url; + } } } diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index 21c6bdf2..e5078f05 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -4,7 +4,7 @@ use super::*; // These cases don't work roundtrip, because converting to commonmark // automatically escapes certain characters. #[test] -fn wikilinks_does_not_unescape_html_entities_in_link_text() { +fn wikilinks_does_not_unescape_html_entities_in_link_label() { html_opts!( [extension.wikilinks_title_after_pipe, render.sourcepos], concat!("This is [[<script>alert(0)</script>|a <link]]",), @@ -89,28 +89,28 @@ fn wikilinks_exceeds_label_limit() { fn sourcepos() { assert_ast_match!( [extension.wikilinks_title_after_pipe], - "This [[http://example.com|link text]] that\n", - (document (1:1-1:42) [ - (paragraph (1:1-1:42) [ + "This [[http://example.com|link label]] that\n", + (document (1:1-1:43) [ + (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") - (link (1:6-1:37) [ - (text (1:6-1:37) "link text") + (wikilink (1:6-1:38) [ + (text (1:6-1:38) "link label") ]) - (text (1:38-1:42) " that") + (text (1:39-1:43) " that") ]) ]) ); assert_ast_match!( [extension.wikilinks_title_before_pipe], - "This [[link text|http://example.com]] that\n", - (document (1:1-1:42) [ - (paragraph (1:1-1:42) [ + "This [[link label|http://example.com]] that\n", + (document (1:1-1:43) [ + (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") - (link (1:6-1:37) [ - (text (1:6-1:37) "link text") + (wikilink (1:6-1:38) [ + (text (1:6-1:38) "link label") ]) - (text (1:38-1:42) " that") + (text (1:39-1:43) " that") ]) ]) ); diff --git a/src/xml.rs b/src/xml.rs index dd90ecf0..3d2e3ebd 100644 --- a/src/xml.rs +++ b/src/xml.rs @@ -273,6 +273,11 @@ impl<'o> XmlFormatter<'o> { write!(self.output, " { + self.output.write_all(b" destination=\"")?; + self.escape(nl.url.as_bytes())?; + self.output.write_all(b"\"")?; + } } if node.first_child().is_some() { From 24afea653d7915bb7384426c77a817518e2fe783 Mon Sep 17 00:00:00 2001 From: digitalMoksha Date: Wed, 15 May 2024 14:01:32 -0500 Subject: [PATCH 8/9] Add test for wikilinks in tables --- src/tests/commonmark.rs | 9 +++++ src/tests/wikilinks.rs | 82 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 91 insertions(+) diff --git a/src/tests/commonmark.rs b/src/tests/commonmark.rs index 3f297e02..6f2f5a48 100644 --- a/src/tests/commonmark.rs +++ b/src/tests/commonmark.rs @@ -55,3 +55,12 @@ fn math(markdown: &str, cm: &str) { commonmark(markdown, cm, Some(&options)); } + +#[test_case("This [[url]] that", "This [[url|url]] that\n")] +#[test_case("This [[url|link label]] that", "This [[url|link%20label]] that\n")] +fn wikilinks(markdown: &str, cm: &str) { + let mut options = Options::default(); + options.extension.wikilinks_title_before_pipe = true; + + commonmark(markdown, cm, Some(&options)); +} diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index e5078f05..e5ca9384 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -73,6 +73,88 @@ fn wikilinks_supercedes_relaxed_autolinks() { ); } +#[test] +fn wikilinks_only_url_in_tables() { + html_opts!( + [extension.wikilinks_title_after_pipe, extension.table], + concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
header
url
\n", + ), + ); + + html_opts!( + [extension.wikilinks_title_before_pipe, extension.table], + concat!("| header |\n", "| ------- |\n", "| [[url]] |\n",), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
header
url
\n", + ), + ); +} + +#[test] +fn wikilinks_full_in_tables_not_supported() { + html_opts!( + [extension.wikilinks_title_after_pipe, extension.table], + concat!("| header |\n", "| ------- |\n", "| [[url|link label]] |\n",), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
header
[[url
\n", + ), + ); + + html_opts!( + [extension.wikilinks_title_before_pipe, extension.table], + concat!("| header |\n", "| ------- |\n", "| [[link label|url]] |\n",), + concat!( + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "\n", + "
header
[[link label
\n", + ), + ); +} + #[test] fn wikilinks_exceeds_label_limit() { let long_label = format!("[[{:b<1100}]]", "a"); From b769ee4c035167f236c18a924a3bfd3617832f12 Mon Sep 17 00:00:00 2001 From: Asherah Connor Date: Thu, 16 May 2024 13:00:14 +0300 Subject: [PATCH 9/9] wikilinks: Text node sourcepos reflects the span that produced the text. --- src/parser/inlines.rs | 28 ++++++++++++++++------------ src/tests/wikilinks.rs | 18 ++++++++++++++++-- 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/src/parser/inlines.rs b/src/parser/inlines.rs index b3811f3c..92c8a6ab 100644 --- a/src/parser/inlines.rs +++ b/src/parser/inlines.rs @@ -109,7 +109,7 @@ struct Bracket<'a> { #[derive(Clone, Copy)] struct WikilinkComponents<'i> { url: &'i [u8], - link_label: Option<&'i [u8]>, + link_label: Option<(&'i [u8], usize, usize)>, } impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { @@ -1579,22 +1579,26 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { // [[url|link text]] pub fn handle_wikilink(&mut self) -> Option<&'a AstNode<'a>> { let startpos = self.pos; - let component = self.wikilink_url_link_label(); - let component = component?; + let component = self.wikilink_url_link_label()?; let url_clean = strings::clean_url(component.url); - let link_label_clean = match component.link_label { - Some(link_label) => entity::unescape_html(link_label), - None => entity::unescape_html(component.url), - }; + let (link_label, link_label_start_column, link_label_end_column) = + match component.link_label { + Some((label, sc, ec)) => (entity::unescape_html(label), sc, ec), + None => ( + entity::unescape_html(component.url), + startpos + 1, + self.pos - 3, + ), + }; let nl = NodeWikiLink { url: String::from_utf8(url_clean).unwrap(), }; let inl = self.make_inline(NodeValue::WikiLink(nl), startpos - 1, self.pos - 1); inl.append(self.make_inline( - NodeValue::Text(String::from_utf8(link_label_clean).unwrap()), - startpos - 1, - self.pos - 1, + NodeValue::Text(String::from_utf8(link_label).unwrap()), + link_label_start_column, + link_label_end_column, )); Some(inl) @@ -1643,12 +1647,12 @@ impl<'a, 'r, 'o, 'd, 'i, 'c, 'subj> Subject<'a, 'r, 'o, 'd, 'i, 'c, 'subj> { if self.options.extension.wikilinks_title_after_pipe { Some(WikilinkComponents { url: left, - link_label: Some(right), + link_label: Some((right, right_startpos + 1, self.pos - 3)), }) } else { Some(WikilinkComponents { url: right, - link_label: Some(left), + link_label: Some((left, left_startpos + 1, right_startpos - 1)), }) } } else { diff --git a/src/tests/wikilinks.rs b/src/tests/wikilinks.rs index e5ca9384..6b0c94c3 100644 --- a/src/tests/wikilinks.rs +++ b/src/tests/wikilinks.rs @@ -176,7 +176,7 @@ fn sourcepos() { (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:38) [ - (text (1:6-1:38) "link label") + (text (1:27-1:36) "link label") ]) (text (1:39-1:43) " that") ]) @@ -190,10 +190,24 @@ fn sourcepos() { (paragraph (1:1-1:43) [ (text (1:1-1:5) "This ") (wikilink (1:6-1:38) [ - (text (1:6-1:38) "link label") + (text (1:8-1:17) "link label") ]) (text (1:39-1:43) " that") ]) ]) ); + + assert_ast_match!( + [extension.wikilinks_title_before_pipe], + "This [[http://example.com]] that\n", + (document (1:1-1:32) [ + (paragraph (1:1-1:32) [ + (text (1:1-1:5) "This ") + (wikilink (1:6-1:27) [ + (text (1:8-1:25) "http://example.com") + ]) + (text (1:28-1:32) " that") + ]) + ]) + ); }