Skip to main content

panache_parser/parser/inlines/
wikilinks.rs

1//! Pandoc wikilink extensions: `wikilinks_title_after_pipe` and
2//! `wikilinks_title_before_pipe`.
3//!
4//! Shape: `[[url]]` (or `[[url|title]]` with after-pipe semantics, or
5//! `[[title|url]]` with before-pipe). Image variant: `![[url]]` /
6//! `![[url|title]]`. Single line, non-greedy on the first `]]`, rejects
7//! empty body. When both extensions are enabled, after-pipe wins
8//! (matches pandoc behavior).
9//!
10//! Title content is NOT recursively parsed for inlines — `[[url|**bold**]]`
11//! emits the title as a flat TEXT span containing the literal bytes
12//! `**bold**`. Verified against `pandoc 3.9.0.2 -f
13//! markdown+wikilinks_title_after_pipe -t native`.
14//!
15//! Lives in the inline IR's `ConstructKind` dispatch path so that
16//! everything inside `[[...]]` is opaque to emphasis / bracket / autolink
17//! resolution. The emitter walks the byte range and re-locates the pipe.
18
19use super::sink::InlineSink;
20use crate::ParserOptions;
21use crate::syntax::SyntaxKind;
22
23/// A successfully matched wikilink span.
24#[derive(Debug, Clone, Copy, PartialEq, Eq)]
25pub(crate) struct WikiLinkSpan {
26    /// Byte index of the leading `[` (or `!` for image variant).
27    pub start: usize,
28    /// One-past-end byte index of the closing `]]`.
29    pub end: usize,
30    /// Byte index of the separating `|`, if present. Absolute (not
31    /// relative to `start`).
32    pub pipe: Option<usize>,
33    /// True if the variant is `![[...]]` (image wikilink).
34    pub is_image: bool,
35}
36
37impl WikiLinkSpan {
38    /// Byte index of the first byte after the opening `[[` (or `![[`).
39    pub(crate) fn body_start(&self) -> usize {
40        if self.is_image {
41            self.start + 3 // ![[
42        } else {
43            self.start + 2 // [[
44        }
45    }
46
47    /// Byte index of the closing `]]` (start of the two `]` bytes).
48    pub(crate) fn body_end(&self) -> usize {
49        self.end - 2
50    }
51}
52
53/// True if either wikilink extension is enabled in `opts`.
54pub(crate) fn any_enabled(opts: &ParserOptions) -> bool {
55    opts.extensions.wikilinks_title_after_pipe || opts.extensions.wikilinks_title_before_pipe
56}
57
58/// Try to recognise a wikilink starting at byte index `pos` in `text`.
59///
60/// Returns `None` unless `text[pos..]` begins with `[[` (or `![[` for the
61/// image variant) and the matching `]]` occurs before the next newline.
62/// Empty body (`[[]]`) is rejected per pandoc behavior. Matching is
63/// non-greedy: the first `]]` after the opener closes the wikilink.
64pub(crate) fn try_parse_wikilink(
65    text: &str,
66    pos: usize,
67    opts: &ParserOptions,
68) -> Option<WikiLinkSpan> {
69    if !any_enabled(opts) {
70        return None;
71    }
72
73    let bytes = text.as_bytes();
74    let n = bytes.len();
75    if pos >= n {
76        return None;
77    }
78
79    let (is_image, open_end) = if bytes[pos] == b'!' {
80        if pos + 2 >= n || bytes[pos + 1] != b'[' || bytes[pos + 2] != b'[' {
81            return None;
82        }
83        (true, pos + 3)
84    } else if bytes[pos] == b'[' {
85        if pos + 1 >= n || bytes[pos + 1] != b'[' {
86            return None;
87        }
88        (false, pos + 2)
89    } else {
90        return None;
91    };
92
93    let body_start = open_end;
94    let mut i = body_start;
95    let mut pipe: Option<usize> = None;
96    while i + 1 < n {
97        let b = bytes[i];
98        if b == b'\n' || b == b'\r' {
99            return None;
100        }
101        if b == b']' && bytes[i + 1] == b']' {
102            if i == body_start {
103                // Empty body — `[[]]` is literal text per pandoc.
104                return None;
105            }
106            return Some(WikiLinkSpan {
107                start: pos,
108                end: i + 2,
109                pipe,
110                is_image,
111            });
112        }
113        if b == b'|' && pipe.is_none() {
114            pipe = Some(i);
115        }
116        i += 1;
117    }
118    None
119}
120
121/// Emit the CST nodes for a previously matched wikilink span.
122///
123/// `text` is the full document buffer; `span.start..span.end` is the
124/// wikilink range. Pipe direction (URL/title order) is decided by
125/// `opts.extensions.wikilinks_title_after_pipe` vs
126/// `wikilinks_title_before_pipe`. After-pipe wins when both are on.
127pub(crate) fn emit_wikilink<S: InlineSink>(
128    builder: &mut S,
129    text: &str,
130    span: WikiLinkSpan,
131    opts: &ParserOptions,
132) {
133    let outer_kind = if span.is_image {
134        SyntaxKind::IMAGE_WIKI_LINK
135    } else {
136        SyntaxKind::WIKI_LINK
137    };
138    let open_str = if span.is_image { "![[" } else { "[[" };
139
140    builder.start_node(outer_kind.into());
141    builder.token(SyntaxKind::WIKI_LINK_OPEN.into(), open_str);
142
143    let body_start = span.body_start();
144    let body_end = span.body_end();
145
146    let (url_range, title_range) = match span.pipe {
147        Some(p) => {
148            let url;
149            let title;
150            if opts.extensions.wikilinks_title_after_pipe {
151                // [[url|title]]
152                url = body_start..p;
153                title = (p + 1)..body_end;
154            } else {
155                // [[title|url]] (only before-pipe is on)
156                title = body_start..p;
157                url = (p + 1)..body_end;
158            }
159            (url, Some((p, title)))
160        }
161        None => (body_start..body_end, None),
162    };
163
164    // URL slot first by CST order when there is no pipe, or in the
165    // source order when there is one. We preserve the source byte
166    // sequence so the formatter can round-trip verbatim.
167    let url_first = match span.pipe {
168        Some(_) => opts.extensions.wikilinks_title_after_pipe,
169        None => true,
170    };
171
172    let emit_url = |b: &mut S| {
173        b.start_node(SyntaxKind::WIKI_LINK_URL.into());
174        b.token(SyntaxKind::TEXT.into(), &text[url_range.clone()]);
175        b.finish_node();
176    };
177    let emit_pipe_and_title = |b: &mut S| {
178        if let Some((_p, ref tr)) = title_range {
179            b.token(SyntaxKind::WIKI_LINK_PIPE.into(), "|");
180            b.start_node(SyntaxKind::WIKI_LINK_TITLE.into());
181            b.token(SyntaxKind::TEXT.into(), &text[tr.clone()]);
182            b.finish_node();
183        }
184    };
185
186    if url_first {
187        emit_url(builder);
188        emit_pipe_and_title(builder);
189    } else {
190        // Before-pipe: source order is title, |, url.
191        if let Some((_p, ref tr)) = title_range {
192            builder.start_node(SyntaxKind::WIKI_LINK_TITLE.into());
193            builder.token(SyntaxKind::TEXT.into(), &text[tr.clone()]);
194            builder.finish_node();
195            builder.token(SyntaxKind::WIKI_LINK_PIPE.into(), "|");
196        }
197        emit_url(builder);
198    }
199
200    builder.token(SyntaxKind::WIKI_LINK_CLOSE.into(), "]]");
201    builder.finish_node();
202}
203
204#[cfg(test)]
205mod tests {
206    use super::*;
207    use crate::options::{Extensions, ParserOptions};
208
209    fn opts_with(after: bool, before: bool) -> ParserOptions {
210        let extensions = Extensions {
211            wikilinks_title_after_pipe: after,
212            wikilinks_title_before_pipe: before,
213            ..Extensions::default()
214        };
215        ParserOptions {
216            extensions,
217            ..ParserOptions::default()
218        }
219    }
220
221    fn opts_after() -> ParserOptions {
222        opts_with(true, false)
223    }
224
225    fn opts_before() -> ParserOptions {
226        opts_with(false, true)
227    }
228
229    fn opts_both() -> ParserOptions {
230        opts_with(true, true)
231    }
232
233    fn opts_off() -> ParserOptions {
234        opts_with(false, false)
235    }
236
237    #[test]
238    fn parses_simple_wikilink() {
239        let text = "[[https://example.org]]";
240        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
241        assert_eq!(span.start, 0);
242        assert_eq!(span.end, text.len());
243        assert_eq!(span.pipe, None);
244        assert!(!span.is_image);
245    }
246
247    #[test]
248    fn parses_with_title() {
249        let text = "[[url|hello]]";
250        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
251        assert_eq!(span.pipe, Some(5));
252        assert_eq!(span.end, text.len());
253    }
254
255    #[test]
256    fn parses_image_wikilink() {
257        let text = "![[url]]";
258        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
259        assert!(span.is_image);
260        assert_eq!(span.end, text.len());
261    }
262
263    #[test]
264    fn rejects_empty_body() {
265        // Pandoc: `[[]]` renders as literal text.
266        assert!(try_parse_wikilink("[[]]", 0, &opts_after()).is_none());
267        assert!(try_parse_wikilink("![[]]", 0, &opts_after()).is_none());
268    }
269
270    #[test]
271    fn rejects_unclosed() {
272        assert!(try_parse_wikilink("[[unclosed", 0, &opts_after()).is_none());
273        assert!(try_parse_wikilink("[[no closing here", 0, &opts_after()).is_none());
274    }
275
276    #[test]
277    fn rejects_newline_inside() {
278        // Pandoc: `[[a\nb]]` is literal text (single-line shape).
279        assert!(try_parse_wikilink("[[a\nb]]", 0, &opts_after()).is_none());
280        assert!(try_parse_wikilink("[[a\r\nb]]", 0, &opts_after()).is_none());
281    }
282
283    #[test]
284    fn rejects_when_disabled() {
285        // No wikilink when neither extension is enabled.
286        assert!(try_parse_wikilink("[[a|b]]", 0, &opts_off()).is_none());
287        assert!(try_parse_wikilink("[[just url]]", 0, &opts_off()).is_none());
288    }
289
290    #[test]
291    fn non_greedy_close() {
292        // `[[a]]b]]` → wikilink is `[[a]]`, the rest is literal.
293        let text = "[[a]]b]]";
294        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
295        assert_eq!(span.end, 5); // just `[[a]]`
296    }
297
298    #[test]
299    fn first_pipe_is_separator() {
300        // `[[a|b|c]]` → pipe is at position of the first `|`.
301        let text = "[[a|b|c]]";
302        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
303        assert_eq!(span.pipe, Some(3));
304    }
305
306    #[test]
307    fn parses_with_before_pipe_extension() {
308        let text = "[[title|url]]";
309        let span = try_parse_wikilink(text, 0, &opts_before()).unwrap();
310        assert_eq!(span.pipe, Some(7));
311    }
312
313    #[test]
314    fn both_extensions_enabled_still_matches() {
315        // Detection is identical; only emission differs.
316        let text = "[[a|b]]";
317        let span = try_parse_wikilink(text, 0, &opts_both()).unwrap();
318        assert_eq!(span.pipe, Some(3));
319    }
320
321    #[test]
322    fn parse_at_offset() {
323        // Wikilink not at position 0.
324        let text = "prefix [[url|title]] suffix";
325        let span = try_parse_wikilink(text, 7, &opts_after()).unwrap();
326        assert_eq!(span.start, 7);
327        assert_eq!(span.end, 20);
328    }
329
330    #[test]
331    fn body_indexing_is_correct() {
332        let text = "[[abc|def]]";
333        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
334        assert_eq!(span.body_start(), 2);
335        assert_eq!(span.body_end(), 9);
336        assert_eq!(&text[span.body_start()..span.body_end()], "abc|def");
337    }
338
339    #[test]
340    fn image_body_indexing_is_correct() {
341        let text = "![[abc]]";
342        let span = try_parse_wikilink(text, 0, &opts_after()).unwrap();
343        assert_eq!(span.body_start(), 3);
344        assert_eq!(span.body_end(), 6);
345        assert_eq!(&text[span.body_start()..span.body_end()], "abc");
346    }
347}