Skip to main content

docgen_core/
wikilink.rs

1use std::collections::BTreeSet;
2
3use comrak::nodes::{AstNode, NodeValue};
4use comrak::Arena;
5
6/// The set of all known slugs, used to resolve wikilink targets.
7pub type SlugSet = BTreeSet<String>;
8
9/// Split a `[[...]]` inner string into `(target, Some(label))` or `(target, None)`.
10/// Splits on the FIRST `|` only; the remainder is the label.
11pub fn parse_wikilink(inner: &str) -> (String, Option<String>) {
12    match inner.split_once('|') {
13        Some((t, label)) => (t.trim().to_string(), Some(label.trim().to_string())),
14        None => (inner.trim().to_string(), None),
15    }
16}
17
18/// Resolve a wikilink target to a slug.
19/// Order: trimmed-exact slug match, then case-insensitive basename match
20/// (basename = last `/`-segment of a slug). First basename match wins by
21/// `SlugSet` (BTreeSet) order, making resolution deterministic.
22pub fn resolve_target(target: &str, slugs: &SlugSet) -> Option<String> {
23    let t = target.trim();
24    if t.is_empty() {
25        return None;
26    }
27    if slugs.contains(t) {
28        return Some(t.to_string());
29    }
30    let needle = t.to_ascii_lowercase();
31    slugs
32        .iter()
33        .find(|slug| {
34            slug.rsplit('/')
35                .next()
36                .unwrap_or(slug)
37                .eq_ignore_ascii_case(&needle)
38        })
39        .cloned()
40}
41
42/// Outcome of transforming one document's AST.
43pub struct WikilinkPass {
44    /// Target slugs this doc links to, deduped, in first-seen document order.
45    pub resolved: Vec<String>,
46}
47
48/// Minimal HTML-attribute / text escaper for the small strings we inject.
49/// Single-pass into one allocation (avoids the 4-string `replace` chain).
50fn esc(s: &str) -> String {
51    let mut out = String::with_capacity(s.len());
52    for c in s.chars() {
53        match c {
54            '&' => out.push_str("&amp;"),
55            '<' => out.push_str("&lt;"),
56            '>' => out.push_str("&gt;"),
57            '"' => out.push_str("&quot;"),
58            _ => out.push(c),
59        }
60    }
61    out
62}
63
64/// The display text for a wikilink: the label if present and non-blank, else the
65/// target. An empty/whitespace-only label is treated as absent so we never emit an
66/// anchor with invisible text. Mirrors `search::push_unwrapping_wikilinks`.
67fn display_text(target: &str, label: Option<String>) -> String {
68    label
69        .filter(|l| !l.trim().is_empty())
70        .unwrap_or_else(|| target.to_string())
71}
72
73/// Build the inline HTML for one wikilink occurrence and, if resolved, push its
74/// target slug into `resolved` (deduped, first-seen order).
75fn render_link(
76    inner: &str,
77    slugs: &SlugSet,
78    base: &str,
79    resolved: &mut Vec<String>,
80    seen: &mut BTreeSet<String>,
81) -> String {
82    let (target, label) = parse_wikilink(inner);
83    match resolve_target(&target, slugs) {
84        Some(slug) => {
85            if seen.insert(slug.clone()) {
86                resolved.push(slug.clone());
87            }
88            let text = display_text(&target, label);
89            format!(
90                r#"<a class="docgen-wikilink" href="{0}/{1}" data-wikilink-title="{2}" data-wikilink-path="/{1}">{2}</a>"#,
91                base,
92                esc(&slug),
93                esc(&text)
94            )
95        }
96        None => {
97            let text = display_text(&target, label);
98            format!(
99                r#"<span class="docgen-wikilink docgen-wikilink--broken" data-target="{}">{}</span>"#,
100                esc(&target),
101                esc(&text)
102            )
103        }
104    }
105}
106
107/// Walk the AST; for each Text node containing `[[...]]`, split it into
108/// surrounding Text nodes + raw-HTML inline nodes for each wikilink.
109/// The flat source text a child node contributes when reconstructing an inline
110/// run, or `None` if the node breaks the run (it is not foldable into text).
111fn flat_source(node: &AstNode<'_>) -> Option<String> {
112    match &node.data.borrow().value {
113        NodeValue::Text(t) => Some(t.to_string()),
114        // Raw inline HTML inside `[[ ... ]]` is folded back into the target string
115        // (e.g. `[[a<b>]]`), so the resolver/escaper sees the literal `a<b>`.
116        NodeValue::HtmlInline(h) => Some(h.clone()),
117        _ => None,
118    }
119}
120
121/// `base` is the deployed sub-path prefix (e.g. `/docs`); `""` for root
122/// deployment. Resolved-wikilink hrefs are emitted as `{base}/{slug}`.
123pub fn transform_wikilinks<'a>(
124    root: &'a AstNode<'a>,
125    arena: &'a Arena<'a>,
126    slugs: &SlugSet,
127    base: &str,
128) -> WikilinkPass {
129    let mut resolved: Vec<String> = Vec::new();
130    let mut seen: BTreeSet<String> = BTreeSet::new();
131
132    // Collect every node that has children, so we can scan their direct child
133    // runs. We snapshot the list first to avoid iterating while mutating.
134    let parents: Vec<&'a AstNode<'a>> = root
135        .descendants()
136        .filter(|n| n.first_child().is_some())
137        .collect();
138
139    for parent in parents {
140        // Snapshot direct children.
141        let children: Vec<&'a AstNode<'a>> = parent.children().collect();
142
143        // Walk maximal runs of foldable (Text/HtmlInline) children, rebuild any
144        // run that contains a complete `[[...]]`.
145        let mut i = 0;
146        while i < children.len() {
147            if flat_source(children[i]).is_none() {
148                i += 1;
149                continue;
150            }
151            // Extend the foldable run.
152            let start = i;
153            let mut combined = String::new();
154            while i < children.len() {
155                match flat_source(children[i]) {
156                    Some(s) => {
157                        combined.push_str(&s);
158                        i += 1;
159                    }
160                    None => break,
161                }
162            }
163
164            if !combined.contains("[[") {
165                continue;
166            }
167
168            // Build replacement nodes from the combined run, inserted before the
169            // first node of the run; then detach the whole run.
170            let anchor = children[start];
171            let mut rest = combined.as_str();
172            let mut produced_any = false;
173            while let Some(open) = rest.find("[[") {
174                if let Some(close_rel) = rest[open + 2..].find("]]") {
175                    let close = open + 2 + close_rel;
176                    let before = &rest[..open];
177                    let inner = &rest[open + 2..close];
178
179                    if !before.is_empty() {
180                        let n =
181                            arena.alloc(AstNode::from(NodeValue::Text(before.to_string().into())));
182                        anchor.insert_before(n);
183                    }
184                    let html = render_link(inner, slugs, base, &mut resolved, &mut seen);
185                    let n = arena.alloc(AstNode::from(NodeValue::HtmlInline(html)));
186                    anchor.insert_before(n);
187
188                    rest = &rest[close + 2..];
189                    produced_any = true;
190                } else {
191                    break; // unterminated `[[` — leave the remainder literal
192                }
193            }
194
195            if produced_any {
196                if !rest.is_empty() {
197                    let n = arena.alloc(AstNode::from(NodeValue::Text(rest.to_string().into())));
198                    anchor.insert_before(n);
199                }
200                for node in &children[start..i] {
201                    node.detach();
202                }
203            }
204        }
205    }
206
207    WikilinkPass { resolved }
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213    use crate::markdown::comrak_options;
214    use comrak::{parse_document, Arena};
215
216    fn slugs() -> SlugSet {
217        ["index", "guide/intro", "guide/Advanced", "reference/api"]
218            .iter()
219            .map(|s| s.to_string())
220            .collect()
221    }
222
223    #[test]
224    fn resolves_exact_slug() {
225        assert_eq!(
226            resolve_target("guide/intro", &slugs()),
227            Some("guide/intro".to_string())
228        );
229    }
230
231    #[test]
232    fn resolves_basename_case_insensitive() {
233        // "advanced" matches the basename of "guide/Advanced".
234        assert_eq!(
235            resolve_target("advanced", &slugs()),
236            Some("guide/Advanced".to_string())
237        );
238        assert_eq!(
239            resolve_target("INTRO", &slugs()),
240            Some("guide/intro".to_string())
241        );
242    }
243
244    #[test]
245    fn trims_surrounding_whitespace() {
246        assert_eq!(
247            resolve_target("  index  ", &slugs()),
248            Some("index".to_string())
249        );
250    }
251
252    #[test]
253    fn unresolved_returns_none() {
254        assert_eq!(resolve_target("does/not/exist", &slugs()), None);
255        assert_eq!(resolve_target("", &slugs()), None);
256    }
257
258    #[test]
259    fn parse_splits_label() {
260        assert_eq!(
261            parse_wikilink("target|Label"),
262            ("target".to_string(), Some("Label".to_string()))
263        );
264        assert_eq!(parse_wikilink("target"), ("target".to_string(), None));
265        // Only the first pipe splits; extra pipes belong to the label.
266        assert_eq!(
267            parse_wikilink("a|b|c"),
268            ("a".to_string(), Some("b|c".to_string()))
269        );
270    }
271
272    fn render(md: &str, slugs: &SlugSet) -> (String, Vec<String>) {
273        render_with_base(md, slugs, "")
274    }
275
276    fn render_with_base(md: &str, slugs: &SlugSet, base: &str) -> (String, Vec<String>) {
277        let arena = Arena::new();
278        let options = comrak_options();
279        let root = parse_document(&arena, md, &options);
280        let pass = transform_wikilinks(root, &arena, slugs, base);
281        let html = crate::markdown::format_ast(root, &options);
282        (html, pass.resolved)
283    }
284
285    #[test]
286    fn resolved_wikilink_href_is_prefixed_with_base() {
287        let (html, _) = render_with_base("[[guide/intro]]\n", &slugs(), "/docs");
288        assert!(html.contains(r#"href="/docs/guide/intro""#));
289        assert!(!html.contains(r#"href="/guide/intro""#));
290    }
291
292    #[test]
293    fn resolved_wikilink_becomes_anchor() {
294        let (html, resolved) = render("see [[guide/intro]] now\n", &slugs());
295        assert!(html.contains(
296            r#"<a class="docgen-wikilink" href="/guide/intro" data-wikilink-title="guide/intro" data-wikilink-path="/guide/intro">guide/intro</a>"#
297        ));
298        assert_eq!(resolved, vec!["guide/intro".to_string()]);
299    }
300
301    #[test]
302    fn labeled_wikilink_uses_label_text() {
303        let (html, _) = render("[[guide/intro|The Intro]]\n", &slugs());
304        assert!(html.contains(r#"href="/guide/intro""#));
305        assert!(html.contains(r#"data-wikilink-title="The Intro""#));
306        assert!(html.contains(r#">The Intro</a>"#));
307    }
308
309    #[test]
310    fn broken_wikilink_becomes_marked_span() {
311        let (html, resolved) = render("[[nope]] here\n", &slugs());
312        assert!(html.contains(
313            r#"<span class="docgen-wikilink docgen-wikilink--broken" data-target="nope">nope</span>"#
314        ));
315        assert!(resolved.is_empty());
316    }
317
318    #[test]
319    fn resolved_targets_are_deduped_in_order() {
320        let (_html, resolved) = render("[[guide/intro]] and [[index]] and [[intro]]\n", &slugs());
321        // "intro" resolves to guide/intro (already present) -> deduped.
322        assert_eq!(
323            resolved,
324            vec!["guide/intro".to_string(), "index".to_string()]
325        );
326    }
327
328    #[test]
329    fn empty_or_whitespace_label_falls_back_to_target() {
330        // `[[index|]]` and `[[index|   ]]` must not render an empty clickable text;
331        // they fall back to the target, matching the search-index unwrap path.
332        let (html, _) = render("[[index|]]\n", &slugs());
333        assert!(html.contains(r#"href="/index""#));
334        assert!(html.contains(r#">index</a>"#));
335        assert!(!html.contains(r#">      </a>"#));
336
337        let (html, _) = render("[[index|   ]]\n", &slugs());
338        assert!(html.contains(r#">index</a>"#));
339
340        // Broken target with empty label also falls back to the target text.
341        let (html, _) = render("[[nope|]] x\n", &slugs());
342        assert!(html.contains(r#"data-target="nope">nope</span>"#));
343    }
344
345    #[test]
346    fn ambiguous_basename_resolves_deterministically() {
347        // Two slugs share the basename `dup`; resolution is first by BTreeSet order.
348        let amb: SlugSet = ["a/dup", "b/dup"].iter().map(|s| s.to_string()).collect();
349        assert_eq!(resolve_target("dup", &amb), Some("a/dup".to_string()));
350    }
351
352    #[test]
353    fn html_special_chars_in_broken_target_are_escaped() {
354        let (html, _) = render("[[a<b>]] x\n", &slugs());
355        assert!(html.contains("data-target=\"a&lt;b&gt;\""));
356        assert!(!html.contains("<b>"));
357    }
358}