mdbook_wikilinks/
lib.rs

1extern crate pest;
2#[macro_use]
3extern crate pest_derive;
4
5use mdbook::{
6    book::{Book, Chapter},
7    errors::Error,
8    preprocess::{Preprocessor, PreprocessorContext},
9    BookItem,
10    utils::id_from_content,
11};
12use pest::Parser;
13use std::collections::HashMap;
14use pulldown_cmark::{CowStr, Event, escape::escape_href};
15
16#[derive(Parser)]
17#[grammar = "wikilink.pest"]
18pub struct WikiLinkParser;
19
20pub struct WikiLinks;
21
22impl Preprocessor for WikiLinks {
23    fn name(&self) -> &str {
24        "wikilink-preprocessor"
25    }
26
27    fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
28        let mut path_map = HashMap::new();
29        for chapter in book.iter().filter_map(get_chapter) {
30            let key = chapter.name.clone();
31            if chapter.path.is_none() { continue; }
32            if path_map.contains_key(&key) {
33                eprintln!("Duplicated page title found: {} at {:?}", key, chapter.path);
34            }
35            path_map.insert(key, chapter.path.as_ref().unwrap().clone());
36        }
37
38        book.for_each_mut(|it| {
39            if let BookItem::Chapter(chapter) = it {
40                for_each_link(&chapter.content.clone(), |link_text| {
41                    let mut link = match WikiLinkParser::parse(Rule::link, link_text) {
42                        Ok(parsed) => parsed,
43                        Err(e) => {
44                            eprintln!("Failed parsing wikilink internals: {}", e);
45                            return
46                        },
47                    }.next()
48                     .unwrap()
49                     .into_inner();
50
51                    // Handle destination
52                    let mut dest = link.next().unwrap().into_inner();
53                    let note = dest.next().unwrap().as_str();
54
55                    // Handle link text
56                    let title = match link.next() {
57                        Some(alias) => alias.as_str(),
58                        None => note.as_ref(),
59                    };
60
61                    let cmark_link = if !path_map.contains_key(note) {
62                        format!(
63                            "<span class=\"missing-link\" style=\"color:darkred;\">{}</span>", 
64                            title
65                        )
66                    } else {
67                        let mut href = pathdiff::diff_paths(
68                            path_map.get(note).unwrap(),
69                            chapter.path.as_ref().unwrap().parent().unwrap(),
70                        ).unwrap().to_string_lossy().to_string(); // Gotta love Rust <3
71
72                        // Handle anchor
73                        // TODO: Blockrefs are currently not handled here
74                        if let Some(anchor) = dest.next() {
75                            let header_kebab = id_from_content(&anchor.as_str()[1..]);
76                            href.push_str(&format!("#{}", header_kebab));
77                        }
78
79                        format!("[{}](<{}>)", title, escape_special_chars(&href))
80                    };
81
82                    chapter.content = chapter.content
83                        .replacen(&format!("[[{}]]", link_text), &cmark_link, 1);
84                });
85            }
86        });
87
88        Ok(book)
89    }
90}
91
92fn for_each_link(content: &str, mut handle_link: impl FnMut(&str)) {
93    enum Currently {
94        OutsideLink,
95        MaybeOpen,
96        MaybeInsideLink,
97        MaybeClose,
98        Ignore,
99    }
100
101    let parser = pulldown_cmark::Parser::new(content);
102
103    let mut buffer = String::new();
104    let mut current = Currently::OutsideLink;
105    for event in parser {
106        match event {
107            // Ignore KaTeX spans
108            Event::Html(CowStr::Borrowed("<span class=\"katex-inline\">")) => current = Currently::Ignore,
109            Event::Html(CowStr::Borrowed("</span>")) => current = Currently::OutsideLink,
110
111            Event::Text(CowStr::Borrowed("[")) => {
112                match current {
113                    Currently::OutsideLink => current = Currently::MaybeOpen,
114                    Currently::MaybeOpen => current = Currently::MaybeInsideLink,
115                    Currently::MaybeInsideLink => current = Currently::OutsideLink,
116                    Currently::MaybeClose => {
117                        buffer.clear();
118                        current = Currently::OutsideLink;
119                    }
120                    Currently::Ignore => {}
121                }
122            }
123
124            Event::Text(CowStr::Borrowed("]")) => {
125                match current {
126                    Currently::MaybeOpen => current = Currently::OutsideLink,
127                    Currently::MaybeInsideLink => current = Currently::MaybeClose,
128                    Currently::MaybeClose => {
129                        handle_link(&buffer.trim());
130                        buffer.clear();
131                        current = Currently::OutsideLink;
132                    }
133                    Currently::OutsideLink => {},
134                    Currently::Ignore => {}
135                }
136            }
137
138            Event::Text(ref text) => {
139                if let Currently::MaybeInsideLink = current {
140                    if buffer.is_empty() {
141                        buffer.push_str(text);
142                    } else {
143                        // Buffer contains something, which means a newline or something else
144                        // split it up. Clear buffer and don't consider this a link.
145                        buffer.clear();
146                        current = Currently::OutsideLink;
147                    }
148                }
149            }
150            _ => {}
151        }
152    }
153}
154
155/// Escape characters for usage in URLs
156fn escape_special_chars(text: &str) -> String {
157    let mut buf = String::new();
158    escape_href(&mut buf, text).ok();
159    buf
160}
161
162fn get_chapter(it: &BookItem) -> Option<&Chapter> {
163    if let BookItem::Chapter(ch) = it {
164        Some(ch)
165    } else {
166        None
167    }
168}
169
170#[cfg(test)]
171mod tests {
172    use super::*;
173
174    #[test]
175    fn detect_these() {
176        let content = r#"This is a note with four links:
177
178This one [[link]], this one [[ link#header ]], this one [[   link | a bit more complex]], and this one [[     link#header | more 😭 complex]].
179
180> This is a [[link in a blockquote]]
181
182- List item
183- Second list item with [[list link]]
184
185| Header 1 | Header 2 |
186| -------- | -------- |
187| Tables can also have [[table links]] | more stuff |"#;
188
189        let mut links = vec![];
190        for_each_link(content, |link_text| { links.push(link_text.to_owned()); });
191
192        assert_eq!(links, vec![
193                   "link",
194                   "link#header",
195                   "link | a bit more complex",
196                   "link#header | more 😭 complex",
197                   "link in a blockquote",
198                   "list link",
199                   "table links"
200        ]);
201    }
202
203    #[test]
204    fn dont_detect_these() {
205        let content = r#"Here are some non-correct links:
206
207First a link [[with
208newline]]
209
210Then a link `inside [[inline code]]`, or inside <span class="katex-inline">inline [[math]]</span>. What about \[\[escaped brackets\]\]?
211
212<div class="katex-display">
213    f(x) = \text{[[display link]]}
214</div>
215
216```rust
217let link = "[[link_in_code]]".to_owned();
218```
219
220<p>
221  This is some raw HTML. We don't want [[html links]] detected here.
222</p>"#;
223
224        let mut links = Vec::<String>::new();
225        for_each_link(content, |link_text| { links.push(link_text.to_owned()); });
226
227        assert!(links.is_empty(), "Got links: {:?}", links);
228    }
229
230    #[test]
231    fn escapel_special_chars() {
232        assert_eq!(
233            escape_special_chars("w3ir∂ førmättÎñg"),
234            "w3ir%E2%88%82%20f%C3%B8rm%C3%A4tt%C3%8E%C3%B1g"
235        )
236    }
237}