1extern crate pest;
2#[macro_use]
3extern crate pest_derive;
4
5use mdbook::{
6 book::{Book, Chapter},
7 errors::Error,
8 preprocess::{Preprocessor, PreprocessorContext},
9 BookItem,
10 utils::id_from_content,
11};
12use pest::Parser;
13use std::collections::HashMap;
14use pulldown_cmark::{CowStr, Event, escape::escape_href};
15
16#[derive(Parser)]
17#[grammar = "wikilink.pest"]
18pub struct WikiLinkParser;
19
20pub struct WikiLinks;
21
22impl Preprocessor for WikiLinks {
23 fn name(&self) -> &str {
24 "wikilink-preprocessor"
25 }
26
27 fn run(&self, _ctx: &PreprocessorContext, mut book: Book) -> Result<Book, Error> {
28 let mut path_map = HashMap::new();
29 for chapter in book.iter().filter_map(get_chapter) {
30 let key = chapter.name.clone();
31 if chapter.path.is_none() { continue; }
32 if path_map.contains_key(&key) {
33 eprintln!("Duplicated page title found: {} at {:?}", key, chapter.path);
34 }
35 path_map.insert(key, chapter.path.as_ref().unwrap().clone());
36 }
37
38 book.for_each_mut(|it| {
39 if let BookItem::Chapter(chapter) = it {
40 for_each_link(&chapter.content.clone(), |link_text| {
41 let mut link = match WikiLinkParser::parse(Rule::link, link_text) {
42 Ok(parsed) => parsed,
43 Err(e) => {
44 eprintln!("Failed parsing wikilink internals: {}", e);
45 return
46 },
47 }.next()
48 .unwrap()
49 .into_inner();
50
51 let mut dest = link.next().unwrap().into_inner();
53 let note = dest.next().unwrap().as_str();
54
55 let title = match link.next() {
57 Some(alias) => alias.as_str(),
58 None => note.as_ref(),
59 };
60
61 let cmark_link = if !path_map.contains_key(note) {
62 format!(
63 "<span class=\"missing-link\" style=\"color:darkred;\">{}</span>",
64 title
65 )
66 } else {
67 let mut href = pathdiff::diff_paths(
68 path_map.get(note).unwrap(),
69 chapter.path.as_ref().unwrap().parent().unwrap(),
70 ).unwrap().to_string_lossy().to_string(); if let Some(anchor) = dest.next() {
75 let header_kebab = id_from_content(&anchor.as_str()[1..]);
76 href.push_str(&format!("#{}", header_kebab));
77 }
78
79 format!("[{}](<{}>)", title, escape_special_chars(&href))
80 };
81
82 chapter.content = chapter.content
83 .replacen(&format!("[[{}]]", link_text), &cmark_link, 1);
84 });
85 }
86 });
87
88 Ok(book)
89 }
90}
91
92fn for_each_link(content: &str, mut handle_link: impl FnMut(&str)) {
93 enum Currently {
94 OutsideLink,
95 MaybeOpen,
96 MaybeInsideLink,
97 MaybeClose,
98 Ignore,
99 }
100
101 let parser = pulldown_cmark::Parser::new(content);
102
103 let mut buffer = String::new();
104 let mut current = Currently::OutsideLink;
105 for event in parser {
106 match event {
107 Event::Html(CowStr::Borrowed("<span class=\"katex-inline\">")) => current = Currently::Ignore,
109 Event::Html(CowStr::Borrowed("</span>")) => current = Currently::OutsideLink,
110
111 Event::Text(CowStr::Borrowed("[")) => {
112 match current {
113 Currently::OutsideLink => current = Currently::MaybeOpen,
114 Currently::MaybeOpen => current = Currently::MaybeInsideLink,
115 Currently::MaybeInsideLink => current = Currently::OutsideLink,
116 Currently::MaybeClose => {
117 buffer.clear();
118 current = Currently::OutsideLink;
119 }
120 Currently::Ignore => {}
121 }
122 }
123
124 Event::Text(CowStr::Borrowed("]")) => {
125 match current {
126 Currently::MaybeOpen => current = Currently::OutsideLink,
127 Currently::MaybeInsideLink => current = Currently::MaybeClose,
128 Currently::MaybeClose => {
129 handle_link(&buffer.trim());
130 buffer.clear();
131 current = Currently::OutsideLink;
132 }
133 Currently::OutsideLink => {},
134 Currently::Ignore => {}
135 }
136 }
137
138 Event::Text(ref text) => {
139 if let Currently::MaybeInsideLink = current {
140 if buffer.is_empty() {
141 buffer.push_str(text);
142 } else {
143 buffer.clear();
146 current = Currently::OutsideLink;
147 }
148 }
149 }
150 _ => {}
151 }
152 }
153}
154
155fn escape_special_chars(text: &str) -> String {
157 let mut buf = String::new();
158 escape_href(&mut buf, text).ok();
159 buf
160}
161
162fn get_chapter(it: &BookItem) -> Option<&Chapter> {
163 if let BookItem::Chapter(ch) = it {
164 Some(ch)
165 } else {
166 None
167 }
168}
169
170#[cfg(test)]
171mod tests {
172 use super::*;
173
174 #[test]
175 fn detect_these() {
176 let content = r#"This is a note with four links:
177
178This one [[link]], this one [[ link#header ]], this one [[ link | a bit more complex]], and this one [[ link#header | more 😠complex]].
179
180> This is a [[link in a blockquote]]
181
182- List item
183- Second list item with [[list link]]
184
185|Â Header 1 | Header 2 |
186| -------- | -------- |
187| Tables can also have [[table links]] | more stuff |"#;
188
189 let mut links = vec![];
190 for_each_link(content, |link_text| { links.push(link_text.to_owned()); });
191
192 assert_eq!(links, vec![
193 "link",
194 "link#header",
195 "link | a bit more complex",
196 "link#header | more 😠complex",
197 "link in a blockquote",
198 "list link",
199 "table links"
200 ]);
201 }
202
203 #[test]
204 fn dont_detect_these() {
205 let content = r#"Here are some non-correct links:
206
207First a link [[with
208newline]]
209
210Then a link `inside [[inline code]]`, or inside <span class="katex-inline">inline [[math]]</span>. What about \[\[escaped brackets\]\]?
211
212<div class="katex-display">
213 f(x) = \text{[[display link]]}
214</div>
215
216```rust
217let link = "[[link_in_code]]".to_owned();
218```
219
220<p>
221 This is some raw HTML. We don't want [[html links]] detected here.
222</p>"#;
223
224 let mut links = Vec::<String>::new();
225 for_each_link(content, |link_text| { links.push(link_text.to_owned()); });
226
227 assert!(links.is_empty(), "Got links: {:?}", links);
228 }
229
230 #[test]
231 fn escapel_special_chars() {
232 assert_eq!(
233 escape_special_chars("w3ir∂ førmättÎñg"),
234 "w3ir%E2%88%82%20f%C3%B8rm%C3%A4tt%C3%8E%C3%B1g"
235 )
236 }
237}