normalize_wikilink/
normalize-wikilink.rs

1use pulldown_cmark::{html, CowStr, Event, LinkType, Options, Parser, Tag};
2use regex::RegexBuilder;
3use std::io::Write;
4
5/// This example demonstrates how to normalize the href of a wikilink. The
6/// details of this implementation can be tweaked for different use cases.
7fn main() {
8    let markdown_input: &str = r#"
9Example provided by [[https://example.org/]].
10Some people might prefer the wikilink syntax for autolinks.
11
12Wanna go for a [[Wiki Walk]]?"#;
13
14    let parser = Parser::new_ext(markdown_input, Options::ENABLE_WIKILINKS).map(|event| {
15        if let Event::Start(Tag::Link {
16            link_type: LinkType::WikiLink { has_pothole },
17            dest_url,
18            title,
19            id,
20        }) = event
21        {
22            let new_link = normalize_wikilink(dest_url);
23            Event::Start(Tag::Link {
24                link_type: LinkType::WikiLink { has_pothole },
25                dest_url: new_link,
26                title,
27                id,
28            })
29        } else {
30            event
31        }
32    });
33
34    // Write to anything implementing the `Write` trait. This could also be a file
35    // or network socket.
36    let stdout = std::io::stdout();
37    let mut handle = stdout.lock();
38    handle.write_all(b"\nHTML output:\n").unwrap();
39    html::write_html_io(&mut handle, parser).unwrap();
40}
41
42/// Performs wikilink normalization.
43fn normalize_wikilink(link: CowStr) -> CowStr {
44    // your wiki is stored at "/wiki"
45    let prefix: &str = "/wiki";
46    if link.is_empty() {
47        return link;
48    }
49
50    // check if the link is absolute, if it is, return as is
51    // according to RFC 3986; https://www.rfc-editor.org/rfc/rfc3986
52    let is_absolute = RegexBuilder::new("^(?:[a-z][a-z0-9+\\-.]*:)?//")
53        .case_insensitive(true)
54        .build()
55        .expect("valid regex");
56
57    if is_absolute.is_match(&link) {
58        return link;
59    }
60
61    let mut result = String::with_capacity(link.len() + 2);
62    let mut i = 0;
63    let mut mark = 0;
64    let mut in_whitespace = false;
65
66    result.push_str(prefix);
67
68    if !link.starts_with('/') {
69        result.push('/');
70    }
71
72    while i < link.len() {
73        if !in_whitespace && link.as_bytes()[i].is_ascii_whitespace() {
74            in_whitespace = true;
75            result.push_str(&link[mark..i]);
76        } else if in_whitespace && !link.as_bytes()[i].is_ascii_whitespace() {
77            result.push('_');
78            mark = i;
79            in_whitespace = false;
80        }
81
82        i += 1;
83    }
84
85    if !in_whitespace {
86        result.push_str(&link[mark..]);
87    }
88    if !result.ends_with('/') {
89        result.push('/');
90    }
91    result.into()
92}