gfm_autolinks/
lib.rs

1mod ctype;
2pub mod email;
3pub mod url;
4pub(crate) mod utils;
5pub mod www;
6
7/// Match an autolink from the start of the string.
8/// Return the link and the number of chars to skip.
9pub fn match_start(contents: &str) -> Option<(String, usize)> {
10    let bytes_contents = contents.as_bytes();
11
12    if let Some((url, link_end)) = url::match_http(bytes_contents) {
13        return Some((url, link_end));
14    }
15    if let Some((url, link_end)) = www::match_www(bytes_contents) {
16        return Some((url, link_end));
17    }
18    if let Some((email, link_end)) = email::match_email(bytes_contents) {
19        return Some((email, link_end));
20    }
21    None
22}
23
24/// Match an autolink from an index in the string (invalid index returns None).
25/// Return the link and the number of chars to skip (from index).
26/// Note, this enforces the rule that autolinks can only come at the beginning of a line, after whitespace, or any of the delimiting characters `*`, `_`, `~`, and `(`.
27pub fn match_index(contents: &str, index: usize) -> Option<(String, usize)> {
28    if index > 0 {
29        let prev_char = contents.chars().nth(index - 1)?;
30
31        // All such recognized autolinks can only come at the beginning of a line, after whitespace, or any of the delimiting characters *, _, ~, and (.
32        if !check_prev(prev_char) {
33            return None;
34        }
35    }
36
37    let start_contents = contents.get(index..)?;
38    let (link, skip_len) = match_start(start_contents)?;
39
40    Some((link, skip_len))
41}
42
43/// Test if a character is a valid preceding character for an autolink.
44pub fn check_prev(prev: char) -> bool {
45    matches!(prev, ' ' | '\t' | '\r' | '\n' | '*' | '_' | '~' | '(')
46}
47
48#[cfg(test)]
49mod tests {
50    use super::*;
51
52    use rstest::rstest;
53
54    #[rstest]
55    // non-matches
56    #[case("", None)]
57    #[case(" ", None)]
58    #[case("foo", None)]
59    #[case("example.com", None)]
60    #[case("www.", None)]
61    #[case("@example.com", None)]
62    // http matches
63    #[case("http://localhost:3000", Some(("http://localhost:3000", 21)))]
64    #[case("https://localhost:3000", Some(("https://localhost:3000", 22)))]
65    #[case("http://Á.com", Some(("http://Á.com", 12)))]
66    #[case("https://www.wolframalpha.com/input/?i=x^2+(y-(x^2)^(1/3))^2=1", Some(("https://www.wolframalpha.com/input/?i=x^2+(y-(x^2)^(1/3))^2=1", 61)))]
67    // www matches
68    #[case("www.example.com", Some(("http://www.example.com", 15)))]
69    #[case("www.Á.com", Some(("http://www.Á.com", 9)))]
70    // email matches
71    #[case("john@example.com", Some(("mailto:john@example.com", 16)))]
72    #[case("mailto:@example.com", Some(("mailto:@example.com", 19)))]
73    #[case("xmpp:john@example.com", Some(("xmpp:john@example.com", 21)))]
74    fn test_match_start(#[case] input: &str, #[case] expected: Option<(&str, i32)>) {
75        assert_eq!(
76            match_start(input),
77            expected.and_then(|a| Some((a.0.to_string(), a.1 as usize)))
78        );
79    }
80
81    #[rstest]
82    // 622
83    #[case("www.commonmark.org", Some(("http://www.commonmark.org", 18)))]
84    // 623
85    #[case("www.commonmark.org/help for more information.", Some(("http://www.commonmark.org/help", 23)))]
86    // 624
87    #[case("www.commonmark.org.", Some(("http://www.commonmark.org", 18)))]
88    #[case("www.commonmark.org/a.b.", Some(("http://www.commonmark.org/a.b", 22)))]
89    // 625
90    #[case("www.google.com/search?q=Markup+(business)", Some(("http://www.google.com/search?q=Markup+(business)", 41)))]
91    #[case("www.google.com/search?q=Markup+(business)))", Some(("http://www.google.com/search?q=Markup+(business)", 41)))]
92    // 626
93    #[case("www.google.com/search?q=(business))+ok", Some(("http://www.google.com/search?q=(business))+ok", 38)))]
94    // 627
95    #[case("www.google.com/search?q=commonmark&hl=en", Some(("http://www.google.com/search?q=commonmark&hl=en", 40)))]
96    #[case("www.google.com/search?q=commonmark&hl;", Some(("http://www.google.com/search?q=commonmark", 34)))]
97    // 628
98    #[case("www.commonmark.org/he<lp", Some(("http://www.commonmark.org/he", 21)))]
99    // 629
100    #[case("http://commonmark.org", Some(("http://commonmark.org", 21)))]
101    #[case("https://encrypted.google.com/search?q=Markup+(business))", Some(("https://encrypted.google.com/search?q=Markup+(business)", 55)))]
102    // 630
103    #[case("foo@bar.baz", Some(("mailto:foo@bar.baz", 11)))]
104    // 631
105    #[case("hello@mail+xyz.example", None)]
106    #[case("hello+xyz@mail.example", Some(("mailto:hello+xyz@mail.example", 22)))]
107    // 632
108    #[case("a.b-c_d@a.b", Some(("mailto:a.b-c_d@a.b", 11)))]
109    #[case("a.b-c_d@a.b.", Some(("mailto:a.b-c_d@a.b", 11)))]
110    #[case("a.b-c_d@a.b-", None)]
111    #[case("a.b-c_d@a.b_", None)]
112    // 633
113    #[case("mailto:foo@bar.baz", Some(("mailto:foo@bar.baz", 18)))]
114    #[case("mailto:a.b-c_d@a.b", Some(("mailto:a.b-c_d@a.b", 18)))]
115    #[case("mailto:a.b-c_d@a.b.", Some(("mailto:a.b-c_d@a.b", 18)))]
116    #[case("mailto:a.b-c_d@a.b/", Some(("mailto:a.b-c_d@a.b", 18)))]
117    #[case("mailto:a.b-c_d@a.b-", None)]
118    #[case("mailto:a.b-c_d@a.b_", None)]
119    #[case("xmpp:foo@bar.baz", Some(("xmpp:foo@bar.baz", 16)))]
120    #[case("xmpp:foo@bar.baz.", Some(("xmpp:foo@bar.baz", 16)))]
121    // 634
122    #[case("xmpp:foo@bar.baz/txt", Some(("xmpp:foo@bar.baz/txt", 20)))]
123    #[case("xmpp:foo@bar.baz/txt@bin", Some(("xmpp:foo@bar.baz/txt@bin", 24)))]
124    #[case("xmpp:foo@bar.baz/txt@bin.com", Some(("xmpp:foo@bar.baz/txt@bin.com", 28)))]
125    // 635
126    #[case("xmpp:foo@bar.baz/txt/bin", Some(("xmpp:foo@bar.baz/txt", 20)))]
127    fn test_spec(#[case] input: &str, #[case] expected: Option<(&str, i32)>) {
128        assert_eq!(
129            match_start(input),
130            expected.and_then(|a| Some((a.0.to_string(), a.1 as usize)))
131        );
132    }
133
134    #[rstest]
135    #[case("www.commonmark.org", 0, Some(("http://www.commonmark.org", 18)))]
136    #[case(" www.commonmark.org", 0, None)]
137    #[case("www.commonmark.org", 100, None)]
138    #[case(" www.commonmark.org", 1, Some(("http://www.commonmark.org", 18)))]
139    #[case("[www.commonmark.org", 1, None)]
140    fn test_match_index(
141        #[case] input: &str,
142        #[case] index: usize,
143        #[case] expected: Option<(&str, i32)>,
144    ) {
145        assert_eq!(
146            match_index(input, index),
147            expected.and_then(|a| Some((a.0.to_string(), a.1 as usize)))
148        );
149    }
150}