markdown_utils/
link.rs

1use crate::parsers::{MarkdownLineByLineSkippingCodeblocksParser};
2
3pub struct MarkdownLinkReferencesParser<'a> {
4    lines_iterator: MarkdownLineByLineSkippingCodeblocksParser<'a>,
5}
6
7impl MarkdownLinkReferencesParser<'_> {
8    pub fn new(text: &str) -> MarkdownLinkReferencesParser {
9        MarkdownLinkReferencesParser {
10            lines_iterator: MarkdownLineByLineSkippingCodeblocksParser::new(text),
11        }
12    }
13}
14
15impl Iterator for MarkdownLinkReferencesParser<'_> {
16    type Item = Vec<String>;
17
18    fn next(&mut self) -> Option<Vec<String>> {
19        let line = self.lines_iterator.next();
20        match line {
21            Some(line) => {
22                if line.chars().next().unwrap_or('\0') == '[' {
23                    return Some(parse_line_link_references(&line));
24                } else {
25                    return self.next();
26                }
27            },
28            None => return None,
29        }
30    }
31}
32
33fn parse_line_link_references(line: &str) -> Vec<String> {
34    let mut id: String = "".to_string();
35    let mut link: String = "".to_string();
36    let mut title: String = "".to_string();
37
38    /*
39     * States representing the context while parsing
40     */
41    let mut state: u8 = 1;
42    let inside_id = 1;
43    let after_id = 2;
44    let inside_link = 4;
45    let after_link = 8;
46    //let inside_title = 16;
47
48    // Escaping identifier closer
49    let mut escaping_id_closer = false;
50
51    // First index of link and title in line
52    let mut link_first_index: usize = 0;
53    let mut title_first_index: usize = 0;
54
55    let mut ic: usize = 0;
56    for c in line.chars() {
57        if state == inside_id {
58            if escaping_id_closer {
59                if c != ']' {
60                    escaping_id_closer = false;
61                }
62            } else if c == '\\' {
63                escaping_id_closer = true;
64            }
65            if !escaping_id_closer && c == ']' {
66                id = line[1..ic].to_string();
67                state = state << 1;
68            }
69        } else if state == after_id {
70            if c == '<' {
71                state = state << 1;
72                link_first_index = ic + 1;
73            } else if c != ' ' && c != ':' {
74                state = state << 1;
75                link_first_index = ic;
76            }
77        } else if state == inside_link {
78            if c == '>' || c == ' ' {
79                link = line[link_first_index..ic].to_string();
80                state = state << 1;
81            }
82        } else if state == after_link {
83            if c == '"' || c == '\'' {
84                state = state << 1;
85                title_first_index = ic + 1;
86            }
87        } else {
88            if c == '"' || c == '\'' {
89                title = line[title_first_index..ic].to_string();
90                break;
91            }
92        }
93        ic += 1;
94    }
95
96    // if there is no title the inside title state
97    // has not been reached
98    if state == inside_link && link.len() == 0 {
99        link = line[link_first_index..].trim_end().to_string();
100    }
101
102    vec![id, link, title]
103}
104
105
106/**
107 * Parse link references found in Markdown content.
108 *
109 * Args:
110 *     text (str): Markdown content to be parsed.
111 *
112 * Returns:
113 *     list: Tuples or lists with 3 values ``(target, href, title)``
114 *     for each link reference. If a title is not found or an
115 *     identifier is empty they will be returned as empty strings.
116 **/
117pub fn parse_link_references(
118    text: &str,
119) -> Vec<Vec<String>> {
120    let mut result: Vec<Vec<String>> = vec![];
121
122    let links_iterator = MarkdownLinkReferencesParser::new(text);
123    for link in links_iterator {
124        result.push(link);
125    }
126    result
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use rstest::rstest;
133
134    #[rstest]
135    #[case(
136        &concat!(
137            "[id1]: https://link1 \"Title 1\"\n\n\n",
138            "[id2]: https://link2 \"Title 2\"\n\n\n"
139        ),
140        vec![
141            vec![
142                "id1".to_string(),
143                "https://link1".to_string(),
144                "Title 1".to_string(),
145            ],
146            vec![
147                "id2".to_string(),
148                "https://link2".to_string(),
149                "Title 2".to_string(),
150            ],
151        ]
152    )]
153    #[case(
154        &concat!(
155            "[id-1]: https://link1 'Title 1'\n",
156        ),
157        vec![
158            vec![
159                "id-1".to_string(),
160                "https://link1".to_string(),
161                "Title 1".to_string(),
162            ],
163        ]
164    )]
165    #[case(
166        &concat!(
167            "[]: https://link1 \"Title 1\"\n",
168        ),
169        vec![
170            vec![
171                "".to_string(),
172                "https://link1".to_string(),
173                "Title 1".to_string(),
174            ],
175        ]
176    )]
177    #[case(
178        &concat!(
179            "[]: https://link1\n",
180        ),
181        vec![
182            vec![
183                "".to_string(),
184                "https://link1".to_string(),
185                "".to_string(),
186            ],
187        ]
188    )]
189    #[case(
190        &concat!(
191            "[id1]: https://link1 \"Title 1\"\n\n\n",
192            "```\n[id2]: https://link2 \"Title 2\"\n```\n\n",
193            "```\n[id3]: https://link3 \"Title 3\"\n```\n\n",
194            "    [id4]: https://link4 \"Title 4\"\n```\n\n",
195        ),
196        vec![
197            vec![
198                "id1".to_string(),
199                "https://link1".to_string(),
200                "Title 1".to_string(),
201            ],
202        ]
203    )]
204    fn parse_link_references_test(
205        #[case] text: &str,
206        #[case] expected: Vec<Vec<String>>,
207    ) {
208        assert_eq!(parse_link_references(text), expected,);
209    }
210}