markdown_linkify/
link_aggregator.rs

1use crate::aggregation::Aggregation;
2use crate::link::Link;
3
4use pulldown_cmark::{CowStr, Event, LinkType, Tag};
5
6#[derive(Debug, Default)]
7pub enum Aggregator<'a> {
8    #[default]
9    Empty,
10    Start(LinkType, CowStr<'a>, CowStr<'a>),
11    Text(Link<'a>),
12}
13
14#[derive(Debug, Default)]
15pub struct LinkAggregator<'a, I> {
16    state: Aggregator<'a>,
17    iter: I,
18}
19
20impl<'a, I> LinkAggregator<'a, I> {
21    pub fn new(iter: I) -> Self {
22        Self {
23            iter,
24            state: Aggregator::default(),
25        }
26    }
27}
28
29/// Walk over an iterator of [`Event`]s.
30/// On encountering a start of a link, then some text, then an end of a link:
31/// pass on an aggregation of the encountered link.
32/// Otherwise, pass on all items.
33impl<'a, I> Iterator for LinkAggregator<'a, I>
34where
35    I: Iterator<Item = Event<'a>>,
36{
37    type Item = Aggregation<'a>;
38
39    fn next(&mut self) -> Option<Self::Item> {
40        loop {
41            let next = self.iter.next();
42            let Some(next) = next else {
43                return match std::mem::replace(&mut self.state, Aggregator::Empty) {
44                    Aggregator::Empty => None,
45                    Aggregator::Start(link_type, destination, title) => {
46                        Some(Aggregation::Link(Link {
47                            link_type,
48                            destination,
49                            title,
50                            text: vec![],
51                        }))
52                    }
53                    Aggregator::Text(link) => Some(Aggregation::Link(link)),
54                };
55            };
56            let state = std::mem::replace(&mut self.state, Aggregator::Empty);
57            match (state, next) {
58                (Aggregator::Empty, Event::Start(Tag::Link(link_type, destination, title))) => {
59                    self.state = Aggregator::Start(link_type, destination, title);
60                    continue;
61                }
62                (Aggregator::Empty, e) => break Some(Aggregation::Event(e)),
63                (Aggregator::Start(link_type, destination, title), e @ Event::Start(..)) => {
64                    let start = Event::Start(Tag::Link(link_type, destination, title));
65                    let agg = Aggregation::Bag(vec![start, e]);
66                    self.state = Aggregator::Empty;
67                    break Some(agg);
68                }
69                (Aggregator::Start(link_type, destination, title), Event::End(Tag::Link(..))) => {
70                    let result = Link {
71                        link_type,
72                        destination,
73                        title,
74                        text: vec![],
75                    };
76                    self.state = Aggregator::Empty;
77                    break Some(Aggregation::Link(result));
78                }
79                (
80                    Aggregator::Start(link_type, destination, title),
81                    e @ (Event::Text(..) | Event::Code(..)),
82                ) => {
83                    let link = Link {
84                        link_type,
85                        destination,
86                        title,
87                        text: vec![e],
88                    };
89                    self.state = Aggregator::Text(link);
90                    continue;
91                }
92                (Aggregator::Text(mut link), e @ (Event::Text(..) | Event::Code(..))) => {
93                    link.text.push(e);
94                    self.state = Aggregator::Text(link);
95                    continue;
96                }
97                (Aggregator::Text(link), Event::End(Tag::Link(..))) => {
98                    self.state = Aggregator::Empty;
99                    break Some(Aggregation::Link(link));
100                }
101                (_state, event) => break Some(Aggregation::Event(event)),
102            }
103        }
104    }
105}
106
107pub trait LinkTools: Iterator {
108    fn aggregate_links<'a>(self) -> LinkAggregator<'a, Self>
109    where
110        Self: Sized;
111}
112
113impl<T> LinkTools for T
114where
115    T: Iterator + ?Sized,
116{
117    fn aggregate_links<'a>(self) -> LinkAggregator<'a, Self>
118    where
119        Self: Sized,
120    {
121        LinkAggregator::new(self.into_iter())
122    }
123}
124
125#[cfg(test)]
126mod test {
127    use super::*;
128
129    use crate::aggregation::Aggregation;
130    use pulldown_cmark::{BrokenLink, CowStr, Event, LinkType, Options, Parser};
131    use pulldown_cmark_to_cmark::cmark;
132
133    #[test]
134    fn aggregates_md() {
135        let md = "# HEADING\n[simple](link \"right?\")\n## more heading";
136        let parser = pulldown_cmark::Parser::new(md);
137        parser.aggregate_links().for_each(|elem| {
138            dbg!(elem);
139        });
140    }
141
142    #[test]
143    fn aggregates_simple_link() {
144        let md = "[simple](link \"right?\")";
145        let parser = Parser::new(md);
146
147        for agg in parser.aggregate_links() {
148            let Aggregation::Link(link) = agg else {
149                continue;
150            };
151            assert_eq!(link.text, vec![Event::Text("simple".into())]);
152            assert_eq!(link.destination, "link".into());
153            assert_eq!(link.title, "right?".into());
154            assert_eq!(link.link_type, LinkType::Inline);
155            return;
156        }
157        panic!("Should return above");
158    }
159
160    #[test]
161    fn aggregates_empty_code() {
162        let md = "[``](thing \"titleee?\")";
163        let parser = Parser::new(md);
164
165        for agg in parser.aggregate_links() {
166            let Aggregation::Link(link) = agg else {
167                continue;
168            };
169            assert_eq!(link.text, vec![Event::Text("``".into())]);
170            assert_eq!(link.destination, "thing".into());
171            assert_eq!(link.title, "titleee?".into());
172            assert_eq!(link.link_type, LinkType::Inline);
173        }
174    }
175
176    #[test]
177    fn broken_link_callback() {
178        fn callback(link: BrokenLink) -> Option<(CowStr, CowStr)> {
179            Some(("destination".into(), link.reference))
180        }
181        let md = "[foo `this` works `nicely`]";
182        let cb = &mut callback;
183        let parser = Parser::new_with_broken_link_callback(md, Options::empty(), Some(cb));
184
185        let mut buf = String::new();
186        let _state = cmark(parser, &mut buf).expect("CMark failed");
187        println!("{buf}");
188    }
189
190    #[test]
191    fn iterate_over_everything() {
192        let md = "# HEADING\n[simple](link \"right?\")";
193        let parser = Parser::new(md);
194        let mut parser2 = Parser::new(md);
195
196        for agg in parser.aggregate_links() {
197            for elem in agg.into_iter() {
198                assert_eq!(Some(elem), parser2.next());
199            }
200        }
201    }
202
203    #[test]
204    fn empty_links() {
205        let md = "[foo]\n# HEADING\n[foo]: /url \"title\"\n\n[foo]";
206        let links = Parser::new(md).aggregate_links();
207        links.for_each(|elem| {
208            dbg!(elem);
209        });
210    }
211}