Skip to main content

satteri_pulldown_cmark/
utils.rs

1//! Miscellaneous utilities to increase comfort.
2//! Special thanks to:
3//!
4//! - <https://github.com/BenjaminRi/Redwood-Wiki/blob/master/src/markdown_utils.rs>.
5//!   Its author authorized the use of this GPL code in this project in
6//!   <https://github.com/raphlinus/pulldown-cmark/issues/507>.
7//!
8//! - <https://gist.github.com/rambip/a507c312ed61c99c24b2a54f98325721>.
9//!   Its author proposed the solution in
10//!   <https://github.com/raphlinus/pulldown-cmark/issues/708>.
11
12use alloc::string::String;
13use core::ops::Range;
14
15use crate::{CowStr, Event};
16
17/// Merge consecutive `Event::Text` events into only one.
18#[derive(Debug)]
19pub struct TextMergeStream<'a, I> {
20    inner: TextMergeWithOffset<'a, DummyOffsets<I>>,
21}
22
23impl<'a, I> TextMergeStream<'a, I>
24where
25    I: Iterator<Item = Event<'a>>,
26{
27    pub fn new(iter: I) -> Self {
28        Self {
29            inner: TextMergeWithOffset::new(DummyOffsets(iter)),
30        }
31    }
32}
33
34impl<'a, I> Iterator for TextMergeStream<'a, I>
35where
36    I: Iterator<Item = Event<'a>>,
37{
38    type Item = Event<'a>;
39
40    fn next(&mut self) -> Option<Self::Item> {
41        self.inner.next().map(|(event, _)| event)
42    }
43}
44
45#[derive(Debug)]
46struct DummyOffsets<I>(I);
47
48impl<'a, I> Iterator for DummyOffsets<I>
49where
50    I: Iterator<Item = Event<'a>>,
51{
52    type Item = (Event<'a>, Range<usize>);
53
54    fn next(&mut self) -> Option<Self::Item> {
55        self.0.next().map(|event| (event, 0..0))
56    }
57}
58
59/// Merge consecutive `Event::Text` events into only one, with offsets.
60///
61/// Compatible with with [`OffsetIter`](crate::OffsetIter).
62#[derive(Debug)]
63pub struct TextMergeWithOffset<'a, I> {
64    iter: I,
65    last_event: Option<(Event<'a>, Range<usize>)>,
66}
67
68impl<'a, I> TextMergeWithOffset<'a, I>
69where
70    I: Iterator<Item = (Event<'a>, Range<usize>)>,
71{
72    pub fn new(iter: I) -> Self {
73        Self {
74            iter,
75            last_event: None,
76        }
77    }
78
79    /// Access the inner iterator (e.g. to retrieve parser state after iteration).
80    pub fn inner(&self) -> &I {
81        &self.iter
82    }
83}
84
85impl<'a, I> Iterator for TextMergeWithOffset<'a, I>
86where
87    I: Iterator<Item = (Event<'a>, Range<usize>)>,
88{
89    type Item = (Event<'a>, Range<usize>);
90
91    fn next(&mut self) -> Option<Self::Item> {
92        match (self.last_event.take(), self.iter.next()) {
93            (
94                Some((Event::Text(last_text), last_offset)),
95                Some((Event::Text(next_text), next_offset)),
96            ) => {
97                // We need to start merging consecutive text events together into one
98                let mut string_buf: String = last_text.into_string();
99                string_buf.push_str(&next_text);
100                let mut offset = last_offset;
101                offset.end = next_offset.end;
102                loop {
103                    // Avoid recursion to avoid stack overflow and to optimize concatenation
104                    match self.iter.next() {
105                        Some((Event::Text(next_text), next_offset)) => {
106                            string_buf.push_str(&next_text);
107                            offset.end = next_offset.end;
108                        }
109                        next_event => {
110                            self.last_event = next_event;
111                            if string_buf.is_empty() {
112                                // Discard text event(s) altogether if there is no text
113                                break self.next();
114                            } else {
115                                break Some((
116                                    Event::Text(CowStr::Boxed(string_buf.into_boxed_str())),
117                                    offset,
118                                ));
119                            }
120                        }
121                    }
122                }
123            }
124            (None, Some(next_event)) => {
125                // This only happens once during the first iteration and if there are items
126                self.last_event = Some(next_event);
127                self.next()
128            }
129            (None, None) => {
130                // This happens when the iterator is depleted
131                None
132            }
133            (last_event, next_event) => {
134                // The ordinary case, emit one event after the other without modification
135                self.last_event = next_event;
136                last_event
137            }
138        }
139    }
140}
141
142#[cfg(test)]
143mod test {
144    use alloc::vec::Vec;
145
146    use super::*;
147    use crate::Parser;
148
149    #[test]
150    fn text_merge_stream_indent() {
151        let source = r#"
152    first line
153    second line
154"#;
155        let parser = TextMergeStream::new(Parser::new(source));
156        let text_events: Vec<_> = parser.filter(|e| matches!(e, Event::Text(_))).collect();
157        assert_eq!(
158            text_events,
159            [Event::Text("first line\nsecond line\n".into())]
160        );
161    }
162
163    #[test]
164    fn text_merge_with_offset_indent() {
165        let source = r#"
166    first line
167    second line
168"#;
169        let parser = TextMergeWithOffset::new(Parser::new(source).into_offset_iter());
170        let text_events: Vec<_> = parser
171            .filter(|e| matches!(e, (Event::Text(_), _)))
172            .collect();
173        assert_eq!(
174            text_events,
175            [(Event::Text("first line\nsecond line\n".into()), 5..32)]
176        );
177    }
178
179    #[test]
180    fn text_merge_empty_is_discarded() {
181        let events = [
182            Event::Rule,
183            Event::Text("".into()),
184            Event::Text("".into()),
185            Event::Rule,
186        ];
187        let result: Vec<_> = TextMergeStream::new(events.into_iter()).collect();
188        assert_eq!(result, [Event::Rule, Event::Rule]);
189    }
190}