satteri_pulldown_cmark/
utils.rs1use alloc::borrow::Cow;
13use alloc::string::String;
14use core::ops::Range;
15
16use crate::{CowStr, Event};
17
18pub fn decode_html_entities(s: &str) -> Cow<'_, str> {
26 if !s.contains('&') {
27 return Cow::Borrowed(s);
28 }
29 let bytes = s.as_bytes();
30 let mut out = String::with_capacity(s.len());
31 let mut i = 0;
32 while i < bytes.len() {
33 if bytes[i] == b'&' {
34 let (consumed, replacement) = crate::scanners::scan_entity(&bytes[i..]);
35 if consumed > 0 {
36 if let Some(rep) = replacement {
37 out.push_str(&rep);
38 }
39 i += consumed;
40 continue;
41 }
42 }
43 let b = bytes[i];
44 let ch_len = if b < 0xC0 {
45 1
46 } else if b < 0xE0 {
47 2
48 } else if b < 0xF0 {
49 3
50 } else {
51 4
52 };
53 out.push_str(&s[i..i + ch_len]);
54 i += ch_len;
55 }
56 Cow::Owned(out)
57}
58
59#[derive(Debug)]
61pub struct TextMergeStream<'a, I> {
62 inner: TextMergeWithOffset<'a, DummyOffsets<I>>,
63}
64
65impl<'a, I> TextMergeStream<'a, I>
66where
67 I: Iterator<Item = Event<'a>>,
68{
69 pub fn new(iter: I) -> Self {
70 Self {
71 inner: TextMergeWithOffset::new(DummyOffsets(iter)),
72 }
73 }
74}
75
76impl<'a, I> Iterator for TextMergeStream<'a, I>
77where
78 I: Iterator<Item = Event<'a>>,
79{
80 type Item = Event<'a>;
81
82 fn next(&mut self) -> Option<Self::Item> {
83 self.inner.next().map(|(event, _)| event)
84 }
85}
86
87#[derive(Debug)]
88struct DummyOffsets<I>(I);
89
90impl<'a, I> Iterator for DummyOffsets<I>
91where
92 I: Iterator<Item = Event<'a>>,
93{
94 type Item = (Event<'a>, Range<usize>);
95
96 fn next(&mut self) -> Option<Self::Item> {
97 self.0.next().map(|event| (event, 0..0))
98 }
99}
100
101#[derive(Debug)]
105pub struct TextMergeWithOffset<'a, I> {
106 iter: I,
107 last_event: Option<(Event<'a>, Range<usize>)>,
108}
109
110impl<'a, I> TextMergeWithOffset<'a, I>
111where
112 I: Iterator<Item = (Event<'a>, Range<usize>)>,
113{
114 pub fn new(iter: I) -> Self {
115 Self {
116 iter,
117 last_event: None,
118 }
119 }
120
121 pub fn inner(&self) -> &I {
123 &self.iter
124 }
125}
126
127impl<'a, I> Iterator for TextMergeWithOffset<'a, I>
128where
129 I: Iterator<Item = (Event<'a>, Range<usize>)>,
130{
131 type Item = (Event<'a>, Range<usize>);
132
133 fn next(&mut self) -> Option<Self::Item> {
134 match (self.last_event.take(), self.iter.next()) {
135 (
136 Some((Event::Text(last_text), last_offset)),
137 Some((Event::Text(next_text), next_offset)),
138 ) => {
139 let mut string_buf: String = last_text.into_string();
141 string_buf.push_str(&next_text);
142 let mut offset = last_offset;
143 offset.end = next_offset.end;
144 loop {
145 match self.iter.next() {
147 Some((Event::Text(next_text), next_offset)) => {
148 string_buf.push_str(&next_text);
149 offset.end = next_offset.end;
150 }
151 next_event => {
152 self.last_event = next_event;
153 if string_buf.is_empty() {
154 break self.next();
156 } else {
157 break Some((
158 Event::Text(CowStr::Boxed(string_buf.into_boxed_str())),
159 offset,
160 ));
161 }
162 }
163 }
164 }
165 }
166 (None, Some(next_event)) => {
167 self.last_event = Some(next_event);
169 self.next()
170 }
171 (None, None) => {
172 None
174 }
175 (last_event, next_event) => {
176 self.last_event = next_event;
178 last_event
179 }
180 }
181 }
182}
183
184#[cfg(test)]
185mod test {
186 use alloc::vec::Vec;
187
188 use super::*;
189 use crate::Parser;
190
191 #[test]
192 fn text_merge_stream_indent() {
193 let source = r#"
194 first line
195 second line
196"#;
197 let parser = TextMergeStream::new(Parser::new(source));
198 let text_events: Vec<_> = parser.filter(|e| matches!(e, Event::Text(_))).collect();
199 assert_eq!(
200 text_events,
201 [Event::Text("first line\nsecond line\n".into())]
202 );
203 }
204
205 #[test]
206 fn text_merge_with_offset_indent() {
207 let source = r#"
208 first line
209 second line
210"#;
211 let parser = TextMergeWithOffset::new(Parser::new(source).into_offset_iter());
212 let text_events: Vec<_> = parser
213 .filter(|e| matches!(e, (Event::Text(_), _)))
214 .collect();
215 assert_eq!(
216 text_events,
217 [(Event::Text("first line\nsecond line\n".into()), 5..32)]
218 );
219 }
220
221 #[test]
222 fn text_merge_empty_is_discarded() {
223 let events = [
224 Event::Rule,
225 Event::Text("".into()),
226 Event::Text("".into()),
227 Event::Rule,
228 ];
229 let result: Vec<_> = TextMergeStream::new(events.into_iter()).collect();
230 assert_eq!(result, [Event::Rule, Event::Rule]);
231 }
232}