1use std::ops::Range;
2
3use anyhow::{Result, bail};
4use log::debug;
5use pulldown_cmark::{Event, Parser};
6
7#[derive(Clone, Debug, PartialEq)]
8pub struct Block<'a> {
9 pub closed: bool,
10 pub events: Vec<(Event<'a>, Range<usize>)>,
11 pub span: Range<usize>,
12 pub inner_span: Range<usize>,
13 pub has_nested: bool,
14}
15
16impl<'a> Block<'a> {
17 pub fn new(first_event: Event<'a>, first_span: Range<usize>) -> Self {
18 let span = first_span.clone();
19 let inner_span = 0..0;
20
21 Block {
22 closed: false,
23 events: vec![(first_event, first_span)],
24 span,
25 inner_span,
26 has_nested: false,
27 }
28 }
29}
30
31pub fn parse_blocks<IsStartFn, IsEndFn>(
32 content: &str,
33 is_start: IsStartFn,
34 is_end: IsEndFn,
35 skip_nested: bool,
36) -> Result<Vec<Block<'_>>>
37where
38 IsStartFn: Fn(&Event) -> bool,
39 IsEndFn: Fn(&Event) -> bool,
40{
41 let mut blocks: Vec<Block> = vec![];
42 let mut nested_level = 0;
43
44 for (event, span) in Parser::new(content).into_offset_iter() {
45 debug!("{event:?} {span:?}");
46
47 if is_start(&event) {
48 if let Some(block) = blocks.last_mut()
49 && !block.closed
50 {
51 if skip_nested {
52 nested_level += 1;
53 block.has_nested = true;
54 block.events.push((event, span));
55 continue;
56 } else {
57 bail!("Block is not closed. Nested blocks are not allowed.");
58 }
59 }
60
61 blocks.push(Block::new(event, span));
62 } else if is_end(&event) {
63 if let Some(block) = blocks.last_mut()
64 && !block.closed
65 {
66 if nested_level > 0 {
67 nested_level -= 1;
68 block.events.push((event, span));
69 continue;
70 }
71
72 block.closed = true;
73 block.span = block.span.start..span.end;
74 block.events.push((event, span));
75
76 let mut seen_first = false;
77 block.events.retain(|(_, span)| {
78 if !seen_first {
79 seen_first = true;
80 true
81 } else if span.start == block.span.start && span.end != block.span.end {
82 false
83 } else {
84 span.start >= block.span.start && span.end <= block.span.end
85 }
86 });
87
88 if let (Some((_, first)), Some((_, last))) = (
89 block.events.get(1),
90 block.events.get(block.events.len() - 2),
91 ) {
92 block.inner_span = first.start..last.end;
93 }
94 }
95 } else if let Some(block) = blocks.last_mut()
96 && !block.closed
97 && span.start >= block.span.start
98 {
99 block.events.push((event, span));
100 }
101 }
102
103 Ok(blocks)
104}
105
106#[cfg(test)]
107mod test {
108 use pulldown_cmark::{CodeBlockKind, CowStr, Tag, TagEnd};
109 use test_log::test;
110
111 use super::*;
112
113 #[test]
114 fn test_parse_blocks() -> Result<()> {
115 let content = "\
116 ```toml\n\
117 key1 = \"value1\"\n\
118 key2 = \"value2\"\n\
119 ```";
120 let expected: Vec<Block> = vec![Block {
121 closed: true,
122 events: vec![
123 (
124 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
125 0..43,
126 ),
127 (
128 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
129 8..40,
130 ),
131 (Event::End(TagEnd::CodeBlock), 0..43),
132 ],
133 span: 0..43,
134 inner_span: 8..40,
135 has_nested: false,
136 }];
137
138 let actual = parse_blocks(
139 content,
140 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
141 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
142 false,
143 )?;
144
145 assert_eq!(expected, actual);
146
147 Ok(())
148 }
149
150 #[test]
151 fn test_parse_blocks_surrounded() -> Result<()> {
152 let content = "\
153 Some text before the code block.\n\
154 \n\
155 ```toml\n\
156 key1 = \"value1\"\n\
157 key2 = \"value2\"\n\
158 ```\n\
159 \n\
160 Some text after the code block.";
161 let expected: Vec<Block> = vec![Block {
162 closed: true,
163 events: vec![
164 (
165 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
166 34..77,
167 ),
168 (
169 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
170 42..74,
171 ),
172 (Event::End(TagEnd::CodeBlock), 34..77),
173 ],
174 span: 34..77,
175 inner_span: 42..74,
176 has_nested: false,
177 }];
178
179 let actual = parse_blocks(
180 content,
181 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
182 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
183 false,
184 )?;
185
186 assert_eq!(expected, actual);
187
188 Ok(())
189 }
190
191 #[test]
192 fn test_parse_blocks_multiple() -> Result<()> {
193 let content = "\
194 First TOML block:\n\
195 ```toml\n\
196 key1 = \"value1\"\n\
197 key2 = \"value2\"\n\
198 ```\n\
199 First non-TOML block:\n\
200 ```shell\n\
201 echo test\n\
202 ```\n\
203 Second TOML block:\n\
204 ```toml\n\
205 key3 = \"value3\"\n\
206 key4 = \"value4\"\n\
207 ```";
208 let expected: Vec<Block> = vec![
209 Block {
210 closed: true,
211 events: vec![
212 (
213 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
214 18..61,
215 ),
216 (
217 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
218 26..58,
219 ),
220 (Event::End(TagEnd::CodeBlock), 18..61),
221 ],
222 span: 18..61,
223 inner_span: 26..58,
224 has_nested: false,
225 },
226 Block {
227 closed: true,
228 events: vec![
229 (
230 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
231 126..169,
232 ),
233 (
234 Event::Text(CowStr::from("key3 = \"value3\"\nkey4 = \"value4\"\n")),
235 134..166,
236 ),
237 (Event::End(TagEnd::CodeBlock), 126..169),
238 ],
239 span: 126..169,
240 inner_span: 134..166,
241 has_nested: false,
242 },
243 ];
244
245 let actual = parse_blocks(
246 content,
247 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
248 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
249 false,
250 )?;
251
252 assert_eq!(expected, actual);
253
254 Ok(())
255 }
256
257 #[test]
258 fn test_parse_blocks_text() -> Result<()> {
259 let content = "\
260 {{#tab }}\n\
261 Some content.\n\
262 {{#endtab }}\n\
263 {{#tab }}\n\
264 \n\
265 ```rust\n\
266 let a = 1 + 2;\n\
267 ```\n\
268 \n\
269 {{#endtab }}\n\
270 ";
271 let expected: Vec<Block> = vec![
272 Block {
273 closed: true,
274 events: vec![
275 (Event::Text(CowStr::from("{{#tab }}")), 0..9),
276 (Event::SoftBreak, 9..10),
277 (Event::Text(CowStr::from("Some content.")), 10..23),
278 (Event::SoftBreak, 23..24),
279 (Event::Text(CowStr::from("{{#endtab }}")), 24..36),
280 ],
281 span: 0..36,
282 inner_span: 9..24,
283 has_nested: false,
284 },
285 Block {
286 closed: true,
287 events: vec![
288 (Event::Text(CowStr::from("{{#tab }}")), 37..46),
289 (
290 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("rust")))),
291 48..74,
292 ),
293 (Event::Text(CowStr::from("let a = 1 + 2;\n")), 56..71),
294 (Event::End(TagEnd::CodeBlock), 48..74),
295 (Event::Text(CowStr::from("{{#endtab }}")), 76..88),
296 ],
297 span: 37..88,
298 inner_span: 48..74,
299 has_nested: false,
300 },
301 ];
302
303 let actual = parse_blocks(
304 content,
305 |event| matches!(event, Event::Text(text) if text.starts_with("{{#tab ")),
306 |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtab ")),
307 false,
308 )?;
309
310 assert_eq!(expected, actual);
311
312 Ok(())
313 }
314
315 #[test]
316 fn test_parse_blocks_nested_error() -> Result<()> {
317 let content = "*a **sentence** with **some** words*";
318
319 let actual = parse_blocks(
320 content,
321 |event| {
322 matches!(
323 event,
324 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong)
325 )
326 },
327 |event| {
328 matches!(
329 event,
330 Event::End(TagEnd::Emphasis) | Event::End(TagEnd::Strong)
331 )
332 },
333 false,
334 );
335
336 assert_eq!(
337 "Block is not closed. Nested blocks are not allowed.",
338 format!("{}", actual.unwrap_err().root_cause())
339 );
340
341 Ok(())
342 }
343
344 #[test]
345 fn test_parse_blocks_nested() -> Result<()> {
346 let content = "\
347 {{#tabs }}\n\
348 Level 1\n\
349 {{#tabs }}\n\
350 Level 2\n\
351 {{#tabs }}\n\
352 Level 3\n\
353 {{#endtabs }}\n\
354 {{#endtabs }}\n\
355 {{#endtabs }}\n\
356 ";
357
358 let expected: Vec<Block> = vec![Block {
359 closed: true,
360 events: vec![
361 (Event::Text(CowStr::from("{{#tabs }}")), 0..10),
362 (Event::SoftBreak, 10..11),
363 (Event::Text(CowStr::from("Level 1")), 11..18),
364 (Event::SoftBreak, 18..19),
365 (Event::Text(CowStr::from("{{#tabs }}")), 19..29),
366 (Event::SoftBreak, 29..30),
367 (Event::Text(CowStr::from("Level 2")), 30..37),
368 (Event::SoftBreak, 37..38),
369 (Event::Text(CowStr::from("{{#tabs }}")), 38..48),
370 (Event::SoftBreak, 48..49),
371 (Event::Text(CowStr::from("Level 3")), 49..56),
372 (Event::SoftBreak, 56..57),
373 (Event::Text(CowStr::from("{{#endtabs }}")), 57..70),
374 (Event::SoftBreak, 70..71),
375 (Event::Text(CowStr::from("{{#endtabs }}")), 71..84),
376 (Event::SoftBreak, 84..85),
377 (Event::Text(CowStr::from("{{#endtabs }}")), 85..98),
378 ],
379 span: 0..98,
380 inner_span: 10..85,
381 has_nested: true,
382 }];
383
384 let actual = parse_blocks(
385 content,
386 |event| matches!(event, Event::Text(text) if text.starts_with("{{#tabs ")),
387 |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtabs ")),
388 true,
389 )?;
390
391 assert_eq!(expected, actual);
392
393 Ok(())
394 }
395}