1use std::ops::Range;
2
3use anyhow::{Result, bail};
4use log::debug;
5use pulldown_cmark::{Event, Parser};
6
7#[derive(Clone, Debug, PartialEq)]
8pub struct Block<'a> {
9 pub closed: bool,
10 pub events: Vec<(Event<'a>, Range<usize>)>,
11 pub span: Range<usize>,
12 pub inner_span: Range<usize>,
13 pub has_nested: bool,
14}
15
16impl<'a> Block<'a> {
17 pub fn new(first_event: Event<'a>, first_span: Range<usize>) -> Self {
18 let span = first_span.clone();
19 let inner_span = 0..0;
20
21 Block {
22 closed: false,
23 events: vec![(first_event, first_span)],
24 span,
25 inner_span,
26 has_nested: false,
27 }
28 }
29}
30
31pub fn parse_blocks<IsStartFn, IsEndFn>(
32 content: &str,
33 is_start: IsStartFn,
34 is_end: IsEndFn,
35 skip_nested: bool,
36) -> Result<Vec<Block>>
37where
38 IsStartFn: Fn(&Event) -> bool,
39 IsEndFn: Fn(&Event) -> bool,
40{
41 let mut blocks: Vec<Block> = vec![];
42 let mut nested_level = 0;
43
44 for (event, span) in Parser::new(content).into_offset_iter() {
45 debug!("{:?} {:?}", event, span);
46
47 if is_start(&event) {
48 if let Some(block) = blocks.last_mut() {
49 if !block.closed {
50 if skip_nested {
51 nested_level += 1;
52 block.has_nested = true;
53 block.events.push((event, span));
54 continue;
55 } else {
56 bail!("Block is not closed. Nested blocks are not allowed.");
57 }
58 }
59 }
60
61 blocks.push(Block::new(event, span));
62 } else if is_end(&event) {
63 if let Some(block) = blocks.last_mut() {
64 if !block.closed {
65 if nested_level > 0 {
66 nested_level -= 1;
67 block.events.push((event, span));
68 continue;
69 }
70
71 block.closed = true;
72 block.span = block.span.start..span.end;
73 block.events.push((event, span));
74
75 let mut seen_first = false;
76 block.events.retain(|(_, span)| {
77 if !seen_first {
78 seen_first = true;
79 true
80 } else if span.start == block.span.start && span.end != block.span.end {
81 false
82 } else {
83 span.start >= block.span.start && span.end <= block.span.end
84 }
85 });
86
87 if let (Some((_, first)), Some((_, last))) = (
88 block.events.get(1),
89 block.events.get(block.events.len() - 2),
90 ) {
91 block.inner_span = first.start..last.end;
92 }
93 }
94 }
95 } else if let Some(block) = blocks.last_mut() {
96 if !block.closed && span.start >= block.span.start {
97 block.events.push((event, span));
98 }
99 }
100 }
101
102 Ok(blocks)
103}
104
105#[cfg(test)]
106mod test {
107 use pulldown_cmark::{CodeBlockKind, CowStr, Tag, TagEnd};
108 use test_log::test;
109
110 use super::*;
111
112 #[test]
113 fn test_parse_blocks() -> Result<()> {
114 let content = "\
115 ```toml\n\
116 key1 = \"value1\"\n\
117 key2 = \"value2\"\n\
118 ```";
119 let expected: Vec<Block> = vec![Block {
120 closed: true,
121 events: vec![
122 (
123 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
124 0..43,
125 ),
126 (
127 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
128 8..40,
129 ),
130 (Event::End(TagEnd::CodeBlock), 0..43),
131 ],
132 span: 0..43,
133 inner_span: 8..40,
134 has_nested: false,
135 }];
136
137 let actual = parse_blocks(
138 content,
139 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
140 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
141 false,
142 )?;
143
144 assert_eq!(expected, actual);
145
146 Ok(())
147 }
148
149 #[test]
150 fn test_parse_blocks_surrounded() -> Result<()> {
151 let content = "\
152 Some text before the code block.\n\
153 \n\
154 ```toml\n\
155 key1 = \"value1\"\n\
156 key2 = \"value2\"\n\
157 ```\n\
158 \n\
159 Some text after the code block.";
160 let expected: Vec<Block> = vec![Block {
161 closed: true,
162 events: vec![
163 (
164 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
165 34..77,
166 ),
167 (
168 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
169 42..74,
170 ),
171 (Event::End(TagEnd::CodeBlock), 34..77),
172 ],
173 span: 34..77,
174 inner_span: 42..74,
175 has_nested: false,
176 }];
177
178 let actual = parse_blocks(
179 content,
180 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
181 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
182 false,
183 )?;
184
185 assert_eq!(expected, actual);
186
187 Ok(())
188 }
189
190 #[test]
191 fn test_parse_blocks_multiple() -> Result<()> {
192 let content = "\
193 First TOML block:\n\
194 ```toml\n\
195 key1 = \"value1\"\n\
196 key2 = \"value2\"\n\
197 ```\n\
198 First non-TOML block:\n\
199 ```shell\n\
200 echo test\n\
201 ```\n\
202 Second TOML block:\n\
203 ```toml\n\
204 key3 = \"value3\"\n\
205 key4 = \"value4\"\n\
206 ```";
207 let expected: Vec<Block> = vec![
208 Block {
209 closed: true,
210 events: vec![
211 (
212 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
213 18..61,
214 ),
215 (
216 Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
217 26..58,
218 ),
219 (Event::End(TagEnd::CodeBlock), 18..61),
220 ],
221 span: 18..61,
222 inner_span: 26..58,
223 has_nested: false,
224 },
225 Block {
226 closed: true,
227 events: vec![
228 (
229 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
230 126..169,
231 ),
232 (
233 Event::Text(CowStr::from("key3 = \"value3\"\nkey4 = \"value4\"\n")),
234 134..166,
235 ),
236 (Event::End(TagEnd::CodeBlock), 126..169),
237 ],
238 span: 126..169,
239 inner_span: 134..166,
240 has_nested: false,
241 },
242 ];
243
244 let actual = parse_blocks(
245 content,
246 |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
247 |event| matches!(event, Event::End(TagEnd::CodeBlock)),
248 false,
249 )?;
250
251 assert_eq!(expected, actual);
252
253 Ok(())
254 }
255
256 #[test]
257 fn test_parse_blocks_text() -> Result<()> {
258 let content = "\
259 {{#tab }}\n\
260 Some content.\n\
261 {{#endtab }}\n\
262 {{#tab }}\n\
263 \n\
264 ```rust\n\
265 let a = 1 + 2;\n\
266 ```\n\
267 \n\
268 {{#endtab }}\n\
269 ";
270 let expected: Vec<Block> = vec![
271 Block {
272 closed: true,
273 events: vec![
274 (Event::Text(CowStr::from("{{#tab }}")), 0..9),
275 (Event::SoftBreak, 9..10),
276 (Event::Text(CowStr::from("Some content.")), 10..23),
277 (Event::SoftBreak, 23..24),
278 (Event::Text(CowStr::from("{{#endtab }}")), 24..36),
279 ],
280 span: 0..36,
281 inner_span: 9..24,
282 has_nested: false,
283 },
284 Block {
285 closed: true,
286 events: vec![
287 (Event::Text(CowStr::from("{{#tab }}")), 37..46),
288 (
289 Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("rust")))),
290 48..74,
291 ),
292 (Event::Text(CowStr::from("let a = 1 + 2;\n")), 56..71),
293 (Event::End(TagEnd::CodeBlock), 48..74),
294 (Event::Text(CowStr::from("{{#endtab }}")), 76..88),
295 ],
296 span: 37..88,
297 inner_span: 48..74,
298 has_nested: false,
299 },
300 ];
301
302 let actual = parse_blocks(
303 content,
304 |event| matches!(event, Event::Text(text) if text.starts_with("{{#tab ")),
305 |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtab ")),
306 false,
307 )?;
308
309 assert_eq!(expected, actual);
310
311 Ok(())
312 }
313
314 #[test]
315 fn test_parse_blocks_nested_error() -> Result<()> {
316 let content = "*a **sentence** with **some** words*";
317
318 let actual = parse_blocks(
319 content,
320 |event| {
321 matches!(
322 event,
323 Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong)
324 )
325 },
326 |event| {
327 matches!(
328 event,
329 Event::End(TagEnd::Emphasis) | Event::End(TagEnd::Strong)
330 )
331 },
332 false,
333 );
334
335 assert_eq!(
336 "Block is not closed. Nested blocks are not allowed.",
337 format!("{}", actual.unwrap_err().root_cause())
338 );
339
340 Ok(())
341 }
342
343 #[test]
344 fn test_parse_blocks_nested() -> Result<()> {
345 let content = "\
346 {{#tabs }}\n\
347 Level 1\n\
348 {{#tabs }}\n\
349 Level 2\n\
350 {{#tabs }}\n\
351 Level 3\n\
352 {{#endtabs }}\n\
353 {{#endtabs }}\n\
354 {{#endtabs }}\n\
355 ";
356
357 let expected: Vec<Block> = vec![Block {
358 closed: true,
359 events: vec![
360 (Event::Text(CowStr::from("{{#tabs }}")), 0..10),
361 (Event::SoftBreak, 10..11),
362 (Event::Text(CowStr::from("Level 1")), 11..18),
363 (Event::SoftBreak, 18..19),
364 (Event::Text(CowStr::from("{{#tabs }}")), 19..29),
365 (Event::SoftBreak, 29..30),
366 (Event::Text(CowStr::from("Level 2")), 30..37),
367 (Event::SoftBreak, 37..38),
368 (Event::Text(CowStr::from("{{#tabs }}")), 38..48),
369 (Event::SoftBreak, 48..49),
370 (Event::Text(CowStr::from("Level 3")), 49..56),
371 (Event::SoftBreak, 56..57),
372 (Event::Text(CowStr::from("{{#endtabs }}")), 57..70),
373 (Event::SoftBreak, 70..71),
374 (Event::Text(CowStr::from("{{#endtabs }}")), 71..84),
375 (Event::SoftBreak, 84..85),
376 (Event::Text(CowStr::from("{{#endtabs }}")), 85..98),
377 ],
378 span: 0..98,
379 inner_span: 10..85,
380 has_nested: true,
381 }];
382
383 let actual = parse_blocks(
384 content,
385 |event| matches!(event, Event::Text(text) if text.starts_with("{{#tabs ")),
386 |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtabs ")),
387 true,
388 )?;
389
390 assert_eq!(expected, actual);
391
392 Ok(())
393 }
394}