mdbook_plugin_utils/markdown/
block.rs

1use std::ops::Range;
2
3use anyhow::{Result, bail};
4use log::debug;
5use pulldown_cmark::{Event, Parser};
6
7#[derive(Clone, Debug, PartialEq)]
8pub struct Block<'a> {
9    pub closed: bool,
10    pub events: Vec<(Event<'a>, Range<usize>)>,
11    pub span: Range<usize>,
12    pub inner_span: Range<usize>,
13    pub has_nested: bool,
14}
15
16impl<'a> Block<'a> {
17    pub fn new(first_event: Event<'a>, first_span: Range<usize>) -> Self {
18        let span = first_span.clone();
19        let inner_span = 0..0;
20
21        Block {
22            closed: false,
23            events: vec![(first_event, first_span)],
24            span,
25            inner_span,
26            has_nested: false,
27        }
28    }
29}
30
31pub fn parse_blocks<IsStartFn, IsEndFn>(
32    content: &str,
33    is_start: IsStartFn,
34    is_end: IsEndFn,
35    skip_nested: bool,
36) -> Result<Vec<Block>>
37where
38    IsStartFn: Fn(&Event) -> bool,
39    IsEndFn: Fn(&Event) -> bool,
40{
41    let mut blocks: Vec<Block> = vec![];
42    let mut nested_level = 0;
43
44    for (event, span) in Parser::new(content).into_offset_iter() {
45        debug!("{:?} {:?}", event, span);
46
47        if is_start(&event) {
48            if let Some(block) = blocks.last_mut() {
49                if !block.closed {
50                    if skip_nested {
51                        nested_level += 1;
52                        block.has_nested = true;
53                        block.events.push((event, span));
54                        continue;
55                    } else {
56                        bail!("Block is not closed. Nested blocks are not allowed.");
57                    }
58                }
59            }
60
61            blocks.push(Block::new(event, span));
62        } else if is_end(&event) {
63            if let Some(block) = blocks.last_mut() {
64                if !block.closed {
65                    if nested_level > 0 {
66                        nested_level -= 1;
67                        block.events.push((event, span));
68                        continue;
69                    }
70
71                    block.closed = true;
72                    block.span = block.span.start..span.end;
73                    block.events.push((event, span));
74
75                    let mut seen_first = false;
76                    block.events.retain(|(_, span)| {
77                        if !seen_first {
78                            seen_first = true;
79                            true
80                        } else if span.start == block.span.start && span.end != block.span.end {
81                            false
82                        } else {
83                            span.start >= block.span.start && span.end <= block.span.end
84                        }
85                    });
86
87                    if let (Some((_, first)), Some((_, last))) = (
88                        block.events.get(1),
89                        block.events.get(block.events.len() - 2),
90                    ) {
91                        block.inner_span = first.start..last.end;
92                    }
93                }
94            }
95        } else if let Some(block) = blocks.last_mut() {
96            if !block.closed && span.start >= block.span.start {
97                block.events.push((event, span));
98            }
99        }
100    }
101
102    Ok(blocks)
103}
104
105#[cfg(test)]
106mod test {
107    use pulldown_cmark::{CodeBlockKind, CowStr, Tag, TagEnd};
108    use test_log::test;
109
110    use super::*;
111
112    #[test]
113    fn test_parse_blocks() -> Result<()> {
114        let content = "\
115        ```toml\n\
116        key1 = \"value1\"\n\
117        key2 = \"value2\"\n\
118        ```";
119        let expected: Vec<Block> = vec![Block {
120            closed: true,
121            events: vec![
122                (
123                    Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
124                    0..43,
125                ),
126                (
127                    Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
128                    8..40,
129                ),
130                (Event::End(TagEnd::CodeBlock), 0..43),
131            ],
132            span: 0..43,
133            inner_span: 8..40,
134            has_nested: false,
135        }];
136
137        let actual = parse_blocks(
138            content,
139            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
140            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
141            false,
142        )?;
143
144        assert_eq!(expected, actual);
145
146        Ok(())
147    }
148
149    #[test]
150    fn test_parse_blocks_surrounded() -> Result<()> {
151        let content = "\
152        Some text before the code block.\n\
153        \n\
154        ```toml\n\
155        key1 = \"value1\"\n\
156        key2 = \"value2\"\n\
157        ```\n\
158        \n\
159        Some text after the code block.";
160        let expected: Vec<Block> = vec![Block {
161            closed: true,
162            events: vec![
163                (
164                    Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
165                    34..77,
166                ),
167                (
168                    Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
169                    42..74,
170                ),
171                (Event::End(TagEnd::CodeBlock), 34..77),
172            ],
173            span: 34..77,
174            inner_span: 42..74,
175            has_nested: false,
176        }];
177
178        let actual = parse_blocks(
179            content,
180            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
181            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
182            false,
183        )?;
184
185        assert_eq!(expected, actual);
186
187        Ok(())
188    }
189
190    #[test]
191    fn test_parse_blocks_multiple() -> Result<()> {
192        let content = "\
193        First TOML block:\n\
194        ```toml\n\
195        key1 = \"value1\"\n\
196        key2 = \"value2\"\n\
197        ```\n\
198        First non-TOML block:\n\
199        ```shell\n\
200        echo test\n\
201        ```\n\
202        Second TOML block:\n\
203        ```toml\n\
204        key3 = \"value3\"\n\
205        key4 = \"value4\"\n\
206        ```";
207        let expected: Vec<Block> = vec![
208            Block {
209                closed: true,
210                events: vec![
211                    (
212                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
213                        18..61,
214                    ),
215                    (
216                        Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
217                        26..58,
218                    ),
219                    (Event::End(TagEnd::CodeBlock), 18..61),
220                ],
221                span: 18..61,
222                inner_span: 26..58,
223                has_nested: false,
224            },
225            Block {
226                closed: true,
227                events: vec![
228                    (
229                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
230                        126..169,
231                    ),
232                    (
233                        Event::Text(CowStr::from("key3 = \"value3\"\nkey4 = \"value4\"\n")),
234                        134..166,
235                    ),
236                    (Event::End(TagEnd::CodeBlock), 126..169),
237                ],
238                span: 126..169,
239                inner_span: 134..166,
240                has_nested: false,
241            },
242        ];
243
244        let actual = parse_blocks(
245            content,
246            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
247            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
248            false,
249        )?;
250
251        assert_eq!(expected, actual);
252
253        Ok(())
254    }
255
256    #[test]
257    fn test_parse_blocks_text() -> Result<()> {
258        let content = "\
259        {{#tab }}\n\
260        Some content.\n\
261        {{#endtab }}\n\
262        {{#tab }}\n\
263        \n\
264        ```rust\n\
265        let a = 1 + 2;\n\
266        ```\n\
267        \n\
268        {{#endtab }}\n\
269        ";
270        let expected: Vec<Block> = vec![
271            Block {
272                closed: true,
273                events: vec![
274                    (Event::Text(CowStr::from("{{#tab }}")), 0..9),
275                    (Event::SoftBreak, 9..10),
276                    (Event::Text(CowStr::from("Some content.")), 10..23),
277                    (Event::SoftBreak, 23..24),
278                    (Event::Text(CowStr::from("{{#endtab }}")), 24..36),
279                ],
280                span: 0..36,
281                inner_span: 9..24,
282                has_nested: false,
283            },
284            Block {
285                closed: true,
286                events: vec![
287                    (Event::Text(CowStr::from("{{#tab }}")), 37..46),
288                    (
289                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("rust")))),
290                        48..74,
291                    ),
292                    (Event::Text(CowStr::from("let a = 1 + 2;\n")), 56..71),
293                    (Event::End(TagEnd::CodeBlock), 48..74),
294                    (Event::Text(CowStr::from("{{#endtab }}")), 76..88),
295                ],
296                span: 37..88,
297                inner_span: 48..74,
298                has_nested: false,
299            },
300        ];
301
302        let actual = parse_blocks(
303            content,
304            |event| matches!(event, Event::Text(text) if text.starts_with("{{#tab ")),
305            |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtab ")),
306            false,
307        )?;
308
309        assert_eq!(expected, actual);
310
311        Ok(())
312    }
313
314    #[test]
315    fn test_parse_blocks_nested_error() -> Result<()> {
316        let content = "*a **sentence** with **some** words*";
317
318        let actual = parse_blocks(
319            content,
320            |event| {
321                matches!(
322                    event,
323                    Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong)
324                )
325            },
326            |event| {
327                matches!(
328                    event,
329                    Event::End(TagEnd::Emphasis) | Event::End(TagEnd::Strong)
330                )
331            },
332            false,
333        );
334
335        assert_eq!(
336            "Block is not closed. Nested blocks are not allowed.",
337            format!("{}", actual.unwrap_err().root_cause())
338        );
339
340        Ok(())
341    }
342
343    #[test]
344    fn test_parse_blocks_nested() -> Result<()> {
345        let content = "\
346        {{#tabs }}\n\
347        Level 1\n\
348        {{#tabs }}\n\
349        Level 2\n\
350        {{#tabs }}\n\
351        Level 3\n\
352        {{#endtabs }}\n\
353        {{#endtabs }}\n\
354        {{#endtabs }}\n\
355        ";
356
357        let expected: Vec<Block> = vec![Block {
358            closed: true,
359            events: vec![
360                (Event::Text(CowStr::from("{{#tabs }}")), 0..10),
361                (Event::SoftBreak, 10..11),
362                (Event::Text(CowStr::from("Level 1")), 11..18),
363                (Event::SoftBreak, 18..19),
364                (Event::Text(CowStr::from("{{#tabs }}")), 19..29),
365                (Event::SoftBreak, 29..30),
366                (Event::Text(CowStr::from("Level 2")), 30..37),
367                (Event::SoftBreak, 37..38),
368                (Event::Text(CowStr::from("{{#tabs }}")), 38..48),
369                (Event::SoftBreak, 48..49),
370                (Event::Text(CowStr::from("Level 3")), 49..56),
371                (Event::SoftBreak, 56..57),
372                (Event::Text(CowStr::from("{{#endtabs }}")), 57..70),
373                (Event::SoftBreak, 70..71),
374                (Event::Text(CowStr::from("{{#endtabs }}")), 71..84),
375                (Event::SoftBreak, 84..85),
376                (Event::Text(CowStr::from("{{#endtabs }}")), 85..98),
377            ],
378            span: 0..98,
379            inner_span: 10..85,
380            has_nested: true,
381        }];
382
383        let actual = parse_blocks(
384            content,
385            |event| matches!(event, Event::Text(text) if text.starts_with("{{#tabs ")),
386            |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtabs ")),
387            true,
388        )?;
389
390        assert_eq!(expected, actual);
391
392        Ok(())
393    }
394}