mdbook_plugin_utils/markdown/
block.rs

1use std::ops::Range;
2
3use anyhow::{Result, bail};
4use log::debug;
5use pulldown_cmark::{Event, Parser};
6
7#[derive(Clone, Debug, PartialEq)]
8pub struct Block<'a> {
9    pub closed: bool,
10    pub events: Vec<(Event<'a>, Range<usize>)>,
11    pub span: Range<usize>,
12    pub inner_span: Range<usize>,
13    pub has_nested: bool,
14}
15
16impl<'a> Block<'a> {
17    pub fn new(first_event: Event<'a>, first_span: Range<usize>) -> Self {
18        let span = first_span.clone();
19        let inner_span = 0..0;
20
21        Block {
22            closed: false,
23            events: vec![(first_event, first_span)],
24            span,
25            inner_span,
26            has_nested: false,
27        }
28    }
29}
30
31pub fn parse_blocks<IsStartFn, IsEndFn>(
32    content: &str,
33    is_start: IsStartFn,
34    is_end: IsEndFn,
35    skip_nested: bool,
36) -> Result<Vec<Block<'_>>>
37where
38    IsStartFn: Fn(&Event) -> bool,
39    IsEndFn: Fn(&Event) -> bool,
40{
41    let mut blocks: Vec<Block> = vec![];
42    let mut nested_level = 0;
43
44    for (event, span) in Parser::new(content).into_offset_iter() {
45        debug!("{event:?} {span:?}");
46
47        if is_start(&event) {
48            if let Some(block) = blocks.last_mut()
49                && !block.closed
50            {
51                if skip_nested {
52                    nested_level += 1;
53                    block.has_nested = true;
54                    block.events.push((event, span));
55                    continue;
56                } else {
57                    bail!("Block is not closed. Nested blocks are not allowed.");
58                }
59            }
60
61            blocks.push(Block::new(event, span));
62        } else if is_end(&event) {
63            if let Some(block) = blocks.last_mut()
64                && !block.closed
65            {
66                if nested_level > 0 {
67                    nested_level -= 1;
68                    block.events.push((event, span));
69                    continue;
70                }
71
72                block.closed = true;
73                block.span = block.span.start..span.end;
74                block.events.push((event, span));
75
76                let mut seen_first = false;
77                block.events.retain(|(_, span)| {
78                    if !seen_first {
79                        seen_first = true;
80                        true
81                    } else if span.start == block.span.start && span.end != block.span.end {
82                        false
83                    } else {
84                        span.start >= block.span.start && span.end <= block.span.end
85                    }
86                });
87
88                if let (Some((_, first)), Some((_, last))) = (
89                    block.events.get(1),
90                    block.events.get(block.events.len() - 2),
91                ) {
92                    block.inner_span = first.start..last.end;
93                }
94            }
95        } else if let Some(block) = blocks.last_mut()
96            && !block.closed
97            && span.start >= block.span.start
98        {
99            block.events.push((event, span));
100        }
101    }
102
103    Ok(blocks)
104}
105
106#[cfg(test)]
107mod test {
108    use pulldown_cmark::{CodeBlockKind, CowStr, Tag, TagEnd};
109    use test_log::test;
110
111    use super::*;
112
113    #[test]
114    fn test_parse_blocks() -> Result<()> {
115        let content = "\
116        ```toml\n\
117        key1 = \"value1\"\n\
118        key2 = \"value2\"\n\
119        ```";
120        let expected: Vec<Block> = vec![Block {
121            closed: true,
122            events: vec![
123                (
124                    Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
125                    0..43,
126                ),
127                (
128                    Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
129                    8..40,
130                ),
131                (Event::End(TagEnd::CodeBlock), 0..43),
132            ],
133            span: 0..43,
134            inner_span: 8..40,
135            has_nested: false,
136        }];
137
138        let actual = parse_blocks(
139            content,
140            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
141            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
142            false,
143        )?;
144
145        assert_eq!(expected, actual);
146
147        Ok(())
148    }
149
150    #[test]
151    fn test_parse_blocks_surrounded() -> Result<()> {
152        let content = "\
153        Some text before the code block.\n\
154        \n\
155        ```toml\n\
156        key1 = \"value1\"\n\
157        key2 = \"value2\"\n\
158        ```\n\
159        \n\
160        Some text after the code block.";
161        let expected: Vec<Block> = vec![Block {
162            closed: true,
163            events: vec![
164                (
165                    Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
166                    34..77,
167                ),
168                (
169                    Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
170                    42..74,
171                ),
172                (Event::End(TagEnd::CodeBlock), 34..77),
173            ],
174            span: 34..77,
175            inner_span: 42..74,
176            has_nested: false,
177        }];
178
179        let actual = parse_blocks(
180            content,
181            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
182            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
183            false,
184        )?;
185
186        assert_eq!(expected, actual);
187
188        Ok(())
189    }
190
191    #[test]
192    fn test_parse_blocks_multiple() -> Result<()> {
193        let content = "\
194        First TOML block:\n\
195        ```toml\n\
196        key1 = \"value1\"\n\
197        key2 = \"value2\"\n\
198        ```\n\
199        First non-TOML block:\n\
200        ```shell\n\
201        echo test\n\
202        ```\n\
203        Second TOML block:\n\
204        ```toml\n\
205        key3 = \"value3\"\n\
206        key4 = \"value4\"\n\
207        ```";
208        let expected: Vec<Block> = vec![
209            Block {
210                closed: true,
211                events: vec![
212                    (
213                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
214                        18..61,
215                    ),
216                    (
217                        Event::Text(CowStr::from("key1 = \"value1\"\nkey2 = \"value2\"\n")),
218                        26..58,
219                    ),
220                    (Event::End(TagEnd::CodeBlock), 18..61),
221                ],
222                span: 18..61,
223                inner_span: 26..58,
224                has_nested: false,
225            },
226            Block {
227                closed: true,
228                events: vec![
229                    (
230                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("toml")))),
231                        126..169,
232                    ),
233                    (
234                        Event::Text(CowStr::from("key3 = \"value3\"\nkey4 = \"value4\"\n")),
235                        134..166,
236                    ),
237                    (Event::End(TagEnd::CodeBlock), 126..169),
238                ],
239                span: 126..169,
240                inner_span: 134..166,
241                has_nested: false,
242            },
243        ];
244
245        let actual = parse_blocks(
246            content,
247            |event| matches!(event, Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(tag))) if tag == &CowStr::from("toml")),
248            |event| matches!(event, Event::End(TagEnd::CodeBlock)),
249            false,
250        )?;
251
252        assert_eq!(expected, actual);
253
254        Ok(())
255    }
256
257    #[test]
258    fn test_parse_blocks_text() -> Result<()> {
259        let content = "\
260        {{#tab }}\n\
261        Some content.\n\
262        {{#endtab }}\n\
263        {{#tab }}\n\
264        \n\
265        ```rust\n\
266        let a = 1 + 2;\n\
267        ```\n\
268        \n\
269        {{#endtab }}\n\
270        ";
271        let expected: Vec<Block> = vec![
272            Block {
273                closed: true,
274                events: vec![
275                    (Event::Text(CowStr::from("{{#tab }}")), 0..9),
276                    (Event::SoftBreak, 9..10),
277                    (Event::Text(CowStr::from("Some content.")), 10..23),
278                    (Event::SoftBreak, 23..24),
279                    (Event::Text(CowStr::from("{{#endtab }}")), 24..36),
280                ],
281                span: 0..36,
282                inner_span: 9..24,
283                has_nested: false,
284            },
285            Block {
286                closed: true,
287                events: vec![
288                    (Event::Text(CowStr::from("{{#tab }}")), 37..46),
289                    (
290                        Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced(CowStr::from("rust")))),
291                        48..74,
292                    ),
293                    (Event::Text(CowStr::from("let a = 1 + 2;\n")), 56..71),
294                    (Event::End(TagEnd::CodeBlock), 48..74),
295                    (Event::Text(CowStr::from("{{#endtab }}")), 76..88),
296                ],
297                span: 37..88,
298                inner_span: 48..74,
299                has_nested: false,
300            },
301        ];
302
303        let actual = parse_blocks(
304            content,
305            |event| matches!(event, Event::Text(text) if text.starts_with("{{#tab ")),
306            |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtab ")),
307            false,
308        )?;
309
310        assert_eq!(expected, actual);
311
312        Ok(())
313    }
314
315    #[test]
316    fn test_parse_blocks_nested_error() -> Result<()> {
317        let content = "*a **sentence** with **some** words*";
318
319        let actual = parse_blocks(
320            content,
321            |event| {
322                matches!(
323                    event,
324                    Event::Start(Tag::Emphasis) | Event::Start(Tag::Strong)
325                )
326            },
327            |event| {
328                matches!(
329                    event,
330                    Event::End(TagEnd::Emphasis) | Event::End(TagEnd::Strong)
331                )
332            },
333            false,
334        );
335
336        assert_eq!(
337            "Block is not closed. Nested blocks are not allowed.",
338            format!("{}", actual.unwrap_err().root_cause())
339        );
340
341        Ok(())
342    }
343
344    #[test]
345    fn test_parse_blocks_nested() -> Result<()> {
346        let content = "\
347        {{#tabs }}\n\
348        Level 1\n\
349        {{#tabs }}\n\
350        Level 2\n\
351        {{#tabs }}\n\
352        Level 3\n\
353        {{#endtabs }}\n\
354        {{#endtabs }}\n\
355        {{#endtabs }}\n\
356        ";
357
358        let expected: Vec<Block> = vec![Block {
359            closed: true,
360            events: vec![
361                (Event::Text(CowStr::from("{{#tabs }}")), 0..10),
362                (Event::SoftBreak, 10..11),
363                (Event::Text(CowStr::from("Level 1")), 11..18),
364                (Event::SoftBreak, 18..19),
365                (Event::Text(CowStr::from("{{#tabs }}")), 19..29),
366                (Event::SoftBreak, 29..30),
367                (Event::Text(CowStr::from("Level 2")), 30..37),
368                (Event::SoftBreak, 37..38),
369                (Event::Text(CowStr::from("{{#tabs }}")), 38..48),
370                (Event::SoftBreak, 48..49),
371                (Event::Text(CowStr::from("Level 3")), 49..56),
372                (Event::SoftBreak, 56..57),
373                (Event::Text(CowStr::from("{{#endtabs }}")), 57..70),
374                (Event::SoftBreak, 70..71),
375                (Event::Text(CowStr::from("{{#endtabs }}")), 71..84),
376                (Event::SoftBreak, 84..85),
377                (Event::Text(CowStr::from("{{#endtabs }}")), 85..98),
378            ],
379            span: 0..98,
380            inner_span: 10..85,
381            has_nested: true,
382        }];
383
384        let actual = parse_blocks(
385            content,
386            |event| matches!(event, Event::Text(text) if text.starts_with("{{#tabs ")),
387            |event| matches!(event, Event::Text(text) if text.starts_with("{{#endtabs ")),
388            true,
389        )?;
390
391        assert_eq!(expected, actual);
392
393        Ok(())
394    }
395}