Skip to main content

asciidoc_parser/blocks/
simple.rs

1use crate::{
2    HasSpan, Parser, Span,
3    attributes::Attrlist,
4    blocks::{
5        CompoundDelimitedBlock, ContentModel, IsBlock, ListItemMarker, RawDelimitedBlock,
6        metadata::BlockMetadata,
7    },
8    content::{Content, SubstitutionGroup},
9    span::MatchedItem,
10    strings::CowStr,
11};
12
13/// The style of a simple block.
14#[derive(Clone, Copy, Eq, PartialEq)]
15pub enum SimpleBlockStyle {
16    /// A paragraph block with normal substitutions.
17    Paragraph,
18
19    /// A literal block with no substitutions.
20    Literal,
21
22    /// Blocks and paragraphs assigned the listing style display their rendered
23    /// content exactly as you see it in the source. Listing content is
24    /// converted to preformatted text (i.e., `<pre>`). The content is presented
25    /// in a fixed-width font and endlines are preserved. Only [special
26    /// characters] and callouts are replaced when the document is converted.
27    ///
28    /// [special characters]: https://docs.asciidoctor.org/asciidoc/latest/subs/special-characters/
29    Listing,
30
31    /// A source block is a specialization of a listing block. Developers are
32    /// accustomed to seeing source code colorized to emphasize the code’s
33    /// structure (i.e., keywords, types, delimiters, etc.).
34    Source,
35}
36
37impl std::fmt::Debug for SimpleBlockStyle {
38    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
39        match self {
40            SimpleBlockStyle::Paragraph => write!(f, "SimpleBlockStyle::Paragraph"),
41            SimpleBlockStyle::Literal => write!(f, "SimpleBlockStyle::Literal"),
42            SimpleBlockStyle::Listing => write!(f, "SimpleBlockStyle::Listing"),
43            SimpleBlockStyle::Source => write!(f, "SimpleBlockStyle::Source"),
44        }
45    }
46}
47
48/// A block that's treated as contiguous lines of paragraph text (and subject to
49/// normal substitutions) (e.g., a paragraph block).
50#[derive(Clone, Debug, Eq, PartialEq)]
51pub struct SimpleBlock<'src> {
52    content: Content<'src>,
53    source: Span<'src>,
54    style: SimpleBlockStyle,
55    title_source: Option<Span<'src>>,
56    title: Option<String>,
57    anchor: Option<Span<'src>>,
58    anchor_reftext: Option<Span<'src>>,
59    attrlist: Option<Attrlist<'src>>,
60}
61
62impl<'src> SimpleBlock<'src> {
63    pub(crate) fn parse(
64        metadata: &BlockMetadata<'src>,
65        parser: &mut Parser,
66    ) -> Option<MatchedItem<'src, Self>> {
67        let MatchedItem {
68            item: (content, style),
69            after,
70        } = parse_lines(
71            metadata.block_start,
72            &metadata.attrlist,
73            false,
74            false,
75            false,
76            parser,
77            &[],
78        )?;
79
80        Some(MatchedItem {
81            item: Self {
82                content,
83                source: metadata
84                    .source
85                    .trim_remainder(after)
86                    .trim_trailing_whitespace(),
87                style,
88                title_source: metadata.title_source,
89                title: metadata.title.clone(),
90                anchor: metadata.anchor,
91                anchor_reftext: metadata.anchor_reftext,
92                attrlist: metadata.attrlist.clone(),
93            },
94            after: after.discard_empty_lines(),
95        })
96    }
97
98    pub(crate) fn parse_for_list_item(
99        metadata: &BlockMetadata<'src>,
100        parser: &mut Parser,
101        is_continuation: bool,
102        parent_list_markers: &[ListItemMarker<'src>],
103    ) -> Option<MatchedItem<'src, Self>> {
104        let MatchedItem {
105            item: (content, style),
106            after,
107        } = parse_lines(
108            metadata.block_start,
109            &metadata.attrlist,
110            true,
111            false,
112            is_continuation,
113            parser,
114            parent_list_markers,
115        )?;
116
117        Some(MatchedItem {
118            item: Self {
119                content,
120                source: metadata
121                    .source
122                    .trim_remainder(after)
123                    .trim_trailing_whitespace(),
124                style,
125                title_source: metadata.title_source,
126                title: metadata.title.clone(),
127                anchor: metadata.anchor,
128                anchor_reftext: metadata.anchor_reftext,
129                attrlist: metadata.attrlist.clone(),
130            },
131            after,
132        })
133    }
134
135    /// Parse a simple block for use in a definition list item.
136    ///
137    /// In definition lists, indented content is treated as a paragraph
138    /// with the indentation stripped, not as a literal block.
139    pub(crate) fn parse_for_definition_list(
140        metadata: &BlockMetadata<'src>,
141        parser: &mut Parser,
142    ) -> Option<MatchedItem<'src, Self>> {
143        let MatchedItem {
144            item: (content, style),
145            after,
146        } = parse_lines(
147            metadata.block_start,
148            &metadata.attrlist,
149            true,
150            true,
151            false,
152            parser,
153            &[],
154        )?;
155
156        Some(MatchedItem {
157            item: Self {
158                content,
159                source: metadata
160                    .source
161                    .trim_remainder(after)
162                    .trim_trailing_whitespace(),
163                style,
164                title_source: metadata.title_source,
165                title: metadata.title.clone(),
166                anchor: metadata.anchor,
167                anchor_reftext: metadata.anchor_reftext,
168                attrlist: metadata.attrlist.clone(),
169            },
170            after,
171        })
172    }
173
174    pub(crate) fn parse_fast(
175        source: Span<'src>,
176        parser: &Parser,
177    ) -> Option<MatchedItem<'src, Self>> {
178        let MatchedItem {
179            item: (content, style),
180            after,
181        } = parse_lines(source, &None, false, false, false, parser, &[])?;
182
183        let source = content.original();
184
185        Some(MatchedItem {
186            item: Self {
187                content,
188                source,
189                style,
190                title_source: None,
191                title: None,
192                anchor: None,
193                anchor_reftext: None,
194                attrlist: None,
195            },
196            after: after.discard_empty_lines(),
197        })
198    }
199
200    /// Return the interpreted content of this block.
201    pub fn content(&self) -> &Content<'src> {
202        &self.content
203    }
204
205    /// Return the style of this block.
206    pub fn style(&self) -> SimpleBlockStyle {
207        self.style
208    }
209}
210
211/// Parse the content-bearing lines for this block.
212///
213/// If `force_paragraph_style` is true, indented content is treated as a
214/// paragraph (with indentation stripped) rather than as a literal block. This
215/// is used for definition list items where indentation is purely visual
216/// formatting.
217///
218/// If `preserve_literal_indent` is true and the content is a literal block,
219/// indentation is preserved as-is (used for `+` continuation content).
220fn parse_lines<'src>(
221    source: Span<'src>,
222    attrlist: &Option<Attrlist<'src>>,
223    mut stop_for_list_items: bool,
224    force_paragraph_style: bool,
225    preserve_literal_indent: bool,
226    parser: &Parser,
227    parent_list_markers: &[ListItemMarker<'src>],
228) -> Option<MatchedItem<'src, (Content<'src>, SimpleBlockStyle)>> {
229    let source_after_whitespace = source.discard_whitespace();
230    let first_line_indent = source_after_whitespace.col() - 1;
231
232    // Track if we're in "indented literal" mode (literal style from indentation).
233    // In this mode, we should still stop for list markers that are NOT indented.
234    let mut indented_literal_mode = false;
235
236    let mut style = if source_after_whitespace.col() == source.col() || force_paragraph_style {
237        // When force_paragraph_style is true, we still need to track that the content
238        // is indented so we can properly stop at unindented list markers.
239        if source_after_whitespace.col() != source.col() {
240            indented_literal_mode = true;
241        }
242        SimpleBlockStyle::Paragraph
243    } else {
244        // Indented content treated as literal: don't stop for list markers
245        // (they become part of the literal content).
246        stop_for_list_items = false;
247        SimpleBlockStyle::Literal
248    };
249
250    // Block style can override the interpretation of literal from reading
251    // indentation.
252    if let Some(attrlist) = attrlist {
253        match attrlist.block_style() {
254            Some("normal") => {
255                style = SimpleBlockStyle::Paragraph;
256            }
257
258            Some("literal") => {
259                stop_for_list_items = false;
260                indented_literal_mode = false;
261                style = SimpleBlockStyle::Literal;
262            }
263
264            Some("listing") => {
265                stop_for_list_items = false;
266                indented_literal_mode = false;
267                style = SimpleBlockStyle::Listing;
268            }
269
270            Some("source") => {
271                stop_for_list_items = false;
272                indented_literal_mode = false;
273                style = SimpleBlockStyle::Source;
274            }
275
276            _ => {}
277        }
278    }
279
280    let mut next = source;
281    let mut filtered_lines: Vec<&'src str> = vec![];
282    let mut skipped_comment_line = false;
283
284    // Determine how much indentation to strip from literal paragraphs.
285    // In definition list continuations, use minimum indentation across all
286    // lines to preserve relative indent. In outline list continuations and
287    // non-continuation contexts, strip based on the first line's indent.
288    let in_definition_list = parent_list_markers
289        .iter()
290        .any(|m| matches!(m, ListItemMarker::DefinedTerm { .. }));
291
292    let strip_indent =
293        if preserve_literal_indent && style == SimpleBlockStyle::Literal && in_definition_list {
294            // Two-pass approach: find minimum indentation across all lines.
295            let mut scan = source;
296            let mut min_indent = first_line_indent;
297            let mut line_count = 0;
298
299            while let Some(line_mi) = scan.take_non_empty_line() {
300                let line = line_mi.item;
301
302                // Apply same stop conditions as the main loop.
303                if line_count > 0 && line.data() == "+" {
304                    break;
305                }
306
307                if let Some(n) = line.position(|c| c != ' ' && c != '\t') {
308                    min_indent = min_indent.min(n);
309                }
310
311                line_count += 1;
312                scan = line_mi.after;
313            }
314            min_indent
315        } else {
316            first_line_indent
317        };
318
319    while let Some(line_mi) = next.take_non_empty_line() {
320        let mut line = line_mi.item;
321
322        // If we've skipped a comment line and this is a section header, stop here
323        // so the section can be parsed as a separate block. Only do this at the
324        // top level (not inside lists), indicated by stop_for_list_items being false.
325        if !stop_for_list_items
326            && skipped_comment_line
327            && style == SimpleBlockStyle::Paragraph
328            && is_section_header(line.data())
329        {
330            break;
331        }
332
333        // There are several stop conditions for simple paragraph blocks. These
334        // "shouldn't" be encountered on the first line (we shouldn't be calling
335        // `SimpleBlock::parse` in these conditions), but in case it is, we simply
336        // ignore them on the first line.
337        if !filtered_lines.is_empty() {
338            // In indented literal mode, only stop for list markers that are NOT indented
339            // (at column 1). This allows definition list items to be properly separated.
340            let should_check_for_list_marker =
341                stop_for_list_items && (!indented_literal_mode || line.col() == 1);
342
343            // If we've already started accumulating content for this list item paragraph,
344            // we don't stop for list markers at any level other than our own or a parent
345            // level.
346            if should_check_for_list_marker
347                && let Some(marker_mi) = ListItemMarker::parse(line, parser)
348            {
349                // In description list continuation context, don't stop for
350                // deeper-nested description list markers (e.g., ::: when the
351                // current context is ::). They are treated as paragraph text.
352                let is_ancestor_list = parent_list_markers
353                    .iter()
354                    .any(|p| p.is_match_for(&marker_mi.item));
355
356                if is_ancestor_list || !preserve_literal_indent {
357                    break;
358                }
359            }
360
361            if line.data() == "+" {
362                break;
363            }
364
365            if line.starts_with('[') && line.ends_with(']') {
366                break;
367            }
368
369            if (line.starts_with('/')
370                || line.starts_with('-')
371                || line.starts_with('.')
372                || line.starts_with('+')
373                || line.starts_with('=')
374                || line.starts_with('*')
375                || line.starts_with('_'))
376                && (RawDelimitedBlock::is_valid_delimiter(&line)
377                    || CompoundDelimitedBlock::is_valid_delimiter(&line))
378            {
379                break;
380            }
381        }
382
383        next = line_mi.after;
384
385        // Only strip comment lines in paragraph style. In literal/listing/source
386        // blocks, "//" lines are preserved as content.
387        if style == SimpleBlockStyle::Paragraph
388            && line.starts_with("//")
389            && !line.starts_with("///")
390        {
391            skipped_comment_line = true;
392            continue;
393        }
394
395        // Strip at most the calculated indentation amount.
396        let should_strip_indent = strip_indent > 0;
397
398        if should_strip_indent && let Some(n) = line.position(|c| c != ' ' && c != '\t') {
399            line = line.into_parse_result(n.min(strip_indent)).after;
400        };
401
402        filtered_lines.push(line.trim_trailing_whitespace().data());
403    }
404
405    let source = source.trim_remainder(next).trim_trailing_whitespace();
406    if source.is_empty() {
407        return None;
408    }
409
410    let filtered_lines = filtered_lines.join("\n");
411    let mut content: Content<'src> = Content::from_filtered(source, filtered_lines);
412
413    let sub_group = match style {
414        // Only apply Verbatim substitutions to literal blocks detected by indentation.
415        // Listing and Source styles declared via attribute list still use Normal subs.
416        SimpleBlockStyle::Literal => SubstitutionGroup::Verbatim,
417        SimpleBlockStyle::Listing | SimpleBlockStyle::Source | SimpleBlockStyle::Paragraph => {
418            SubstitutionGroup::Normal
419        }
420    };
421
422    sub_group.override_via_attrlist(attrlist.as_ref()).apply(
423        &mut content,
424        parser,
425        attrlist.as_ref(),
426    );
427
428    Some(MatchedItem {
429        item: (content, style),
430        after: next,
431    })
432}
433
434impl<'src> IsBlock<'src> for SimpleBlock<'src> {
435    fn content_model(&self) -> ContentModel {
436        ContentModel::Simple
437    }
438
439    fn rendered_content(&self) -> Option<&str> {
440        Some(self.content.rendered())
441    }
442
443    fn raw_context(&self) -> CowStr<'src> {
444        "paragraph".into()
445    }
446
447    fn title_source(&'src self) -> Option<Span<'src>> {
448        self.title_source
449    }
450
451    fn title(&self) -> Option<&str> {
452        self.title.as_deref()
453    }
454
455    fn anchor(&'src self) -> Option<Span<'src>> {
456        self.anchor
457    }
458
459    fn anchor_reftext(&'src self) -> Option<Span<'src>> {
460        self.anchor_reftext
461    }
462
463    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
464        self.attrlist.as_ref()
465    }
466}
467
468impl<'src> HasSpan<'src> for SimpleBlock<'src> {
469    fn span(&self) -> Span<'src> {
470        self.source
471    }
472}
473
474/// Returns true if the line looks like a section header.
475/// Matches `== `, `=== `, etc. (AsciiDoc) or `## `, `### `, etc. (Markdown).
476fn is_section_header(line: &str) -> bool {
477    // AsciiDoc style: `== `, `=== `, etc. (at least 2 `=` followed by space)
478    if line.starts_with("==") {
479        let rest = line.trim_start_matches('=');
480        if rest.starts_with(' ') {
481            return true;
482        }
483    }
484
485    // Markdown style: `## `, `### `, etc. (at least 2 `#` followed by space)
486    if line.starts_with("##") {
487        let rest = line.trim_start_matches('#');
488        if rest.starts_with(' ') {
489            return true;
490        }
491    }
492
493    false
494}
495
496#[cfg(test)]
497mod tests {
498    #![allow(clippy::unwrap_used)]
499
500    use std::ops::Deref;
501
502    use pretty_assertions_sorted::assert_eq;
503
504    use crate::{
505        Parser,
506        blocks::{ContentModel, IsBlock, SimpleBlockStyle, metadata::BlockMetadata},
507        content::SubstitutionGroup,
508        tests::prelude::*,
509    };
510
511    #[test]
512    fn impl_clone() {
513        // Silly test to mark the #[derive(...)] line as covered.
514        let mut parser = Parser::default();
515
516        let b1 =
517            crate::blocks::SimpleBlock::parse(&BlockMetadata::new("abc"), &mut parser).unwrap();
518
519        let b2 = b1.item.clone();
520        assert_eq!(b1.item, b2);
521    }
522
523    #[test]
524    fn style_enum_impl_debug() {
525        assert_eq!(
526            format!("{:?}", SimpleBlockStyle::Paragraph),
527            "SimpleBlockStyle::Paragraph"
528        );
529
530        assert_eq!(
531            format!("{:?}", SimpleBlockStyle::Literal),
532            "SimpleBlockStyle::Literal"
533        );
534
535        assert_eq!(
536            format!("{:?}", SimpleBlockStyle::Listing),
537            "SimpleBlockStyle::Listing"
538        );
539
540        assert_eq!(
541            format!("{:?}", SimpleBlockStyle::Source),
542            "SimpleBlockStyle::Source"
543        );
544    }
545
546    #[test]
547    fn empty_source() {
548        let mut parser = Parser::default();
549        assert!(crate::blocks::SimpleBlock::parse(&BlockMetadata::new(""), &mut parser).is_none());
550    }
551
552    #[test]
553    fn only_spaces() {
554        let mut parser = Parser::default();
555        assert!(
556            crate::blocks::SimpleBlock::parse(&BlockMetadata::new("    "), &mut parser).is_none()
557        );
558    }
559
560    #[test]
561    fn single_line() {
562        let mut parser = Parser::default();
563        let mi =
564            crate::blocks::SimpleBlock::parse(&BlockMetadata::new("abc"), &mut parser).unwrap();
565
566        assert_eq!(
567            mi.item,
568            SimpleBlock {
569                content: Content {
570                    original: Span {
571                        data: "abc",
572                        line: 1,
573                        col: 1,
574                        offset: 0,
575                    },
576                    rendered: "abc",
577                },
578                source: Span {
579                    data: "abc",
580                    line: 1,
581                    col: 1,
582                    offset: 0,
583                },
584                style: SimpleBlockStyle::Paragraph,
585                title_source: None,
586                title: None,
587                anchor: None,
588                anchor_reftext: None,
589                attrlist: None,
590            },
591        );
592
593        assert_eq!(mi.item.content_model(), ContentModel::Simple);
594        assert_eq!(mi.item.rendered_content().unwrap(), "abc");
595        assert_eq!(mi.item.raw_context().deref(), "paragraph");
596        assert_eq!(mi.item.resolved_context().deref(), "paragraph");
597        assert!(mi.item.declared_style().is_none());
598        assert!(mi.item.id().is_none());
599        assert!(mi.item.roles().is_empty());
600        assert!(mi.item.options().is_empty());
601        assert!(mi.item.title_source().is_none());
602        assert!(mi.item.title().is_none());
603        assert!(mi.item.anchor().is_none());
604        assert!(mi.item.anchor_reftext().is_none());
605        assert!(mi.item.attrlist().is_none());
606        assert_eq!(mi.item.substitution_group(), SubstitutionGroup::Normal);
607
608        assert_eq!(
609            mi.after,
610            Span {
611                data: "",
612                line: 1,
613                col: 4,
614                offset: 3
615            }
616        );
617    }
618
619    #[test]
620    fn multiple_lines() {
621        let mut parser = Parser::default();
622        let mi = crate::blocks::SimpleBlock::parse(&BlockMetadata::new("abc\ndef"), &mut parser)
623            .unwrap();
624
625        assert_eq!(
626            mi.item,
627            SimpleBlock {
628                content: Content {
629                    original: Span {
630                        data: "abc\ndef",
631                        line: 1,
632                        col: 1,
633                        offset: 0,
634                    },
635                    rendered: "abc\ndef",
636                },
637                source: Span {
638                    data: "abc\ndef",
639                    line: 1,
640                    col: 1,
641                    offset: 0,
642                },
643                style: SimpleBlockStyle::Paragraph,
644                title_source: None,
645                title: None,
646                anchor: None,
647                anchor_reftext: None,
648                attrlist: None,
649            }
650        );
651
652        assert_eq!(
653            mi.after,
654            Span {
655                data: "",
656                line: 2,
657                col: 4,
658                offset: 7
659            }
660        );
661
662        assert_eq!(mi.item.rendered_content().unwrap(), "abc\ndef");
663    }
664
665    #[test]
666    fn consumes_blank_lines_after() {
667        let mut parser = Parser::default();
668        let mi = crate::blocks::SimpleBlock::parse(&BlockMetadata::new("abc\n\ndef"), &mut parser)
669            .unwrap();
670
671        assert_eq!(
672            mi.item,
673            SimpleBlock {
674                content: Content {
675                    original: Span {
676                        data: "abc",
677                        line: 1,
678                        col: 1,
679                        offset: 0,
680                    },
681                    rendered: "abc",
682                },
683                source: Span {
684                    data: "abc",
685                    line: 1,
686                    col: 1,
687                    offset: 0,
688                },
689                style: SimpleBlockStyle::Paragraph,
690                title_source: None,
691                title: None,
692                anchor: None,
693                anchor_reftext: None,
694                attrlist: None,
695            }
696        );
697
698        assert_eq!(
699            mi.after,
700            Span {
701                data: "def",
702                line: 3,
703                col: 1,
704                offset: 5
705            }
706        );
707    }
708
709    #[test]
710    fn overrides_sub_group_via_subs_attribute() {
711        let mut parser = Parser::default();
712        let mi = crate::blocks::SimpleBlock::parse(
713            &BlockMetadata::new("[subs=quotes]\na<b>c *bold*\n\ndef"),
714            &mut parser,
715        )
716        .unwrap();
717
718        assert_eq!(
719            mi.item,
720            SimpleBlock {
721                content: Content {
722                    original: Span {
723                        data: "a<b>c *bold*",
724                        line: 2,
725                        col: 1,
726                        offset: 14,
727                    },
728                    rendered: "a<b>c <strong>bold</strong>",
729                },
730                source: Span {
731                    data: "[subs=quotes]\na<b>c *bold*",
732                    line: 1,
733                    col: 1,
734                    offset: 0,
735                },
736                style: SimpleBlockStyle::Paragraph,
737                title_source: None,
738                title: None,
739                anchor: None,
740                anchor_reftext: None,
741                attrlist: Some(Attrlist {
742                    attributes: &[ElementAttribute {
743                        name: Some("subs"),
744                        value: "quotes",
745                        shorthand_items: &[],
746                    },],
747                    anchor: None,
748                    source: Span {
749                        data: "subs=quotes",
750                        line: 1,
751                        col: 2,
752                        offset: 1,
753                    },
754                },),
755            }
756        );
757
758        assert_eq!(
759            mi.after,
760            Span {
761                data: "def",
762                line: 4,
763                col: 1,
764                offset: 28
765            }
766        );
767
768        assert_eq!(
769            mi.item.rendered_content().unwrap(),
770            "a<b>c <strong>bold</strong>"
771        );
772    }
773}