asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        CompoundDelimitedBlock, ContentModel, IsBlock, MediaBlock, RawDelimitedBlock, SectionBlock,
8        SimpleBlock, metadata::BlockMetadata,
9    },
10    content::SubstitutionGroup,
11    document::Attribute,
12    span::MatchedItem,
13    strings::CowStr,
14    warnings::{MatchAndWarnings, Warning, WarningType},
15};
16
17/// **Block elements** form the main structure of an AsciiDoc document, starting
18/// with the document itself.
19///
20/// A block element (aka **block**) is a discrete, line-oriented chunk of
21/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
22/// a block element in the parsed document model. Certain blocks may contain
23/// other blocks, so we say that blocks can be nested. The converter visits each
24/// block in turn, in document order, converting it to a corresponding chunk of
25/// output.
26///
27/// This enum represents all of the block types that are understood directly by
28/// this parser and also implements the [`IsBlock`] trait.
29#[derive(Clone, Debug, Eq, PartialEq)]
30#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
31#[non_exhaustive]
32pub enum Block<'src> {
33    /// A block that’s treated as contiguous lines of paragraph text (and
34    /// subject to normal substitutions) (e.g., a paragraph block).
35    Simple(SimpleBlock<'src>),
36
37    /// A media block is used to represent an image, video, or audio block
38    /// macro.
39    Media(MediaBlock<'src>),
40
41    /// A section helps to partition the document into a content hierarchy.
42    /// May also be a part, chapter, or special section.
43    Section(SectionBlock<'src>),
44
45    /// A delimited block that contains verbatim, raw, or comment text. The
46    /// content between the matching delimiters is not parsed for block
47    /// syntax.
48    RawDelimited(RawDelimitedBlock<'src>),
49
50    /// A delimited block that can contain other blocks.
51    CompoundDelimited(CompoundDelimitedBlock<'src>),
52
53    /// When an attribute is defined in the document body using an attribute
54    /// entry, that’s simply referred to as a document attribute.
55    DocumentAttribute(Attribute<'src>),
56}
57
58impl<'src> Block<'src> {
59    /// Parse a block of any type and return a `Block` that describes it.
60    ///
61    /// Consumes any blank lines before and after the block.
62    pub(crate) fn parse(
63        source: Span<'src>,
64        parser: &mut Parser,
65    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
66        // Optimization: If the first line doesn't match any of the early indications
67        // for delimited blocks, titles, or attrlists, we can skip directly to treating
68        // this as a simple block. That saves quite a bit of parsing time.
69        let first_line = source.take_line();
70
71        // If it does contain any of those markers, we fall through to the more costly
72        // tests below which can more accurately classify the upcoming block.
73        if let Some(first_char) = source.chars().next()
74            && !matches!(
75                first_char,
76                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':'
77            )
78            && !first_line.item.contains("::")
79            && let Some(MatchedItem {
80                item: simple_block,
81                after,
82            }) = SimpleBlock::parse_fast(source, parser)
83        {
84            return MatchAndWarnings {
85                item: Some(MatchedItem {
86                    item: Self::Simple(simple_block),
87                    after,
88                }),
89                warnings: vec![],
90            };
91        }
92
93        // Look for document attributes first since these don't support block metadata.
94        if first_line.item.starts_with(':')
95            && (first_line.item.ends_with(':') || first_line.item.contains(": "))
96            && let Some(attr) = Attribute::parse(source, parser)
97        {
98            let mut warnings: Vec<Warning<'src>> = vec![];
99            parser.set_attribute_from_body(&attr.item, &mut warnings);
100
101            return MatchAndWarnings {
102                item: Some(MatchedItem {
103                    item: Self::DocumentAttribute(attr.item),
104                    after: attr.after,
105                }),
106                warnings,
107            };
108        }
109
110        // Optimization not possible; start by looking for block metadata (title,
111        // attrlist, etc.).
112        let MatchAndWarnings {
113            item: mut metadata,
114            mut warnings,
115        } = BlockMetadata::parse(source, parser);
116
117        if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
118            && let Some(rdb) = rdb_maw.item
119        {
120            if !rdb_maw.warnings.is_empty() {
121                warnings.append(&mut rdb_maw.warnings);
122            }
123
124            return MatchAndWarnings {
125                item: Some(MatchedItem {
126                    item: Self::RawDelimited(rdb.item),
127                    after: rdb.after,
128                }),
129                warnings,
130            };
131        }
132
133        if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
134            && let Some(cdb) = cdb_maw.item
135        {
136            if !cdb_maw.warnings.is_empty() {
137                warnings.append(&mut cdb_maw.warnings);
138            }
139
140            return MatchAndWarnings {
141                item: Some(MatchedItem {
142                    item: Self::CompoundDelimited(cdb.item),
143                    after: cdb.after,
144                }),
145                warnings,
146            };
147        }
148
149        // Try to discern the block type by scanning the first line.
150        let line = metadata.block_start.take_normalized_line();
151
152        if line.item.starts_with("image::")
153            || line.item.starts_with("video::")
154            || line.item.starts_with("video::")
155        {
156            let mut media_block_maw = MediaBlock::parse(&metadata, parser);
157
158            if let Some(media_block) = media_block_maw.item {
159                // Only propagate warnings from media block parsing if we think this
160                // *is* a media block. Otherwise, there would likely be too many false
161                // positives.
162                if !media_block_maw.warnings.is_empty() {
163                    warnings.append(&mut media_block_maw.warnings);
164                }
165
166                return MatchAndWarnings {
167                    item: Some(MatchedItem {
168                        item: Self::Media(media_block.item),
169                        after: media_block.after,
170                    }),
171                    warnings,
172                };
173            }
174
175            // This might be some other kind of block, so we don't automatically
176            // error out on a parse failure.
177        }
178
179        if line.item.starts_with('=')
180            && let Some(mut maw_section_block) = SectionBlock::parse(&metadata, parser)
181        {
182            // A line starting with `=` might be some other kind of block, so we continue
183            // quietly if `SectionBlock` parser rejects this block.
184
185            if !maw_section_block.warnings.is_empty() {
186                warnings.append(&mut maw_section_block.warnings);
187            }
188
189            return MatchAndWarnings {
190                item: Some(MatchedItem {
191                    item: Self::Section(maw_section_block.item.item),
192                    after: maw_section_block.item.after,
193                }),
194                warnings,
195            };
196        }
197
198        // First, let's look for a fun edge case. Perhaps the text contains block
199        // metadata but no block immediately following. If we're not careful, we could
200        // spin in a loop (for example, `parse_blocks_until`) thinking there will be
201        // another block, but there isn't.
202
203        // The following check disables that spin loop.
204        let simple_block_mi = SimpleBlock::parse(&metadata, parser);
205
206        if simple_block_mi.is_none() && !metadata.is_empty() {
207            // We have a metadata with no block. Treat it as a simple block but issue a
208            // warning.
209
210            warnings.push(Warning {
211                source: metadata.source,
212                warning: WarningType::MissingBlockAfterTitleOrAttributeList,
213            });
214
215            // Remove the metadata content so that SimpleBlock will read the title/attrlist
216            // line(s) as regular content.
217            metadata.title_source = None;
218            metadata.title = None;
219            metadata.anchor = None;
220            metadata.attrlist = None;
221            metadata.block_start = metadata.source;
222        }
223
224        // If no other block kind matches, we can always use SimpleBlock.
225        MatchAndWarnings {
226            item: SimpleBlock::parse(&metadata, parser).map(|mi| MatchedItem {
227                item: Self::Simple(mi.item),
228                after: mi.after,
229            }),
230            warnings,
231        }
232    }
233}
234
235impl<'src> IsBlock<'src> for Block<'src> {
236    fn content_model(&self) -> ContentModel {
237        match self {
238            Self::Simple(_) => ContentModel::Simple,
239            Self::Media(b) => b.content_model(),
240            Self::Section(_) => ContentModel::Compound,
241            Self::RawDelimited(b) => b.content_model(),
242            Self::CompoundDelimited(b) => b.content_model(),
243            Self::DocumentAttribute(b) => b.content_model(),
244        }
245    }
246
247    fn raw_context(&self) -> CowStr<'src> {
248        match self {
249            Self::Simple(b) => b.raw_context(),
250            Self::Media(b) => b.raw_context(),
251            Self::Section(b) => b.raw_context(),
252            Self::RawDelimited(b) => b.raw_context(),
253            Self::CompoundDelimited(b) => b.raw_context(),
254            Self::DocumentAttribute(b) => b.raw_context(),
255        }
256    }
257
258    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
259        match self {
260            Self::Simple(b) => b.nested_blocks(),
261            Self::Media(b) => b.nested_blocks(),
262            Self::Section(b) => b.nested_blocks(),
263            Self::RawDelimited(b) => b.nested_blocks(),
264            Self::CompoundDelimited(b) => b.nested_blocks(),
265            Self::DocumentAttribute(b) => b.nested_blocks(),
266        }
267    }
268
269    fn title_source(&'src self) -> Option<Span<'src>> {
270        match self {
271            Self::Simple(b) => b.title_source(),
272            Self::Media(b) => b.title_source(),
273            Self::Section(b) => b.title_source(),
274            Self::RawDelimited(b) => b.title_source(),
275            Self::CompoundDelimited(b) => b.title_source(),
276            Self::DocumentAttribute(b) => b.title_source(),
277        }
278    }
279
280    fn title(&self) -> Option<&str> {
281        match self {
282            Self::Simple(b) => b.title(),
283            Self::Media(b) => b.title(),
284            Self::Section(b) => b.title(),
285            Self::RawDelimited(b) => b.title(),
286            Self::CompoundDelimited(b) => b.title(),
287            Self::DocumentAttribute(b) => b.title(),
288        }
289    }
290
291    fn anchor(&'src self) -> Option<Span<'src>> {
292        match self {
293            Self::Simple(b) => b.anchor(),
294            Self::Media(b) => b.anchor(),
295            Self::Section(b) => b.anchor(),
296            Self::RawDelimited(b) => b.anchor(),
297            Self::CompoundDelimited(b) => b.anchor(),
298            Self::DocumentAttribute(b) => b.anchor(),
299        }
300    }
301
302    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
303        match self {
304            Self::Simple(b) => b.attrlist(),
305            Self::Media(b) => b.attrlist(),
306            Self::Section(b) => b.attrlist(),
307            Self::RawDelimited(b) => b.attrlist(),
308            Self::CompoundDelimited(b) => b.attrlist(),
309            Self::DocumentAttribute(b) => b.attrlist(),
310        }
311    }
312
313    fn substitution_group(&self) -> SubstitutionGroup {
314        match self {
315            Self::Simple(b) => b.substitution_group(),
316            Self::Media(b) => b.substitution_group(),
317            Self::Section(b) => b.substitution_group(),
318            Self::RawDelimited(b) => b.substitution_group(),
319            Self::CompoundDelimited(b) => b.substitution_group(),
320            Self::DocumentAttribute(b) => b.substitution_group(),
321        }
322    }
323}
324
325impl<'src> HasSpan<'src> for Block<'src> {
326    fn span(&self) -> Span<'src> {
327        match self {
328            Self::Simple(b) => b.span(),
329            Self::Media(b) => b.span(),
330            Self::Section(b) => b.span(),
331            Self::RawDelimited(b) => b.span(),
332            Self::CompoundDelimited(b) => b.span(),
333            Self::DocumentAttribute(b) => b.span(),
334        }
335    }
336}