asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        CompoundDelimitedBlock, ContentModel, IsBlock, MediaBlock, RawDelimitedBlock, SectionBlock,
8        SimpleBlock, metadata::BlockMetadata,
9    },
10    content::SubstitutionGroup,
11    document::Attribute,
12    span::MatchedItem,
13    strings::CowStr,
14    warnings::{MatchAndWarnings, Warning, WarningType},
15};
16
17/// **Block elements** form the main structure of an AsciiDoc document, starting
18/// with the document itself.
19///
20/// A block element (aka **block**) is a discrete, line-oriented chunk of
21/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
22/// a block element in the parsed document model. Certain blocks may contain
23/// other blocks, so we say that blocks can be nested. The converter visits each
24/// block in turn, in document order, converting it to a corresponding chunk of
25/// output.
26///
27/// This enum represents all of the block types that are understood directly by
28/// this parser and also implements the [`IsBlock`] trait.
29#[derive(Clone, Eq, PartialEq)]
30#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
31#[non_exhaustive]
32pub enum Block<'src> {
33    /// A block that’s treated as contiguous lines of paragraph text (and
34    /// subject to normal substitutions) (e.g., a paragraph block).
35    Simple(SimpleBlock<'src>),
36
37    /// A media block is used to represent an image, video, or audio block
38    /// macro.
39    Media(MediaBlock<'src>),
40
41    /// A section helps to partition the document into a content hierarchy.
42    /// May also be a part, chapter, or special section.
43    Section(SectionBlock<'src>),
44
45    /// A delimited block that contains verbatim, raw, or comment text. The
46    /// content between the matching delimiters is not parsed for block
47    /// syntax.
48    RawDelimited(RawDelimitedBlock<'src>),
49
50    /// A delimited block that can contain other blocks.
51    CompoundDelimited(CompoundDelimitedBlock<'src>),
52
53    /// When an attribute is defined in the document body using an attribute
54    /// entry, that’s simply referred to as a document attribute.
55    DocumentAttribute(Attribute<'src>),
56}
57
58impl<'src> std::fmt::Debug for Block<'src> {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self {
61            Block::Simple(block) => f.debug_tuple("Block::Simple").field(block).finish(),
62            Block::Media(block) => f.debug_tuple("Block::Media").field(block).finish(),
63            Block::Section(block) => f.debug_tuple("Block::Section").field(block).finish(),
64
65            Block::RawDelimited(block) => {
66                f.debug_tuple("Block::RawDelimited").field(block).finish()
67            }
68
69            Block::CompoundDelimited(block) => f
70                .debug_tuple("Block::CompoundDelimited")
71                .field(block)
72                .finish(),
73
74            Block::DocumentAttribute(block) => f
75                .debug_tuple("Block::DocumentAttribute")
76                .field(block)
77                .finish(),
78        }
79    }
80}
81
82impl<'src> Block<'src> {
83    /// Parse a block of any type and return a `Block` that describes it.
84    ///
85    /// Consumes any blank lines before and after the block.
86    pub(crate) fn parse(
87        source: Span<'src>,
88        parser: &mut Parser,
89    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
90        // Optimization: If the first line doesn't match any of the early indications
91        // for delimited blocks, titles, or attrlists, we can skip directly to treating
92        // this as a simple block. That saves quite a bit of parsing time.
93        let first_line = source.take_line();
94
95        // If it does contain any of those markers, we fall through to the more costly
96        // tests below which can more accurately classify the upcoming block.
97        if let Some(first_char) = source.chars().next()
98            && !matches!(
99                first_char,
100                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':'
101            )
102            && !first_line.item.contains("::")
103            && let Some(MatchedItem {
104                item: simple_block,
105                after,
106            }) = SimpleBlock::parse_fast(source, parser)
107        {
108            return MatchAndWarnings {
109                item: Some(MatchedItem {
110                    item: Self::Simple(simple_block),
111                    after,
112                }),
113                warnings: vec![],
114            };
115        }
116
117        // Look for document attributes first since these don't support block metadata.
118        if first_line.item.starts_with(':')
119            && (first_line.item.ends_with(':') || first_line.item.contains(": "))
120            && let Some(attr) = Attribute::parse(source, parser)
121        {
122            let mut warnings: Vec<Warning<'src>> = vec![];
123            parser.set_attribute_from_body(&attr.item, &mut warnings);
124
125            return MatchAndWarnings {
126                item: Some(MatchedItem {
127                    item: Self::DocumentAttribute(attr.item),
128                    after: attr.after,
129                }),
130                warnings,
131            };
132        }
133
134        // Optimization not possible; start by looking for block metadata (title,
135        // attrlist, etc.).
136        let MatchAndWarnings {
137            item: mut metadata,
138            mut warnings,
139        } = BlockMetadata::parse(source, parser);
140
141        if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
142            && let Some(rdb) = rdb_maw.item
143        {
144            if !rdb_maw.warnings.is_empty() {
145                warnings.append(&mut rdb_maw.warnings);
146            }
147
148            return MatchAndWarnings {
149                item: Some(MatchedItem {
150                    item: Self::RawDelimited(rdb.item),
151                    after: rdb.after,
152                }),
153                warnings,
154            };
155        }
156
157        if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
158            && let Some(cdb) = cdb_maw.item
159        {
160            if !cdb_maw.warnings.is_empty() {
161                warnings.append(&mut cdb_maw.warnings);
162            }
163
164            return MatchAndWarnings {
165                item: Some(MatchedItem {
166                    item: Self::CompoundDelimited(cdb.item),
167                    after: cdb.after,
168                }),
169                warnings,
170            };
171        }
172
173        // Try to discern the block type by scanning the first line.
174        let line = metadata.block_start.take_normalized_line();
175
176        if line.item.starts_with("image::")
177            || line.item.starts_with("video::")
178            || line.item.starts_with("video::")
179        {
180            let mut media_block_maw = MediaBlock::parse(&metadata, parser);
181
182            if let Some(media_block) = media_block_maw.item {
183                // Only propagate warnings from media block parsing if we think this
184                // *is* a media block. Otherwise, there would likely be too many false
185                // positives.
186                if !media_block_maw.warnings.is_empty() {
187                    warnings.append(&mut media_block_maw.warnings);
188                }
189
190                return MatchAndWarnings {
191                    item: Some(MatchedItem {
192                        item: Self::Media(media_block.item),
193                        after: media_block.after,
194                    }),
195                    warnings,
196                };
197            }
198
199            // This might be some other kind of block, so we don't automatically
200            // error out on a parse failure.
201        }
202
203        if line.item.starts_with('=')
204            && let Some(mut maw_section_block) = SectionBlock::parse(&metadata, parser)
205        {
206            // A line starting with `=` might be some other kind of block, so we continue
207            // quietly if `SectionBlock` parser rejects this block.
208
209            if !maw_section_block.warnings.is_empty() {
210                warnings.append(&mut maw_section_block.warnings);
211            }
212
213            return MatchAndWarnings {
214                item: Some(MatchedItem {
215                    item: Self::Section(maw_section_block.item.item),
216                    after: maw_section_block.item.after,
217                }),
218                warnings,
219            };
220        }
221
222        // First, let's look for a fun edge case. Perhaps the text contains block
223        // metadata but no block immediately following. If we're not careful, we could
224        // spin in a loop (for example, `parse_blocks_until`) thinking there will be
225        // another block, but there isn't.
226
227        // The following check disables that spin loop.
228        let simple_block_mi = SimpleBlock::parse(&metadata, parser);
229
230        if simple_block_mi.is_none() && !metadata.is_empty() {
231            // We have a metadata with no block. Treat it as a simple block but issue a
232            // warning.
233
234            warnings.push(Warning {
235                source: metadata.source,
236                warning: WarningType::MissingBlockAfterTitleOrAttributeList,
237            });
238
239            // Remove the metadata content so that SimpleBlock will read the title/attrlist
240            // line(s) as regular content.
241            metadata.title_source = None;
242            metadata.title = None;
243            metadata.anchor = None;
244            metadata.attrlist = None;
245            metadata.block_start = metadata.source;
246        }
247
248        // If no other block kind matches, we can always use SimpleBlock.
249        MatchAndWarnings {
250            item: SimpleBlock::parse(&metadata, parser).map(|mi| MatchedItem {
251                item: Self::Simple(mi.item),
252                after: mi.after,
253            }),
254            warnings,
255        }
256    }
257}
258
259impl<'src> IsBlock<'src> for Block<'src> {
260    fn content_model(&self) -> ContentModel {
261        match self {
262            Self::Simple(_) => ContentModel::Simple,
263            Self::Media(b) => b.content_model(),
264            Self::Section(_) => ContentModel::Compound,
265            Self::RawDelimited(b) => b.content_model(),
266            Self::CompoundDelimited(b) => b.content_model(),
267            Self::DocumentAttribute(b) => b.content_model(),
268        }
269    }
270
271    fn raw_context(&self) -> CowStr<'src> {
272        match self {
273            Self::Simple(b) => b.raw_context(),
274            Self::Media(b) => b.raw_context(),
275            Self::Section(b) => b.raw_context(),
276            Self::RawDelimited(b) => b.raw_context(),
277            Self::CompoundDelimited(b) => b.raw_context(),
278            Self::DocumentAttribute(b) => b.raw_context(),
279        }
280    }
281
282    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
283        match self {
284            Self::Simple(b) => b.nested_blocks(),
285            Self::Media(b) => b.nested_blocks(),
286            Self::Section(b) => b.nested_blocks(),
287            Self::RawDelimited(b) => b.nested_blocks(),
288            Self::CompoundDelimited(b) => b.nested_blocks(),
289            Self::DocumentAttribute(b) => b.nested_blocks(),
290        }
291    }
292
293    fn title_source(&'src self) -> Option<Span<'src>> {
294        match self {
295            Self::Simple(b) => b.title_source(),
296            Self::Media(b) => b.title_source(),
297            Self::Section(b) => b.title_source(),
298            Self::RawDelimited(b) => b.title_source(),
299            Self::CompoundDelimited(b) => b.title_source(),
300            Self::DocumentAttribute(b) => b.title_source(),
301        }
302    }
303
304    fn title(&self) -> Option<&str> {
305        match self {
306            Self::Simple(b) => b.title(),
307            Self::Media(b) => b.title(),
308            Self::Section(b) => b.title(),
309            Self::RawDelimited(b) => b.title(),
310            Self::CompoundDelimited(b) => b.title(),
311            Self::DocumentAttribute(b) => b.title(),
312        }
313    }
314
315    fn anchor(&'src self) -> Option<Span<'src>> {
316        match self {
317            Self::Simple(b) => b.anchor(),
318            Self::Media(b) => b.anchor(),
319            Self::Section(b) => b.anchor(),
320            Self::RawDelimited(b) => b.anchor(),
321            Self::CompoundDelimited(b) => b.anchor(),
322            Self::DocumentAttribute(b) => b.anchor(),
323        }
324    }
325
326    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
327        match self {
328            Self::Simple(b) => b.attrlist(),
329            Self::Media(b) => b.attrlist(),
330            Self::Section(b) => b.attrlist(),
331            Self::RawDelimited(b) => b.attrlist(),
332            Self::CompoundDelimited(b) => b.attrlist(),
333            Self::DocumentAttribute(b) => b.attrlist(),
334        }
335    }
336
337    fn substitution_group(&self) -> SubstitutionGroup {
338        match self {
339            Self::Simple(b) => b.substitution_group(),
340            Self::Media(b) => b.substitution_group(),
341            Self::Section(b) => b.substitution_group(),
342            Self::RawDelimited(b) => b.substitution_group(),
343            Self::CompoundDelimited(b) => b.substitution_group(),
344            Self::DocumentAttribute(b) => b.substitution_group(),
345        }
346    }
347}
348
349impl<'src> HasSpan<'src> for Block<'src> {
350    fn span(&self) -> Span<'src> {
351        match self {
352            Self::Simple(b) => b.span(),
353            Self::Media(b) => b.span(),
354            Self::Section(b) => b.span(),
355            Self::RawDelimited(b) => b.span(),
356            Self::CompoundDelimited(b) => b.span(),
357            Self::DocumentAttribute(b) => b.span(),
358        }
359    }
360}