asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        CompoundDelimitedBlock, ContentModel, IsBlock, MediaBlock, Preamble, RawDelimitedBlock,
8        SectionBlock, SimpleBlock, metadata::BlockMetadata,
9    },
10    content::SubstitutionGroup,
11    document::{Attribute, RefType},
12    span::MatchedItem,
13    strings::CowStr,
14    warnings::{MatchAndWarnings, Warning, WarningType},
15};
16
17/// **Block elements** form the main structure of an AsciiDoc document, starting
18/// with the document itself.
19///
20/// A block element (aka **block**) is a discrete, line-oriented chunk of
21/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
22/// a block element in the parsed document model. Certain blocks may contain
23/// other blocks, so we say that blocks can be nested. The converter visits each
24/// block in turn, in document order, converting it to a corresponding chunk of
25/// output.
26///
27/// This enum represents all of the block types that are understood directly by
28/// this parser and also implements the [`IsBlock`] trait.
29#[derive(Clone, Eq, PartialEq)]
30#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
31#[non_exhaustive]
32pub enum Block<'src> {
33    /// A block that’s treated as contiguous lines of paragraph text (and
34    /// subject to normal substitutions) (e.g., a paragraph block).
35    Simple(SimpleBlock<'src>),
36
37    /// A media block is used to represent an image, video, or audio block
38    /// macro.
39    Media(MediaBlock<'src>),
40
41    /// A section helps to partition the document into a content hierarchy.
42    /// May also be a part, chapter, or special section.
43    Section(SectionBlock<'src>),
44
45    /// A delimited block that contains verbatim, raw, or comment text. The
46    /// content between the matching delimiters is not parsed for block
47    /// syntax.
48    RawDelimited(RawDelimitedBlock<'src>),
49
50    /// A delimited block that can contain other blocks.
51    CompoundDelimited(CompoundDelimitedBlock<'src>),
52
53    /// Content between the end of the document header and the first section
54    /// title in the document body is called the preamble.
55    Preamble(Preamble<'src>),
56
57    /// When an attribute is defined in the document body using an attribute
58    /// entry, that’s simply referred to as a document attribute.
59    DocumentAttribute(Attribute<'src>),
60}
61
62impl<'src> std::fmt::Debug for Block<'src> {
63    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
64        match self {
65            Block::Simple(block) => f.debug_tuple("Block::Simple").field(block).finish(),
66            Block::Media(block) => f.debug_tuple("Block::Media").field(block).finish(),
67            Block::Section(block) => f.debug_tuple("Block::Section").field(block).finish(),
68
69            Block::RawDelimited(block) => {
70                f.debug_tuple("Block::RawDelimited").field(block).finish()
71            }
72
73            Block::CompoundDelimited(block) => f
74                .debug_tuple("Block::CompoundDelimited")
75                .field(block)
76                .finish(),
77
78            Block::Preamble(block) => f.debug_tuple("Block::Preamble").field(block).finish(),
79
80            Block::DocumentAttribute(block) => f
81                .debug_tuple("Block::DocumentAttribute")
82                .field(block)
83                .finish(),
84        }
85    }
86}
87
88impl<'src> Block<'src> {
89    /// Parse a block of any type and return a `Block` that describes it.
90    ///
91    /// Consumes any blank lines before and after the block.
92    pub(crate) fn parse(
93        source: Span<'src>,
94        parser: &mut Parser,
95    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
96        // Optimization: If the first line doesn't match any of the early indications
97        // for delimited blocks, titles, or attrlists, we can skip directly to treating
98        // this as a simple block. That saves quite a bit of parsing time.
99        let first_line = source.take_line();
100
101        // If it does contain any of those markers, we fall through to the more costly
102        // tests below which can more accurately classify the upcoming block.
103        if let Some(first_char) = source.chars().next()
104            && !matches!(
105                first_char,
106                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':'
107            )
108            && !first_line.item.contains("::")
109            && let Some(MatchedItem {
110                item: simple_block,
111                after,
112            }) = SimpleBlock::parse_fast(source, parser)
113        {
114            let mut warnings = vec![];
115            let block = Self::Simple(simple_block);
116
117            Self::register_block_id(
118                block.id(),
119                block.title(),
120                block.span(),
121                parser,
122                &mut warnings,
123            );
124
125            return MatchAndWarnings {
126                item: Some(MatchedItem { item: block, after }),
127                warnings,
128            };
129        }
130
131        // Look for document attributes first since these don't support block metadata.
132        if first_line.item.starts_with(':')
133            && (first_line.item.ends_with(':') || first_line.item.contains(": "))
134            && let Some(attr) = Attribute::parse(source, parser)
135        {
136            let mut warnings: Vec<Warning<'src>> = vec![];
137            parser.set_attribute_from_body(&attr.item, &mut warnings);
138
139            return MatchAndWarnings {
140                item: Some(MatchedItem {
141                    item: Self::DocumentAttribute(attr.item),
142                    after: attr.after,
143                }),
144                warnings,
145            };
146        }
147
148        // Optimization not possible; start by looking for block metadata (title,
149        // attrlist, etc.).
150        let MatchAndWarnings {
151            item: mut metadata,
152            mut warnings,
153        } = BlockMetadata::parse(source, parser);
154
155        if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
156            && let Some(rdb) = rdb_maw.item
157        {
158            if !rdb_maw.warnings.is_empty() {
159                warnings.append(&mut rdb_maw.warnings);
160            }
161
162            let block = Self::RawDelimited(rdb.item);
163
164            Self::register_block_id(
165                block.id(),
166                block.title(),
167                block.span(),
168                parser,
169                &mut warnings,
170            );
171
172            return MatchAndWarnings {
173                item: Some(MatchedItem {
174                    item: block,
175                    after: rdb.after,
176                }),
177                warnings,
178            };
179        }
180
181        if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
182            && let Some(cdb) = cdb_maw.item
183        {
184            if !cdb_maw.warnings.is_empty() {
185                warnings.append(&mut cdb_maw.warnings);
186            }
187
188            let block = Self::CompoundDelimited(cdb.item);
189
190            Self::register_block_id(
191                block.id(),
192                block.title(),
193                block.span(),
194                parser,
195                &mut warnings,
196            );
197
198            return MatchAndWarnings {
199                item: Some(MatchedItem {
200                    item: block,
201                    after: cdb.after,
202                }),
203                warnings,
204            };
205        }
206
207        // Try to discern the block type by scanning the first line.
208        let line = metadata.block_start.take_normalized_line();
209
210        if line.item.starts_with("image::")
211            || line.item.starts_with("video::")
212            || line.item.starts_with("video::")
213        {
214            let mut media_block_maw = MediaBlock::parse(&metadata, parser);
215
216            if let Some(media_block) = media_block_maw.item {
217                // Only propagate warnings from media block parsing if we think this
218                // *is* a media block. Otherwise, there would likely be too many false
219                // positives.
220                if !media_block_maw.warnings.is_empty() {
221                    warnings.append(&mut media_block_maw.warnings);
222                }
223
224                let block = Self::Media(media_block.item);
225
226                Self::register_block_id(
227                    block.id(),
228                    block.title(),
229                    block.span(),
230                    parser,
231                    &mut warnings,
232                );
233
234                return MatchAndWarnings {
235                    item: Some(MatchedItem {
236                        item: block,
237                        after: media_block.after,
238                    }),
239                    warnings,
240                };
241            }
242
243            // This might be some other kind of block, so we don't automatically
244            // error out on a parse failure.
245        }
246
247        if (line.item.starts_with('=') || line.item.starts_with('#'))
248            && let Some(mi_section_block) = SectionBlock::parse(&metadata, parser, &mut warnings)
249        {
250            // A line starting with `=` or `#` might be some other kind of block, so we
251            // continue quietly if `SectionBlock` parser rejects this block.
252
253            return MatchAndWarnings {
254                item: Some(MatchedItem {
255                    item: Self::Section(mi_section_block.item),
256                    after: mi_section_block.after,
257                }),
258                warnings,
259            };
260        }
261
262        // First, let's look for a fun edge case. Perhaps the text contains block
263        // metadata but no block immediately following. If we're not careful, we could
264        // spin in a loop (for example, `parse_blocks_until`) thinking there will be
265        // another block, but there isn't.
266
267        // The following check disables that spin loop.
268        let simple_block_mi = SimpleBlock::parse(&metadata, parser);
269
270        if simple_block_mi.is_none() && !metadata.is_empty() {
271            // We have a metadata with no block. Treat it as a simple block but issue a
272            // warning.
273
274            warnings.push(Warning {
275                source: metadata.source,
276                warning: WarningType::MissingBlockAfterTitleOrAttributeList,
277            });
278
279            // Remove the metadata content so that SimpleBlock will read the title/attrlist
280            // line(s) as regular content.
281            metadata.title_source = None;
282            metadata.title = None;
283            metadata.anchor = None;
284            metadata.attrlist = None;
285            metadata.block_start = metadata.source;
286        }
287
288        // If no other block kind matches, we can always use SimpleBlock.
289        let mut result = MatchAndWarnings {
290            item: SimpleBlock::parse(&metadata, parser).map(|mi| MatchedItem {
291                item: Self::Simple(mi.item),
292                after: mi.after,
293            }),
294            warnings,
295        };
296
297        if let Some(ref matched_item) = result.item {
298            Self::register_block_id(
299                matched_item.item.id(),
300                matched_item.item.title(),
301                matched_item.item.span(),
302                parser,
303                &mut result.warnings,
304            );
305        }
306
307        result
308    }
309
310    /// Register a block's ID with the catalog if the block has an ID.
311    ///
312    /// This should be called for all block types except `SectionBlock`,
313    /// which handles its own catalog registration.
314    fn register_block_id(
315        id: Option<&str>,
316        title: Option<&str>,
317        span: Span<'src>,
318        parser: &mut Parser,
319        warnings: &mut Vec<Warning<'src>>,
320    ) {
321        if let Some(id) = id
322            && let Some(catalog) = parser.catalog_mut()
323            && let Err(_duplicate_error) = catalog.register_ref(
324                id,
325                title, // Use block title as reftext if available
326                RefType::Anchor,
327            )
328        {
329            // If registration fails due to duplicate ID, issue a warning.
330            warnings.push(Warning {
331                source: span,
332                warning: WarningType::DuplicateId(id.to_string()),
333            });
334        }
335    }
336}
337
338impl<'src> IsBlock<'src> for Block<'src> {
339    fn content_model(&self) -> ContentModel {
340        match self {
341            Self::Simple(_) => ContentModel::Simple,
342            Self::Media(b) => b.content_model(),
343            Self::Section(_) => ContentModel::Compound,
344            Self::RawDelimited(b) => b.content_model(),
345            Self::CompoundDelimited(b) => b.content_model(),
346            Self::Preamble(b) => b.content_model(),
347            Self::DocumentAttribute(b) => b.content_model(),
348        }
349    }
350
351    fn raw_context(&self) -> CowStr<'src> {
352        match self {
353            Self::Simple(b) => b.raw_context(),
354            Self::Media(b) => b.raw_context(),
355            Self::Section(b) => b.raw_context(),
356            Self::RawDelimited(b) => b.raw_context(),
357            Self::CompoundDelimited(b) => b.raw_context(),
358            Self::Preamble(b) => b.raw_context(),
359            Self::DocumentAttribute(b) => b.raw_context(),
360        }
361    }
362
363    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
364        match self {
365            Self::Simple(b) => b.nested_blocks(),
366            Self::Media(b) => b.nested_blocks(),
367            Self::Section(b) => b.nested_blocks(),
368            Self::RawDelimited(b) => b.nested_blocks(),
369            Self::CompoundDelimited(b) => b.nested_blocks(),
370            Self::Preamble(b) => b.nested_blocks(),
371            Self::DocumentAttribute(b) => b.nested_blocks(),
372        }
373    }
374
375    fn title_source(&'src self) -> Option<Span<'src>> {
376        match self {
377            Self::Simple(b) => b.title_source(),
378            Self::Media(b) => b.title_source(),
379            Self::Section(b) => b.title_source(),
380            Self::RawDelimited(b) => b.title_source(),
381            Self::CompoundDelimited(b) => b.title_source(),
382            Self::Preamble(b) => b.title_source(),
383            Self::DocumentAttribute(b) => b.title_source(),
384        }
385    }
386
387    fn title(&self) -> Option<&str> {
388        match self {
389            Self::Simple(b) => b.title(),
390            Self::Media(b) => b.title(),
391            Self::Section(b) => b.title(),
392            Self::RawDelimited(b) => b.title(),
393            Self::CompoundDelimited(b) => b.title(),
394            Self::Preamble(b) => b.title(),
395            Self::DocumentAttribute(b) => b.title(),
396        }
397    }
398
399    fn anchor(&'src self) -> Option<Span<'src>> {
400        match self {
401            Self::Simple(b) => b.anchor(),
402            Self::Media(b) => b.anchor(),
403            Self::Section(b) => b.anchor(),
404            Self::RawDelimited(b) => b.anchor(),
405            Self::CompoundDelimited(b) => b.anchor(),
406            Self::Preamble(b) => b.anchor(),
407            Self::DocumentAttribute(b) => b.anchor(),
408        }
409    }
410
411    fn anchor_reftext(&'src self) -> Option<Span<'src>> {
412        match self {
413            Self::Simple(b) => b.anchor_reftext(),
414            Self::Media(b) => b.anchor_reftext(),
415            Self::Section(b) => b.anchor_reftext(),
416            Self::RawDelimited(b) => b.anchor_reftext(),
417            Self::CompoundDelimited(b) => b.anchor_reftext(),
418            Self::Preamble(b) => b.anchor_reftext(),
419            Self::DocumentAttribute(b) => b.anchor_reftext(),
420        }
421    }
422
423    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
424        match self {
425            Self::Simple(b) => b.attrlist(),
426            Self::Media(b) => b.attrlist(),
427            Self::Section(b) => b.attrlist(),
428            Self::RawDelimited(b) => b.attrlist(),
429            Self::CompoundDelimited(b) => b.attrlist(),
430            Self::Preamble(b) => b.attrlist(),
431            Self::DocumentAttribute(b) => b.attrlist(),
432        }
433    }
434
435    fn substitution_group(&self) -> SubstitutionGroup {
436        match self {
437            Self::Simple(b) => b.substitution_group(),
438            Self::Media(b) => b.substitution_group(),
439            Self::Section(b) => b.substitution_group(),
440            Self::RawDelimited(b) => b.substitution_group(),
441            Self::CompoundDelimited(b) => b.substitution_group(),
442            Self::Preamble(b) => b.substitution_group(),
443            Self::DocumentAttribute(b) => b.substitution_group(),
444        }
445    }
446}
447
448impl<'src> HasSpan<'src> for Block<'src> {
449    fn span(&self) -> Span<'src> {
450        match self {
451            Self::Simple(b) => b.span(),
452            Self::Media(b) => b.span(),
453            Self::Section(b) => b.span(),
454            Self::RawDelimited(b) => b.span(),
455            Self::CompoundDelimited(b) => b.span(),
456            Self::Preamble(b) => b.span(),
457            Self::DocumentAttribute(b) => b.span(),
458        }
459    }
460}