asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        Break, CompoundDelimitedBlock, ContentModel, IsBlock, MediaBlock, Preamble,
8        RawDelimitedBlock, SectionBlock, SimpleBlock, metadata::BlockMetadata,
9    },
10    content::SubstitutionGroup,
11    document::{Attribute, RefType},
12    span::MatchedItem,
13    strings::CowStr,
14    warnings::{MatchAndWarnings, Warning, WarningType},
15};
16
17/// **Block elements** form the main structure of an AsciiDoc document, starting
18/// with the document itself.
19///
20/// A block element (aka **block**) is a discrete, line-oriented chunk of
21/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
22/// a block element in the parsed document model. Certain blocks may contain
23/// other blocks, so we say that blocks can be nested. The converter visits each
24/// block in turn, in document order, converting it to a corresponding chunk of
25/// output.
26///
27/// This enum represents all of the block types that are understood directly by
28/// this parser and also implements the [`IsBlock`] trait.
29#[derive(Clone, Eq, PartialEq)]
30#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
31#[non_exhaustive]
32pub enum Block<'src> {
33    /// A block that’s treated as contiguous lines of paragraph text (and
34    /// subject to normal substitutions) (e.g., a paragraph block).
35    Simple(SimpleBlock<'src>),
36
37    /// A media block is used to represent an image, video, or audio block
38    /// macro.
39    Media(MediaBlock<'src>),
40
41    /// A section helps to partition the document into a content hierarchy.
42    /// May also be a part, chapter, or special section.
43    Section(SectionBlock<'src>),
44
45    /// A delimited block that contains verbatim, raw, or comment text. The
46    /// content between the matching delimiters is not parsed for block
47    /// syntax.
48    RawDelimited(RawDelimitedBlock<'src>),
49
50    /// A delimited block that can contain other blocks.
51    CompoundDelimited(CompoundDelimitedBlock<'src>),
52
53    /// Content between the end of the document header and the first section
54    /// title in the document body is called the preamble.
55    Preamble(Preamble<'src>),
56
57    /// A thematic or page break.
58    Break(Break<'src>),
59
60    /// When an attribute is defined in the document body using an attribute
61    /// entry, that’s simply referred to as a document attribute.
62    DocumentAttribute(Attribute<'src>),
63}
64
65impl<'src> std::fmt::Debug for Block<'src> {
66    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
67        match self {
68            Block::Simple(block) => f.debug_tuple("Block::Simple").field(block).finish(),
69            Block::Media(block) => f.debug_tuple("Block::Media").field(block).finish(),
70            Block::Section(block) => f.debug_tuple("Block::Section").field(block).finish(),
71
72            Block::RawDelimited(block) => {
73                f.debug_tuple("Block::RawDelimited").field(block).finish()
74            }
75
76            Block::CompoundDelimited(block) => f
77                .debug_tuple("Block::CompoundDelimited")
78                .field(block)
79                .finish(),
80
81            Block::Preamble(block) => f.debug_tuple("Block::Preamble").field(block).finish(),
82            Block::Break(break_) => f.debug_tuple("Block::Break").field(break_).finish(),
83
84            Block::DocumentAttribute(block) => f
85                .debug_tuple("Block::DocumentAttribute")
86                .field(block)
87                .finish(),
88        }
89    }
90}
91
92impl<'src> Block<'src> {
93    /// Parse a block of any type and return a `Block` that describes it.
94    ///
95    /// Consumes any blank lines before and after the block.
96    pub(crate) fn parse(
97        source: Span<'src>,
98        parser: &mut Parser,
99    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
100        // Optimization: If the first line doesn't match any of the early indications
101        // for delimited blocks, titles, or attrlists, we can skip directly to treating
102        // this as a simple block. That saves quite a bit of parsing time.
103        let first_line = source.take_line();
104
105        // If it does contain any of those markers, we fall through to the more costly
106        // tests below which can more accurately classify the upcoming block.
107        if let Some(first_char) = source.chars().next()
108            && !matches!(
109                first_char,
110                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':' | '\'' | '<'
111            )
112            && !first_line.item.contains("::")
113            && let Some(MatchedItem {
114                item: simple_block,
115                after,
116            }) = SimpleBlock::parse_fast(source, parser)
117        {
118            let mut warnings = vec![];
119            let block = Self::Simple(simple_block);
120
121            Self::register_block_id(
122                block.id(),
123                block.title(),
124                block.span(),
125                parser,
126                &mut warnings,
127            );
128
129            return MatchAndWarnings {
130                item: Some(MatchedItem { item: block, after }),
131                warnings,
132            };
133        }
134
135        // Look for document attributes first since these don't support block metadata.
136        if first_line.item.starts_with(':')
137            && (first_line.item.ends_with(':') || first_line.item.contains(": "))
138            && let Some(attr) = Attribute::parse(source, parser)
139        {
140            let mut warnings: Vec<Warning<'src>> = vec![];
141            parser.set_attribute_from_body(&attr.item, &mut warnings);
142
143            return MatchAndWarnings {
144                item: Some(MatchedItem {
145                    item: Self::DocumentAttribute(attr.item),
146                    after: attr.after,
147                }),
148                warnings,
149            };
150        }
151
152        // Optimization not possible; start by looking for block metadata (title,
153        // attrlist, etc.).
154        let MatchAndWarnings {
155            item: mut metadata,
156            mut warnings,
157        } = BlockMetadata::parse(source, parser);
158
159        let is_literal =
160            metadata.attrlist.as_ref().and_then(|a| a.block_style()) == Some("literal");
161
162        if !is_literal {
163            if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
164                && let Some(rdb) = rdb_maw.item
165            {
166                if !rdb_maw.warnings.is_empty() {
167                    warnings.append(&mut rdb_maw.warnings);
168                }
169
170                let block = Self::RawDelimited(rdb.item);
171
172                Self::register_block_id(
173                    block.id(),
174                    block.title(),
175                    block.span(),
176                    parser,
177                    &mut warnings,
178                );
179
180                return MatchAndWarnings {
181                    item: Some(MatchedItem {
182                        item: block,
183                        after: rdb.after,
184                    }),
185                    warnings,
186                };
187            }
188
189            if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
190                && let Some(cdb) = cdb_maw.item
191            {
192                if !cdb_maw.warnings.is_empty() {
193                    warnings.append(&mut cdb_maw.warnings);
194                }
195
196                let block = Self::CompoundDelimited(cdb.item);
197
198                Self::register_block_id(
199                    block.id(),
200                    block.title(),
201                    block.span(),
202                    parser,
203                    &mut warnings,
204                );
205
206                return MatchAndWarnings {
207                    item: Some(MatchedItem {
208                        item: block,
209                        after: cdb.after,
210                    }),
211                    warnings,
212                };
213            }
214
215            // Try to discern the block type by scanning the first line.
216            let line = metadata.block_start.take_normalized_line();
217
218            if line.item.starts_with("image::")
219                || line.item.starts_with("video::")
220                || line.item.starts_with("video::")
221            {
222                let mut media_block_maw = MediaBlock::parse(&metadata, parser);
223
224                if let Some(media_block) = media_block_maw.item {
225                    // Only propagate warnings from media block parsing if we think this
226                    // *is* a media block. Otherwise, there would likely be too many false
227                    // positives.
228                    if !media_block_maw.warnings.is_empty() {
229                        warnings.append(&mut media_block_maw.warnings);
230                    }
231
232                    let block = Self::Media(media_block.item);
233
234                    Self::register_block_id(
235                        block.id(),
236                        block.title(),
237                        block.span(),
238                        parser,
239                        &mut warnings,
240                    );
241
242                    return MatchAndWarnings {
243                        item: Some(MatchedItem {
244                            item: block,
245                            after: media_block.after,
246                        }),
247                        warnings,
248                    };
249                }
250
251                // This might be some other kind of block, so we don't
252                // automatically error out on a parse failure.
253            }
254
255            if (line.item.starts_with('=') || line.item.starts_with('#'))
256                && let Some(mi_section_block) =
257                    SectionBlock::parse(&metadata, parser, &mut warnings)
258            {
259                // A line starting with `=` or `#` might be some other kind of block, so we
260                // continue quietly if `SectionBlock` parser rejects this block.
261
262                return MatchAndWarnings {
263                    item: Some(MatchedItem {
264                        item: Self::Section(mi_section_block.item),
265                        after: mi_section_block.after,
266                    }),
267                    warnings,
268                };
269            }
270
271            if (line.item.starts_with('\'')
272                || line.item.starts_with('-')
273                || line.item.starts_with('*')
274                || line.item.starts_with('<'))
275                && let Some(mi_break) = Break::parse(&metadata, parser)
276            {
277                // Continue quietly if `Break` parser rejects this block.
278
279                return MatchAndWarnings {
280                    item: Some(MatchedItem {
281                        item: Self::Break(mi_break.item),
282                        after: mi_break.after,
283                    }),
284                    warnings,
285                };
286            }
287
288            // First, let's look for a fun edge case. Perhaps the text contains block
289            // metadata but no block immediately following. If we're not careful, we could
290            // spin in a loop (for example, `parse_blocks_until`) thinking there will be
291            // another block, but there isn't.
292
293            // The following check disables that spin loop.
294            let simple_block_mi = SimpleBlock::parse(&metadata, parser);
295
296            if simple_block_mi.is_none() && !metadata.is_empty() {
297                // We have a metadata with no block. Treat it as a simple block but issue a
298                // warning.
299
300                warnings.push(Warning {
301                    source: metadata.source,
302                    warning: WarningType::MissingBlockAfterTitleOrAttributeList,
303                });
304
305                // Remove the metadata content so that SimpleBlock will read the title/attrlist
306                // line(s) as regular content.
307                metadata.title_source = None;
308                metadata.title = None;
309                metadata.anchor = None;
310                metadata.attrlist = None;
311                metadata.block_start = metadata.source;
312            }
313        }
314
315        // If no other block kind matches, we can always use SimpleBlock.
316        let mut result = MatchAndWarnings {
317            item: SimpleBlock::parse(&metadata, parser).map(|mi| MatchedItem {
318                item: Self::Simple(mi.item),
319                after: mi.after,
320            }),
321            warnings,
322        };
323
324        if let Some(ref matched_item) = result.item {
325            Self::register_block_id(
326                matched_item.item.id(),
327                matched_item.item.title(),
328                matched_item.item.span(),
329                parser,
330                &mut result.warnings,
331            );
332        }
333
334        result
335    }
336
337    /// Register a block's ID with the catalog if the block has an ID.
338    ///
339    /// This should be called for all block types except `SectionBlock`,
340    /// which handles its own catalog registration.
341    fn register_block_id(
342        id: Option<&str>,
343        title: Option<&str>,
344        span: Span<'src>,
345        parser: &mut Parser,
346        warnings: &mut Vec<Warning<'src>>,
347    ) {
348        if let Some(id) = id
349            && let Some(catalog) = parser.catalog_mut()
350            && let Err(_duplicate_error) = catalog.register_ref(
351                id,
352                title, // Use block title as reftext if available
353                RefType::Anchor,
354            )
355        {
356            // If registration fails due to duplicate ID, issue a warning.
357            warnings.push(Warning {
358                source: span,
359                warning: WarningType::DuplicateId(id.to_string()),
360            });
361        }
362    }
363}
364
365impl<'src> IsBlock<'src> for Block<'src> {
366    fn content_model(&self) -> ContentModel {
367        match self {
368            Self::Simple(_) => ContentModel::Simple,
369            Self::Media(b) => b.content_model(),
370            Self::Section(_) => ContentModel::Compound,
371            Self::RawDelimited(b) => b.content_model(),
372            Self::CompoundDelimited(b) => b.content_model(),
373            Self::Preamble(b) => b.content_model(),
374            Self::Break(b) => b.content_model(),
375            Self::DocumentAttribute(b) => b.content_model(),
376        }
377    }
378
379    fn raw_context(&self) -> CowStr<'src> {
380        match self {
381            Self::Simple(b) => b.raw_context(),
382            Self::Media(b) => b.raw_context(),
383            Self::Section(b) => b.raw_context(),
384            Self::RawDelimited(b) => b.raw_context(),
385            Self::CompoundDelimited(b) => b.raw_context(),
386            Self::Preamble(b) => b.raw_context(),
387            Self::Break(b) => b.raw_context(),
388            Self::DocumentAttribute(b) => b.raw_context(),
389        }
390    }
391
392    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
393        match self {
394            Self::Simple(b) => b.nested_blocks(),
395            Self::Media(b) => b.nested_blocks(),
396            Self::Section(b) => b.nested_blocks(),
397            Self::RawDelimited(b) => b.nested_blocks(),
398            Self::CompoundDelimited(b) => b.nested_blocks(),
399            Self::Preamble(b) => b.nested_blocks(),
400            Self::Break(b) => b.nested_blocks(),
401            Self::DocumentAttribute(b) => b.nested_blocks(),
402        }
403    }
404
405    fn title_source(&'src self) -> Option<Span<'src>> {
406        match self {
407            Self::Simple(b) => b.title_source(),
408            Self::Media(b) => b.title_source(),
409            Self::Section(b) => b.title_source(),
410            Self::RawDelimited(b) => b.title_source(),
411            Self::CompoundDelimited(b) => b.title_source(),
412            Self::Preamble(b) => b.title_source(),
413            Self::Break(b) => b.title_source(),
414            Self::DocumentAttribute(b) => b.title_source(),
415        }
416    }
417
418    fn title(&self) -> Option<&str> {
419        match self {
420            Self::Simple(b) => b.title(),
421            Self::Media(b) => b.title(),
422            Self::Section(b) => b.title(),
423            Self::RawDelimited(b) => b.title(),
424            Self::CompoundDelimited(b) => b.title(),
425            Self::Preamble(b) => b.title(),
426            Self::Break(b) => b.title(),
427            Self::DocumentAttribute(b) => b.title(),
428        }
429    }
430
431    fn anchor(&'src self) -> Option<Span<'src>> {
432        match self {
433            Self::Simple(b) => b.anchor(),
434            Self::Media(b) => b.anchor(),
435            Self::Section(b) => b.anchor(),
436            Self::RawDelimited(b) => b.anchor(),
437            Self::CompoundDelimited(b) => b.anchor(),
438            Self::Preamble(b) => b.anchor(),
439            Self::Break(b) => b.anchor(),
440            Self::DocumentAttribute(b) => b.anchor(),
441        }
442    }
443
444    fn anchor_reftext(&'src self) -> Option<Span<'src>> {
445        match self {
446            Self::Simple(b) => b.anchor_reftext(),
447            Self::Media(b) => b.anchor_reftext(),
448            Self::Section(b) => b.anchor_reftext(),
449            Self::RawDelimited(b) => b.anchor_reftext(),
450            Self::CompoundDelimited(b) => b.anchor_reftext(),
451            Self::Preamble(b) => b.anchor_reftext(),
452            Self::Break(b) => b.anchor_reftext(),
453            Self::DocumentAttribute(b) => b.anchor_reftext(),
454        }
455    }
456
457    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
458        match self {
459            Self::Simple(b) => b.attrlist(),
460            Self::Media(b) => b.attrlist(),
461            Self::Section(b) => b.attrlist(),
462            Self::RawDelimited(b) => b.attrlist(),
463            Self::CompoundDelimited(b) => b.attrlist(),
464            Self::Preamble(b) => b.attrlist(),
465            Self::Break(b) => b.attrlist(),
466            Self::DocumentAttribute(b) => b.attrlist(),
467        }
468    }
469
470    fn substitution_group(&self) -> SubstitutionGroup {
471        match self {
472            Self::Simple(b) => b.substitution_group(),
473            Self::Media(b) => b.substitution_group(),
474            Self::Section(b) => b.substitution_group(),
475            Self::RawDelimited(b) => b.substitution_group(),
476            Self::CompoundDelimited(b) => b.substitution_group(),
477            Self::Preamble(b) => b.substitution_group(),
478            Self::Break(b) => b.substitution_group(),
479            Self::DocumentAttribute(b) => b.substitution_group(),
480        }
481    }
482}
483
484impl<'src> HasSpan<'src> for Block<'src> {
485    fn span(&self) -> Span<'src> {
486        match self {
487            Self::Simple(b) => b.span(),
488            Self::Media(b) => b.span(),
489            Self::Section(b) => b.span(),
490            Self::RawDelimited(b) => b.span(),
491            Self::CompoundDelimited(b) => b.span(),
492            Self::Preamble(b) => b.span(),
493            Self::Break(b) => b.span(),
494            Self::DocumentAttribute(b) => b.span(),
495        }
496    }
497}