asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        CompoundDelimitedBlock, ContentModel, IsBlock, MediaBlock, RawDelimitedBlock, SectionBlock,
8        SimpleBlock, metadata::BlockMetadata,
9    },
10    content::SubstitutionGroup,
11    document::{Attribute, RefType},
12    span::MatchedItem,
13    strings::CowStr,
14    warnings::{MatchAndWarnings, Warning, WarningType},
15};
16
17/// **Block elements** form the main structure of an AsciiDoc document, starting
18/// with the document itself.
19///
20/// A block element (aka **block**) is a discrete, line-oriented chunk of
21/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
22/// a block element in the parsed document model. Certain blocks may contain
23/// other blocks, so we say that blocks can be nested. The converter visits each
24/// block in turn, in document order, converting it to a corresponding chunk of
25/// output.
26///
27/// This enum represents all of the block types that are understood directly by
28/// this parser and also implements the [`IsBlock`] trait.
29#[derive(Clone, Eq, PartialEq)]
30#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
31#[non_exhaustive]
32pub enum Block<'src> {
33    /// A block that’s treated as contiguous lines of paragraph text (and
34    /// subject to normal substitutions) (e.g., a paragraph block).
35    Simple(SimpleBlock<'src>),
36
37    /// A media block is used to represent an image, video, or audio block
38    /// macro.
39    Media(MediaBlock<'src>),
40
41    /// A section helps to partition the document into a content hierarchy.
42    /// May also be a part, chapter, or special section.
43    Section(SectionBlock<'src>),
44
45    /// A delimited block that contains verbatim, raw, or comment text. The
46    /// content between the matching delimiters is not parsed for block
47    /// syntax.
48    RawDelimited(RawDelimitedBlock<'src>),
49
50    /// A delimited block that can contain other blocks.
51    CompoundDelimited(CompoundDelimitedBlock<'src>),
52
53    /// When an attribute is defined in the document body using an attribute
54    /// entry, that’s simply referred to as a document attribute.
55    DocumentAttribute(Attribute<'src>),
56}
57
58impl<'src> std::fmt::Debug for Block<'src> {
59    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
60        match self {
61            Block::Simple(block) => f.debug_tuple("Block::Simple").field(block).finish(),
62            Block::Media(block) => f.debug_tuple("Block::Media").field(block).finish(),
63            Block::Section(block) => f.debug_tuple("Block::Section").field(block).finish(),
64
65            Block::RawDelimited(block) => {
66                f.debug_tuple("Block::RawDelimited").field(block).finish()
67            }
68
69            Block::CompoundDelimited(block) => f
70                .debug_tuple("Block::CompoundDelimited")
71                .field(block)
72                .finish(),
73
74            Block::DocumentAttribute(block) => f
75                .debug_tuple("Block::DocumentAttribute")
76                .field(block)
77                .finish(),
78        }
79    }
80}
81
82impl<'src> Block<'src> {
83    /// Parse a block of any type and return a `Block` that describes it.
84    ///
85    /// Consumes any blank lines before and after the block.
86    pub(crate) fn parse(
87        source: Span<'src>,
88        parser: &mut Parser,
89    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
90        // Optimization: If the first line doesn't match any of the early indications
91        // for delimited blocks, titles, or attrlists, we can skip directly to treating
92        // this as a simple block. That saves quite a bit of parsing time.
93        let first_line = source.take_line();
94
95        // If it does contain any of those markers, we fall through to the more costly
96        // tests below which can more accurately classify the upcoming block.
97        if let Some(first_char) = source.chars().next()
98            && !matches!(
99                first_char,
100                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':'
101            )
102            && !first_line.item.contains("::")
103            && let Some(MatchedItem {
104                item: simple_block,
105                after,
106            }) = SimpleBlock::parse_fast(source, parser)
107        {
108            let mut warnings = vec![];
109            let block = Self::Simple(simple_block);
110
111            Self::register_block_id(
112                block.id(),
113                block.title(),
114                block.span(),
115                parser,
116                &mut warnings,
117            );
118
119            return MatchAndWarnings {
120                item: Some(MatchedItem { item: block, after }),
121                warnings,
122            };
123        }
124
125        // Look for document attributes first since these don't support block metadata.
126        if first_line.item.starts_with(':')
127            && (first_line.item.ends_with(':') || first_line.item.contains(": "))
128            && let Some(attr) = Attribute::parse(source, parser)
129        {
130            let mut warnings: Vec<Warning<'src>> = vec![];
131            parser.set_attribute_from_body(&attr.item, &mut warnings);
132
133            return MatchAndWarnings {
134                item: Some(MatchedItem {
135                    item: Self::DocumentAttribute(attr.item),
136                    after: attr.after,
137                }),
138                warnings,
139            };
140        }
141
142        // Optimization not possible; start by looking for block metadata (title,
143        // attrlist, etc.).
144        let MatchAndWarnings {
145            item: mut metadata,
146            mut warnings,
147        } = BlockMetadata::parse(source, parser);
148
149        if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
150            && let Some(rdb) = rdb_maw.item
151        {
152            if !rdb_maw.warnings.is_empty() {
153                warnings.append(&mut rdb_maw.warnings);
154            }
155
156            let block = Self::RawDelimited(rdb.item);
157
158            Self::register_block_id(
159                block.id(),
160                block.title(),
161                block.span(),
162                parser,
163                &mut warnings,
164            );
165
166            return MatchAndWarnings {
167                item: Some(MatchedItem {
168                    item: block,
169                    after: rdb.after,
170                }),
171                warnings,
172            };
173        }
174
175        if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
176            && let Some(cdb) = cdb_maw.item
177        {
178            if !cdb_maw.warnings.is_empty() {
179                warnings.append(&mut cdb_maw.warnings);
180            }
181
182            let block = Self::CompoundDelimited(cdb.item);
183
184            Self::register_block_id(
185                block.id(),
186                block.title(),
187                block.span(),
188                parser,
189                &mut warnings,
190            );
191
192            return MatchAndWarnings {
193                item: Some(MatchedItem {
194                    item: block,
195                    after: cdb.after,
196                }),
197                warnings,
198            };
199        }
200
201        // Try to discern the block type by scanning the first line.
202        let line = metadata.block_start.take_normalized_line();
203
204        if line.item.starts_with("image::")
205            || line.item.starts_with("video::")
206            || line.item.starts_with("video::")
207        {
208            let mut media_block_maw = MediaBlock::parse(&metadata, parser);
209
210            if let Some(media_block) = media_block_maw.item {
211                // Only propagate warnings from media block parsing if we think this
212                // *is* a media block. Otherwise, there would likely be too many false
213                // positives.
214                if !media_block_maw.warnings.is_empty() {
215                    warnings.append(&mut media_block_maw.warnings);
216                }
217
218                let block = Self::Media(media_block.item);
219
220                Self::register_block_id(
221                    block.id(),
222                    block.title(),
223                    block.span(),
224                    parser,
225                    &mut warnings,
226                );
227
228                return MatchAndWarnings {
229                    item: Some(MatchedItem {
230                        item: block,
231                        after: media_block.after,
232                    }),
233                    warnings,
234                };
235            }
236
237            // This might be some other kind of block, so we don't automatically
238            // error out on a parse failure.
239        }
240
241        if (line.item.starts_with('=') || line.item.starts_with('#'))
242            && let Some(mi_section_block) = SectionBlock::parse(&metadata, parser, &mut warnings)
243        {
244            // A line starting with `=` or `#` might be some other kind of block, so we
245            // continue quietly if `SectionBlock` parser rejects this block.
246
247            return MatchAndWarnings {
248                item: Some(MatchedItem {
249                    item: Self::Section(mi_section_block.item),
250                    after: mi_section_block.after,
251                }),
252                warnings,
253            };
254        }
255
256        // First, let's look for a fun edge case. Perhaps the text contains block
257        // metadata but no block immediately following. If we're not careful, we could
258        // spin in a loop (for example, `parse_blocks_until`) thinking there will be
259        // another block, but there isn't.
260
261        // The following check disables that spin loop.
262        let simple_block_mi = SimpleBlock::parse(&metadata, parser);
263
264        if simple_block_mi.is_none() && !metadata.is_empty() {
265            // We have a metadata with no block. Treat it as a simple block but issue a
266            // warning.
267
268            warnings.push(Warning {
269                source: metadata.source,
270                warning: WarningType::MissingBlockAfterTitleOrAttributeList,
271            });
272
273            // Remove the metadata content so that SimpleBlock will read the title/attrlist
274            // line(s) as regular content.
275            metadata.title_source = None;
276            metadata.title = None;
277            metadata.anchor = None;
278            metadata.attrlist = None;
279            metadata.block_start = metadata.source;
280        }
281
282        // If no other block kind matches, we can always use SimpleBlock.
283        let mut result = MatchAndWarnings {
284            item: SimpleBlock::parse(&metadata, parser).map(|mi| MatchedItem {
285                item: Self::Simple(mi.item),
286                after: mi.after,
287            }),
288            warnings,
289        };
290
291        if let Some(ref matched_item) = result.item {
292            Self::register_block_id(
293                matched_item.item.id(),
294                matched_item.item.title(),
295                matched_item.item.span(),
296                parser,
297                &mut result.warnings,
298            );
299        }
300
301        result
302    }
303
304    /// Register a block's ID with the catalog if the block has an ID.
305    ///
306    /// This should be called for all block types except `SectionBlock`,
307    /// which handles its own catalog registration.
308    fn register_block_id(
309        id: Option<&str>,
310        title: Option<&str>,
311        span: Span<'src>,
312        parser: &mut Parser,
313        warnings: &mut Vec<Warning<'src>>,
314    ) {
315        if let Some(id) = id
316            && let Some(catalog) = parser.catalog_mut()
317            && let Err(_duplicate_error) = catalog.register_ref(
318                id,
319                title, // Use block title as reftext if available
320                RefType::Anchor,
321            )
322        {
323            // If registration fails due to duplicate ID, issue a warning.
324            warnings.push(Warning {
325                source: span,
326                warning: WarningType::DuplicateId(id.to_string()),
327            });
328        }
329    }
330}
331
332impl<'src> IsBlock<'src> for Block<'src> {
333    fn content_model(&self) -> ContentModel {
334        match self {
335            Self::Simple(_) => ContentModel::Simple,
336            Self::Media(b) => b.content_model(),
337            Self::Section(_) => ContentModel::Compound,
338            Self::RawDelimited(b) => b.content_model(),
339            Self::CompoundDelimited(b) => b.content_model(),
340            Self::DocumentAttribute(b) => b.content_model(),
341        }
342    }
343
344    fn raw_context(&self) -> CowStr<'src> {
345        match self {
346            Self::Simple(b) => b.raw_context(),
347            Self::Media(b) => b.raw_context(),
348            Self::Section(b) => b.raw_context(),
349            Self::RawDelimited(b) => b.raw_context(),
350            Self::CompoundDelimited(b) => b.raw_context(),
351            Self::DocumentAttribute(b) => b.raw_context(),
352        }
353    }
354
355    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
356        match self {
357            Self::Simple(b) => b.nested_blocks(),
358            Self::Media(b) => b.nested_blocks(),
359            Self::Section(b) => b.nested_blocks(),
360            Self::RawDelimited(b) => b.nested_blocks(),
361            Self::CompoundDelimited(b) => b.nested_blocks(),
362            Self::DocumentAttribute(b) => b.nested_blocks(),
363        }
364    }
365
366    fn title_source(&'src self) -> Option<Span<'src>> {
367        match self {
368            Self::Simple(b) => b.title_source(),
369            Self::Media(b) => b.title_source(),
370            Self::Section(b) => b.title_source(),
371            Self::RawDelimited(b) => b.title_source(),
372            Self::CompoundDelimited(b) => b.title_source(),
373            Self::DocumentAttribute(b) => b.title_source(),
374        }
375    }
376
377    fn title(&self) -> Option<&str> {
378        match self {
379            Self::Simple(b) => b.title(),
380            Self::Media(b) => b.title(),
381            Self::Section(b) => b.title(),
382            Self::RawDelimited(b) => b.title(),
383            Self::CompoundDelimited(b) => b.title(),
384            Self::DocumentAttribute(b) => b.title(),
385        }
386    }
387
388    fn anchor(&'src self) -> Option<Span<'src>> {
389        match self {
390            Self::Simple(b) => b.anchor(),
391            Self::Media(b) => b.anchor(),
392            Self::Section(b) => b.anchor(),
393            Self::RawDelimited(b) => b.anchor(),
394            Self::CompoundDelimited(b) => b.anchor(),
395            Self::DocumentAttribute(b) => b.anchor(),
396        }
397    }
398
399    fn anchor_reftext(&'src self) -> Option<Span<'src>> {
400        match self {
401            Self::Simple(b) => b.anchor_reftext(),
402            Self::Media(b) => b.anchor_reftext(),
403            Self::Section(b) => b.anchor_reftext(),
404            Self::RawDelimited(b) => b.anchor_reftext(),
405            Self::CompoundDelimited(b) => b.anchor_reftext(),
406            Self::DocumentAttribute(b) => b.anchor_reftext(),
407        }
408    }
409
410    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
411        match self {
412            Self::Simple(b) => b.attrlist(),
413            Self::Media(b) => b.attrlist(),
414            Self::Section(b) => b.attrlist(),
415            Self::RawDelimited(b) => b.attrlist(),
416            Self::CompoundDelimited(b) => b.attrlist(),
417            Self::DocumentAttribute(b) => b.attrlist(),
418        }
419    }
420
421    fn substitution_group(&self) -> SubstitutionGroup {
422        match self {
423            Self::Simple(b) => b.substitution_group(),
424            Self::Media(b) => b.substitution_group(),
425            Self::Section(b) => b.substitution_group(),
426            Self::RawDelimited(b) => b.substitution_group(),
427            Self::CompoundDelimited(b) => b.substitution_group(),
428            Self::DocumentAttribute(b) => b.substitution_group(),
429        }
430    }
431}
432
433impl<'src> HasSpan<'src> for Block<'src> {
434    fn span(&self) -> Span<'src> {
435        match self {
436            Self::Simple(b) => b.span(),
437            Self::Media(b) => b.span(),
438            Self::Section(b) => b.span(),
439            Self::RawDelimited(b) => b.span(),
440            Self::CompoundDelimited(b) => b.span(),
441            Self::DocumentAttribute(b) => b.span(),
442        }
443    }
444}