Skip to main content

asciidoc_parser/blocks/
block.rs

1use std::slice::Iter;
2
3use crate::{
4    HasSpan, Parser, Span,
5    attributes::Attrlist,
6    blocks::{
7        Break, CompoundDelimitedBlock, ContentModel, IsBlock, ListBlock, ListItem, ListItemMarker,
8        MediaBlock, Preamble, RawDelimitedBlock, SectionBlock, SimpleBlock,
9        metadata::BlockMetadata,
10    },
11    content::SubstitutionGroup,
12    document::{Attribute, RefType},
13    span::MatchedItem,
14    strings::CowStr,
15    warnings::{MatchAndWarnings, Warning, WarningType},
16};
17
18/// **Block elements** form the main structure of an AsciiDoc document, starting
19/// with the document itself.
20///
21/// A block element (aka **block**) is a discrete, line-oriented chunk of
22/// content in an AsciiDoc document. Once parsed, that chunk of content becomes
23/// a block element in the parsed document model. Certain blocks may contain
24/// other blocks, so we say that blocks can be nested. The converter visits each
25/// block in turn, in document order, converting it to a corresponding chunk of
26/// output.
27///
28/// This enum represents all of the block types that are understood directly by
29/// this parser and also implements the [`IsBlock`] trait.
30#[derive(Clone, Eq, PartialEq)]
31#[allow(clippy::large_enum_variant)] // TEMPORARY: review later
32#[non_exhaustive]
33pub enum Block<'src> {
34    /// A block that’s treated as contiguous lines of paragraph text (and
35    /// subject to normal substitutions) (e.g., a paragraph block).
36    Simple(SimpleBlock<'src>),
37
38    /// A media block is used to represent an image, video, or audio block
39    /// macro.
40    Media(MediaBlock<'src>),
41
42    /// A section helps to partition the document into a content hierarchy.
43    /// May also be a part, chapter, or special section.
44    Section(SectionBlock<'src>),
45
46    /// A list contains a sequence of items prefixed with symbol, such as a disc
47    /// (aka bullet). Each individual item in the list is represented by a
48    /// [`ListItem`].
49    List(ListBlock<'src>),
50
51    /// A list item is a special kind of block that is a member of a
52    /// [`ListBlock`] and contains one or more blocks attached to it.
53    ListItem(ListItem<'src>),
54
55    /// A delimited block that contains verbatim, raw, or comment text. The
56    /// content between the matching delimiters is not parsed for block
57    /// syntax.
58    RawDelimited(RawDelimitedBlock<'src>),
59
60    /// A delimited block that can contain other blocks.
61    CompoundDelimited(CompoundDelimitedBlock<'src>),
62
63    /// Content between the end of the document header and the first section
64    /// title in the document body is called the preamble.
65    Preamble(Preamble<'src>),
66
67    /// A thematic or page break.
68    Break(Break<'src>),
69
70    /// When an attribute is defined in the document body using an attribute
71    /// entry, that’s simply referred to as a document attribute.
72    DocumentAttribute(Attribute<'src>),
73}
74
75impl<'src> std::fmt::Debug for Block<'src> {
76    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
77        match self {
78            Block::Simple(block) => f.debug_tuple("Block::Simple").field(block).finish(),
79            Block::Media(block) => f.debug_tuple("Block::Media").field(block).finish(),
80            Block::Section(block) => f.debug_tuple("Block::Section").field(block).finish(),
81            Block::List(block) => f.debug_tuple("Block::List").field(block).finish(),
82            Block::ListItem(block) => f.debug_tuple("Block::ListItem").field(block).finish(),
83
84            Block::RawDelimited(block) => {
85                f.debug_tuple("Block::RawDelimited").field(block).finish()
86            }
87
88            Block::CompoundDelimited(block) => f
89                .debug_tuple("Block::CompoundDelimited")
90                .field(block)
91                .finish(),
92
93            Block::Preamble(block) => f.debug_tuple("Block::Preamble").field(block).finish(),
94            Block::Break(break_) => f.debug_tuple("Block::Break").field(break_).finish(),
95
96            Block::DocumentAttribute(block) => f
97                .debug_tuple("Block::DocumentAttribute")
98                .field(block)
99                .finish(),
100        }
101    }
102}
103
104impl<'src> Block<'src> {
105    /// Parse a block of any type and return a `Block` that describes it.
106    ///
107    /// Consumes any blank lines before and after the block.
108    pub(crate) fn parse(
109        source: Span<'src>,
110        parser: &mut Parser,
111    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
112        Self::parse_internal(source, parser, None, false)
113    }
114
115    /// Parse a block of any type and return a `Block` that describes it.
116    ///
117    /// Will terminate early when parsing certain block types within a list
118    /// context.
119    ///
120    /// Consumes any blank lines before and after the block.
121    ///
122    /// If `is_continuation` is true, this content was attached via a `+`
123    /// continuation marker and literal blocks should preserve their
124    /// indentation.
125    pub(crate) fn parse_for_list_item(
126        source: Span<'src>,
127        parser: &mut Parser,
128        parent_list_markers: &[ListItemMarker<'src>],
129        is_continuation: bool,
130    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
131        Self::parse_internal(source, parser, Some(parent_list_markers), is_continuation)
132    }
133
134    /// Shared parser for [`Block::parse`] and [`Block::parse_for_list_item`].
135    fn parse_internal(
136        source: Span<'src>,
137        parser: &mut Parser,
138        parent_list_markers: Option<&[ListItemMarker<'src>]>,
139        is_continuation: bool,
140    ) -> MatchAndWarnings<'src, Option<MatchedItem<'src, Self>>> {
141        // Optimization: If the first line doesn't match any of the early indications
142        // for delimited blocks, titles, or attrlists, we can skip directly to treating
143        // this as a simple block. That saves quite a bit of parsing time.
144        let first_line = source.take_line().item.discard_whitespace();
145
146        // If it does contain any of those markers, we fall through to the more costly
147        // tests below which can more accurately classify the upcoming block.
148        if let Some(first_char) = first_line.chars().next()
149            && !matches!(
150                first_char,
151                '.' | '#' | '=' | '/' | '-' | '+' | '*' | '_' | '[' | ':' | '\'' | '<' | '•'
152            )
153            && !first_line.contains("::")
154            && !first_line.contains(";;")
155            && !ListItemMarker::starts_with_marker(first_line)
156            && parent_list_markers.is_none()
157            && let Some(MatchedItem {
158                item: simple_block,
159                after,
160            }) = SimpleBlock::parse_fast(source, parser)
161        {
162            let mut warnings = vec![];
163            let block = Self::Simple(simple_block);
164
165            Self::register_block_id(
166                block.id(),
167                block.title(),
168                block.span(),
169                parser,
170                &mut warnings,
171            );
172
173            return MatchAndWarnings {
174                item: Some(MatchedItem { item: block, after }),
175                warnings,
176            };
177        }
178
179        // Look for document attributes first since these don't support block metadata.
180        if first_line.starts_with(':')
181            && (first_line.ends_with(':') || first_line.contains(": "))
182            && let Some(attr) = Attribute::parse(source, parser)
183        {
184            let mut warnings: Vec<Warning<'src>> = vec![];
185            parser.set_attribute_from_body(&attr.item, &mut warnings);
186
187            return MatchAndWarnings {
188                item: Some(MatchedItem {
189                    item: Self::DocumentAttribute(attr.item),
190                    after: attr.after,
191                }),
192                warnings,
193            };
194        }
195
196        // Optimization not possible; start by looking for block metadata (title,
197        // attrlist, etc.).
198        let MatchAndWarnings {
199            item: mut metadata,
200            mut warnings,
201        } = BlockMetadata::parse(source, parser);
202
203        let is_literal =
204            metadata.attrlist.as_ref().and_then(|a| a.block_style()) == Some("literal");
205
206        if !is_literal {
207            if let Some(mut rdb_maw) = RawDelimitedBlock::parse(&metadata, parser)
208                && let Some(rdb) = rdb_maw.item
209            {
210                if !rdb_maw.warnings.is_empty() {
211                    warnings.append(&mut rdb_maw.warnings);
212                }
213
214                let block = Self::RawDelimited(rdb.item);
215
216                Self::register_block_id(
217                    block.id(),
218                    block.title(),
219                    block.span(),
220                    parser,
221                    &mut warnings,
222                );
223
224                return MatchAndWarnings {
225                    item: Some(MatchedItem {
226                        item: block,
227                        after: rdb.after,
228                    }),
229                    warnings,
230                };
231            }
232
233            if let Some(mut cdb_maw) = CompoundDelimitedBlock::parse(&metadata, parser)
234                && let Some(cdb) = cdb_maw.item
235            {
236                if !cdb_maw.warnings.is_empty() {
237                    warnings.append(&mut cdb_maw.warnings);
238                }
239
240                let block = Self::CompoundDelimited(cdb.item);
241
242                Self::register_block_id(
243                    block.id(),
244                    block.title(),
245                    block.span(),
246                    parser,
247                    &mut warnings,
248                );
249
250                return MatchAndWarnings {
251                    item: Some(MatchedItem {
252                        item: block,
253                        after: cdb.after,
254                    }),
255                    warnings,
256                };
257            }
258
259            // Try to discern the block type by scanning the first line.
260            let line = metadata.block_start.take_normalized_line();
261
262            if line.item.starts_with("image::")
263                || line.item.starts_with("video::")
264                || line.item.starts_with("video::")
265            {
266                let mut media_block_maw = MediaBlock::parse(&metadata, parser);
267
268                if let Some(media_block) = media_block_maw.item {
269                    // Only propagate warnings from media block parsing if we think this
270                    // *is* a media block. Otherwise, there would likely be too many false
271                    // positives.
272                    if !media_block_maw.warnings.is_empty() {
273                        warnings.append(&mut media_block_maw.warnings);
274                    }
275
276                    let block = Self::Media(media_block.item);
277
278                    Self::register_block_id(
279                        block.id(),
280                        block.title(),
281                        block.span(),
282                        parser,
283                        &mut warnings,
284                    );
285
286                    return MatchAndWarnings {
287                        item: Some(MatchedItem {
288                            item: block,
289                            after: media_block.after,
290                        }),
291                        warnings,
292                    };
293                }
294
295                // This might be some other kind of block, so we don't
296                // automatically error out on a parse failure.
297            }
298
299            if (line.item.starts_with('=') || line.item.starts_with('#'))
300                && let Some(mi_section_block) =
301                    SectionBlock::parse(&metadata, parser, &mut warnings)
302            {
303                // A line starting with `=` or `#` might be some other kind of block, so we
304                // continue quietly if `SectionBlock` parser rejects this block.
305
306                return MatchAndWarnings {
307                    item: Some(MatchedItem {
308                        item: Self::Section(mi_section_block.item),
309                        after: mi_section_block.after,
310                    }),
311                    warnings,
312                };
313            }
314
315            if (line.item.starts_with('\'')
316                || line.item.starts_with('-')
317                || line.item.starts_with('*')
318                || line.item.starts_with('<'))
319                && let Some(mi_break) = Break::parse(&metadata, parser)
320            {
321                // Continue quietly if `Break` parser rejects this block.
322
323                return MatchAndWarnings {
324                    item: Some(MatchedItem {
325                        item: Self::Break(mi_break.item),
326                        after: mi_break.after,
327                    }),
328                    warnings,
329                };
330            }
331
332            // Only try to parse as a new list if we're NOT inside a list item context.
333            // If we are inside a list context, lists can only be created when the first
334            // line is a list item marker (handled above).
335            if parent_list_markers.is_none()
336                && let Some(mi_list) = ListBlock::parse(&metadata, parser, &mut warnings)
337            {
338                return MatchAndWarnings {
339                    item: Some(MatchedItem {
340                        item: Self::List(mi_list.item),
341                        after: mi_list.after,
342                    }),
343                    warnings,
344                };
345            }
346
347            // First, let's look for a fun edge case. Perhaps the text contains block
348            // metadata but no block immediately following. If we're not careful, we could
349            // spin in a loop (for example, `parse_blocks_until`) thinking there will be
350            // another block, but there isn't.
351
352            // The following check disables that spin loop.
353            let simple_block_mi = if let Some(plm) = parent_list_markers {
354                SimpleBlock::parse_for_list_item(&metadata, parser, is_continuation, plm)
355            } else {
356                SimpleBlock::parse(&metadata, parser)
357            };
358
359            if simple_block_mi.is_none() && !metadata.is_empty() {
360                // We have a metadata with no block. Treat it as a simple block but issue a
361                // warning.
362
363                warnings.push(Warning {
364                    source: metadata.source,
365                    warning: WarningType::MissingBlockAfterTitleOrAttributeList,
366                });
367
368                // Remove the metadata content so that SimpleBlock will read the title/attrlist
369                // line(s) as regular content.
370                metadata.title_source = None;
371                metadata.title = None;
372                metadata.anchor = None;
373                metadata.attrlist = None;
374                metadata.block_start = metadata.source;
375            }
376        }
377
378        // If no other block kind matches, we can always use SimpleBlock.
379        let simple_block_mi = if let Some(plm) = parent_list_markers {
380            SimpleBlock::parse_for_list_item(&metadata, parser, is_continuation, plm)
381        } else {
382            SimpleBlock::parse(&metadata, parser)
383        };
384
385        let mut result = MatchAndWarnings {
386            item: simple_block_mi.map(|mi| MatchedItem {
387                item: Self::Simple(mi.item),
388                after: mi.after,
389            }),
390            warnings,
391        };
392
393        if let Some(ref matched_item) = result.item {
394            Self::register_block_id(
395                matched_item.item.id(),
396                matched_item.item.title(),
397                matched_item.item.span(),
398                parser,
399                &mut result.warnings,
400            );
401        }
402
403        result
404    }
405
406    /// Register a block's ID with the catalog if the block has an ID.
407    ///
408    /// This should be called for all block types except `SectionBlock`,
409    /// which handles its own catalog registration.
410    fn register_block_id(
411        id: Option<&str>,
412        title: Option<&str>,
413        span: Span<'src>,
414        parser: &mut Parser,
415        warnings: &mut Vec<Warning<'src>>,
416    ) {
417        if let Some(id) = id
418            && let Some(catalog) = parser.catalog_mut()
419            && let Err(_duplicate_error) = catalog.register_ref(
420                id,
421                title, // Use block title as reftext if available
422                RefType::Anchor,
423            )
424        {
425            // If registration fails due to duplicate ID, issue a warning.
426            warnings.push(Warning {
427                source: span,
428                warning: WarningType::DuplicateId(id.to_string()),
429            });
430        }
431    }
432}
433
434impl<'src> IsBlock<'src> for Block<'src> {
435    fn content_model(&self) -> ContentModel {
436        match self {
437            Self::Simple(_) => ContentModel::Simple,
438            Self::Media(b) => b.content_model(),
439            Self::Section(_) => ContentModel::Compound,
440            Self::List(b) => b.content_model(),
441            Self::ListItem(b) => b.content_model(),
442            Self::RawDelimited(b) => b.content_model(),
443            Self::CompoundDelimited(b) => b.content_model(),
444            Self::Preamble(b) => b.content_model(),
445            Self::Break(b) => b.content_model(),
446            Self::DocumentAttribute(b) => b.content_model(),
447        }
448    }
449
450    fn rendered_content(&'src self) -> Option<&'src str> {
451        match self {
452            Self::Simple(b) => b.rendered_content(),
453            Self::Media(b) => b.rendered_content(),
454            Self::Section(b) => b.rendered_content(),
455            Self::List(b) => b.rendered_content(),
456            Self::ListItem(b) => b.rendered_content(),
457            Self::RawDelimited(b) => b.rendered_content(),
458            Self::CompoundDelimited(b) => b.rendered_content(),
459            Self::Preamble(b) => b.rendered_content(),
460            Self::Break(b) => b.rendered_content(),
461            Self::DocumentAttribute(b) => b.rendered_content(),
462        }
463    }
464
465    fn raw_context(&self) -> CowStr<'src> {
466        match self {
467            Self::Simple(b) => b.raw_context(),
468            Self::Media(b) => b.raw_context(),
469            Self::Section(b) => b.raw_context(),
470            Self::List(b) => b.raw_context(),
471            Self::ListItem(b) => b.raw_context(),
472            Self::RawDelimited(b) => b.raw_context(),
473            Self::CompoundDelimited(b) => b.raw_context(),
474            Self::Preamble(b) => b.raw_context(),
475            Self::Break(b) => b.raw_context(),
476            Self::DocumentAttribute(b) => b.raw_context(),
477        }
478    }
479
480    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
481        match self {
482            Self::Simple(b) => b.nested_blocks(),
483            Self::Media(b) => b.nested_blocks(),
484            Self::Section(b) => b.nested_blocks(),
485            Self::List(b) => b.nested_blocks(),
486            Self::ListItem(b) => b.nested_blocks(),
487            Self::RawDelimited(b) => b.nested_blocks(),
488            Self::CompoundDelimited(b) => b.nested_blocks(),
489            Self::Preamble(b) => b.nested_blocks(),
490            Self::Break(b) => b.nested_blocks(),
491            Self::DocumentAttribute(b) => b.nested_blocks(),
492        }
493    }
494
495    fn title_source(&'src self) -> Option<Span<'src>> {
496        match self {
497            Self::Simple(b) => b.title_source(),
498            Self::Media(b) => b.title_source(),
499            Self::Section(b) => b.title_source(),
500            Self::List(b) => b.title_source(),
501            Self::ListItem(b) => b.title_source(),
502            Self::RawDelimited(b) => b.title_source(),
503            Self::CompoundDelimited(b) => b.title_source(),
504            Self::Preamble(b) => b.title_source(),
505            Self::Break(b) => b.title_source(),
506            Self::DocumentAttribute(b) => b.title_source(),
507        }
508    }
509
510    fn title(&self) -> Option<&str> {
511        match self {
512            Self::Simple(b) => b.title(),
513            Self::Media(b) => b.title(),
514            Self::Section(b) => b.title(),
515            Self::List(b) => b.title(),
516            Self::ListItem(b) => b.title(),
517            Self::RawDelimited(b) => b.title(),
518            Self::CompoundDelimited(b) => b.title(),
519            Self::Preamble(b) => b.title(),
520            Self::Break(b) => b.title(),
521            Self::DocumentAttribute(b) => b.title(),
522        }
523    }
524
525    fn anchor(&'src self) -> Option<Span<'src>> {
526        match self {
527            Self::Simple(b) => b.anchor(),
528            Self::Media(b) => b.anchor(),
529            Self::Section(b) => b.anchor(),
530            Self::List(b) => b.anchor(),
531            Self::ListItem(b) => b.anchor(),
532            Self::RawDelimited(b) => b.anchor(),
533            Self::CompoundDelimited(b) => b.anchor(),
534            Self::Preamble(b) => b.anchor(),
535            Self::Break(b) => b.anchor(),
536            Self::DocumentAttribute(b) => b.anchor(),
537        }
538    }
539
540    fn anchor_reftext(&'src self) -> Option<Span<'src>> {
541        match self {
542            Self::Simple(b) => b.anchor_reftext(),
543            Self::Media(b) => b.anchor_reftext(),
544            Self::Section(b) => b.anchor_reftext(),
545            Self::List(b) => b.anchor_reftext(),
546            Self::ListItem(b) => b.anchor_reftext(),
547            Self::RawDelimited(b) => b.anchor_reftext(),
548            Self::CompoundDelimited(b) => b.anchor_reftext(),
549            Self::Preamble(b) => b.anchor_reftext(),
550            Self::Break(b) => b.anchor_reftext(),
551            Self::DocumentAttribute(b) => b.anchor_reftext(),
552        }
553    }
554
555    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
556        match self {
557            Self::Simple(b) => b.attrlist(),
558            Self::Media(b) => b.attrlist(),
559            Self::Section(b) => b.attrlist(),
560            Self::List(b) => b.attrlist(),
561            Self::ListItem(b) => b.attrlist(),
562            Self::RawDelimited(b) => b.attrlist(),
563            Self::CompoundDelimited(b) => b.attrlist(),
564            Self::Preamble(b) => b.attrlist(),
565            Self::Break(b) => b.attrlist(),
566            Self::DocumentAttribute(b) => b.attrlist(),
567        }
568    }
569
570    fn substitution_group(&self) -> SubstitutionGroup {
571        match self {
572            Self::Simple(b) => b.substitution_group(),
573            Self::Media(b) => b.substitution_group(),
574            Self::Section(b) => b.substitution_group(),
575            Self::List(b) => b.substitution_group(),
576            Self::ListItem(b) => b.substitution_group(),
577            Self::RawDelimited(b) => b.substitution_group(),
578            Self::CompoundDelimited(b) => b.substitution_group(),
579            Self::Preamble(b) => b.substitution_group(),
580            Self::Break(b) => b.substitution_group(),
581            Self::DocumentAttribute(b) => b.substitution_group(),
582        }
583    }
584}
585
586impl<'src> HasSpan<'src> for Block<'src> {
587    fn span(&self) -> Span<'src> {
588        match self {
589            Self::Simple(b) => b.span(),
590            Self::Media(b) => b.span(),
591            Self::Section(b) => b.span(),
592            Self::List(b) => b.span(),
593            Self::ListItem(b) => b.span(),
594            Self::RawDelimited(b) => b.span(),
595            Self::CompoundDelimited(b) => b.span(),
596            Self::Preamble(b) => b.span(),
597            Self::Break(b) => b.span(),
598            Self::DocumentAttribute(b) => b.span(),
599        }
600    }
601}