asciidoc_parser/document/
document.rs

1//! Describes the top-level document structure.
2
3use std::slice::Iter;
4
5use crate::{
6    HasSpan, Parser, Span,
7    attributes::Attrlist,
8    blocks::{Block, ContentModel, IsBlock, parse_utils::parse_blocks_until},
9    document::Header,
10    strings::CowStr,
11    warnings::Warning,
12};
13
14/// A document represents the top-level block element in AsciiDoc. It consists
15/// of an optional document header and either a) one or more sections preceded
16/// by an optional preamble or b) a sequence of top-level blocks only.
17///
18/// The document can be configured using a document header. The header is not a
19/// block itself, but contributes metadata to the document, such as the document
20/// title and document attributes.
21#[derive(Clone, Debug, Eq, PartialEq)]
22pub struct Document<'src> {
23    header: Header<'src>,
24    blocks: Vec<Block<'src>>,
25    source: Span<'src>,
26    warnings: Vec<Warning<'src>>,
27}
28
29impl<'src> Document<'src> {
30    /// Parse a UTF-8 string as an AsciiDoc document.
31    ///
32    /// Note that the document references the underlying source string and
33    /// necessarily has the same lifetime as the source.
34    ///
35    /// The `Document` data structure returned by this call and nearly all data
36    /// structures contained within it are gated by the lifetime of the `source`
37    /// text passed in to this function. For that reason all of those data
38    /// structures are given the lifetime `'src`.
39    ///
40    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
41    /// encoding is allowed if a byte-order-mark (BOM) is present at the
42    /// start of a file. This format is not directly supported by the
43    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
44    /// UTF-8 prior to parsing.
45    ///
46    /// # Warnings, not errors
47    ///
48    /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
49    /// return an [`Option`] or [`Result`] data type. There may be any number of
50    /// character sequences that have ambiguous or potentially unintended
51    /// meanings. For that reason, a caller is advised to review the warnings
52    /// provided via the [`warnings()`] iterator.
53    ///
54    /// [`warnings()`]: Self::warnings
55    pub(crate) fn parse(source: &'src str, parser: &mut Parser) -> Self {
56        let source = Span::new(source);
57
58        let mi = Header::parse(source, parser);
59        let next = mi.item.after;
60
61        let header = mi.item.item;
62        let mut warnings = mi.warnings;
63
64        let mut maw_blocks = parse_blocks_until(next, |_| false, parser);
65
66        if !maw_blocks.warnings.is_empty() {
67            warnings.append(&mut maw_blocks.warnings);
68        }
69
70        Self {
71            header,
72            blocks: maw_blocks.item.item,
73            source: source.trim_trailing_whitespace(),
74            warnings,
75        }
76    }
77
78    /// Return the document header.
79    pub fn header(&'src self) -> &'src Header<'src> {
80        &self.header
81    }
82
83    /// Return an iterator over any warnings found during parsing.
84    pub fn warnings(&'src self) -> Iter<'src, Warning<'src>> {
85        self.warnings.iter()
86    }
87}
88
89impl<'src> IsBlock<'src> for Document<'src> {
90    fn content_model(&self) -> ContentModel {
91        ContentModel::Compound
92    }
93
94    fn raw_context(&self) -> CowStr<'src> {
95        "document".into()
96    }
97
98    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
99        self.blocks.iter()
100    }
101
102    fn title_source(&'src self) -> Option<Span<'src>> {
103        // Document title is reflected in the Header.
104        None
105    }
106
107    fn title(&self) -> Option<&str> {
108        // Document title is reflected in the Header.
109        None
110    }
111
112    fn anchor(&'src self) -> Option<Span<'src>> {
113        None
114    }
115
116    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
117        // Document attributes are reflected in the Header.
118        None
119    }
120}
121
122impl<'src> HasSpan<'src> for Document<'src> {
123    fn span(&self) -> Span<'src> {
124        self.source
125    }
126}