asciidoc_parser/document/
document.rs

1//! Describes the top-level document structure.
2
3use std::slice::Iter;
4
5use crate::{
6    attributes::Attrlist,
7    blocks::{parse_utils::parse_blocks_until, Block, ContentModel, IsBlock},
8    document::Header,
9    strings::CowStr,
10    warnings::Warning,
11    HasSpan, Parser, Span,
12};
13
14/// A document represents the top-level block element in AsciiDoc. It consists
15/// of an optional document header and either a) one or more sections preceded
16/// by an optional preamble or b) a sequence of top-level blocks only.
17///
18/// The document can be configured using a document header. The header is not a
19/// block itself, but contributes metadata to the document, such as the document
20/// title and document attributes.
21#[derive(Clone, Debug, Eq, PartialEq)]
22pub struct Document<'src> {
23    header: Header<'src>,
24    blocks: Vec<Block<'src>>,
25    source: Span<'src>,
26    warnings: Vec<Warning<'src>>,
27}
28
29impl<'src> Document<'src> {
30    /// Parse a UTF-8 string as an AsciiDoc document.
31    ///
32    /// Note that the document references the underlying source string and
33    /// necessarily has the same lifetime as the source.
34    ///
35    /// The `Document` data structure returned by this call and nearly all data
36    /// structures contained within it are gated by the lifetime of the `source`
37    /// text passed in to this function. For that reason all of those data
38    /// structures are given the lifetime `'src`.
39    ///
40    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
41    /// encoding is allowed if a byte-order-mark (BOM) is present at the
42    /// start of a file. This format is not directly supported by the
43    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
44    /// UTF-8 prior to parsing.
45    ///
46    /// # Warnings, not errors
47    ///
48    /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
49    /// return an [`Option`] or [`Result`] data type. There may be any number of
50    /// character sequences that have ambiguous or potentially unintended
51    /// meanings. For that reason, a caller is advised to review the warnings
52    /// provided via the [`warnings()`] iterator.
53    ///
54    /// [`warnings()`]: Self::warnings
55    pub(crate) fn parse(source: &'src str, parser: &mut Parser) -> Self {
56        let source = Span::new(source);
57        let i = source.discard_empty_lines();
58        let i = if i.is_empty() { source } else { i };
59
60        let mi = Header::parse(i, parser);
61        let i = mi.item.after;
62
63        let header = mi.item.item;
64        let mut warnings = mi.warnings;
65
66        let mut maw_blocks = parse_blocks_until(i, |_| false, parser);
67
68        if !maw_blocks.warnings.is_empty() {
69            warnings.append(&mut maw_blocks.warnings);
70        }
71
72        Self {
73            header,
74            blocks: maw_blocks.item.item,
75            source: source.trim_trailing_whitespace(),
76            warnings,
77        }
78    }
79
80    /// Return the document header.
81    pub fn header(&'src self) -> &'src Header<'src> {
82        &self.header
83    }
84
85    /// Return an iterator over any warnings found during parsing.
86    pub fn warnings(&'src self) -> Iter<'src, Warning<'src>> {
87        self.warnings.iter()
88    }
89}
90
91impl<'src> IsBlock<'src> for Document<'src> {
92    fn content_model(&self) -> ContentModel {
93        ContentModel::Compound
94    }
95
96    fn raw_context(&self) -> CowStr<'src> {
97        "document".into()
98    }
99
100    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
101        self.blocks.iter()
102    }
103
104    fn title(&'src self) -> Option<Span<'src>> {
105        // Document title is reflected in the Header.
106        None
107    }
108
109    fn anchor(&'src self) -> Option<Span<'src>> {
110        None
111    }
112
113    fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
114        // Document attributes are reflected in the Header.
115        None
116    }
117}
118
119impl<'src> HasSpan<'src> for Document<'src> {
120    fn span(&'src self) -> &'src Span<'src> {
121        &self.source
122    }
123}