asciidoc_parser/document/
document.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
//! Describes the top-level document structure.

use std::slice::Iter;

use crate::{
    blocks::{parse_utils::parse_blocks_until, Block, ContentModel, IsBlock},
    document::Header,
    strings::CowStr,
    warnings::Warning,
    HasSpan, Span,
};

/// A document represents the top-level block element in AsciiDoc. It consists
/// of an optional document header and either a) one or more sections preceded
/// by an optional preamble or b) a sequence of top-level blocks only.
///
/// The document can be configured using a document header. The header is not a
/// block itself, but contributes metadata to the document, such as the document
/// title and document attributes.
#[derive(Clone, Debug, Eq, PartialEq)]
pub struct Document<'src> {
    header: Header<'src>,
    blocks: Vec<Block<'src>>,
    source: Span<'src>,
    warnings: Vec<Warning<'src>>,
}

impl<'src> Document<'src> {
    /// Parse a UTF-8 string as an AsciiDoc document.
    ///
    /// Note that the document references the underlying source string and
    /// necessarily has the same lifetime as the source.
    ///
    /// The `Document` data structure returned by this call and nearly all data
    /// structures contained within it are gated by the lifetime of the `source`
    /// text passed in to this function. For that reason all of those data
    /// structures are given the lifetime `'src`.
    ///
    /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
    /// encoding is allowed if a byte-order-mark (BOM) is present at the
    /// start of a file. This format is not directly supported by the
    /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
    /// UTF-8 prior to parsing.
    ///
    /// Any UTF-8 string is a valid AsciiDoc document, so there is no `Option`
    /// or `Result` on this API. There may be any number of character sequences
    /// that have ambiguous or potentially unintended meanings. For that reason,
    /// a caller is advised to review the warnings provided via the
    /// `Self::warnings` iterator.
    pub fn parse(source: &'src str) -> Self {
        let source = Span::new(source);
        let i = source.discard_empty_lines();
        let i = if i.is_empty() { source } else { i };

        let mi = Header::parse(i);
        let i = mi.item.after;

        let header = mi.item.item;
        let mut warnings = mi.warnings;

        let mut maw_blocks = parse_blocks_until(i, |_| false);

        if !maw_blocks.warnings.is_empty() {
            warnings.append(&mut maw_blocks.warnings);
        }

        Self {
            header,
            blocks: maw_blocks.item.item,
            source,
            warnings,
        }
    }

    /// Return the document header.
    pub fn header(&'src self) -> &'src Header<'src> {
        &self.header
    }

    /// Return an iterator over any warnings found during parsing.
    pub fn warnings(&'src self) -> Iter<'src, Warning<'src>> {
        self.warnings.iter()
    }
}

impl<'src> IsBlock<'src> for Document<'src> {
    fn content_model(&self) -> ContentModel {
        ContentModel::Compound
    }

    fn context(&self) -> CowStr<'src> {
        "document".into()
    }

    fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
        self.blocks.iter()
    }
}

impl<'src> HasSpan<'src> for Document<'src> {
    fn span(&'src self) -> &'src Span<'src> {
        &self.source
    }
}