asciidoc_parser/document/document.rs
1//! Describes the top-level document structure.
2
3use std::slice::Iter;
4
5use crate::{
6 attributes::Attrlist,
7 blocks::{parse_utils::parse_blocks_until, Block, ContentModel, IsBlock},
8 document::Header,
9 strings::CowStr,
10 warnings::Warning,
11 HasSpan, Parser, Span,
12};
13
14/// A document represents the top-level block element in AsciiDoc. It consists
15/// of an optional document header and either a) one or more sections preceded
16/// by an optional preamble or b) a sequence of top-level blocks only.
17///
18/// The document can be configured using a document header. The header is not a
19/// block itself, but contributes metadata to the document, such as the document
20/// title and document attributes.
21#[derive(Clone, Debug, Eq, PartialEq)]
22pub struct Document<'src> {
23 header: Header<'src>,
24 blocks: Vec<Block<'src>>,
25 source: Span<'src>,
26 warnings: Vec<Warning<'src>>,
27}
28
29impl<'src> Document<'src> {
30 /// Parse a UTF-8 string as an AsciiDoc document.
31 ///
32 /// Note that the document references the underlying source string and
33 /// necessarily has the same lifetime as the source.
34 ///
35 /// The `Document` data structure returned by this call and nearly all data
36 /// structures contained within it are gated by the lifetime of the `source`
37 /// text passed in to this function. For that reason all of those data
38 /// structures are given the lifetime `'src`.
39 ///
40 /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
41 /// encoding is allowed if a byte-order-mark (BOM) is present at the
42 /// start of a file. This format is not directly supported by the
43 /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
44 /// UTF-8 prior to parsing.
45 ///
46 /// # Warnings, not errors
47 ///
48 /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
49 /// return an [`Option`] or [`Result`] data type. There may be any number of
50 /// character sequences that have ambiguous or potentially unintended
51 /// meanings. For that reason, a caller is advised to review the warnings
52 /// provided via the [`warnings()`] iterator.
53 ///
54 /// [`warnings()`]: Self::warnings
55 pub(crate) fn parse(source: &'src str, parser: &mut Parser) -> Self {
56 let source = Span::new(source);
57 let i = source.discard_empty_lines();
58 let i = if i.is_empty() { source } else { i };
59
60 let mi = Header::parse(i, parser);
61 let i = mi.item.after;
62
63 let header = mi.item.item;
64 let mut warnings = mi.warnings;
65
66 let mut maw_blocks = parse_blocks_until(i, |_| false, parser);
67
68 if !maw_blocks.warnings.is_empty() {
69 warnings.append(&mut maw_blocks.warnings);
70 }
71
72 Self {
73 header,
74 blocks: maw_blocks.item.item,
75 source: source.trim_trailing_whitespace(),
76 warnings,
77 }
78 }
79
80 /// Return the document header.
81 pub fn header(&'src self) -> &'src Header<'src> {
82 &self.header
83 }
84
85 /// Return an iterator over any warnings found during parsing.
86 pub fn warnings(&'src self) -> Iter<'src, Warning<'src>> {
87 self.warnings.iter()
88 }
89}
90
91impl<'src> IsBlock<'src> for Document<'src> {
92 fn content_model(&self) -> ContentModel {
93 ContentModel::Compound
94 }
95
96 fn raw_context(&self) -> CowStr<'src> {
97 "document".into()
98 }
99
100 fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
101 self.blocks.iter()
102 }
103
104 fn title(&'src self) -> Option<Span<'src>> {
105 // Document title is reflected in the Header.
106 None
107 }
108
109 fn anchor(&'src self) -> Option<Span<'src>> {
110 None
111 }
112
113 fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
114 // Document attributes are reflected in the Header.
115 None
116 }
117}
118
119impl<'src> HasSpan<'src> for Document<'src> {
120 fn span(&'src self) -> &'src Span<'src> {
121 &self.source
122 }
123}