asciidoc_parser/document/document.rs
1//! Describes the top-level document structure.
2
3use std::slice::Iter;
4
5use crate::{
6 HasSpan, Parser, Span,
7 attributes::Attrlist,
8 blocks::{Block, ContentModel, IsBlock, parse_utils::parse_blocks_until},
9 document::Header,
10 strings::CowStr,
11 warnings::Warning,
12};
13
14/// A document represents the top-level block element in AsciiDoc. It consists
15/// of an optional document header and either a) one or more sections preceded
16/// by an optional preamble or b) a sequence of top-level blocks only.
17///
18/// The document can be configured using a document header. The header is not a
19/// block itself, but contributes metadata to the document, such as the document
20/// title and document attributes.
21#[derive(Clone, Debug, Eq, PartialEq)]
22pub struct Document<'src> {
23 header: Header<'src>,
24 blocks: Vec<Block<'src>>,
25 source: Span<'src>,
26 warnings: Vec<Warning<'src>>,
27}
28
29impl<'src> Document<'src> {
30 /// Parse a UTF-8 string as an AsciiDoc document.
31 ///
32 /// Note that the document references the underlying source string and
33 /// necessarily has the same lifetime as the source.
34 ///
35 /// The `Document` data structure returned by this call and nearly all data
36 /// structures contained within it are gated by the lifetime of the `source`
37 /// text passed in to this function. For that reason all of those data
38 /// structures are given the lifetime `'src`.
39 ///
40 /// **IMPORTANT:** The AsciiDoc language documentation states that UTF-16
41 /// encoding is allowed if a byte-order-mark (BOM) is present at the
42 /// start of a file. This format is not directly supported by the
43 /// `asciidoc-parser` crate. Any UTF-16 content must be re-encoded as
44 /// UTF-8 prior to parsing.
45 ///
46 /// # Warnings, not errors
47 ///
48 /// Any UTF-8 string is a valid AsciiDoc document, so this function does not
49 /// return an [`Option`] or [`Result`] data type. There may be any number of
50 /// character sequences that have ambiguous or potentially unintended
51 /// meanings. For that reason, a caller is advised to review the warnings
52 /// provided via the [`warnings()`] iterator.
53 ///
54 /// [`warnings()`]: Self::warnings
55 pub(crate) fn parse(source: &'src str, parser: &mut Parser) -> Self {
56 let source = Span::new(source);
57
58 let mi = Header::parse(source, parser);
59 let next = mi.item.after;
60
61 let header = mi.item.item;
62 let mut warnings = mi.warnings;
63
64 let mut maw_blocks = parse_blocks_until(next, |_| false, parser);
65
66 if !maw_blocks.warnings.is_empty() {
67 warnings.append(&mut maw_blocks.warnings);
68 }
69
70 Self {
71 header,
72 blocks: maw_blocks.item.item,
73 source: source.trim_trailing_whitespace(),
74 warnings,
75 }
76 }
77
78 /// Return the document header.
79 pub fn header(&'src self) -> &'src Header<'src> {
80 &self.header
81 }
82
83 /// Return an iterator over any warnings found during parsing.
84 pub fn warnings(&'src self) -> Iter<'src, Warning<'src>> {
85 self.warnings.iter()
86 }
87}
88
89impl<'src> IsBlock<'src> for Document<'src> {
90 fn content_model(&self) -> ContentModel {
91 ContentModel::Compound
92 }
93
94 fn raw_context(&self) -> CowStr<'src> {
95 "document".into()
96 }
97
98 fn nested_blocks(&'src self) -> Iter<'src, Block<'src>> {
99 self.blocks.iter()
100 }
101
102 fn title_source(&'src self) -> Option<Span<'src>> {
103 // Document title is reflected in the Header.
104 None
105 }
106
107 fn title(&self) -> Option<&str> {
108 // Document title is reflected in the Header.
109 None
110 }
111
112 fn anchor(&'src self) -> Option<Span<'src>> {
113 None
114 }
115
116 fn attrlist(&'src self) -> Option<&'src Attrlist<'src>> {
117 // Document attributes are reflected in the Header.
118 None
119 }
120}
121
122impl<'src> HasSpan<'src> for Document<'src> {
123 fn span(&self) -> Span<'src> {
124 self.source
125 }
126}