rxml/parser/
mod.rs

1/*!
2# Restricted XML 1.0 parsing facilities
3
4This module contains parsing facilities for XML documents. To satisfy
5different styles of tree building, the parsing step is separated in two
6stages: the logical stage and the namespace/attribute resolution stage.
7
8## Logical stage
9
10In the logical stage, the logical elements of the XML document are emitted as
11[`RawEvent`] structs. These *may* be used by end-users to build XML document
12models, but they are not completely validated even to the XML 1.0
13specification. The caveats are documented at the [`RawParser`] struct.
14
15## Namespace/attribute resolution stage
16
17This stage resolves namespace declarations found in a stream of [`RawEvent`]
18structs and emits [`Event`] structs.
19
20Together with the validation of the logical stage, this provides full
21conformity checks according to XML 1.0 and Namespaces for XML 1.0. Both stages
22are combined in the [`Parser`] struct.
23
24The downside of using this stage is added processing cost, because
25considerable dynamic allocations need to be performed per-element (for
26attribute hash maps). In addition, information about the prefixes used to
27declare namespaces is lost (but nothing should rely on those anyway).
28*/
29
30mod common;
31mod namespaces;
32mod raw;
33
34#[doc(inline)]
35pub use self::common::*;
36#[doc(inline)]
37pub use self::namespaces::{Event, QName};
38#[doc(inline)]
39pub use self::raw::{RawEvent, RawParser, RawQName};
40
41use self::namespaces::NamespaceResolver;
42
43/**
44# Non-blocking restricted XML 1.0 parser
45
46The [`Parser`] allows parsing XML documents as they arrive in the application,
47giving back control to the caller immediately when not enough data is available
48for processing. This is especially useful when streaming data from sockets.
49
50To read events from the `Parser` after feeding data, use its [`Parse`] trait.
51
52## Example
53
54```
55use rxml::{Parser, Parse, Error, Event, XmlVersion, error::EndOrError};
56let doc = b"<?xml version='1.0'?><hello>World!</hello>";
57let mut fp = Parser::new();
58// We expect a NeedMoreData, because the XML declaration is not complete yet
59assert!(matches!(
60	fp.parse(&mut &doc[..10], false).err().unwrap(),
61	EndOrError::NeedMoreData,
62));
63
64// Now we pass the XML declaration (and some), so we expect a corresponding
65// event
66let ev = fp.parse(&mut &doc[10..25], false);
67assert!(matches!(ev.unwrap().unwrap(), Event::XmlDeclaration(_, XmlVersion::V1_0)));
68```
69
70In contrast to a [`RawParser`], the [`Parser`] enforces well-formedness and
71namespace-well-formedness.
72
73   [`rxml`]: crate
74*/
75#[derive(Debug)]
76pub struct Parser {
77	inner: RawParser,
78	resolver: NamespaceResolver,
79}
80
81impl Parser {
82	/// Create a new parser with default settings.
83	pub fn new() -> Self {
84		Self::default()
85	}
86
87	/// Configure text buffering (enabled by default).
88	///
89	/// If enabled, text content is buffered up to the configured token size
90	/// limit, unless it is more efficient to flush it out anyway.
91	///
92	/// If disabled, text content is emitted as event as soon as at least one
93	/// valid char has been read.
94	///
95	/// Enabling text buffering reduces the number of calls which need to be
96	/// made into the parser and thus may improve performance. However, it
97	/// also makes the application see the text content later, which may be
98	/// problematic if control flow which affects parsing depends on text
99	/// content.
100	#[inline(always)]
101	pub fn set_text_buffering(&mut self, enabled: bool) {
102		self.inner.set_text_buffering(enabled)
103	}
104
105	/// Return whether text buffering is enabled.
106	///
107	/// See [`set_text_buffering`][`Self::set_text_buffering`].
108	#[inline(always)]
109	pub fn text_buffering(&self) -> bool {
110		self.inner.text_buffering()
111	}
112}
113
114impl Default for Parser {
115	fn default() -> Self {
116		Self::with_options(Options::default())
117	}
118}
119
120impl WithOptions for Parser {
121	fn with_options(options: Options) -> Self {
122		Self {
123			inner: RawParser::with_lexer_options((&options).into()),
124			resolver: if let Some(ctx) = options.context {
125				NamespaceResolver::with_context(ctx)
126			} else {
127				NamespaceResolver::new()
128			},
129		}
130	}
131}
132
133impl Parse for Parser {
134	type Output = Event;
135
136	fn parse(&mut self, r: &mut &[u8], at_eof: bool) -> Result<Option<Self::Output>> {
137		let inner = &mut self.inner;
138		self.resolver.next(|| inner.parse(r, at_eof))
139	}
140
141	fn release_temporaries(&mut self) {
142		self.inner.release_temporaries();
143		self.resolver.context().release_temporaries();
144	}
145}