1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
//! `MaybeXml` is a library to scan and evaluate [XML][xml]-like data into
//! tokens. In effect, the library provides a non-validating parser. The
//! interface is similar to many XML pull parsers.
//!
//! # Examples
//!
//! ## Using [`tokenize()`][Reader::tokenize()]
//!
//! ```
//! use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};
//!
//! let input = "<id>123</id>";
//!
//! let reader = Reader::from_str(input);
//! let mut pos = 0;
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
//!     assert_eq!("id", tag.name().local().as_str());
//!     assert_eq!(None, tag.name().namespace_prefix());
//! } else {
//!     panic!();
//! }
//! assert_eq!(4, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::Characters(chars)) = token.map(|t| t.ty()) {
//!     assert_eq!("123", chars.content().as_str());
//! } else {
//!     panic!();
//! }
//! assert_eq!(7, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::EndTag(tag)) = token.map(|t| t.ty()) {
//!     assert_eq!("</id>", tag.as_str());
//!     assert_eq!("id", tag.name().local().as_str());
//! } else {
//!     panic!();
//! }
//! assert_eq!(12, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! assert_eq!(None, token);
//!
//! // Verify that `pos` is equal to `input.len()` to ensure all data was
//! // processed.
//! ```
//!
//! ## Using [`Iterator`] functionality
//!
//! ```
//! use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};
//!
//! let input = "<id>Example</id>";
//!
//! let reader = Reader::from_str(input);
//!
//! let mut iter = reader.into_iter().map(|token| token.ty());
//!
//! if let Some(Ty::StartTag(start_tag)) = iter.next() {
//!     assert_eq!("id", start_tag.name().as_str(), "id");
//! } else {
//!     panic!();
//! }
//!
//! if let Some(Ty::Characters(chars)) = iter.next() {
//!     assert_eq!("Example", chars.content().as_str());
//! } else {
//!     panic!();
//! }
//!
//! if let Some(Ty::EndTag(tag)) = iter.next() {
//!     assert_eq!("</id>", tag.as_str());
//!     assert_eq!("id", tag.name().local().as_str());
//! } else {
//!     panic!();
//! }
//! assert_eq!(None, iter.next());
//! ```
//!
//! # Well-formed vs. Malformed document processing
//!
//! The library should scan and evaluate well-formed XML documents correctly.
//! For XML documents which are not well-formed, the behavior is currently
//! undefined. The library does not error when scanning a malformed document.
//!
//! # Security Considerations
//!
//! The input is managed by the library user. If there is malformed input, the
//! tokenizing functions could never return a complete token.
//!
//! For instance, the input could start with a `<` but there is no closing `>`
//! character.
//!
//! In particular, if data is coming over the network and the data is being
//! stored in a buffer, the buffer may have unbounded growth if the buffer's
//! data is freed only if a complete token is found.
//!
//! [xml]: https://www.w3.org/TR/2006/REC-xml11-20060816/

#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![warn(
    missing_copy_implementations,
    missing_debug_implementations,
    missing_docs,
    rust_2018_idioms,
    unused_lifetimes,
    unused_qualifications
)]

mod read;
pub mod token;

pub use read::{IntoIter, Iter, Reader};

#[allow(clippy::cast_possible_wrap)]
#[inline]
#[must_use]
const fn is_utf8_boundary(byte: u8) -> bool {
    byte as i8 >= -0x40
}

#[cfg(any(test, feature = "internal_unstable"))]
pub use read::parser::scan_document;

#[cfg(any(test, feature = "internal_unstable"))]
pub use read::parser::ScanDocumentOpts;