1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129
//! `MaybeXml` is a library to scan and evaluate [XML][xml]-like data into
//! tokens. In effect, the library provides a non-validating parser. The
//! interface is similar to many XML pull parsers.
//!
//! # Examples
//!
//! ## Using [`tokenize()`][Reader::tokenize()]
//!
//! ```
//! use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};
//!
//! let input = "<id>123</id>";
//!
//! let reader = Reader::from_str(input);
//! let mut pos = 0;
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
//! assert_eq!("id", tag.name().local().as_str());
//! assert_eq!(None, tag.name().namespace_prefix());
//! } else {
//! panic!();
//! }
//! assert_eq!(4, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::Characters(chars)) = token.map(|t| t.ty()) {
//! assert_eq!("123", chars.content().as_str());
//! } else {
//! panic!();
//! }
//! assert_eq!(7, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! if let Some(Ty::EndTag(tag)) = token.map(|t| t.ty()) {
//! assert_eq!("</id>", tag.as_str());
//! assert_eq!("id", tag.name().local().as_str());
//! } else {
//! panic!();
//! }
//! assert_eq!(12, pos);
//!
//! let token = reader.tokenize(&mut pos);
//! assert_eq!(None, token);
//!
//! // Verify that `pos` is equal to `input.len()` to ensure all data was
//! // processed.
//! ```
//!
//! ## Using [`Iterator`] functionality
//!
//! ```
//! use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};
//!
//! let input = "<id>Example</id>";
//!
//! let reader = Reader::from_str(input);
//!
//! let mut iter = reader.into_iter().map(|token| token.ty());
//!
//! if let Some(Ty::StartTag(start_tag)) = iter.next() {
//! assert_eq!("id", start_tag.name().as_str(), "id");
//! } else {
//! panic!();
//! }
//!
//! if let Some(Ty::Characters(chars)) = iter.next() {
//! assert_eq!("Example", chars.content().as_str());
//! } else {
//! panic!();
//! }
//!
//! if let Some(Ty::EndTag(tag)) = iter.next() {
//! assert_eq!("</id>", tag.as_str());
//! assert_eq!("id", tag.name().local().as_str());
//! } else {
//! panic!();
//! }
//! assert_eq!(None, iter.next());
//! ```
//!
//! # Well-formed vs. Malformed document processing
//!
//! The library should scan and evaluate well-formed XML documents correctly.
//! For XML documents which are not well-formed, the behavior is currently
//! undefined. The library does not error when scanning a malformed document.
//!
//! # Security Considerations
//!
//! The input is managed by the library user. If there is malformed input, the
//! tokenizing functions could never return a complete token.
//!
//! For instance, the input could start with a `<` but there is no closing `>`
//! character.
//!
//! In particular, if data is coming over the network and the data is being
//! stored in a buffer, the buffer may have unbounded growth if the buffer's
//! data is freed only if a complete token is found.
//!
//! [xml]: https://www.w3.org/TR/2006/REC-xml11-20060816/
#![cfg_attr(not(feature = "std"), no_std)]
#![cfg_attr(docsrs, feature(doc_cfg))]
#![warn(
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
rust_2018_idioms,
unused_lifetimes,
unused_qualifications
)]
mod read;
pub mod token;
pub use read::{IntoIter, Iter, Reader};
#[allow(clippy::cast_possible_wrap)]
#[inline]
#[must_use]
const fn is_utf8_boundary(byte: u8) -> bool {
byte as i8 >= -0x40
}
#[cfg(any(test, feature = "internal_unstable"))]
pub use read::parser::scan_document;
#[cfg(any(test, feature = "internal_unstable"))]
pub use read::parser::ScanDocumentOpts;