pub struct Reader<'a> { /* private fields */ }
Expand description
Tokenizes XML input into a Token
.
It does not allocate.
Examples
Using tokenize()
use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};
let input = "<id>123</id>";
let reader = Reader::from_str(input);
let mut pos = 0;
let token = reader.tokenize(&mut pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
assert_eq!("id", tag.name().local().as_str());
assert_eq!(None, tag.name().namespace_prefix());
} else {
panic!();
}
assert_eq!(4, pos);
let token = reader.tokenize(&mut pos);
if let Some(Ty::Characters(chars)) = token.map(|t| t.ty()) {
assert_eq!("123", chars.content().as_str());
} else {
panic!();
}
assert_eq!(7, pos);
let token = reader.tokenize(&mut pos);
if let Some(Ty::EndTag(tag)) = token.map(|t| t.ty()) {
assert_eq!("</id>", tag.as_str());
assert_eq!("id", tag.name().local().as_str());
} else {
panic!();
}
assert_eq!(12, pos);
let token = reader.tokenize(&mut pos);
assert_eq!(None, token);
// Verify that `pos` is equal to `input.len()` to ensure all data was
// processed.
Using Iterator
functionality
use maybe_xml::{Reader, token::Ty};
let input = "<id>123</id><name>Jane Doe</name>";
let reader = Reader::from_str(input);
let mut iter = reader.into_iter().filter_map(|token| {
match token.ty() {
Ty::StartTag(tag) => Some(tag.name().as_str()),
_ => None,
}
});
let name = iter.next();
assert_eq!(Some("id"), name);
let name = iter.next();
assert_eq!(Some("name"), name);
assert_eq!(None, iter.next());
Note that if the input is malformed or incomplete such as <tag
, the
Iterator will return None
and will not return the invalid input. If you
want to verify that all of the input was processed, then you should use the
Reader::tokenize()
method.
Implementations§
source§impl<'a> Reader<'a>
impl<'a> Reader<'a>
sourcepub const fn from_str(input: &'a str) -> Self
pub const fn from_str(input: &'a str) -> Self
Creates a new instance with the given UTF-8 string input.
sourcepub const fn new(input: &'a str) -> Self
pub const fn new(input: &'a str) -> Self
Creates a new instance with the given UTF-8 string input.
sourcepub fn tokenize(&self, pos: &mut usize) -> Option<Token<'a>>
pub fn tokenize(&self, pos: &mut usize) -> Option<Token<'a>>
Tokenizes the input starting at the given position.
If a token is found, the position argument is also updated to the byte index after the token.
Panics
Panics if the pos
is greater than the input length or if pos
is
not at a character boundary.
Examples
use maybe_xml::{Reader, token::{StartTag, Ty}};
let input = "<id>123</id>";
let reader = Reader::from_str(input);
let mut pos = 0;
let token = reader.tokenize(&mut pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
assert_eq!("id", tag.name().local().as_str());
assert_eq!(None, tag.name().namespace_prefix());
} else {
panic!();
}
// Position was assigned to the index after the end of the token
assert_eq!(4, pos);
If tokenize()
returns None
, but the position is not equal to the input’s
byte length, then there is unprocessed input such as malformed XML. For
instance, if the input was <tag
without the enclosing >
, then
tokenize()
will return None
.
use maybe_xml::{Reader, token::{StartTag, Ty}};
let input = "<tag";
let reader = Reader::from_str(input);
let mut pos = 0;
let token = reader.tokenize(&mut pos);
assert_eq!(None, token);
assert_eq!(0, pos);
assert_ne!(input.len(), pos);
sourcepub const fn parse(&self, pos: usize) -> Option<Token<'a>>
pub const fn parse(&self, pos: usize) -> Option<Token<'a>>
Constant function which tokenizes the input starting at the given position.
Important
The pos
is not updated and should be updated with the
Token::len()
.
Panics
Panics if the pos
is greater than the input length or if pos
is
not at a character boundary.
Examples
use maybe_xml::{Reader, token::{StartTag, Ty}};
let input = "<id>123</id>";
let reader = Reader::from_str(input);
let mut pos = 0;
let token = reader.parse(pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
assert_eq!("id", tag.name().local().as_str());
assert_eq!(None, tag.name().namespace_prefix());
} else {
panic!();
}
pos += token.map(|t| t.len()).unwrap_or_default();
assert_eq!(4, pos);
If parse()
returns None
, but the position is not equal to the input’s
byte length, then there is unprocessed input such as malformed XML. For
instance, if the input was <tag
without the enclosing >
, then
tokenize()
will return None
.
use maybe_xml::{Reader, token::{StartTag, Ty}};
let input = "<tag";
let reader = Reader::from_str(input);
let mut pos = 0;
let token = reader.parse(pos);
assert_eq!(None, token);
assert_eq!(0, pos);
assert_ne!(input.len(), pos);
sourcepub const fn iter(&self, pos: usize) -> Iter<'a> ⓘ
pub const fn iter(&self, pos: usize) -> Iter<'a> ⓘ
Returns an iterator for tokens starting at the given position.
Panics
The iterator will panic if the initial pos
is greater than the input
length or if pos
is not at a character boundary.
Examples
Using other Iterator
functionality
use maybe_xml::{Reader, token::Ty};
let input = "<id>123</id><name>Jane Doe</name>";
let reader = Reader::from_str(input);
let mut iter = reader.iter(0).filter_map(|token| {
match token.ty() {
Ty::StartTag(tag) => Some(tag.name().as_str()),
_ => None,
}
});
let name = iter.next();
assert_eq!(Some("id"), name);
let name = iter.next();
assert_eq!(Some("name"), name);
assert_eq!(None, iter.next());
sourcepub const fn into_inner(self) -> &'a str
pub const fn into_inner(self) -> &'a str
Return the underlying bytes being tokenized.