Struct maybe_xml::Reader

source ·

pub struct Reader<'a> { /* private fields */ }

Expand description

Tokenizes XML input into a Token.

It does not allocate.

Examples

Using `tokenize()`

use maybe_xml::{Reader, token::{Characters, EndTag, StartTag, Ty}};

let input = "<id>123</id>";

let reader = Reader::from_str(input);
let mut pos = 0;

let token = reader.tokenize(&mut pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
    assert_eq!("id", tag.name().local().as_str());
    assert_eq!(None, tag.name().namespace_prefix());
} else {
    panic!();
}
assert_eq!(4, pos);

let token = reader.tokenize(&mut pos);
if let Some(Ty::Characters(chars)) = token.map(|t| t.ty()) {
    assert_eq!("123", chars.content().as_str());
} else {
    panic!();
}
assert_eq!(7, pos);

let token = reader.tokenize(&mut pos);
if let Some(Ty::EndTag(tag)) = token.map(|t| t.ty()) {
    assert_eq!("</id>", tag.as_str());
    assert_eq!("id", tag.name().local().as_str());
} else {
    panic!();
}
assert_eq!(12, pos);

let token = reader.tokenize(&mut pos);
assert_eq!(None, token);

// Verify that `pos` is equal to `input.len()` to ensure all data was
// processed.

Using `Iterator` functionality

use maybe_xml::{Reader, token::Ty};

let input = "<id>123</id><name>Jane Doe</name>";

let reader = Reader::from_str(input);
let mut iter = reader.into_iter().filter_map(|token| {
    match token.ty() {
        Ty::StartTag(tag) => Some(tag.name().as_str()),
        _ => None,
    }
});

let name = iter.next();
assert_eq!(Some("id"), name);

let name = iter.next();
assert_eq!(Some("name"), name);

assert_eq!(None, iter.next());

Note that if the input is malformed or incomplete such as <tag, the Iterator will return None and will not return the invalid input. If you want to verify that all of the input was processed, then you should use the Reader::tokenize() method.

Implementations§

source §

impl<'a> Reader<'a>

source

pub const fn from_str(input: &'a str) -> Self

Creates a new instance with the given UTF-8 string input.

source

pub const fn new(input: &'a str) -> Self

Creates a new instance with the given UTF-8 string input.

source

pub fn tokenize(&self, pos: &mut usize) -> Option<Token<'a>>

Tokenizes the input starting at the given position.

If a token is found, the position argument is also updated to the byte index after the token.

Panics

Panics if the pos is greater than the input length or if pos is not at a character boundary.

Examples

use maybe_xml::{Reader, token::{StartTag, Ty}};

let input = "<id>123</id>";

let reader = Reader::from_str(input);
let mut pos = 0;

let token = reader.tokenize(&mut pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
    assert_eq!("id", tag.name().local().as_str());
    assert_eq!(None, tag.name().namespace_prefix());
} else {
    panic!();
}

// Position was assigned to the index after the end of the token
assert_eq!(4, pos);

If tokenize() returns None, but the position is not equal to the input’s byte length, then there is unprocessed input such as malformed XML. For instance, if the input was <tag without the enclosing >, then tokenize() will return None.

use maybe_xml::{Reader, token::{StartTag, Ty}};

let input = "<tag";

let reader = Reader::from_str(input);
let mut pos = 0;

let token = reader.tokenize(&mut pos);
assert_eq!(None, token);

assert_eq!(0, pos);
assert_ne!(input.len(), pos);

source

pub const fn parse(&self, pos: usize) -> Option<Token<'a>>

Constant function which tokenizes the input starting at the given position.

Important

The pos is not updated and should be updated with the Token::len().

Panics

Panics if the pos is greater than the input length or if pos is not at a character boundary.

Examples

use maybe_xml::{Reader, token::{StartTag, Ty}};

let input = "<id>123</id>";

let reader = Reader::from_str(input);
let mut pos = 0;

let token = reader.parse(pos);
if let Some(Ty::StartTag(tag)) = token.map(|t| t.ty()) {
    assert_eq!("id", tag.name().local().as_str());
    assert_eq!(None, tag.name().namespace_prefix());
} else {
    panic!();
}

pos += token.map(|t| t.len()).unwrap_or_default();
assert_eq!(4, pos);

If parse() returns None, but the position is not equal to the input’s byte length, then there is unprocessed input such as malformed XML. For instance, if the input was <tag without the enclosing >, then tokenize() will return None.

use maybe_xml::{Reader, token::{StartTag, Ty}};

let input = "<tag";

let reader = Reader::from_str(input);
let mut pos = 0;

let token = reader.parse(pos);
assert_eq!(None, token);

assert_eq!(0, pos);
assert_ne!(input.len(), pos);

source

pub const fn iter(&self, pos: usize) -> Iter<'a> ⓘ

Returns an iterator for tokens starting at the given position.

use maybe_xml::{Reader, token::Ty};

let input = "<id>123</id><name>Jane Doe</name>";

let reader = Reader::from_str(input);
let mut iter = reader.iter(0).filter_map(|token| {
    match token.ty() {
        Ty::StartTag(tag) => Some(tag.name().as_str()),
        _ => None,
    }
});

let name = iter.next();
assert_eq!(Some("id"), name);

let name = iter.next();
assert_eq!(Some("name"), name);

assert_eq!(None, iter.next());