#![no_std]
#![doc(html_root_url = "https://docs.rs/xmltok/0.14.0")]
#![forbid(unsafe_code)]
#![warn(missing_docs)]
#![allow(ellipsis_inclusive_range_patterns)]
#[cfg(feature = "std")]
#[macro_use]
extern crate std;
macro_rules! matches {
($expression:expr, $($pattern:tt)+) => {
match $expression {
$($pattern)+ => true,
_ => false
}
}
}
mod error;
mod stream;
mod strspan;
mod xmlchar;
pub use crate::error::*;
pub use crate::stream::*;
pub use crate::strspan::*;
pub use crate::xmlchar::*;
#[allow(missing_docs)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum Token {
Declaration {
start: u32,
version: SmallDetachedStrSpan,
encoding: SmallDetachedStrSpan,
standalone: Option<bool>,
end: u16,
},
ProcessingInstruction {
start: u32,
end: u16,
target: SmallDetachedStrSpan,
content: SmallDetachedStrSpan,
},
Comment { start: u32, text: DetachedStrSpan },
DtdStartNoExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
},
DtdStartSystemExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
external_id: SmallDetachedStrSpan,
},
DtdStartPublicExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
public1: SmallDetachedStrSpan,
public2: SmallDetachedStrSpan,
},
EmptyDtdNoExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
},
EmptyDtdSystemExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
external_id: SmallDetachedStrSpan,
},
EmptyDtdPublicExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
public1: SmallDetachedStrSpan,
public2: SmallDetachedStrSpan,
},
EntityDeclarationEntityValue {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
entity_value: SmallDetachedStrSpan,
},
EntityDeclarationSystemExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
external_id: SmallDetachedStrSpan,
},
EntityDeclarationPublicExternalId {
start: u32,
end: u16,
name: SmallDetachedStrSpan,
public1: SmallDetachedStrSpan,
public2: SmallDetachedStrSpan,
},
DtdEnd { start: u32, end: u16 },
ElementStart {
start: u32,
prefix: SmallDetachedStrSpan,
local: SmallDetachedStrSpan,
},
Attribute {
start: u32,
prefix: SmallDetachedStrSpan,
local: SmallDetachedStrSpan,
value: SmallDetachedStrSpan,
},
ElementEnd {
start: u32,
end: u16,
el_end: ElementEnd,
},
Text { start: u32, text: DetachedStrSpan },
Cdata { start: u32, text: DetachedStrSpan },
}
impl Token {
pub fn range(&self) -> core::ops::Range<usize> {
let (start, len) = match *self {
Token::Declaration { start, end, .. }
| Token::ProcessingInstruction { start, end, .. }
| Token::DtdStartNoExternalId { start, end, .. }
| Token::DtdStartSystemExternalId { start, end, .. }
| Token::DtdStartPublicExternalId { start, end, .. }
| Token::EmptyDtdNoExternalId { start, end, .. }
| Token::EmptyDtdSystemExternalId { start, end, .. }
| Token::EmptyDtdPublicExternalId { start, end, .. }
| Token::EntityDeclarationEntityValue { start, end, .. }
| Token::EntityDeclarationSystemExternalId { start, end, .. }
| Token::EntityDeclarationPublicExternalId { start, end, .. }
| Token::DtdEnd { start, end }
| Token::ElementEnd { start, end, .. } => (start, end as usize),
Token::ElementStart { start, local, .. } => (start, local.end() as usize),
Token::Attribute { start, value, .. } => (start, value.end() as usize + 1),
Token::Text { start, text } => (start, text.end() as usize),
Token::Comment { start, text } | Token::Cdata { start, text } => {
(start, text.end() as usize + 3)
}
};
let start = start as usize;
start..start + len
}
}
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum ElementEnd {
Open,
Close(SmallDetachedStrSpan, SmallDetachedStrSpan),
Empty,
}
#[allow(missing_docs)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum ExternalId {
System(SmallDetachedStrSpan),
Public(SmallDetachedStrSpan, SmallDetachedStrSpan),
}
#[allow(missing_docs)]
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub enum EntityDefinition {
EntityValue(SmallDetachedStrSpan),
ExternalId(ExternalId),
}
type Result<T> = core::result::Result<T, Error>;
type StreamResult<T> = core::result::Result<T, StreamError>;
#[inline]
fn small_token_parts(s: &Stream, start: usize) -> StreamResult<(u32, u16)> {
let len = s.pos() - start;
if s.pos() > u32::MAX as usize || len > u16::MAX as usize {
return Err(small_token_error(len));
}
Ok((start as u32, len as u16))
}
#[cold]
fn small_token_error(len: usize) -> StreamError {
if len > u16::MAX as usize {
StreamError::TokenTooLong
} else {
StreamError::DocumentTooLarge
}
}
#[inline]
fn small_token_start(s: &Stream, start: usize) -> StreamResult<u32> {
small_token_parts(s, start).map(|(start, _)| start)
}
#[inline]
fn large_token_start(s: &Stream, start: usize) -> StreamResult<u32> {
if s.pos() > u32::MAX as usize {
return Err(StreamError::DocumentTooLarge);
}
Ok(start as u32)
}
#[inline]
fn detach_small_checked(span: StrSpan<'_>, start: usize) -> StreamResult<SmallDetachedStrSpan> {
if span.end() - start > u16::MAX as usize {
return Err(StreamError::TokenTooLong);
}
Ok(span.detach_small(start))
}
#[derive(Clone, Copy, PartialEq)]
enum State {
Declaration,
AfterDeclaration,
Dtd,
AfterDtd,
Elements,
Attributes,
AfterElements,
End,
}
pub struct Tokenizer<'a> {
stream: Stream<'a>,
state: State,
depth: usize,
fragment_parsing: bool,
}
impl<'a> From<&'a str> for Tokenizer<'a> {
#[inline]
fn from(text: &'a str) -> Self {
let mut stream = Stream::from(text);
if stream.starts_with(&[0xEF, 0xBB, 0xBF]) {
stream.advance(3);
}
Tokenizer {
stream,
state: State::Declaration,
depth: 0,
fragment_parsing: false,
}
}
}
macro_rules! map_err_at {
($fun:expr, $stream:expr, $err:ident) => {{
let start = $stream.pos();
$fun.map_err(|e| Error::$err(e, $stream.gen_text_pos_from(start)))
}};
}
impl<'a> Tokenizer<'a> {
pub fn from_fragment(full_text: &'a str, fragment: core::ops::Range<usize>) -> Self {
Tokenizer {
stream: Stream::from_substr(full_text, fragment),
state: State::Elements,
depth: 0,
fragment_parsing: true,
}
}
fn parse_next_impl(&mut self) -> Option<Result<Token>> {
let s = &mut self.stream;
if s.at_end() {
return None;
}
let start = s.pos();
match self.state {
State::Declaration => {
self.state = State::AfterDeclaration;
if s.starts_with(b"<?xml ") {
Some(Self::parse_declaration(s))
} else {
None
}
}
State::AfterDeclaration => {
if s.starts_with(b"<!DOCTYPE") {
let t = Self::parse_doctype(s);
match t {
Ok(Token::DtdStartNoExternalId { .. })
| Ok(Token::DtdStartSystemExternalId { .. })
| Ok(Token::DtdStartPublicExternalId { .. }) => self.state = State::Dtd,
Ok(Token::EmptyDtdNoExternalId { .. })
| Ok(Token::EmptyDtdSystemExternalId { .. })
| Ok(Token::EmptyDtdPublicExternalId { .. }) => {
self.state = State::AfterDtd
}
_ => {}
}
Some(t)
} else if s.starts_with(b"<!--") {
Some(Self::parse_comment(s))
} else if s.starts_with(b"<?") {
if s.starts_with(b"<?xml ") {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
} else {
Some(Self::parse_pi(s))
}
} else if s.starts_with_space() {
s.skip_spaces();
None
} else {
self.state = State::AfterDtd;
None
}
}
State::Dtd => {
if s.starts_with(b"<!ENTITY") {
Some(Self::parse_entity_decl(s))
} else if s.starts_with(b"<!--") {
Some(Self::parse_comment(s))
} else if s.starts_with(b"<?") {
if s.starts_with(b"<?xml ") {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
} else {
Some(Self::parse_pi(s))
}
} else if s.starts_with(b"]") {
s.advance(1);
s.skip_spaces();
match s.curr_byte() {
Ok(b'>') => {
self.state = State::AfterDtd;
s.advance(1);
let t = small_token_parts(s, start)
.map(|(start, end)| Token::DtdEnd { start, end });
Some(
t.map_err(|e| Error::InvalidDoctype(e, s.gen_text_pos_from(start))),
)
}
Ok(c) => {
let e = StreamError::InvalidChar(c, b'>', s.gen_text_pos());
Some(Err(Error::InvalidDoctype(e, s.gen_text_pos_from(start))))
}
Err(_) => {
let e = StreamError::UnexpectedEndOfStream;
Some(Err(Error::InvalidDoctype(e, s.gen_text_pos_from(start))))
}
}
} else if s.starts_with_space() {
s.skip_spaces();
None
} else if s.starts_with(b"<!ELEMENT")
|| s.starts_with(b"<!ATTLIST")
|| s.starts_with(b"<!NOTATION")
{
if Self::consume_decl(s).is_err() {
let pos = s.gen_text_pos_from(start);
Some(Err(Error::UnknownToken(pos)))
} else {
None
}
} else {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
}
}
State::AfterDtd => {
if s.starts_with(b"<!--") {
Some(Self::parse_comment(s))
} else if s.starts_with(b"<?") {
if s.starts_with(b"<?xml ") {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
} else {
Some(Self::parse_pi(s))
}
} else if s.starts_with(b"<!") {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
} else if s.starts_with(b"<") {
self.state = State::Attributes;
Some(Self::parse_element_start(s))
} else if s.starts_with_space() {
s.skip_spaces();
None
} else {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
}
}
State::Elements => {
match s.curr_byte() {
Ok(b'<') => match s.next_byte() {
Ok(b'!') => {
if s.starts_with(b"<!--") {
Some(Self::parse_comment(s))
} else if s.starts_with(b"<![CDATA[") {
Some(Self::parse_cdata(s))
} else {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
}
}
Ok(b'?') => {
if !s.starts_with(b"<?xml ") {
Some(Self::parse_pi(s))
} else {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
}
}
Ok(b'/') => {
if self.depth > 0 {
self.depth -= 1;
}
if self.depth == 0 && !self.fragment_parsing {
self.state = State::AfterElements;
} else {
self.state = State::Elements;
}
Some(Self::parse_close_element(s))
}
Ok(_) => {
self.state = State::Attributes;
Some(Self::parse_element_start(s))
}
Err(_) => Some(Err(Error::UnknownToken(s.gen_text_pos()))),
},
Ok(_) => Some(Self::parse_text(s)),
Err(_) => Some(Err(Error::UnknownToken(s.gen_text_pos()))),
}
}
State::Attributes => {
let t = Self::parse_attribute(s);
if let Ok(Token::ElementEnd { el_end, .. }) = t {
if el_end == ElementEnd::Open {
self.depth += 1;
}
if self.depth == 0 && !self.fragment_parsing {
self.state = State::AfterElements;
} else {
self.state = State::Elements;
}
}
Some(t.map_err(|e| Error::InvalidAttribute(e, s.gen_text_pos_from(start))))
}
State::AfterElements => {
if s.starts_with(b"<!--") {
Some(Self::parse_comment(s))
} else if s.starts_with(b"<?") {
if s.starts_with(b"<?xml ") {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
} else {
Some(Self::parse_pi(s))
}
} else if s.starts_with_space() {
s.skip_spaces();
None
} else {
Some(Err(Error::UnknownToken(s.gen_text_pos())))
}
}
State::End => None,
}
}
fn parse_declaration(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_declaration_impl(s), s, InvalidDeclaration)
}
fn parse_declaration_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
fn consume_spaces(s: &mut Stream) -> StreamResult<()> {
if s.starts_with_space() {
s.skip_spaces();
} else if !s.starts_with(b"?>") && !s.at_end() {
return Err(StreamError::InvalidSpace(
s.curr_byte_unchecked(),
s.gen_text_pos(),
));
}
Ok(())
}
let start = s.pos();
s.advance(6);
let version = Self::parse_version_info(s)?;
consume_spaces(s)?;
let encoding = Self::parse_encoding_decl(s)?;
if encoding.is_some() {
consume_spaces(s)?;
}
let standalone = Self::parse_standalone(s)?;
s.skip_spaces();
s.skip_string(b"?>")?;
let (start32, end) = small_token_parts(s, start)?;
Ok(Token::Declaration {
start: start32,
version: version.detach_small(start),
encoding: encoding
.map(|e| e.detach_small(start))
.unwrap_or_else(SmallDetachedStrSpan::empty),
standalone,
end,
})
}
fn parse_version_info(s: &mut Stream<'a>) -> StreamResult<StrSpan<'a>> {
s.skip_spaces();
s.skip_string(b"version")?;
s.consume_eq()?;
let quote = s.consume_quote()?;
let start = s.pos();
s.skip_string(b"1.")?;
s.skip_bytes(|_, c| c.is_xml_digit());
let ver = s.slice_back(start);
s.consume_byte(quote)?;
Ok(ver)
}
fn parse_encoding_decl(s: &mut Stream<'a>) -> StreamResult<Option<StrSpan<'a>>> {
if !s.starts_with(b"encoding") {
return Ok(None);
}
s.advance(8);
s.consume_eq()?;
let quote = s.consume_quote()?;
let name = s.consume_bytes(|_, c| {
c.is_xml_letter() || c.is_xml_digit() || c == b'.' || c == b'-' || c == b'_'
});
s.consume_byte(quote)?;
Ok(Some(name))
}
fn parse_standalone(s: &mut Stream<'a>) -> StreamResult<Option<bool>> {
if !s.starts_with(b"standalone") {
return Ok(None);
}
s.advance(10);
s.consume_eq()?;
let quote = s.consume_quote()?;
let start = s.pos();
let value = s.consume_name()?.as_str();
let flag = match value {
"yes" => true,
"no" => false,
_ => {
let pos = s.gen_text_pos_from(start);
return Err(StreamError::InvalidString("yes', 'no", pos));
}
};
s.consume_byte(quote)?;
Ok(Some(flag))
}
fn parse_comment(s: &mut Stream<'a>) -> Result<Token> {
let start = s.pos();
Self::parse_comment_impl(s)
.map_err(|e| Error::InvalidComment(e, s.gen_text_pos_from(start)))
}
fn parse_comment_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(4);
let text_start = s.pos();
s.skip_comment_text()?;
let text = s.slice_back(text_start);
s.skip_string(b"-->")?;
Ok(Token::Comment {
start: large_token_start(s, start)?,
text: text.detach(start),
})
}
fn parse_pi(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_pi_impl(s), s, InvalidPI)
}
fn parse_pi_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(2);
let target = s.consume_name()?;
s.skip_spaces();
let content = s.consume_chars(|s, c| !(c == '?' && s.starts_with(b"?>")))?;
let content = if !content.is_empty() {
Some(content)
} else {
None
};
s.skip_string(b"?>")?;
let (start32, end) = small_token_parts(s, start)?;
Ok(Token::ProcessingInstruction {
start: start32,
end,
target: target.detach_small(start),
content: content
.map(|c| c.detach_small(start))
.unwrap_or_else(SmallDetachedStrSpan::empty),
})
}
fn parse_doctype(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_doctype_impl(s), s, InvalidDoctype)
}
fn parse_doctype_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(9);
s.consume_spaces()?;
let name = s.consume_name()?;
s.skip_spaces();
let external_id = Self::parse_external_id(s, start)?;
s.skip_spaces();
let c = s.curr_byte()?;
if c != b'[' && c != b'>' {
static EXPECTED: &[u8] = b"[>";
return Err(StreamError::InvalidCharMultiple(
c,
EXPECTED,
s.gen_text_pos(),
));
}
s.advance(1);
let (start32, end) = small_token_parts(s, start)?;
if c == b'[' {
Ok(match external_id {
None => Token::DtdStartNoExternalId {
start: start32,
end,
name: name.detach_small(start),
},
Some(ExternalId::System(ext_id)) => Token::DtdStartSystemExternalId {
start: start32,
end,
name: name.detach_small(start),
external_id: ext_id,
},
Some(ExternalId::Public(p1, p2)) => Token::DtdStartPublicExternalId {
start: start32,
end,
name: name.detach_small(start),
public1: p1,
public2: p2,
},
})
} else {
Ok(match external_id {
None => Token::EmptyDtdNoExternalId {
start: start32,
end,
name: name.detach_small(start),
},
Some(ExternalId::System(ext_id)) => Token::EmptyDtdSystemExternalId {
start: start32,
end,
name: name.detach_small(start),
external_id: ext_id,
},
Some(ExternalId::Public(p1, p2)) => Token::EmptyDtdPublicExternalId {
start: start32,
end,
name: name.detach_small(start),
public1: p1,
public2: p2,
},
})
}
}
fn parse_external_id(s: &mut Stream<'a>, start: usize) -> StreamResult<Option<ExternalId>> {
let v = if s.starts_with(b"SYSTEM") || s.starts_with(b"PUBLIC") {
let local_start = s.pos();
s.advance(6);
let id = s.slice_back(local_start);
s.consume_spaces()?;
let quote = s.consume_quote()?;
let literal1 = s.consume_bytes(|_, c| c != quote);
s.consume_byte(quote)?;
let v = if id.as_str() == "SYSTEM" {
ExternalId::System(detach_small_checked(literal1, start)?)
} else {
s.consume_spaces()?;
let quote = s.consume_quote()?;
let literal2 = s.consume_bytes(|_, c| c != quote);
s.consume_byte(quote)?;
ExternalId::Public(
detach_small_checked(literal1, start)?,
detach_small_checked(literal2, start)?,
)
};
Some(v)
} else {
None
};
Ok(v)
}
fn parse_entity_decl(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_entity_decl_impl(s), s, InvalidEntity)
}
fn parse_entity_decl_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(8);
s.consume_spaces()?;
let is_ge = if s.try_consume_byte(b'%') {
s.consume_spaces()?;
false
} else {
true
};
let name = s.consume_name()?;
s.consume_spaces()?;
let definition = Self::parse_entity_def(s, is_ge, start)?;
s.skip_spaces();
s.consume_byte(b'>')?;
let (start32, end) = small_token_parts(s, start)?;
Ok(match definition {
EntityDefinition::EntityValue(entity_value) => Token::EntityDeclarationEntityValue {
start: start32,
end,
name: name.detach_small(start),
entity_value,
},
EntityDefinition::ExternalId(ExternalId::System(ext_id)) => {
Token::EntityDeclarationSystemExternalId {
start: start32,
end,
name: name.detach_small(start),
external_id: ext_id,
}
}
EntityDefinition::ExternalId(ExternalId::Public(p1, p2)) => {
Token::EntityDeclarationPublicExternalId {
start: start32,
end,
name: name.detach_small(start),
public1: p1,
public2: p2,
}
}
})
}
fn parse_entity_def(
s: &mut Stream<'a>,
is_ge: bool,
start: usize,
) -> StreamResult<EntityDefinition> {
let c = s.curr_byte()?;
match c {
b'"' | b'\'' => {
let quote = s.consume_quote()?;
let value = s.consume_bytes(|_, c| c != quote);
s.consume_byte(quote)?;
Ok(EntityDefinition::EntityValue(detach_small_checked(
value, start,
)?))
}
b'S' | b'P' => {
if let Some(id) = Self::parse_external_id(s, start)? {
if is_ge {
s.skip_spaces();
if s.starts_with(b"NDATA") {
s.advance(5);
s.consume_spaces()?;
s.skip_name()?;
}
}
Ok(EntityDefinition::ExternalId(id))
} else {
Err(StreamError::InvalidExternalID)
}
}
_ => {
static EXPECTED: &[u8] = b"\"'SP";
let pos = s.gen_text_pos();
Err(StreamError::InvalidCharMultiple(c, EXPECTED, pos))
}
}
}
fn consume_decl(s: &mut Stream) -> StreamResult<()> {
s.skip_bytes(|_, c| c != b'>');
s.consume_byte(b'>')?;
Ok(())
}
fn parse_cdata(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_cdata_impl(s), s, InvalidCdata)
}
fn parse_cdata_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(9);
let text = s.consume_chars(|s, c| !(c == ']' && s.starts_with(b"]]>")))?;
s.skip_string(b"]]>")?;
Ok(Token::Cdata {
start: large_token_start(s, start)?,
text: text.detach(start),
})
}
fn parse_element_start(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_element_start_impl(s), s, InvalidElement)
}
fn parse_element_start_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(1);
let (prefix, local) = s.consume_qname()?;
Ok(Token::ElementStart {
start: small_token_start(s, start)?,
prefix: prefix.detach_small(start),
local: local.detach_small(start),
})
}
fn parse_close_element(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_close_element_impl(s), s, InvalidElement)
}
fn parse_close_element_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.advance(2);
let (prefix, tag_name) = s.consume_qname()?;
s.skip_spaces();
s.consume_byte(b'>')?;
let (start32, end) = small_token_parts(s, start)?;
Ok(Token::ElementEnd {
start: start32,
end,
el_end: ElementEnd::Close(prefix.detach_small(start), tag_name.detach_small(start)),
})
}
fn parse_attribute(s: &mut Stream<'a>) -> StreamResult<Token> {
let attr_start = s.pos();
let has_space = s.starts_with_space();
s.skip_spaces();
if let Ok(c) = s.curr_byte() {
let start = s.pos();
match c {
b'/' => {
s.advance(1);
s.consume_byte(b'>')?;
let (start, end) = small_token_parts(s, start)?;
return Ok(Token::ElementEnd {
start,
end,
el_end: ElementEnd::Empty,
});
}
b'>' => {
s.advance(1);
let (start, end) = small_token_parts(s, start)?;
return Ok(Token::ElementEnd {
start,
end,
el_end: ElementEnd::Open,
});
}
_ => {}
}
}
if !has_space {
return if !s.at_end() {
Err(StreamError::InvalidSpace(
s.curr_byte_unchecked(),
s.gen_text_pos_from(attr_start),
))
} else {
Err(StreamError::UnexpectedEndOfStream)
};
}
let start = s.pos();
let (prefix, local) = s.consume_qname()?;
s.consume_eq()?;
let quote = s.consume_quote()?;
let value_start = s.pos();
s.skip_attr_value(quote)?;
let value = s.slice_back(value_start);
s.consume_byte(quote)?;
Ok(Token::Attribute {
start: small_token_start(s, start)?,
prefix: prefix.detach_small(start),
local: local.detach_small(start),
value: value.detach_small(start),
})
}
fn parse_text(s: &mut Stream<'a>) -> Result<Token> {
map_err_at!(Self::parse_text_impl(s), s, InvalidCharData)
}
fn parse_text_impl(s: &mut Stream<'a>) -> StreamResult<Token> {
let start = s.pos();
s.skip_text_content()?;
let text = s.slice_back(start);
Ok(Token::Text {
start: large_token_start(s, start)?,
text: text.detach(start),
})
}
}
impl<'a> Iterator for Tokenizer<'a> {
type Item = Result<Token>;
#[inline]
fn next(&mut self) -> Option<Self::Item> {
let mut t = None;
while !self.stream.at_end() && self.state != State::End && t.is_none() {
t = self.parse_next_impl();
}
if let Some(Err(_)) = t {
self.stream.jump_to_end();
self.state = State::End;
}
t
}
}