mod utils;
use crate::entities::try_read_character_reference;
use crate::offset::{Offset, Position};
use crate::trace::AttrValueSyntax;
use crate::{reader::Reader, Emitter, Error};
use utils::{
ascii_digit_pat, control_pat, ctostr, noncharacter_pat, surrogate_pat, whitespace_pat,
};
pub use utils::State;
pub(super) struct Machine<R, O, E> {
pub(super) state: State,
pub(super) emitter: E,
temporary_buffer: String,
reader: R,
to_reconsume: Stack2<Option<char>>,
character_reference_code: u32,
return_state: Option<State>,
current_tag_name: String,
pub(super) last_start_tag_name: String,
is_start_tag: bool,
position_before_match: O,
some_offset: O,
pub(crate) naively_switch_state: bool,
}
impl<R, O, E> Machine<R, O, E>
where
R: Reader + Position<O>,
O: Offset,
E: Emitter<O>,
{
pub fn new(reader: R, emitter: E) -> Self {
Self {
reader,
emitter,
state: State::Data,
to_reconsume: Stack2::default(),
return_state: None,
temporary_buffer: String::new(),
character_reference_code: 0,
current_tag_name: String::new(),
last_start_tag_name: String::new(),
is_start_tag: false,
position_before_match: O::default(),
some_offset: O::default(),
naively_switch_state: false,
}
}
}
pub enum ControlToken {
Eof,
Continue,
CdataOpen,
}
#[inline]
pub(super) fn consume<O, R, E>(slf: &mut Machine<R, O, E>) -> Result<ControlToken, R::Error>
where
O: Offset,
R: Reader + Position<O>,
E: Emitter<O>,
{
macro_rules! mutate_character_reference {
(* $mul:literal + $x:ident - $sub:literal) => {
match slf
.character_reference_code
.checked_mul($mul)
.and_then(|cr| cr.checked_add($x as u32 - $sub))
{
Some(cr) => slf.character_reference_code = cr,
None => {
slf.character_reference_code = 0x110000;
}
};
};
}
slf.position_before_match = slf.reader.position();
match slf.state {
State::Data => match slf.read_char()? {
Some('&') => {
slf.return_state = Some(slf.state);
slf.state = State::CharacterReference;
Ok(ControlToken::Continue)
}
Some('<') => {
slf.some_offset = slf.position_before_match;
slf.state = State::TagOpen;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char('\0');
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
None => Ok(ControlToken::Eof),
},
State::Rcdata => match slf.read_char()? {
Some('&') => {
slf.return_state = Some(State::Rcdata);
slf.state = State::CharacterReference;
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::RcdataLessThanSign;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
None => Ok(ControlToken::Eof),
},
State::Rawtext => match slf.read_char()? {
Some('<') => {
slf.state = State::RawTextLessThanSign;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
None => Ok(ControlToken::Eof),
},
State::ScriptData => match slf.read_char()? {
Some('<') => {
slf.state = State::ScriptDataLessThanSign;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
None => Ok(ControlToken::Eof),
},
State::Plaintext => match slf.read_char()? {
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
None => Ok(ControlToken::Eof),
},
State::TagOpen => match slf.read_char()? {
Some('!') => {
slf.state = State::MarkupDeclarationOpen;
Ok(ControlToken::Continue)
}
Some('/') => {
slf.state = State::EndTagOpen;
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.init_start_tag();
slf.state = State::TagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c @ Some('?') => {
slf.emit_error(Error::UnexpectedQuestionMarkInsteadOfTagName);
slf.emitter.init_comment(slf.reader.position());
slf.state = State::BogusComment;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofBeforeTagName);
slf.emit_char('<');
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::InvalidFirstCharacterOfTagName);
slf.state = State::Data;
slf.emit_char('<');
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::EndTagOpen => match slf.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
slf.init_end_tag();
slf.state = State::TagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingEndTagName);
slf.state = State::Data;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofBeforeTagName);
slf.emit_chars(b"</");
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_error(Error::InvalidFirstCharacterOfTagName);
slf.emitter.init_comment(slf.reader.position());
slf.state = State::BogusComment;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
},
State::TagName => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') => {
slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter.terminate_tag_name(slf.position_before_match);
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.push_tag_name("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x) => {
slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
},
State::RcdataLessThanSign => match slf.read_char()? {
Some('/') => {
slf.temporary_buffer.clear();
slf.state = State::RcdataEndTagOpen;
Ok(ControlToken::Continue)
}
c => {
slf.emit_char('<');
slf.state = State::Rcdata;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::RcdataEndTagOpen => match slf.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
slf.init_end_tag();
slf.state = State::RcdataEndTagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.state = State::Rcdata;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::RcdataEndTagName => match slf.read_char()? {
Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') if slf.current_end_tag_is_appropriate() => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') if slf.current_end_tag_is_appropriate() => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
slf.temporary_buffer.push(x);
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.flush_buffer_characters();
slf.state = State::Rcdata;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::RawTextLessThanSign => match slf.read_char()? {
Some('/') => {
slf.temporary_buffer.clear();
slf.state = State::RawTextEndTagOpen;
Ok(ControlToken::Continue)
}
c => {
slf.emit_char('<');
slf.state = State::Rawtext;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::RawTextEndTagOpen => match slf.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
slf.init_end_tag();
slf.state = State::RawTextEndTagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.state = State::Rawtext;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::RawTextEndTagName => match slf.read_char()? {
Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') if slf.current_end_tag_is_appropriate() => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') if slf.current_end_tag_is_appropriate() => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
slf.temporary_buffer.push(x);
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.flush_buffer_characters();
slf.state = State::Rawtext;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataLessThanSign => match slf.read_char()? {
Some('/') => {
slf.temporary_buffer.clear();
slf.state = State::ScriptDataEndTagOpen;
Ok(ControlToken::Continue)
}
Some('!') => {
slf.state = State::ScriptDataEscapeStart;
slf.emit_chars(b"<!");
Ok(ControlToken::Continue)
}
c => {
slf.emit_char('<');
slf.state = State::ScriptData;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEndTagOpen => match slf.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
slf.init_end_tag();
slf.state = State::ScriptDataEndTagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.state = State::ScriptData;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEndTagName => match slf.read_char()? {
Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') if slf.current_end_tag_is_appropriate() => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') if slf.current_end_tag_is_appropriate() => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
slf.temporary_buffer.push(x);
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.flush_buffer_characters();
slf.state = State::Data;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapeStart => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataEscapeStartDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
c => {
slf.state = State::ScriptData;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapeStartDash => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataEscapedDashDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
c => {
slf.state = State::ScriptData;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscaped => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataEscapedDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::ScriptDataEscapedLessThanSign;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapedDash => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataEscapedDashDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::ScriptDataEscapedLessThanSign;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataEscaped;
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.state = State::ScriptDataEscaped;
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapedDashDash => match slf.read_char()? {
Some('-') => {
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::ScriptDataEscapedLessThanSign;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::ScriptData;
slf.emit_char('>');
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataEscaped;
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.state = State::ScriptDataEscaped;
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapedLessThanSign => match slf.read_char()? {
Some('/') => {
slf.temporary_buffer.clear();
slf.state = State::ScriptDataEscapedEndTagOpen;
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.temporary_buffer.clear();
slf.emit_char('<');
slf.state = State::ScriptDataDoubleEscapeStart;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c => {
slf.emit_char('<');
slf.state = State::ScriptDataEscaped;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapedEndTagOpen => match slf.read_char()? {
Some(x) if x.is_ascii_alphabetic() => {
slf.init_end_tag();
slf.state = State::ScriptDataEscapedEndTagName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.unread_char(c);
slf.state = State::ScriptDataEscaped;
Ok(ControlToken::Continue)
}
},
State::ScriptDataEscapedEndTagName => match slf.read_char()? {
Some(whitespace_pat!()) if slf.current_end_tag_is_appropriate() => {
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') if slf.current_end_tag_is_appropriate() => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') if slf.current_end_tag_is_appropriate() => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.push_tag_name(ctostr!(x.to_ascii_lowercase()));
slf.temporary_buffer.push(x);
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"</");
slf.flush_buffer_characters();
slf.state = State::ScriptDataEscaped;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscapeStart => match slf.read_char()? {
Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
if slf.temporary_buffer == "script" {
slf.state = State::ScriptDataDoubleEscaped;
} else {
slf.state = State::ScriptDataEscaped;
}
slf.emit_char(x);
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.temporary_buffer.push(x.to_ascii_lowercase());
slf.emit_char(x);
Ok(ControlToken::Continue)
}
c => {
slf.state = State::ScriptDataEscaped;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscaped => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataDoubleEscapedDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::ScriptDataDoubleEscapedLessThanSign;
slf.emit_char('<');
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscapedDash => match slf.read_char()? {
Some('-') => {
slf.state = State::ScriptDataDoubleEscapedDashDash;
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.state = State::ScriptDataDoubleEscapedLessThanSign;
slf.emit_char('<');
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataDoubleEscaped;
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.state = State::ScriptDataDoubleEscaped;
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscapedDashDash => match slf.read_char()? {
Some('-') => {
slf.emit_char('-');
Ok(ControlToken::Continue)
}
Some('<') => {
slf.emit_char('<');
slf.state = State::ScriptDataDoubleEscapedLessThanSign;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_char('>');
slf.state = State::ScriptData;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.state = State::ScriptDataDoubleEscaped;
slf.emit_char_for_source_char('\u{fffd}', '\0');
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInScriptHtmlCommentLikeText);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.state = State::ScriptDataDoubleEscaped;
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscapedLessThanSign => match slf.read_char()? {
Some('/') => {
slf.temporary_buffer.clear();
slf.state = State::ScriptDataDoubleEscapeEnd;
slf.emit_char('/');
Ok(ControlToken::Continue)
}
c => {
slf.state = State::ScriptDataDoubleEscaped;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::ScriptDataDoubleEscapeEnd => match slf.read_char()? {
Some(x @ whitespace_pat!() | x @ '/' | x @ '>') => {
if slf.temporary_buffer == "script" {
slf.state = State::ScriptDataEscaped;
} else {
slf.state = State::ScriptDataDoubleEscaped;
}
slf.emit_char(x);
Ok(ControlToken::Continue)
}
Some(x) if x.is_ascii_alphabetic() => {
slf.temporary_buffer.push(x.to_ascii_lowercase());
slf.emit_char(x);
Ok(ControlToken::Continue)
}
c => {
slf.state = State::ScriptDataDoubleEscaped;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::BeforeAttributeName => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
c @ Some('/' | '>') | c @ None => {
slf.state = State::AfterAttributeName;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
Some('=') => {
slf.emit_error(Error::UnexpectedEqualsSignBeforeAttributeName);
slf.emitter.init_attribute_name(slf.reader.position());
slf.emitter.push_attribute_name("=");
slf.state = State::AttributeName;
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emitter.init_attribute_name(slf.position_before_match);
slf.state = State::AttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
},
State::AttributeName => match slf.read_char()? {
c @ Some(whitespace_pat!() | '/' | '>') | c @ None => {
slf.emitter
.terminate_attribute_name(slf.position_before_match);
slf.state = State::AfterAttributeName;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
Some('=') => {
slf.emitter
.terminate_attribute_name(slf.position_before_match);
slf.state = State::BeforeAttributeValue;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_name("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<') => {
slf.emit_error(Error::UnexpectedCharacterInAttributeName);
slf.emitter
.push_attribute_name(ctostr!(x.to_ascii_lowercase()));
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emitter
.push_attribute_name(ctostr!(x.to_ascii_lowercase()));
Ok(ControlToken::Continue)
}
},
State::AfterAttributeName => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('/') => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('=') => {
slf.state = State::BeforeAttributeValue;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.init_attribute_name(slf.position_before_match);
slf.state = State::AttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
},
State::BeforeAttributeValue => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
slf.emitter
.init_attribute_value(AttrValueSyntax::DoubleQuoted, slf.reader.position());
slf.state = State::AttributeValueDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter
.init_attribute_value(AttrValueSyntax::SingleQuoted, slf.reader.position());
slf.state = State::AttributeValueSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingAttributeValue);
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
c => {
slf.emitter
.init_attribute_value(AttrValueSyntax::Unquoted, slf.position_before_match);
slf.state = State::AttributeValueUnquoted;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::AttributeValueDoubleQuoted => match slf.read_char()? {
Some('"') => {
slf.emitter.terminate_attribute_value(
slf.reader.position() - slf.reader.len_of_char_in_current_encoding('"'),
);
slf.state = State::AfterAttributeValueQuoted;
Ok(ControlToken::Continue)
}
Some('&') => {
slf.return_state = Some(State::AttributeValueDoubleQuoted);
slf.state = State::CharacterReference;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_attribute_value(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::AttributeValueSingleQuoted => match slf.read_char()? {
Some('\'') => {
slf.emitter.terminate_attribute_value(
slf.reader.position() - slf.reader.len_of_char_in_current_encoding('\''),
);
slf.state = State::AfterAttributeValueQuoted;
Ok(ControlToken::Continue)
}
Some('&') => {
slf.return_state = Some(State::AttributeValueSingleQuoted);
slf.state = State::CharacterReference;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_attribute_value(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::AttributeValueUnquoted => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.emitter.terminate_attribute_value(
slf.reader.position() - slf.reader.len_of_char_in_current_encoding(' '),
);
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('&') => {
slf.return_state = Some(State::AttributeValueUnquoted);
slf.state = State::CharacterReference;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_attribute_value("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x @ '"' | x @ '\'' | x @ '<' | x @ '=' | x @ '\u{60}') => {
slf.emit_error(Error::UnexpectedCharacterInUnquotedAttributeValue);
slf.emitter.push_attribute_value(ctostr!(x));
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_attribute_value(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::AfterAttributeValueQuoted => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.state = State::BeforeAttributeName;
Ok(ControlToken::Continue)
}
Some('/') => {
slf.state = State::SelfClosingStartTag;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_error(Error::MissingWhitespaceBetweenAttributes);
slf.state = State::BeforeAttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
},
State::SelfClosingStartTag => match slf.read_char()? {
Some('>') => {
slf.emitter.set_self_closing(
slf.position_before_match - slf.reader.len_of_char_in_current_encoding('/')
..slf.position_before_match,
);
slf.state = State::Data;
slf.emit_current_tag();
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInTag);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_error(Error::UnexpectedSolidusInTag);
slf.state = State::BeforeAttributeName;
slf.unread_char(Some(x));
Ok(ControlToken::Continue)
}
},
State::BogusComment => match slf.read_char()? {
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_comment(slf.position_before_match);
Ok(ControlToken::Continue)
}
None => {
slf.emitter.emit_current_comment(slf.position_before_match);
Ok(ControlToken::Eof)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_comment("\u{fffd}");
Ok(ControlToken::Continue)
}
Some(x) => {
slf.emitter.push_comment(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::MarkupDeclarationOpen => match slf.read_char()? {
Some('-') if slf.try_read_string("-", true)? => {
slf.emitter.init_comment(slf.reader.position());
slf.state = State::CommentStart;
Ok(ControlToken::Continue)
}
Some('d' | 'D') if slf.try_read_string("octype", false)? => {
slf.state = State::Doctype;
Ok(ControlToken::Continue)
}
Some('[') if slf.try_read_string("CDATA[", true)? => Ok(ControlToken::CdataOpen),
c => {
slf.emit_error(Error::IncorrectlyOpenedComment);
slf.emitter.init_comment(slf.position_before_match);
slf.state = State::BogusComment;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::CommentStart => match slf.read_char()? {
Some('-') => {
slf.state = State::CommentStartDash;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::AbruptClosingOfEmptyComment);
slf.state = State::Data;
slf.emitter.emit_current_comment(slf.position_before_match);
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::CommentStartDash => match slf.read_char()? {
Some('-') => {
slf.state = State::CommentEnd;
Ok(ControlToken::Continue)
}
Some(c @ '>') => {
slf.emit_error(Error::AbruptClosingOfEmptyComment);
slf.state = State::Data;
slf.emitter.emit_current_comment(
slf.position_before_match - slf.reader.len_of_char_in_current_encoding(c),
);
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment(
slf.position_before_match - slf.reader.len_of_char_in_current_encoding('-'),
);
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emitter.push_comment("-");
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::Comment => match slf.read_char()? {
Some('<') => {
slf.emitter.push_comment("<");
slf.state = State::CommentLessThanSign;
Ok(ControlToken::Continue)
}
Some('-') => {
slf.some_offset = slf.position_before_match;
slf.state = State::CommentEndDash;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_comment("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_comment(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::CommentLessThanSign => match slf.read_char()? {
Some('!') => {
slf.emitter.push_comment("!");
slf.state = State::CommentLessThanSignBang;
Ok(ControlToken::Continue)
}
Some('<') => {
slf.emitter.push_comment("<");
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::CommentLessThanSignBang => match slf.read_char()? {
Some('-') => {
slf.state = State::CommentLessThanSignBangDash;
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::CommentLessThanSignBangDash => match slf.read_char()? {
Some('-') => {
slf.state = State::CommentLessThanSignBangDashDash;
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = State::CommentEndDash;
Ok(ControlToken::Continue)
}
},
State::CommentLessThanSignBangDashDash => match slf.read_char()? {
c @ Some('>') | c @ None => {
slf.unread_char(c);
slf.state = State::CommentEnd;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::NestedComment);
slf.unread_char(c);
slf.state = State::CommentEnd;
Ok(ControlToken::Continue)
}
},
State::CommentEndDash => match slf.read_char()? {
Some('-') => {
slf.state = State::CommentEnd;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment(slf.some_offset);
Ok(ControlToken::Eof)
}
c => {
slf.emitter.push_comment("-");
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::CommentEnd => match slf.read_char()? {
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_comment(slf.some_offset);
Ok(ControlToken::Continue)
}
Some('!') => {
slf.state = State::CommentEndBang;
Ok(ControlToken::Continue)
}
Some('-') => {
slf.emitter.push_comment("-");
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment(slf.some_offset);
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emitter.push_comment("-");
slf.emitter.push_comment("-");
slf.unread_char(c);
slf.state = State::Comment;
Ok(ControlToken::Continue)
}
},
State::CommentEndBang => match slf.read_char()? {
Some('-') => {
slf.emitter.push_comment("-");
slf.emitter.push_comment("-");
slf.emitter.push_comment("!");
slf.state = State::CommentEndDash;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::IncorrectlyClosedComment);
slf.state = State::Data;
slf.emitter.emit_current_comment(slf.some_offset);
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInComment);
slf.emitter.emit_current_comment(slf.some_offset);
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emitter.push_comment("-");
slf.emitter.push_comment("-");
slf.emitter.push_comment("!");
slf.state = State::Comment;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::Doctype => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.state = State::BeforeDoctypeName;
Ok(ControlToken::Continue)
}
c @ Some('>') => {
slf.unread_char(c);
slf.state = State::BeforeDoctypeName;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.init_doctype();
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingWhitespaceBeforeDoctypeName);
slf.unread_char(c);
slf.state = State::BeforeDoctypeName;
Ok(ControlToken::Continue)
}
},
State::BeforeDoctypeName => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.init_doctype();
slf.emitter.init_doctype_name(slf.position_before_match);
slf.emitter.push_doctype_name("\u{fffd}");
slf.state = State::DoctypeName;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypeName);
slf.init_doctype();
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.init_doctype();
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.init_doctype();
slf.emitter.init_doctype_name(slf.position_before_match);
slf.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
slf.state = State::DoctypeName;
Ok(ControlToken::Continue)
}
},
State::DoctypeName => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.emitter
.terminate_doctype_name(slf.position_before_match);
slf.state = State::AfterDoctypeName;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter
.terminate_doctype_name(slf.position_before_match);
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_name("\u{fffd}");
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter
.terminate_doctype_name(slf.position_before_match);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter
.push_doctype_name(ctostr!(x.to_ascii_lowercase()));
Ok(ControlToken::Continue)
}
},
State::AfterDoctypeName => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some('p' | 'P') if slf.try_read_string("ublic", false)? => {
slf.state = State::AfterDoctypePublicKeyword;
Ok(ControlToken::Continue)
}
Some('s' | 'S') if slf.try_read_string("ystem", false)? => {
slf.state = State::AfterDoctypeSystemKeyword;
Ok(ControlToken::Continue)
}
c @ Some(_) => {
slf.emit_error(Error::InvalidCharacterSequenceAfterDoctypeName);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
}
},
State::AfterDoctypePublicKeyword => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.state = State::BeforeDoctypePublicIdentifier;
Ok(ControlToken::Continue)
}
Some('"') => {
slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
slf.emitter.init_doctype_public_id(slf.reader.position());
slf.state = State::DoctypePublicIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emit_error(Error::MissingWhitespaceAfterDoctypePublicKeyword);
slf.emitter.init_doctype_public_id(slf.reader.position());
slf.state = State::DoctypePublicIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
}
},
State::BeforeDoctypePublicIdentifier => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
slf.emitter.init_doctype_public_id(slf.reader.position());
slf.state = State::DoctypePublicIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter.init_doctype_public_id(slf.reader.position());
slf.state = State::DoctypePublicIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
}
},
State::DoctypePublicIdentifierDoubleQuoted => match slf.read_char()? {
Some('"') => {
slf.emitter
.terminate_doctype_public_id(slf.position_before_match);
slf.state = State::AfterDoctypePublicIdentifier;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_public_id("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter
.terminate_doctype_public_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emitter
.terminate_doctype_public_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_doctype_public_id(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::DoctypePublicIdentifierSingleQuoted => match slf.read_char()? {
Some('\'') => {
slf.emitter
.terminate_doctype_public_id(slf.position_before_match);
slf.state = State::AfterDoctypePublicIdentifier;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_public_id("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter
.terminate_doctype_public_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypePublicIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emitter
.terminate_doctype_public_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_doctype_public_id(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::AfterDoctypePublicIdentifier => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.state = State::BetweenDoctypePublicAndSystemIdentifiers;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
Some('"') => {
slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emit_error(Error::MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers);
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
}
},
State::BetweenDoctypePublicAndSystemIdentifiers => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
Some('"') => {
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::AfterDoctypeSystemKeyword => match slf.read_char()? {
Some(whitespace_pat!()) => {
slf.state = State::BeforeDoctypeSystemIdentifier;
Ok(ControlToken::Continue)
}
Some('"') => {
slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emit_error(Error::MissingWhitespaceAfterDoctypeSystemKeyword);
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::BeforeDoctypeSystemIdentifier => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('"') => {
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierDoubleQuoted;
Ok(ControlToken::Continue)
}
Some('\'') => {
slf.emitter.init_doctype_system_id(slf.reader.position());
slf.state = State::DoctypeSystemIdentifierSingleQuoted;
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emit_error(Error::MissingDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::MissingQuoteBeforeDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::BogusDoctype;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::DoctypeSystemIdentifierDoubleQuoted => match slf.read_char()? {
Some('"') => {
slf.emitter
.terminate_doctype_system_id(slf.position_before_match);
slf.state = State::AfterDoctypeSystemIdentifier;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_system_id("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter
.terminate_doctype_system_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emitter
.terminate_doctype_system_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_doctype_system_id(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::DoctypeSystemIdentifierSingleQuoted => match slf.read_char()? {
Some('\'') => {
slf.emitter
.terminate_doctype_system_id(slf.position_before_match);
slf.state = State::AfterDoctypeSystemIdentifier;
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
slf.emitter.push_doctype_system_id("\u{fffd}");
Ok(ControlToken::Continue)
}
Some('>') => {
slf.emitter
.terminate_doctype_system_id(slf.position_before_match);
slf.emit_error(Error::AbruptDoctypeSystemIdentifier);
slf.emitter.set_force_quirks();
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emitter
.terminate_doctype_system_id(slf.reader.position());
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emitter.push_doctype_system_id(ctostr!(x));
Ok(ControlToken::Continue)
}
},
State::AfterDoctypeSystemIdentifier => match slf.read_char()? {
Some(whitespace_pat!()) => Ok(ControlToken::Continue),
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInDoctype);
slf.emitter.set_force_quirks();
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
c @ Some(_) => {
slf.emit_error(Error::UnexpectedCharacterAfterDoctypeSystemIdentifier);
slf.unread_char(c);
slf.state = State::BogusDoctype;
Ok(ControlToken::Continue)
}
},
State::BogusDoctype => match slf.read_char()? {
Some('>') => {
slf.state = State::Data;
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Continue)
}
Some('\0') => {
slf.emit_error(Error::UnexpectedNullCharacter);
Ok(ControlToken::Continue)
}
None => {
slf.emitter.emit_current_doctype(slf.reader.position());
Ok(ControlToken::Eof)
}
Some(_) => Ok(ControlToken::Continue),
},
State::CdataSection => match slf.read_char()? {
Some(']') => {
slf.state = State::CdataSectionBracket;
Ok(ControlToken::Continue)
}
None => {
slf.emit_error(Error::EofInCdata);
Ok(ControlToken::Eof)
}
Some(x) => {
slf.emit_char(x);
Ok(ControlToken::Continue)
}
},
State::CdataSectionBracket => match slf.read_char()? {
Some(']') => {
slf.state = State::CdataSectionEnd;
slf.some_offset = slf.position_before_match;
Ok(ControlToken::Continue)
}
c => {
slf.emit_char(']');
slf.state = State::CdataSection;
slf.unread_char(c);
Ok(ControlToken::Continue)
}
},
State::CdataSectionEnd => match slf.read_char()? {
Some(']') => {
slf.emit_char(']');
Ok(ControlToken::Continue)
}
Some('>') => {
slf.state = State::Data;
Ok(ControlToken::Continue)
}
c => {
slf.emit_chars(b"]]");
slf.unread_char(c);
slf.state = State::CdataSection;
Ok(ControlToken::Continue)
}
},
State::CharacterReference => {
slf.some_offset =
slf.reader.position() - slf.reader.len_of_char_in_current_encoding('&');
slf.temporary_buffer.clear();
slf.temporary_buffer.push('&');
match slf.read_char()? {
Some(x) if x.is_ascii_alphanumeric() => {
slf.unread_char(Some(x));
slf.state = State::NamedCharacterReference;
Ok(ControlToken::Continue)
}
Some('#') => {
slf.temporary_buffer.push('#');
slf.state = State::NumericCharacterReference;
Ok(ControlToken::Continue)
}
c => {
slf.flush_code_points_consumed_as_character_reference();
slf.state = slf.return_state.take().unwrap();
slf.unread_char(c);
Ok(ControlToken::Continue)
}
}
}
State::NamedCharacterReference => {
let first_char = slf.read_char()?.unwrap();
let Some(char_ref) =
try_read_character_reference(first_char, |x| slf.try_read_string(x, true))?
else {
slf.unread_char(Some(first_char));
debug_assert_eq!(slf.temporary_buffer, "&");
slf.temporary_buffer.clear();
if slf.is_consumed_as_part_of_an_attribute() {
slf.emitter.push_attribute_value("&");
} else {
slf.emitter.emit_char(
'&',
slf.some_offset
..slf.some_offset + slf.reader.len_of_char_in_current_encoding('&'),
);
}
slf.state = State::AmbiguousAmpersand;
return Ok(ControlToken::Continue);
};
slf.temporary_buffer.push(first_char);
slf.temporary_buffer.push_str(char_ref.name);
let char_ref_name_last_character = char_ref.name.chars().last();
let next_character = slf.read_char()?;
slf.unread_char(next_character);
if slf.is_consumed_as_part_of_an_attribute()
&& char_ref_name_last_character != Some(';')
&& matches!(next_character, Some(x) if x == '=' || x.is_ascii_alphanumeric())
{
slf.flush_code_points_consumed_as_character_reference();
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
} else {
if char_ref_name_last_character != Some(';') {
slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
}
if slf.is_consumed_as_part_of_an_attribute() {
slf.temporary_buffer.clear();
slf.temporary_buffer.push_str(char_ref.characters);
slf.emitter.push_attribute_value(&slf.temporary_buffer);
} else {
for c in char_ref.characters.chars() {
slf.emitter.emit_char(
c,
slf.some_offset
..slf.reader.position()
- slf.reader.len_of_char_in_current_encoding(c),
);
}
}
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
}
State::AmbiguousAmpersand => match slf.read_char()? {
Some(x) if x.is_ascii_alphanumeric() => {
if slf.is_consumed_as_part_of_an_attribute() {
slf.emitter.push_attribute_value(ctostr!(x));
} else {
slf.emit_char(x);
}
Ok(ControlToken::Continue)
}
c @ Some(';') => {
slf.emit_error(Error::UnknownNamedCharacterReference);
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
},
State::NumericCharacterReference => {
slf.character_reference_code = 0;
match slf.read_char()? {
Some(x @ 'x' | x @ 'X') => {
slf.temporary_buffer.push(x);
slf.state = State::HexadecimalCharacterReferenceStart;
Ok(ControlToken::Continue)
}
c => {
slf.unread_char(c);
slf.state = State::DecimalCharacterReferenceStart;
Ok(ControlToken::Continue)
}
}
}
State::HexadecimalCharacterReferenceStart => match slf.read_char()? {
c @ Some('0'..='9' | 'A'..='F' | 'a'..='f') => {
slf.unread_char(c);
slf.state = State::HexadecimalCharacterReference;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
slf.flush_code_points_consumed_as_character_reference();
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
},
State::DecimalCharacterReferenceStart => match slf.read_char()? {
Some(x @ ascii_digit_pat!()) => {
slf.unread_char(Some(x));
slf.state = State::DecimalCharacterReference;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::AbsenceOfDigitsInNumericCharacterReference);
slf.flush_code_points_consumed_as_character_reference();
slf.unread_char(c);
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
},
State::HexadecimalCharacterReference => match slf.read_char()? {
Some(x @ ascii_digit_pat!()) => {
mutate_character_reference!(*16 + x - 0x0030);
Ok(ControlToken::Continue)
}
Some(x @ 'A'..='F') => {
mutate_character_reference!(*16 + x - 0x0037);
Ok(ControlToken::Continue)
}
Some(x @ 'a'..='f') => {
mutate_character_reference!(*16 + x - 0x0057);
Ok(ControlToken::Continue)
}
Some(';') => {
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
slf.unread_char(c);
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
}
},
State::DecimalCharacterReference => match slf.read_char()? {
Some(x @ ascii_digit_pat!()) => {
mutate_character_reference!(*10 + x - 0x0030);
Ok(ControlToken::Continue)
}
Some(';') => {
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
}
c => {
slf.emit_error(Error::MissingSemicolonAfterCharacterReference);
slf.unread_char(c);
slf.state = State::NumericCharacterReferenceEnd;
Ok(ControlToken::Continue)
}
},
State::NumericCharacterReferenceEnd => {
match slf.character_reference_code {
0x00 => {
slf.emit_error(Error::NullCharacterReference);
slf.character_reference_code = 0xfffd;
}
0x110000.. => {
slf.emit_error(Error::CharacterReferenceOutsideUnicodeRange);
slf.character_reference_code = 0xfffd;
}
surrogate_pat!() => {
slf.emit_error(Error::SurrogateCharacterReference);
slf.character_reference_code = 0xfffd;
}
noncharacter_pat!() => {
slf.emit_error(Error::NoncharacterCharacterReference);
}
x @ 0x000d | x @ control_pat!()
if !matches!(x, 0x0009 | 0x000a | 0x000c | 0x0020) =>
{
slf.emit_error(Error::ControlCharacterReference);
slf.character_reference_code = match x {
0x80 => 0x20AC, 0x82 => 0x201A, 0x83 => 0x0192, 0x84 => 0x201E, 0x85 => 0x2026, 0x86 => 0x2020, 0x87 => 0x2021, 0x88 => 0x02C6, 0x89 => 0x2030, 0x8A => 0x0160, 0x8B => 0x2039, 0x8C => 0x0152, 0x8E => 0x017D, 0x91 => 0x2018, 0x92 => 0x2019, 0x93 => 0x201C, 0x94 => 0x201D, 0x95 => 0x2022, 0x96 => 0x2013, 0x97 => 0x2014, 0x98 => 0x02DC, 0x99 => 0x2122, 0x9A => 0x0161, 0x9B => 0x203A, 0x9C => 0x0153, 0x9E => 0x017E, 0x9F => 0x0178, _ => slf.character_reference_code,
};
}
_ => (),
}
let char = std::char::from_u32(slf.character_reference_code).unwrap();
if slf.is_consumed_as_part_of_an_attribute() {
slf.temporary_buffer.clear();
slf.temporary_buffer.push(char);
slf.emitter.push_attribute_value(&slf.temporary_buffer);
} else {
slf.emitter
.emit_char(char, slf.some_offset..slf.reader.position());
}
slf.state = slf.return_state.take().unwrap();
Ok(ControlToken::Continue)
}
}
}
#[inline]
pub(super) fn handle_cdata_open<O, R, E>(
slf: &mut Machine<R, O, E>,
adjusted_current_node_present_and_not_in_html_namespace: bool,
) where
O: Offset,
R: Reader + Position<O>,
E: Emitter<O>,
{
if adjusted_current_node_present_and_not_in_html_namespace {
slf.state = State::CdataSection;
} else {
slf.emit_error(Error::CdataInHtmlContent);
slf.emitter.init_comment(slf.reader.position());
slf.emitter.push_comment("[CDATA[");
slf.state = State::BogusComment;
}
}
#[derive(Debug, Default, Clone, Copy)]
struct Stack2<T: Copy>(Option<(T, Option<T>)>);
impl<T: Copy> Stack2<T> {
#[inline]
fn push(&mut self, c: T) {
self.0 = match self.0 {
None => Some((c, None)),
Some((c1, None)) => Some((c1, Some(c))),
Some((_c1, Some(_c2))) => panic!("stack full!"),
}
}
#[inline]
fn pop(&mut self) -> Option<T> {
let (new_self, rv) = match self.0 {
Some((c1, Some(c2))) => (Some((c1, None)), Some(c2)),
Some((c1, None)) => (None, Some(c1)),
None => (None, None),
};
self.0 = new_self;
rv
}
}