use htmlize::Context;
use rs_html_parser_tokenizer_tokens::{QuoteType, TokenizerToken, TokenizerTokenLocation};
use std::iter::Iterator;
use std::ops::Range;
use std::str;
struct CharCodes {}
impl CharCodes {
const TAB: u8 = 9; const NEW_LINE: u8 = 10; const FORM_FEED: u8 = 2; const CARRIAGE_RETURN: u8 = 3; const SPACE: u8 = 32; const EXCLAMATION_MARK: u8 = 33; const HASH: u8 = 35; const AMP: u8 = 38; const SINGLE_QUOTE: u8 = 39; const DOUBLE_QUOTE: u8 = 34; const DASH: u8 = 45; const SLASH: u8 = 47; const SEMI: u8 = 59; const LT: u8 = 60; const EQ: u8 = 61; const GT: u8 = 62; const QUESTION_MARK: u8 = 63; const UNDERSCORE: u8 = 95; const OPENING_SQUARE_BRACKET: u8 = 91; }
struct Sequences {}
impl Sequences {
const EMPTY: [u8; 0] = [];
const CDATA: [u8; 6] = [0x43, 0x44, 0x41, 0x54, 0x41, 0x5b]; const CDATA_END: [u8; 3] = [0x5d, 0x5d, 0x3e]; const COMMENT_END: [u8; 3] = [0x2d, 0x2d, 0x3e]; const SCRIPT_END: [u8; 8] = [0x3c, 0x2f, 0x73, 0x63, 0x72, 0x69, 0x70, 0x74]; const STYLE_END: [u8; 7] = [0x3c, 0x2f, 0x73, 0x74, 0x79, 0x6c, 0x65]; const TITLE_END: [u8; 7] = [0x3c, 0x2f, 0x74, 0x69, 0x74, 0x6c, 0x65]; }
#[derive(Clone, Copy, PartialEq)]
enum State {
Text = 1,
BeforeTagName, InTagName,
InSelfClosingTag,
BeforeClosingTagName,
InClosingTagName,
AfterClosingTagName,
BeforeAttributeName,
InAttributeName,
InAttributeAfterDataSingleQuote,
InAttributeAfterDataDoubleQuote,
AfterAttributeName,
AfterAttributeData,
BeforeAttributeValue,
InAttributeValueDq, InAttributeValueSq, InAttributeValueNq,
BeforeDeclaration, InDeclaration,
InProcessingInstruction, BeforeComment,
CDATASequence,
InSpecialComment,
InCommentLike,
BeforeSpecialS, SpecialStartSequence,
InSpecialTag,
InEntity,
AfterReadEntityText,
AfterReadEntityAttr,
End,
}
pub struct Tokenizer<'a> {
state: State,
buffer: &'a [u8],
section_start: usize,
index: i32,
entity_start: usize,
base_state: State,
is_special: bool,
code: u32,
prev_quote_type: QuoteType,
xml_mode: bool,
decode_entities: bool,
current_sequence: &'a [u8],
sequence_index: usize,
has_ended: bool,
}
pub struct TokenizerOptions {
pub xml_mode: Option<bool>,
pub decode_entities: Option<bool>,
}
fn is_whitespace(c: u8) -> bool {
matches!(
c,
CharCodes::SPACE
| CharCodes::NEW_LINE
| CharCodes::TAB
| CharCodes::FORM_FEED
| CharCodes::CARRIAGE_RETURN
)
}
fn is_end_of_tag_section(c: u8) -> bool {
matches!(
c,
CharCodes::SLASH
| CharCodes::GT
| CharCodes::SPACE
| CharCodes::NEW_LINE
| CharCodes::TAB
| CharCodes::FORM_FEED
| CharCodes::CARRIAGE_RETURN
)
}
impl Tokenizer<'_> {
pub fn new<'a>(buffer: &'a [u8], options: &'a TokenizerOptions) -> Tokenizer<'a> {
Tokenizer {
state: State::Text,
buffer,
section_start: 0,
index: 0,
code: 0,
entity_start: 0,
base_state: State::Text,
is_special: false,
xml_mode: options.xml_mode.unwrap_or(false),
decode_entities: options.decode_entities.unwrap_or(true),
current_sequence: Default::default(),
sequence_index: 0,
has_ended: false,
prev_quote_type: QuoteType::NoValue,
}
}
pub fn reset(&mut self) {
self.state = State::Text;
self.buffer = &Sequences::EMPTY;
self.section_start = 0;
self.index = 0;
self.base_state = State::Text;
self.current_sequence = Default::default();
}
fn fast_forward_to(&mut self, c: u8) -> bool {
for n in self.index..self.buffer.len() as i32 {
if self.buffer[n as usize] == c {
self.index = n;
return true;
}
}
self.index = (self.buffer.len() - 1) as i32;
false
}
fn state_text(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::LT || (!self.decode_entities && self.fast_forward_to(CharCodes::LT)) {
let token = if self.index > self.section_start as i32 {
Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::Text,
code: 0,
quote: QuoteType::NoValue,
})
} else {
None
};
self.state = State::BeforeTagName;
self.section_start = self.index as usize;
return token;
}
if self.decode_entities && c == CharCodes::AMP {
self.start_entity();
}
None
}
fn state_in_comment_like(&mut self, c: u8) -> Option<TokenizerToken> {
if c == self.current_sequence[self.sequence_index] {
self.sequence_index += 1;
if self.sequence_index == self.current_sequence.len() {
let end_index = if self.index - 2 > self.buffer.len() as i32 {
self.section_start
} else {
self.index as usize - 2
};
let token = TokenizerToken {
start: self.section_start,
end: if self.section_start > end_index {
self.section_start
} else {
end_index
},
location: if self.current_sequence == Sequences::CDATA_END {
TokenizerTokenLocation::CData
} else {
TokenizerTokenLocation::Comment
},
code: 0,
quote: QuoteType::NoValue,
};
self.sequence_index = 0;
self.section_start = (self.index + 1) as usize;
self.state = State::Text;
return Some(token);
}
} else if self.sequence_index == 0 {
if self.fast_forward_to(self.current_sequence[0]) {
self.sequence_index = 1;
}
} else if c != self.current_sequence[self.sequence_index - 1] {
self.sequence_index = 0;
}
None
}
fn is_tag_start_char(&self, c: u8) -> bool {
if self.xml_mode {
is_end_of_tag_section(c)
} else {
c.is_ascii_alphabetic()
}
}
fn start_special(&mut self, sequence: &'static [u8], offset: i32) {
self.is_special = true;
self.current_sequence = sequence;
self.sequence_index = offset as usize;
self.state = State::SpecialStartSequence;
}
fn state_before_tag_name(&mut self, c: u8) -> Option<TokenizerToken> {
match c {
CharCodes::EXCLAMATION_MARK => {
self.state = State::BeforeDeclaration;
self.section_start = (self.index + 1) as usize;
}
CharCodes::QUESTION_MARK => {
self.state = State::InProcessingInstruction;
self.section_start = (self.index + 1) as usize;
}
_ if self.is_tag_start_char(c) => {
self.section_start = self.index as usize;
if !self.xml_mode && c.eq_ignore_ascii_case(&Sequences::TITLE_END[2]) {
self.start_special(&Sequences::TITLE_END, 3);
} else {
self.state =
if !self.xml_mode && c.eq_ignore_ascii_case(&Sequences::SCRIPT_END[2]) {
State::BeforeSpecialS
} else {
State::InTagName
};
}
}
CharCodes::SLASH => {
self.state = State::BeforeClosingTagName;
}
_ => {
self.state = State::Text;
return self.state_text(c);
}
}
None
}
fn state_in_tag_name(&mut self, c: u8) -> Option<TokenizerToken> {
if is_end_of_tag_section(c) {
let token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::OpenTagName,
code: 0,
quote: QuoteType::NoValue,
});
self.section_start = 0;
self.state = State::BeforeAttributeName;
self.index -= 1; return token;
}
None
}
fn state_before_closing_tag_name(&mut self, c: u8) -> Option<TokenizerToken> {
if is_whitespace(c) {
} else if c == CharCodes::GT {
self.state = State::Text;
} else {
self.state = if self.is_tag_start_char(c) {
State::InClosingTagName
} else {
State::InSpecialComment
};
self.section_start = self.index as usize;
}
None
}
fn state_in_closing_tag_name(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::GT || is_whitespace(c) {
let token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::CloseTag,
code: 0,
quote: QuoteType::NoValue,
});
self.section_start = 0;
self.state = State::AfterClosingTagName;
self.state_after_closing_tag_name(c);
return token;
}
None
}
fn state_after_closing_tag_name(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::GT || self.fast_forward_to(CharCodes::GT) {
self.state = State::Text;
self.section_start = (self.index + 1) as usize;
}
None
}
fn state_before_attribute_name(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::GT {
let token = Some(TokenizerToken {
start: self.index as usize,
end: self.index as usize,
location: TokenizerTokenLocation::OpenTagEnd,
code: 0,
quote: QuoteType::NoValue,
});
if self.is_special {
self.state = State::InSpecialTag;
self.sequence_index = 0;
} else {
self.state = State::Text;
}
self.section_start = (self.index + 1) as usize;
return token;
} else if c == CharCodes::SLASH {
self.state = State::InSelfClosingTag;
} else if !is_whitespace(c) {
self.state = State::InAttributeName;
self.section_start = self.index as usize;
}
None
}
fn state_in_self_closing_tag(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::GT {
let token = Some(TokenizerToken {
start: self.index as usize,
end: self.index as usize,
location: TokenizerTokenLocation::SelfClosingTag,
code: 0,
quote: QuoteType::NoValue,
});
self.state = State::Text;
self.section_start = (self.index + 1) as usize;
self.is_special = false;
return token;
} else if !is_whitespace(c) {
self.state = State::BeforeAttributeName;
self.index -= 1; }
None
}
fn state_in_attribute_name(&mut self, c: u8) -> Option<TokenizerToken> {
let token: Option<TokenizerToken>;
if c == CharCodes::EQ || is_end_of_tag_section(c) {
token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::AttrName,
code: 0,
quote: QuoteType::NoValue,
});
self.section_start = self.index as usize;
self.state = State::AfterAttributeName;
self.index -= 1; } else {
token = None;
}
token
}
fn state_after_attribute_name(&mut self, c: u8) -> Option<TokenizerToken> {
let token: Option<TokenizerToken>;
if c == CharCodes::EQ {
self.state = State::BeforeAttributeValue;
token = None;
} else if c == CharCodes::SLASH || c == CharCodes::GT {
token = Some(TokenizerToken {
start: self.section_start,
end: self.section_start,
location: TokenizerTokenLocation::AttrEnd,
code: 0,
quote: QuoteType::NoValue,
});
self.section_start = 0;
self.state = State::BeforeAttributeName;
self.index -= 1; } else if !is_whitespace(c) {
token = Some(TokenizerToken {
start: self.section_start,
end: self.section_start,
location: TokenizerTokenLocation::AttrEnd,
code: 0,
quote: QuoteType::NoValue,
});
self.state = State::InAttributeName;
self.section_start = self.index as usize;
} else {
token = None;
};
token
}
fn state_before_attribute_value(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::DOUBLE_QUOTE {
self.state = State::InAttributeValueDq;
self.prev_quote_type = QuoteType::Double;
self.section_start = (self.index + 1) as usize;
} else if c == CharCodes::SINGLE_QUOTE {
self.state = State::InAttributeValueSq;
self.prev_quote_type = QuoteType::Single;
self.section_start = (self.index + 1) as usize;
} else if !is_whitespace(c) {
self.prev_quote_type = QuoteType::Unquoted;
self.section_start = self.index as usize;
self.state = State::InAttributeValueNq;
return self.state_in_attribute_value_no_quotes(c);
}
None
}
fn handle_in_attribute_value(&mut self, c: u8, quote: u8) -> Option<TokenizerToken> {
if c == quote || (!self.decode_entities && self.fast_forward_to(quote)) {
self.state = if quote == CharCodes::DOUBLE_QUOTE {
State::InAttributeAfterDataDoubleQuote
} else {
State::InAttributeAfterDataSingleQuote
};
let token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::AttrData,
code: 0,
quote: self.prev_quote_type,
});
self.index -= 1; return token;
} else if self.decode_entities && c == CharCodes::AMP {
self.start_entity();
}
None
}
fn state_in_attribute_value_double_quotes(&mut self, c: u8) -> Option<TokenizerToken> {
self.handle_in_attribute_value(c, CharCodes::DOUBLE_QUOTE)
}
fn state_in_attribute_value_single_quotes(&mut self, c: u8) -> Option<TokenizerToken> {
self.handle_in_attribute_value(c, CharCodes::SINGLE_QUOTE)
}
fn state_in_attribute_value_no_quotes(&mut self, c: u8) -> Option<TokenizerToken> {
if is_whitespace(c) || c == CharCodes::GT {
let token = if self.section_start < self.index as usize {
Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location: TokenizerTokenLocation::AttrData,
code: 0,
quote: QuoteType::Unquoted,
})
} else {
None
};
self.index -= 1; self.state = State::AfterAttributeData;
self.section_start = 0;
return token;
} else if self.decode_entities && c == CharCodes::AMP {
self.start_entity();
}
None
}
fn state_before_declaration(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::OPENING_SQUARE_BRACKET {
self.state = State::CDATASequence;
self.sequence_index = 0;
} else {
self.state = if c == CharCodes::DASH {
State::BeforeComment
} else {
State::InDeclaration
};
}
None
}
fn fast_get_until_gt(
&mut self,
c: u8,
location: TokenizerTokenLocation,
) -> Option<TokenizerToken> {
if c == CharCodes::GT || self.fast_forward_to(CharCodes::GT) {
let token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location,
code: 0,
quote: QuoteType::NoValue,
});
self.state = State::Text;
self.section_start = (self.index + 1) as usize;
return token;
}
None
}
fn state_before_comment(&mut self, c: u8) -> Option<TokenizerToken> {
if c == CharCodes::DASH {
self.state = State::InCommentLike;
self.current_sequence = &Sequences::COMMENT_END;
self.sequence_index = 2;
self.section_start = (self.index + 1) as usize;
} else {
self.state = State::InDeclaration;
}
None
}
fn state_before_special_s(&mut self, c: u8) -> Option<TokenizerToken> {
if c.eq_ignore_ascii_case(&Sequences::SCRIPT_END[3]) {
self.start_special(&Sequences::SCRIPT_END, 4);
return None;
}
if c.eq_ignore_ascii_case(&Sequences::STYLE_END[3]) {
self.start_special(&Sequences::STYLE_END, 4);
return None;
}
self.state = State::InTagName;
self.state_in_tag_name(c)
}
fn start_entity(&mut self) {
self.base_state = self.state;
self.state = State::InEntity;
self.entity_start = self.index as usize;
self.index -= 1; }
fn find_end_of_html_entity(&mut self) -> i32 {
let start_pos = self.index as usize;
let loop_until = self.buffer.len();
let mut count: usize = start_pos + 1; while count < loop_until {
let char = self.buffer[count];
if (char > 47 && char < 58) || (char > 96 && char < 123) || (char > 64 && char < 91) || char == CharCodes::UNDERSCORE ||
char == CharCodes::HASH
{
count += 1;
continue;
}
if char == CharCodes::SEMI {
count += 1;
break;
}
break;
}
if count <= loop_until {
return count as i32;
}
-1
}
fn state_in_entity(&mut self) -> Option<TokenizerToken> {
let index = self.find_end_of_html_entity();
if index >= 0 {
let range: Range<usize> = Range {
start: self.index as usize,
end: index as usize,
};
let is_attr = self.base_state != State::Text && self.base_state != State::InSpecialTag;
let code = htmlize::unescape_bytes_in(
&self.buffer[range],
if is_attr {
Context::Attribute
} else {
Context::General
},
);
if code.len() > 0 {
if code.len() > 1 && code[0] == CharCodes::AMP {
self.state = self.base_state;
return None;
}
let text = unsafe { str::from_utf8_unchecked(&code) };
if let Some(character) = text.chars().next() {
self.code = character as u32
} else {
self.state = self.base_state;
return None;
}
let token: Option<TokenizerToken>;
if self.section_start < self.entity_start {
token = Some(TokenizerToken {
start: self.section_start,
end: self.entity_start,
location: if is_attr {
TokenizerTokenLocation::AttrData
} else {
TokenizerTokenLocation::Text
},
code: 0,
quote: if is_attr {
self.prev_quote_type
} else {
QuoteType::NoValue
},
});
self.section_start = self.index as usize;
} else {
token = None;
}
self.index = index - 1;
self.state = if is_attr {
State::AfterReadEntityAttr
} else {
State::AfterReadEntityText
};
return token;
} else {
self.index = (self.buffer.len() - 1) as i32;
}
} else {
self.index = (self.buffer.len() - 1) as i32;
}
None
}
fn tokenizer_next(&mut self) -> Option<TokenizerToken> {
while self.index < self.buffer.len() as i32 {
let c = self.buffer[self.index as usize];
let token_or_empty: Option<TokenizerToken> = match self.state {
State::Text => self.state_text(c),
State::SpecialStartSequence => self.state_special_start_sequence(c),
State::InSpecialTag => self.state_in_special_tag(c),
State::CDATASequence => self.state_cdata_sequence(c),
State::InAttributeValueDq => self.state_in_attribute_value_double_quotes(c),
State::InAttributeName => self.state_in_attribute_name(c),
State::InAttributeAfterDataSingleQuote => {
self.state_in_attribute_after_data(QuoteType::Single)
}
State::InAttributeAfterDataDoubleQuote => {
self.state_in_attribute_after_data(QuoteType::Double)
}
State::InCommentLike => self.state_in_comment_like(c),
State::InSpecialComment => {
self.fast_get_until_gt(c, TokenizerTokenLocation::Comment)
}
State::BeforeAttributeName => self.state_before_attribute_name(c),
State::InTagName => self.state_in_tag_name(c),
State::InClosingTagName => self.state_in_closing_tag_name(c),
State::BeforeTagName => self.state_before_tag_name(c),
State::AfterAttributeName => self.state_after_attribute_name(c),
State::InAttributeValueSq => self.state_in_attribute_value_single_quotes(c),
State::BeforeAttributeValue => self.state_before_attribute_value(c),
State::BeforeClosingTagName => self.state_before_closing_tag_name(c),
State::AfterClosingTagName => self.state_after_closing_tag_name(c),
State::BeforeSpecialS => self.state_before_special_s(c),
State::InAttributeValueNq => self.state_in_attribute_value_no_quotes(c),
State::AfterAttributeData => self.state_after_attribute_data(),
State::InSelfClosingTag => self.state_in_self_closing_tag(c),
State::InDeclaration => {
self.fast_get_until_gt(c, TokenizerTokenLocation::Declaration)
}
State::BeforeDeclaration => self.state_before_declaration(c),
State::BeforeComment => self.state_before_comment(c),
State::InProcessingInstruction => {
self.fast_get_until_gt(c, TokenizerTokenLocation::ProcessingInstruction)
}
State::InEntity => self.state_in_entity(),
State::AfterReadEntityText => {
self.state_after_entity(TokenizerTokenLocation::TextEntity)
}
State::AfterReadEntityAttr => {
self.state_after_entity(TokenizerTokenLocation::AttrEntity)
}
State::End => self.state_end(),
};
self.index += 1;
if token_or_empty.is_some() {
return token_or_empty;
}
}
if self.state == State::InEntity {
self.state = self.base_state;
}
let option_trailing_token: Option<TokenizerToken> = self.handle_trailing_data();
self.state = State::End;
if option_trailing_token.is_some() {
return option_trailing_token;
}
self.state_end()
}
fn state_end(&mut self) -> Option<TokenizerToken> {
if !self.has_ended {
self.has_ended = true;
let i = self.buffer.len();
return Some(TokenizerToken {
start: i,
end: i,
location: TokenizerTokenLocation::End,
code: 0,
quote: QuoteType::NoValue,
});
}
None
}
fn handle_trailing_data(&mut self) -> Option<TokenizerToken> {
if self.state == State::End {
return None;
}
let end_index = self.buffer.len();
if self.section_start >= end_index {
return None;
}
if self.state == State::InCommentLike || self.state == State::InSpecialComment {
Some(TokenizerToken {
start: self.section_start,
end: end_index,
location: if self.current_sequence == Sequences::CDATA_END {
TokenizerTokenLocation::CData
} else {
TokenizerTokenLocation::Comment
},
code: 0,
quote: QuoteType::NoValue,
})
} else {
match &self.state {
State::InTagName
| State::BeforeAttributeName
| State::BeforeAttributeValue
| State::AfterAttributeName
| State::InAttributeName
| State::InAttributeValueSq
| State::InAttributeValueDq
| State::InAttributeValueNq
| State::InClosingTagName => None,
State::AfterReadEntityText => {
self.state_after_entity(TokenizerTokenLocation::TextEntity)
}
State::AfterReadEntityAttr => {
self.state_after_entity(TokenizerTokenLocation::AttrData)
}
_ => Some(TokenizerToken {
start: self.section_start,
end: end_index,
location: TokenizerTokenLocation::Text,
code: 0,
quote: QuoteType::NoValue,
}),
}
}
}
fn state_special_start_sequence(&mut self, c: u8) -> Option<TokenizerToken> {
let is_end = self.sequence_index == self.current_sequence.len();
let is_match = if is_end {
is_end_of_tag_section(c)
} else {
c.eq_ignore_ascii_case(&self.current_sequence[self.sequence_index])
};
if !is_match {
self.is_special = false;
} else if !is_end {
self.sequence_index += 1;
return None;
}
self.sequence_index = 0;
self.state = State::InTagName;
self.state_in_tag_name(c)
}
fn state_cdata_sequence(&mut self, c: u8) -> Option<TokenizerToken> {
if c == Sequences::CDATA[self.sequence_index] {
self.sequence_index += 1;
if self.sequence_index == Sequences::CDATA.len() {
self.state = State::InCommentLike;
self.current_sequence = &Sequences::CDATA_END;
self.sequence_index = 0;
self.section_start = (self.index + 1) as usize;
}
None
} else {
self.sequence_index = 0;
self.state = State::InDeclaration;
self.fast_get_until_gt(c, TokenizerTokenLocation::Declaration) }
}
fn state_in_special_tag(&mut self, c: u8) -> Option<TokenizerToken> {
if self.sequence_index == self.current_sequence.len() {
if c == CharCodes::GT || is_whitespace(c) {
let end_of_text = self.index - self.current_sequence.len() as i32;
let token: Option<TokenizerToken> = if self.section_start < end_of_text as usize {
Some(TokenizerToken {
start: self.section_start,
end: end_of_text as usize,
location: TokenizerTokenLocation::Text,
code: 0,
quote: QuoteType::NoValue,
})
} else {
None
};
self.is_special = false;
self.section_start = (end_of_text + 2) as usize; self.state = State::InClosingTagName;
self.index -= 1; return token;
}
self.sequence_index = 0;
}
if c.eq_ignore_ascii_case(&self.current_sequence[self.sequence_index]) {
self.sequence_index += 1;
} else if self.sequence_index == 0 {
if self.current_sequence == Sequences::TITLE_END {
if self.decode_entities && c == CharCodes::AMP {
self.start_entity();
}
} else if self.fast_forward_to(CharCodes::LT) {
self.sequence_index = 1;
}
} else {
if c == CharCodes::LT {
self.sequence_index = 1;
} else {
self.sequence_index = 0;
}
}
None
}
#[inline]
fn state_after_attribute_data(&mut self) -> Option<TokenizerToken> {
let token = Some(TokenizerToken {
start: self.index as usize,
end: self.index as usize,
location: TokenizerTokenLocation::AttrEnd,
code: 0,
quote: QuoteType::Unquoted,
});
self.state = State::BeforeAttributeName;
self.index -= 1; token
}
#[inline]
fn state_in_attribute_after_data(&mut self, quote_type: QuoteType) -> Option<TokenizerToken> {
self.section_start = 0;
self.state = State::BeforeAttributeName;
Some(TokenizerToken {
start: (self.index + 1) as usize,
end: (self.index + 1) as usize,
location: TokenizerTokenLocation::AttrEnd,
code: 0,
quote: quote_type,
})
}
#[inline]
fn state_after_entity(&mut self, location: TokenizerTokenLocation) -> Option<TokenizerToken> {
self.state = self.base_state;
let token = Some(TokenizerToken {
start: self.section_start,
end: self.index as usize,
location,
code: self.code,
quote: if location == TokenizerTokenLocation::AttrEntity {
self.prev_quote_type
} else {
QuoteType::NoValue
},
});
self.section_start = self.index as usize;
self.index -= 1;
self.code = 0;
token
}
}
impl Iterator for Tokenizer<'_> {
type Item = TokenizerToken;
fn next(&mut self) -> Option<TokenizerToken> {
self.tokenizer_next()
}
}