use crate::bytes::{
self, AlreadyFoundByteSeqCount, BracketCount, QuoteAndBracketContextAwareFoundState,
QuoteContextAwareFoundState, QuoteState,
};
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
struct Offset(usize);
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
enum InternalState {
Reset,
ScanningMarkup,
ScanningStartOrEmptyElementTag(QuoteState, bool),
ScanningEndTag(QuoteState),
ScanningCharacters,
ScanningProcessingInstruction(AlreadyFoundByteSeqCount),
ScanningDeclarationCommentOrCdata([u8; 7], usize),
ScanningDeclaration(QuoteState, BracketCount, AlreadyFoundByteSeqCount),
ScanningComment(AlreadyFoundByteSeqCount),
ScanningCdata(AlreadyFoundByteSeqCount),
Eof,
}
#[derive(Copy, Clone, Debug, Eq, Hash, PartialEq)]
pub enum State {
ScanningCharacters,
ScannedCharacters(usize),
ScanningMarkup,
ScanningStartOrEmptyElementTag,
ScannedStartTag(usize),
ScannedEmptyElementTag(usize),
ScanningEndTag,
ScannedEndTag(usize),
ScanningDeclarationCommentOrCdata,
ScanningDeclaration,
ScannedDeclaration(usize),
ScanningComment,
ScannedComment(usize),
ScanningCdata,
ScannedCdata(usize),
ScanningProcessingInstruction,
ScannedProcessingInstruction(usize),
}
#[derive(Debug, Clone, PartialEq, Hash)]
pub struct Scanner {
state: InternalState,
}
impl Scanner {
pub fn new() -> Self {
Self {
state: InternalState::Reset,
}
}
#[inline(always)]
fn scan_markup(&mut self, bytes: &[u8]) -> Option<State> {
if let Some(next) = bytes::peek2(bytes) {
match next {
b'/' => self.scan_end_tag(bytes, QuoteState::None, Offset(2)),
b'?' => {
self.scan_processing_instruction(bytes, AlreadyFoundByteSeqCount(0), Offset(2))
}
b'!' => self.scan_declaration_comment_or_cdata(bytes, [0; 7], 0, Offset(2)),
_ => {
self.scan_start_or_empty_element_tag(bytes, QuoteState::None, false, Offset(1))
}
}
} else {
self.state = InternalState::ScanningMarkup;
debug_assert_eq!(bytes.len(), 1);
Some(State::ScanningMarkup)
}
}
#[inline(always)]
fn scan_markup2(&mut self, bytes: &[u8]) -> Option<State> {
if let Some(next) = bytes::peek(bytes) {
match next {
b'/' => self.scan_end_tag(bytes, QuoteState::None, Offset(1)),
b'?' => {
self.scan_processing_instruction(bytes, AlreadyFoundByteSeqCount(0), Offset(1))
}
b'!' => self.scan_declaration_comment_or_cdata(bytes, [0; 7], 0, Offset(1)),
_ => {
self.scan_start_or_empty_element_tag(bytes, QuoteState::None, false, Offset(0))
}
}
} else {
self.state = InternalState::Eof;
None
}
}
fn scan_start_or_empty_element_tag(
&mut self,
bytes: &[u8],
quote_state: QuoteState,
is_last_char_slash: bool,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
let byte_seq = b">";
let (read, found) = bytes::quote_context_aware_find(
&bytes[(offset.0)..],
byte_seq,
AlreadyFoundByteSeqCount(0),
quote_state,
);
match found {
QuoteContextAwareFoundState::Found => {
self.state = InternalState::Reset;
if read > 1 && bytes[offset.0 + read - 2] == b'/' {
Some(State::ScannedEmptyElementTag(offset.0 + read))
} else if is_last_char_slash && read == 1 {
debug_assert_eq!(offset.0, 0);
Some(State::ScannedEmptyElementTag(read))
} else {
Some(State::ScannedStartTag(offset.0 + read))
}
}
QuoteContextAwareFoundState::NotFound(quote_state, _) => {
let last_char_slash = match quote_state {
QuoteState::None => bytes.last() == Some(&b'/'),
QuoteState::Single | QuoteState::Double => false,
};
self.state =
InternalState::ScanningStartOrEmptyElementTag(quote_state, last_char_slash);
Some(State::ScanningStartOrEmptyElementTag)
}
}
}
fn scan_end_tag(
&mut self,
bytes: &[u8],
quote_state: QuoteState,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
let byte_seq = b">";
let (read, found) = bytes::quote_context_aware_find(
&bytes[(offset.0)..],
byte_seq,
AlreadyFoundByteSeqCount(0),
quote_state,
);
match found {
QuoteContextAwareFoundState::Found => {
self.state = InternalState::Reset;
Some(State::ScannedEndTag(offset.0 + read))
}
QuoteContextAwareFoundState::NotFound(quote_state, _) => {
self.state = InternalState::ScanningEndTag(quote_state);
Some(State::ScanningEndTag)
}
}
}
fn scan_processing_instruction(
&mut self,
bytes: &[u8],
already_found_byte_seq_count: AlreadyFoundByteSeqCount,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
if already_found_byte_seq_count.0 > 0 {
debug_assert_eq!(already_found_byte_seq_count.0, 1);
if bytes.get(offset.0) == Some(&b'>') {
self.state = InternalState::Reset;
return Some(State::ScannedProcessingInstruction(offset.0 + 1));
}
}
let mut bytes_to_search = &bytes[offset.0..];
let mut read = 0;
let found;
loop {
if let Some(index) = bytes_to_search.iter().position(|b| *b == b'>') {
let end = index + 1;
read += end;
if index > 0 && &bytes_to_search[index - 1..end] == b"?>" {
found = true;
break;
}
bytes_to_search = &bytes_to_search[end..];
} else {
found = false;
break;
}
}
if found {
self.state = InternalState::Reset;
Some(State::ScannedProcessingInstruction(offset.0 + read))
} else {
let already_found_byte_seq_count =
bytes::find_matching_suffix(b"?>", &bytes[offset.0..]);
self.state = InternalState::ScanningProcessingInstruction(already_found_byte_seq_count);
Some(State::ScanningProcessingInstruction)
}
}
fn scan_declaration_comment_or_cdata(
&mut self,
bytes: &[u8],
mut filled_array: [u8; 7],
mut filled_count: usize,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
let bytes_to_check = &bytes[(offset.0)..];
let cdata = b"[CDATA[";
let to_fill = usize::min(filled_array.len() - filled_count, bytes_to_check.len());
if to_fill > 0 {
filled_array[filled_count..to_fill + filled_count]
.copy_from_slice(&bytes_to_check[..to_fill]);
}
filled_count += to_fill;
if filled_count > 0 {
match filled_array[0] {
b'-' => {
if filled_count > 1 {
match filled_array[1] {
b'-' => self.scan_comment(
bytes,
AlreadyFoundByteSeqCount(0),
Offset(offset.0 + usize::min(to_fill, 2)),
),
_ => self.scan_declaration(
bytes,
QuoteState::None,
BracketCount(0),
AlreadyFoundByteSeqCount(0),
Offset(offset.0),
),
}
} else {
debug_assert_eq!(filled_array[0], b'-');
debug_assert_eq!(filled_count, 1);
self.state = InternalState::ScanningDeclarationCommentOrCdata(
filled_array,
filled_count,
);
Some(State::ScanningDeclarationCommentOrCdata)
}
}
b'[' => {
if filled_array[..filled_count] == cdata[..filled_count] {
if filled_count == 7 {
self.scan_cdata(
bytes,
AlreadyFoundByteSeqCount(0),
Offset(offset.0 + to_fill),
)
} else {
self.state = InternalState::ScanningDeclarationCommentOrCdata(
filled_array,
filled_count,
);
Some(State::ScanningDeclarationCommentOrCdata)
}
} else {
let mut bracket_count: u64 = 0;
for byte in &filled_array[..filled_count - to_fill] {
match byte {
b'[' => {
bracket_count += 1;
}
b']' => {
if bracket_count > 0 {
bracket_count -= 1;
}
}
_ => {}
}
}
self.scan_declaration(
bytes,
QuoteState::None,
BracketCount(bracket_count),
AlreadyFoundByteSeqCount(0),
Offset(offset.0),
)
}
}
_ => self.scan_declaration(
bytes,
QuoteState::None,
BracketCount(0),
AlreadyFoundByteSeqCount(0),
Offset(offset.0),
),
}
} else {
self.state = InternalState::ScanningDeclarationCommentOrCdata([0; 7], 0);
Some(State::ScanningDeclarationCommentOrCdata)
}
}
fn scan_declaration(
&mut self,
bytes: &[u8],
quote_state: QuoteState,
bracket_count: BracketCount,
already_found_byte_seq_count: AlreadyFoundByteSeqCount,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
let byte_seq = b">";
let (read, found) = bytes::quote_and_bracket_context_aware_find(
&bytes[(offset.0)..],
byte_seq,
already_found_byte_seq_count,
quote_state,
bracket_count,
);
match found {
QuoteAndBracketContextAwareFoundState::Found => {
self.state = InternalState::Reset;
Some(State::ScannedDeclaration(offset.0 + read))
}
QuoteAndBracketContextAwareFoundState::NotFound(
quote_state,
bracket_count,
already_found_byte_seq_count,
) => {
self.state = InternalState::ScanningDeclaration(
quote_state,
bracket_count,
already_found_byte_seq_count,
);
Some(State::ScanningDeclaration)
}
}
}
fn scan_comment(
&mut self,
bytes: &[u8],
mut already_found_byte_seq_count: AlreadyFoundByteSeqCount,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
if already_found_byte_seq_count.0 > 0 {
match already_found_byte_seq_count.0 {
1 => {
if bytes.get(offset.0) == Some(&b'-') {
match bytes.get(offset.0 + 1) {
Some(&b'>') => {
self.state = InternalState::Reset;
return Some(State::ScannedComment(offset.0 + 2));
}
None => {
already_found_byte_seq_count.0 = 2;
self.state =
InternalState::ScanningComment(already_found_byte_seq_count);
return Some(State::ScanningComment);
}
_ => {}
}
}
}
2 => {
if bytes.get(offset.0) == Some(&b'>') {
self.state = InternalState::Reset;
return Some(State::ScannedComment(offset.0 + 1));
}
}
_ => unreachable!("should only match up to 2"),
}
}
let mut bytes_to_search = &bytes[offset.0..];
let mut read = 0;
let found;
loop {
if let Some(index) = bytes_to_search.iter().position(|b| *b == b'>') {
let end = index + 1;
read += end;
if index > 1 && &bytes_to_search[index - 2..end] == b"-->" {
found = true;
break;
}
bytes_to_search = &bytes_to_search[end..];
} else {
found = false;
break;
}
}
if found {
self.state = InternalState::Reset;
Some(State::ScannedComment(offset.0 + read))
} else {
let already_found_byte_seq_count =
bytes::find_matching_suffix(b"-->", &bytes[offset.0..]);
self.state = InternalState::ScanningComment(already_found_byte_seq_count);
Some(State::ScanningComment)
}
}
fn scan_cdata(
&mut self,
bytes: &[u8],
mut already_found_byte_seq_count: AlreadyFoundByteSeqCount,
offset: Offset,
) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
return None;
}
if already_found_byte_seq_count.0 > 0 {
match already_found_byte_seq_count.0 {
1 => {
if bytes.get(offset.0) == Some(&b']') {
match bytes.get(offset.0 + 1) {
Some(&b'>') => {
self.state = InternalState::Reset;
return Some(State::ScannedCdata(offset.0 + 2));
}
None => {
already_found_byte_seq_count.0 = 2;
self.state =
InternalState::ScanningCdata(already_found_byte_seq_count);
return Some(State::ScanningCdata);
}
_ => {}
}
}
}
2 => {
if bytes.get(offset.0) == Some(&b'>') {
self.state = InternalState::Reset;
return Some(State::ScannedCdata(offset.0 + 1));
}
}
_ => unreachable!("should only match up to 2"),
}
}
let mut bytes_to_search = &bytes[offset.0..];
let mut read = 0;
let found;
loop {
if let Some(index) = bytes_to_search.iter().position(|b| *b == b'>') {
let end = index + 1;
read += end;
if index > 1 && &bytes_to_search[index - 2..end] == b"]]>" {
found = true;
break;
}
bytes_to_search = &bytes_to_search[end..];
} else {
found = false;
break;
}
}
if found {
self.state = InternalState::Reset;
Some(State::ScannedCdata(offset.0 + read))
} else {
let already_found_byte_seq_count =
bytes::find_matching_suffix(b"]]>", &bytes[offset.0..]);
self.state = InternalState::ScanningCdata(already_found_byte_seq_count);
Some(State::ScanningCdata)
}
}
fn scan_text_content(&mut self, bytes: &[u8]) -> Option<State> {
if bytes.is_empty() {
self.state = InternalState::Eof;
Some(State::ScannedCharacters(0))
} else if let Some(index) = bytes.iter().position(|b| *b == b'<') {
self.state = InternalState::Reset;
Some(State::ScannedCharacters(index))
} else {
self.state = InternalState::ScanningCharacters;
Some(State::ScanningCharacters)
}
}
pub fn scan<'a>(&mut self, bytes: &'a [u8]) -> Option<State> {
match self.state {
InternalState::Reset => match bytes::peek(bytes) {
None => {
self.state = InternalState::Eof;
None
}
Some(b'<') => self.scan_markup(&bytes),
Some(_) => self.scan_text_content(&bytes),
},
InternalState::ScanningMarkup => self.scan_markup2(&bytes),
InternalState::ScanningStartOrEmptyElementTag(quote_state, is_last_char_slash) => self
.scan_start_or_empty_element_tag(
&bytes,
quote_state,
is_last_char_slash,
Offset(0),
),
InternalState::ScanningEndTag(quote_state) => {
self.scan_end_tag(&bytes, quote_state, Offset(0))
}
InternalState::ScanningCharacters => self.scan_text_content(&bytes),
InternalState::ScanningProcessingInstruction(already_found_byte_seq_count) => {
self.scan_processing_instruction(&bytes, already_found_byte_seq_count, Offset(0))
}
InternalState::ScanningDeclarationCommentOrCdata(filled_array, filled_count) => {
self.scan_declaration_comment_or_cdata(bytes, filled_array, filled_count, Offset(0))
}
InternalState::ScanningDeclaration(
quote_state,
bracket_count,
already_found_byte_seq_count,
) => self.scan_declaration(
bytes,
quote_state,
bracket_count,
already_found_byte_seq_count,
Offset(0),
),
InternalState::ScanningComment(already_found_byte_seq_count) => {
self.scan_comment(bytes, already_found_byte_seq_count, Offset(0))
}
InternalState::ScanningCdata(already_found_byte_seq_count) => {
self.scan_cdata(bytes, already_found_byte_seq_count, Offset(0))
}
InternalState::Eof => None,
}
}
}
impl Default for Scanner {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn none_on_empty() {
let mut scanner = Scanner::new();
assert_eq!(scanner.state, InternalState::Reset);
assert_eq!(scanner.scan(b""), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b"<hello>"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn text_content() {
let mut scanner = Scanner::new();
let bytes = r"Hello".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCharacters));
assert_eq!(scanner.state, InternalState::ScanningCharacters);
let bytes = r"wo".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCharacters));
let bytes = r"rld!<".as_bytes();
assert_eq!(scanner.state, InternalState::ScanningCharacters);
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCharacters(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn text_content_finish_on_empty_bytes() {
let mut scanner = Scanner::new();
let bytes = r"Hello".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCharacters));
assert_eq!(scanner.state, InternalState::ScanningCharacters);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCharacters(0)));
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b"<hello>"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn start_of_markup() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
}
#[test]
fn start_of_markup_eof() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b"<hello>"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn start_tag_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r"<hello>".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(7)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_eof() {
let mut scanner = Scanner::new();
let bytes = r"<hello".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, false)
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b"<hello>"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn start_tag_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"hello".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, false)
);
let bytes = r">Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"<hello".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, false)
);
let bytes = r">Some content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a='val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(16)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a='"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::Single, false)
);
let bytes = r#"val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a="val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(16)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a=""#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::Double, false)
);
let bytes = r#"val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r"<hello/>".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_eof() {
let mut scanner = Scanner::new();
let bytes = r"<hello/".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, true)
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b">"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn empty_element_tag_with_slash_in_standalone_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"hello".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, false)
);
let bytes = r"/".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, true)
);
let bytes = r">Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"hello".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, false)
);
let bytes = r"/>Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(2)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a="val/>"/>Content"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedEmptyElementTag(18))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<hello a=""#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::Double, false)
);
let bytes = r#"val/>"/>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_last_slash_split_across_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<hello/"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, true)
);
let bytes = r#">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_last_slash_split_across_parts_with_single_quotes() {
let mut scanner = Scanner::new();
let bytes = r#"<hello attr='/"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::Single, false)
);
let bytes = r#">'/>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn empty_element_tag_last_slash_split_across_parts_with_double_quotes() {
let mut scanner = Scanner::new();
let bytes = r#"<hello attr="/"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::Double, false)
);
let bytes = r#">"/>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEmptyElementTag(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn start_tag_last_slash_split_across_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<hello/"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningStartOrEmptyElementTag)
);
assert_eq!(
scanner.state,
InternalState::ScanningStartOrEmptyElementTag(QuoteState::None, true)
);
let bytes = r#" invalid>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedStartTag(9)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r"</goodbye>".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(10)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_eof() {
let mut scanner = Scanner::new();
let bytes = r"</hello".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::None)
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b">"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn end_tag_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"/goodbye".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::None)
);
let bytes = r">Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_with_slash_as_only_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"/".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::None)
);
let bytes = r"goodbye>Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"</goodbye".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::None)
);
let bytes = r">Some content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"</goodbye a='val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(19)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"</goodbye a='"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::Single)
);
let bytes = r#"val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"</goodbye a="val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(19)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn end_tag_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"</goodbye a=""#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningEndTag));
assert_eq!(
scanner.state,
InternalState::ScanningEndTag(QuoteState::Double)
);
let bytes = r#"val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedEndTag(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<?test a="b" ?>"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(15))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_eof() {
let mut scanner = Scanner::new();
let bytes = r#"<?test a="b" ?"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(1))
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b">"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn pi_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"?test".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r"?>Content".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(2))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_question_mark_as_only_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"?".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r"test ?>Content".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(7))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"<?test".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r">invalid?>Some content".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(10))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_ensure_does_not_reuse() {
let mut scanner = Scanner::new();
let bytes = r"<?".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r">invalid?>Some content".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(10))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_broken_delimiter() {
let mut scanner = Scanner::new();
let bytes = r"<?test".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r"?".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(1))
);
let bytes = r#" > a="v""#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"?"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(1))
);
let bytes = r#">"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(1))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<?goodbye a='val>'?>Content"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(20))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<?goodbye a='?"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(1))
);
let bytes = r#"val?>'?>Content"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(5))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<?goodbye a="val?>"?>Content"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(18))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<?goodbye a="?"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(1))
);
let bytes = r#"val?>"?>Content"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedProcessingInstruction(5))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn pi_not_reuse_question_mark() {
let mut scanner = Scanner::new();
let bytes = r#"<?>"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
}
#[test]
fn pi_not_reuse_question_mark_across_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<?"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
let bytes = r#">"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningProcessingInstruction)
);
assert_eq!(
scanner.state,
InternalState::ScanningProcessingInstruction(AlreadyFoundByteSeqCount(0))
);
}
#[test]
fn declaration_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!DOCTYPE test [<!ELEMENT test (#PCDATA)>]>"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedDeclaration(bytes.len()))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_eof() {
let mut scanner = Scanner::new();
let bytes = r#"<!DOCTYPE test ["#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(1),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b">"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn declaration_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"!ELEMENT".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(0),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r">Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_exclamation_as_only_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"!".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningDeclarationCommentOrCdata)
);
assert_eq!(
scanner.state,
InternalState::ScanningDeclarationCommentOrCdata([0; 7], 0)
);
let bytes = r"test >Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"<!test".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(0),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r">Some content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!goodbye a='val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(19)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<!goodbye a='>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::Single,
BracketCount(0),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#"val>'>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!goodbye a="val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(19)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<!goodbye a=">"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::Double,
BracketCount(0),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#"val>">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_closed_brackets() {
let mut scanner = Scanner::new();
let bytes = r#"<![%test;[<!ELEMENT test (something*)>]]>"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedDeclaration(bytes.len()))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_unclosed_single_bracket() {
let mut scanner = Scanner::new();
let bytes = r#"<![test>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(1),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#">] >Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_unclosed_double_bracket() {
let mut scanner = Scanner::new();
let bytes = r#"<![test>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(1),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#"[more"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(2),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#">] >Content>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::None,
BracketCount(1),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#">] >Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!-- Comment -->"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedComment(bytes.len()))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_eof() {
let mut scanner = Scanner::new();
let bytes = r#"<!-- Comment"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b">"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn comment_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"!--".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r" Comment --> Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(12)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_exclamation_as_only_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"!".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningDeclarationCommentOrCdata)
);
assert_eq!(
scanner.state,
InternalState::ScanningDeclarationCommentOrCdata([0; 7], 0)
);
let bytes = r"-- Comment -->Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(14)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"<!-- test".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r" -->Some content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(4)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!-- goodbye a='val-->'-->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(22)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a='--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#"val>'-->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a="val-->"-->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(21)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a="--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#"val-->"-->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_invalid_start() {
let mut scanner = Scanner::new();
let bytes = r#"<!-goodbye a="-->"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::Double,
BracketCount(0),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#"val-->">Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_double_dash_inside() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a="--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#"val-->"-- test -->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_single_dash() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a="--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#"val--" test ->Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"More -->Real Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_with_split_terminating_delimiter() {
let mut scanner = Scanner::new();
let bytes = r#"<!--goodbye a="--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#"val->" -"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(1))
);
let bytes = r#"-"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(2))
);
let bytes = r#">"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_not_reused_dashes() {
let mut scanner = Scanner::new();
let bytes = r#"<!-->"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"-->"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedComment(3)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn comment_not_reused_dashes_across_scans() {
let mut scanner = Scanner::new();
let bytes = r#"<!--"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
let bytes = r#">"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningComment));
assert_eq!(
scanner.state,
InternalState::ScanningComment(AlreadyFoundByteSeqCount(0))
);
}
#[test]
fn cdata_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ Content ]]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(bytes.len())));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn declaration_with_uneven_brackets_in_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<![&random[ Declaration ]]]>"#.as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScannedDeclaration(bytes.len()))
);
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_eof() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r"".as_bytes();
assert_eq!(scanner.scan(&bytes), None);
assert_eq!(scanner.state, InternalState::Eof);
assert_eq!(scanner.scan(b"]]>"), None);
assert_eq!(scanner.state, InternalState::Eof);
}
#[test]
fn cdata_with_only_markup_in_first_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"![CDAT".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningDeclarationCommentOrCdata)
);
assert_eq!(
scanner.state,
InternalState::ScanningDeclarationCommentOrCdata(
[b'[', b'C', b'D', b'A', b'T', 0, 0],
5
)
);
let bytes = r"A[ Content ]]> Unused Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(14)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_exclamation_as_only_part() {
let mut scanner = Scanner::new();
let bytes = r"<".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningMarkup));
assert_eq!(scanner.state, InternalState::ScanningMarkup);
let bytes = r"!".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningDeclarationCommentOrCdata)
);
assert_eq!(
scanner.state,
InternalState::ScanningDeclarationCommentOrCdata([0, 0, 0, 0, 0, 0, 0], 0)
);
let bytes = r"[CDATA[ Content ]]>Content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(19)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_in_parts() {
let mut scanner = Scanner::new();
let bytes = r"<![CDA".as_bytes();
assert_eq!(
scanner.scan(&bytes),
Some(State::ScanningDeclarationCommentOrCdata)
);
assert_eq!(
scanner.state,
InternalState::ScanningDeclarationCommentOrCdata([b'[', b'C', b'D', b'A', 0, 0, 0], 4)
);
let bytes = r"TA[ Content ]]>Some content".as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(15)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_single_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ Content ']]>']]>Unused Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(22)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_single_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ ']>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"]>']]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_double_quotes_one_pass() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ goodbye a="]]>"]]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(24)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_double_quotes_in_parts() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ a="]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"]>"]]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(6)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_invalid_start() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA Content a="]]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningDeclaration));
assert_eq!(
scanner.state,
InternalState::ScanningDeclaration(
QuoteState::Double,
BracketCount(1),
AlreadyFoundByteSeqCount(0)
)
);
let bytes = r#"]]>"]]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedDeclaration(7)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_double_right_bracket_inside() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ Content a="]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"other ]]"]] test ]]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(20)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_single_closing_bracket() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ Content a="]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"]>" test ]>Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"More ]]>Real Content"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(8)));
assert_eq!(scanner.state, InternalState::Reset);
}
#[test]
fn cdata_with_split_terminating_delimiter() {
let mut scanner = Scanner::new();
let bytes = r#"<![CDATA[ goodbye a="]>"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(0))
);
let bytes = r#"val]>" ]"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(1))
);
let bytes = r#"]"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScanningCdata));
assert_eq!(
scanner.state,
InternalState::ScanningCdata(AlreadyFoundByteSeqCount(2))
);
let bytes = r#">"#.as_bytes();
assert_eq!(scanner.scan(&bytes), Some(State::ScannedCdata(1)));
assert_eq!(scanner.state, InternalState::Reset);
}
}