use crate::tendril::StrTendril;
pub(crate) fn extract_a_character_encoding_from_a_meta_element(
input: StrTendril,
) -> Option<StrTendril> {
let mut position = 0;
loop {
loop {
let candidate = input.as_bytes().get(position..position + "charset".len())?;
if candidate.eq_ignore_ascii_case(b"charset") {
break;
}
position += 1;
}
position += "charset".len();
position += input.as_bytes()[position..]
.iter()
.take_while(|byte| byte.is_ascii_whitespace())
.count();
if input.as_bytes()[position] == b'=' {
break;
}
}
position += 1;
position += input.as_bytes()[position..]
.iter()
.take_while(|byte| byte.is_ascii_whitespace())
.count();
match input.as_bytes().get(position)? {
quote @ (b'"' | b'\'') => {
let length = input.as_bytes()[position + 1..]
.iter()
.position(|byte| byte == quote)?;
Some(input.subtendril(position as u32 + 1, length as u32))
},
_ => {
let length = input.as_bytes()[position..]
.iter()
.position(|byte| byte.is_ascii_whitespace() || *byte == b';');
if let Some(length) = length {
Some(input.subtendril(position as u32, length as u32))
} else {
Some(input.subtendril(position as u32, (input.len() - position) as u32))
}
},
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn meta_element_without_charset() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice("foobar")),
None
);
}
#[test]
fn meta_element_with_capitalized_charset() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"cHarSet=utf8"
)),
Some(StrTendril::from_slice("utf8"))
);
}
#[test]
fn meta_element_with_no_equals_after_charset() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset utf8"
)),
None
);
}
#[test]
fn meta_element_with_whitespace_around_equals() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset \t=\tutf8"
)),
Some(StrTendril::from_slice("utf8"))
);
}
#[test]
fn meta_element_with_quoted_value() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset='utf8'"
)),
Some(StrTendril::from_slice("utf8"))
);
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset=\"utf8\""
)),
Some(StrTendril::from_slice("utf8"))
);
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset='utf8"
)),
None
);
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset=\"utf8"
)),
None
);
}
#[test]
fn meta_element_with_implicit_terminator() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset=utf8 foo"
)),
Some(StrTendril::from_slice("utf8"))
);
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"charset=utf8;foo"
)),
Some(StrTendril::from_slice("utf8"))
);
}
#[test]
fn meta_element_with_content_type() {
assert_eq!(
extract_a_character_encoding_from_a_meta_element(StrTendril::from_slice(
"text/html; charset=utf8"
)),
Some(StrTendril::from_slice("utf8"))
);
}
}