use log::info;
use std::result::Result;
enum State {
InsideDescriptor,
AfterDescriptor,
InsideParens,
}
fn split_at<F>(input: &str, predicate: F) -> (&str, &str)
where
F: Fn(&char) -> bool,
{
for (i, ch) in input.char_indices() {
if !predicate(&ch) {
return input.split_at(i);
}
}
(input, "")
}
pub(crate) fn parse(input: &str) -> Vec<&str> {
let mut candidates: Vec<&str> = Vec::new();
let mut remaining = input;
while !remaining.is_empty() {
remaining = match parse_one_url(remaining) {
Ok((rem, None)) => rem,
Ok((rem, Some(url))) => {
candidates.push(url);
rem
}
Err(e) => {
info!("{e}");
return vec![];
}
}
}
candidates
}
fn parse_one_url(remaining: &str) -> Result<(&str, Option<&str>), String> {
let (start, remaining) = split_at(remaining, |c| *c == ',' || c.is_ascii_whitespace());
if start.find(',').is_some() {
return Err("srcset parse error (too many commas)".to_string());
}
if remaining.is_empty() {
return Ok(("", None));
}
let (url, remaining) = split_at(remaining, |c| !c.is_ascii_whitespace());
let comma_count = url.chars().rev().take_while(|c| *c == ',').count();
if comma_count > 1 {
return Err("srcset parse error (trailing commas)".to_string());
}
let url = url.get(..url.len() - comma_count);
let (_spaces, remaining) = split_at(remaining, char::is_ascii_whitespace);
let remaining = skip_descriptor(remaining);
Ok((remaining, url))
}
#[allow(clippy::single_match)]
fn skip_descriptor(remaining: &str) -> &str {
let mut state = State::InsideDescriptor;
for (i, c) in remaining.char_indices() {
match state {
State::InsideDescriptor => match c {
c if c.is_ascii_whitespace() => state = State::AfterDescriptor,
'(' => state = State::InsideParens,
',' => return &remaining[i + c.len_utf8()..], _ => (),
},
State::InsideParens => match c {
')' => state = State::InsideDescriptor,
_ => (),
},
State::AfterDescriptor => match c {
c if c.is_ascii_whitespace() => (),
_ => state = State::InsideDescriptor,
},
}
}
""
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_collect_sequence_characters_with_empty_string() {
let (sequence, remainder) = split_at("", |c| c.is_alphabetic());
assert_eq!(sequence, "");
assert_eq!(remainder, "");
}
#[test]
fn test_collect_sequence_characters_with_alphabetic_predicate() {
let (sequence, remainder) = split_at("abc123", |c| c.is_alphabetic());
assert_eq!(sequence, "abc");
assert_eq!(remainder, "123");
}
#[test]
fn test_collect_sequence_characters_with_digit_predicate() {
let (sequence, remainder) = split_at("123abc", char::is_ascii_digit);
assert_eq!(sequence, "123");
assert_eq!(remainder, "abc");
}
#[test]
fn test_collect_sequence_characters_with_no_match() {
let (sequence, remainder) = split_at("123abc", |c| c.is_whitespace());
assert_eq!(sequence, "");
assert_eq!(remainder, "123abc");
}
#[test]
fn test_collect_sequence_characters_with_all_match() {
let (sequence, remainder) = split_at("123abc", |c| !c.is_whitespace());
assert_eq!(sequence, "123abc");
assert_eq!(remainder, "");
}
#[test]
fn test_parse_no_value() {
assert!(parse("").is_empty());
}
#[test]
fn test_parse_url_one_value() {
let candidates = vec!["test-img-320w.jpg".to_string()];
assert_eq!(parse("test-img-320w.jpg 320w"), candidates);
}
#[test]
fn test_parse_srcset_two_values() {
assert_eq!(
parse("test-img-320w.jpg 320w, test-img-480w.jpg 480w"),
vec![
"test-img-320w.jpg".to_string(),
"test-img-480w.jpg".to_string(),
]
);
}
#[test]
fn test_parse_srcset_with_unencoded_comma() {
assert_eq!(
parse(
"/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 640w, /cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 750w"
),
vec![
"/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
"/cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg".to_string(),
]
);
}
#[test]
fn test_parse_srcset_url() {
assert_eq!(
parse("https://example.com/image1.jpg 1x, https://example.com/image2.jpg 2x"),
vec![
"https://example.com/image1.jpg",
"https://example.com/image2.jpg"
]
);
}
#[test]
fn test_parse_srcset_with_commas() {
assert_eq!(
parse(
"/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 640w, /cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg 750w"
),
vec![
"/cdn-cgi/image/format=webp,width=640/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg",
"/cdn-cgi/image/format=webp,width=750/https://img.youtube.com/vi/hVBl8_pgQf0/maxresdefault.jpg"
]
);
}
#[test]
fn test_parse_srcset_without_spaces() {
assert_eq!(
parse(
"/300.png 300w,/600.png 600w,/900.png 900w,https://x.invalid/a.png 1000w,relative.png 10w"
),
vec![
"/300.png",
"/600.png",
"/900.png",
"https://x.invalid/a.png",
"relative.png"
]
);
}
}