use std::collections::HashSet;
pub fn suffix_postfix_expressions(canonical_url: &str) -> Vec<String> {
if canonical_url.is_empty() {
return Vec::new();
}
let after_scheme = canonical_url
.find("://")
.map(|i| &canonical_url[i + 3..])
.unwrap_or(canonical_url);
let (host, path_and_query) = match after_scheme.find('/') {
Some(i) => (&after_scheme[..i], &after_scheme[i..]),
None => (after_scheme, "/"),
};
let (path, query) = match path_and_query.find('?') {
Some(i) => (&path_and_query[..i], Some(&path_and_query[i + 1..])),
None => (path_and_query, None),
};
let mut res: Vec<Vec<String>> = Vec::new();
let mut domain = host.to_string();
while domain.contains('.') && !is_three_part_numeric(&domain) {
let mut domain_res: Vec<String> = Vec::new();
if let Some(q) = query {
domain_res.push(format!("{}{}?{}", domain, path, q));
}
let mut current_path = path.to_string();
while has_content_after_slash(¤t_path) {
domain_res.push(format!("{}{}", domain, current_path));
current_path = strip_last_path_component(¤t_path);
}
domain_res.push(format!("{}/", domain));
if domain_res.len() > 6 {
let first = domain_res[0].clone();
let tail: Vec<String> = domain_res[domain_res.len() - 5..].to_vec();
domain_res = std::iter::once(first).chain(tail).collect();
}
res.push(domain_res);
match domain.find('.') {
Some(i) => domain = domain[i + 1..].to_string(),
None => break,
}
}
if res.len() > 5 {
let first = res[0].clone();
let tail: Vec<Vec<String>> = res[res.len() - 4..].to_vec();
res = std::iter::once(first).chain(tail).collect();
}
let mut seen = HashSet::new();
let mut result = Vec::new();
for group in res {
for entry in group {
if seen.insert(entry.clone()) {
result.push(entry);
}
}
}
result
}
fn is_three_part_numeric(s: &str) -> bool {
let parts: Vec<&str> = s.split('.').collect();
parts.len() == 3
&& parts
.iter()
.all(|p| !p.is_empty() && p.bytes().all(|b| b.is_ascii_digit()))
}
fn has_content_after_slash(path: &str) -> bool {
if let Some(slash_pos) = path.find('/') {
slash_pos + 1 < path.len()
} else {
false
}
}
fn strip_last_path_component(path: &str) -> String {
let bytes = path.as_bytes();
let mut i = bytes.len();
if i > 0 && bytes[i - 1] == b'/' {
i -= 1;
}
while i > 0 && bytes[i - 1] != b'/' {
i -= 1;
}
path[..i].to_string()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_three_part_numeric() {
assert!(is_three_part_numeric("168.0.1"));
assert!(!is_three_part_numeric("192.168.0.1"));
assert!(!is_three_part_numeric("a.b.c"));
assert!(!is_three_part_numeric("a.b"));
}
#[test]
fn test_strip_last_path_component() {
assert_eq!(strip_last_path_component("/1/2.html"), "/1/");
assert_eq!(strip_last_path_component("/1/"), "/");
assert_eq!(strip_last_path_component("/foo"), "/");
assert_eq!(strip_last_path_component("/"), "");
}
#[test]
fn test_has_content_after_slash() {
assert!(!has_content_after_slash("/"));
assert!(has_content_after_slash("/foo"));
assert!(has_content_after_slash("/foo/"));
}
}