use std::collections::HashSet;
use url::Url;
pub fn extract(base: &Url, js: &str) -> Vec<Url> {
let mut set: HashSet<String> = HashSet::new();
for lit in string_literals(js) {
let trimmed = lit.trim();
if trimmed.starts_with("http://") || trimmed.starts_with("https://") {
if let Ok(u) = Url::parse(trimmed) {
set.insert(u.to_string());
}
continue;
}
if trimmed.starts_with('/') && looks_like_api_path(trimmed) {
if let Ok(u) = base.join(trimmed) {
set.insert(u.to_string());
}
}
}
set.into_iter()
.filter_map(|s| Url::parse(&s).ok())
.collect()
}
fn looks_like_api_path(p: &str) -> bool {
const PREFIXES: &[&str] = &[
"/api/",
"/v1/",
"/v2/",
"/v3/",
"/graphql",
"/rest/",
"/rpc/",
"/admin/",
"/internal/",
"/.well-known/",
];
PREFIXES.iter().any(|pre| p.starts_with(pre))
}
fn string_literals(src: &str) -> Vec<&str> {
let b = src.as_bytes();
let mut out = Vec::new();
let mut i = 0;
while i < b.len() {
let c = b[i];
if c == b'"' || c == b'\'' || c == b'`' {
let quote = c;
let start = i + 1;
let mut end = start;
while end < b.len() {
let c2 = b[end];
if c2 == b'\\' {
end += 2;
continue;
}
if c2 == quote {
break;
}
end += 1;
}
if end <= b.len() && end >= start {
if end - start > 2 {
if let Ok(s) = std::str::from_utf8(&b[start..end]) {
out.push(s);
}
}
}
i = end + 1;
} else if c == b'/' && i + 1 < b.len() && b[i + 1] == b'/' {
while i < b.len() && b[i] != b'\n' {
i += 1;
}
} else if c == b'/' && i + 1 < b.len() && b[i + 1] == b'*' {
i += 2;
while i + 1 < b.len() && !(b[i] == b'*' && b[i + 1] == b'/') {
i += 1;
}
i += 2;
} else {
i += 1;
}
}
out
}