use crate::error::Result;
use crate::framework::{detect_framework, get_compiled_framework_patterns, Framework};
use once_cell::sync::Lazy;
use regex::Regex;
use std::collections::HashSet;
use url::Url;
static JS_URL_PATTERNS: &[&str] = &[
r#"["']https?://[^"'\s]+["']"#,
r#"["'](/[a-zA-Z0-9/_\-\.]+)["']"#,
r#"fetch\s*\(\s*["']([^"']+)["']"#,
r#"fetch\s*\(\s*`([^`]+)`"#,
r#"\.open\s*\(\s*["'][^"']*["']\s*,\s*["']([^"']+)["']"#,
r#"axios\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
r#"axios\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
r#"\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
r#"\$\.(get|post)\s*\(\s*["']([^"']+)["']"#,
r#"(api|endpoint|url|path|route)\s*[:=]\s*["']([^"']+)["']"#,
r#"`/api/[^`]+`"#,
r#"`https?://[^`]+`"#,
r#"path\s*:\s*["']([^"']+)["']"#,
r#"route\s*:\s*["']([^"']+)["']"#,
r#"(graphql|gql)\s*["']([^"']+)["']"#,
r#"["'](wss?://[^"'\s]+)["']"#,
r#"rpc\s*:\s*["']([^"']+)["']"#,
r#"\.get\s*\(\s*["']([^"']+)["']"#,
r#"\.post\s*\(\s*["']([^"']+)["']"#,
r#"\.put\s*\(\s*["']([^"']+)["']"#,
r#"\.delete\s*\(\s*["']([^"']+)["']"#,
r#"\.patch\s*\(\s*["']([^"']+)["']"#,
r#"<Route\s+path=["']([^"']+)["']"#,
r#"useNavigate\s*\(\s*\)\s*\(\s*["']([^"']+)["']"#,
r#"RouterModule\.forRoot\([^)]*path:\s*["']([^"']+)["']"#,
r#"\.navigate\(\s*\[["']([^"']+)["']"#,
r#"router\.push\(\s*["']([^"']+)["']"#,
r#"/api/[^"'\s]+"#,
r#"(app|router)\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
r#"import\s+.*\s+from\s+["']([^"']+)["']"#,
];
static JS_URL_PATTERN_CONFIDENCE: &[f32] = &[
0.6, 0.5, 0.9, 0.75, 0.9, 0.9, 0.85, 0.85, 0.85, 0.7, 0.7, 0.65, 0.75, 0.8, 0.8, 0.8, 0.75, 0.8, 0.8, 0.8, 0.8, 0.8, 0.85, 0.85, 0.85, 0.85, 0.85, 0.75, 0.9, 0.3, ];
static TEMPLATE_VAR_RE: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\$\{[^}]+\}").expect("Failed to compile template variable regex"));
#[derive(Clone)]
pub struct JavaScriptParser {
patterns: Vec<Regex>,
}
impl JavaScriptParser {
pub fn new() -> Result<Self> {
debug_assert_eq!(
JS_URL_PATTERNS.len(),
JS_URL_PATTERN_CONFIDENCE.len(),
"JS_URL_PATTERNS and JS_URL_PATTERN_CONFIDENCE must have the same length"
);
let patterns = JS_URL_PATTERNS
.iter()
.map(|p| Regex::new(p))
.collect::<std::result::Result<Vec<_>, _>>()?;
Ok(Self { patterns })
}
pub fn extract_endpoints(&self, js_content: &str, base_url: &Url) -> Vec<Url> {
self.extract_endpoints_with_confidence(js_content, base_url)
.into_iter()
.map(|(url, _)| url)
.collect()
}
pub fn extract_endpoints_with_confidence(
&self,
js_content: &str,
base_url: &Url,
) -> Vec<(Url, f32)> {
let mut endpoint_confidence: std::collections::HashMap<String, (Url, f32)> =
std::collections::HashMap::new();
let insert =
|map: &mut std::collections::HashMap<String, (Url, f32)>, url: Url, confidence: f32| {
let key = url.as_str().to_string();
let entry = map.entry(key).or_insert((url.clone(), confidence));
if confidence > entry.1 {
*entry = (url, confidence);
}
};
for (pattern, &confidence) in self.patterns.iter().zip(JS_URL_PATTERN_CONFIDENCE.iter()) {
for cap in pattern.captures_iter(js_content) {
for i in 1..cap.len() {
if let Some(url_match) = cap.get(i) {
let url_str = url_match.as_str();
if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
insert(&mut endpoint_confidence, url, confidence);
}
}
}
}
}
let frameworks = detect_framework(js_content);
for framework in &frameworks {
if let Some(framework_endpoints) =
self.extract_framework_endpoints(js_content, base_url, framework)
{
for url in framework_endpoints {
insert(&mut endpoint_confidence, url, 0.85);
}
}
}
endpoint_confidence.into_values().collect()
}
fn extract_framework_endpoints(
&self,
js_content: &str,
base_url: &Url,
framework: &Framework,
) -> Option<Vec<Url>> {
let patterns = get_compiled_framework_patterns(framework);
if patterns.is_empty() {
return None;
}
let mut endpoints = Vec::new();
for pattern in patterns {
for cap in pattern.captures_iter(js_content) {
for i in 1..cap.len() {
if let Some(url_match) = cap.get(i) {
let url_str = url_match.as_str();
if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
endpoints.push(url);
}
}
}
}
}
Some(endpoints)
}
fn normalize_and_resolve(&self, url_str: &str, base_url: &Url) -> Result<Url> {
let cleaned = url_str.trim_matches(|c| c == '"' || c == '\'' || c == '`');
let cleaned = self.replace_template_vars(cleaned);
if let Ok(url) = Url::parse(&cleaned) {
return Ok(url);
}
Ok(base_url.join(&cleaned)?)
}
fn replace_template_vars(&self, url: &str) -> String {
let mut result = url.to_string();
result = TEMPLATE_VAR_RE.replace_all(&result, "0").to_string();
result = result
.replace("{id}", "1")
.replace("{userId}", "1")
.replace("{user_id}", "1")
.replace("{uuid}", "00000000-0000-0000-0000-000000000000")
.replace("{slug}", "example")
.replace("{name}", "example")
.replace(":id", "1")
.replace(":userId", "1")
.replace(":user_id", "1")
.replace(":uuid", "00000000-0000-0000-0000-000000000000")
.replace(":slug", "example")
.replace(":name", "example");
result
}
}
impl Default for JavaScriptParser {
fn default() -> Self {
Self::new().unwrap_or_else(|e| panic!("Failed to create default JavaScriptParser: {}", e))
}
}
#[derive(Clone)]
pub struct FrameFileParser {
js_parser: JavaScriptParser,
}
impl FrameFileParser {
pub fn new() -> Result<Self> {
Ok(Self {
js_parser: JavaScriptParser::new()?,
})
}
pub fn extract_endpoints(&self, frame_content: &str, base_url: &Url) -> Vec<Url> {
let mut endpoints = Vec::new();
if let Ok(json) = serde_json::from_str::<serde_json::Value>(frame_content) {
endpoints.extend(self.extract_from_json(&json, base_url));
}
endpoints.extend(self.js_parser.extract_endpoints(frame_content, base_url));
let unique: HashSet<_> = endpoints.into_iter().collect();
unique.into_iter().collect()
}
fn extract_from_json(&self, json: &serde_json::Value, base_url: &Url) -> Vec<Url> {
let mut endpoints = Vec::new();
match json {
serde_json::Value::Object(map) => {
for (key, value) in map {
if key.contains("url")
|| key.contains("endpoint")
|| key.contains("path")
|| key.contains("route")
|| key.contains("href")
|| key.contains("link")
{
if let Some(url_str) = value.as_str() {
if let Ok(url) = base_url.join(url_str) {
endpoints.push(url);
}
}
}
endpoints.extend(self.extract_from_json(value, base_url));
}
}
serde_json::Value::Array(arr) => {
for item in arr {
endpoints.extend(self.extract_from_json(item, base_url));
}
}
_ => {}
}
endpoints
}
}
impl Default for FrameFileParser {
fn default() -> Self {
Self::new().unwrap_or_else(|e| panic!("Failed to create default FrameFileParser: {}", e))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_js_endpoint_extraction() {
let parser = JavaScriptParser::new().unwrap();
let js = r#"
fetch('/api/users');
axios.get('/api/posts');
const endpoint = '/api/comments';
"#;
let base = Url::parse("https://example.com").unwrap();
let endpoints = parser.extract_endpoints(js, &base);
assert!(endpoints.iter().any(|u| u.path() == "/api/users"));
assert!(endpoints.iter().any(|u| u.path() == "/api/posts"));
assert!(endpoints.iter().any(|u| u.path() == "/api/comments"));
}
#[test]
fn test_template_variable_replacement() {
let parser = JavaScriptParser::new().unwrap();
let js = r#"
fetch('/api/users/${userId}');
fetch('/api/items/{id}');
fetch('/api/posts/:slug');
"#;
let base = Url::parse("https://example.com").unwrap();
let endpoints = parser.extract_endpoints(js, &base);
assert!(endpoints.iter().any(|u| u.path() == "/api/users/0"));
assert!(endpoints.iter().any(|u| u.path() == "/api/items/1"));
assert!(endpoints.iter().any(|u| u.path() == "/api/posts/example"));
}
#[test]
fn test_frame_file_json_extraction() {
let parser = FrameFileParser::new().unwrap();
let frame_content = r#"
{
"api": {
"endpoint": "/api/v1/data",
"path": "/api/v1/users"
}
}
"#;
let base = Url::parse("https://example.com").unwrap();
let endpoints = parser.extract_endpoints(frame_content, &base);
assert!(endpoints.iter().any(|u| u.path() == "/api/v1/data"));
assert!(endpoints.iter().any(|u| u.path() == "/api/v1/users"));
}
#[test]
fn test_websocket_extraction() {
let parser = JavaScriptParser::new().unwrap();
let js = r#"
const ws = new WebSocket('wss://example.com/socket');
"#;
let base = Url::parse("https://example.com").unwrap();
let endpoints = parser.extract_endpoints(js, &base);
assert!(endpoints
.iter()
.any(|u| u.as_str() == "wss://example.com/socket"));
}
}