1use crate::error::Result;
2use crate::framework::{detect_framework, get_compiled_framework_patterns, Framework};
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6use url::Url;
7
8static JS_URL_PATTERNS: &[&str] = &[
10 r#"["']https?://[^"'\s]+["']"#,
12 r#"["'](/[a-zA-Z0-9/_\-\.]+)["']"#,
13 r#"fetch\s*\(\s*["']([^"']+)["']"#,
15 r#"fetch\s*\(\s*`([^`]+)`"#,
16 r#"\.open\s*\(\s*["'][^"']*["']\s*,\s*["']([^"']+)["']"#,
18 r#"axios\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
20 r#"axios\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
21 r#"\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
23 r#"\$\.(get|post)\s*\(\s*["']([^"']+)["']"#,
24 r#"(api|endpoint|url|path|route)\s*[:=]\s*["']([^"']+)["']"#,
26 r#"`/api/[^`]+`"#,
28 r#"`https?://[^`]+`"#,
29 r#"path\s*:\s*["']([^"']+)["']"#,
31 r#"route\s*:\s*["']([^"']+)["']"#,
32 r#"(graphql|gql)\s*["']([^"']+)["']"#,
34 r#"["'](wss?://[^"'\s]+)["']"#,
36 r#"rpc\s*:\s*["']([^"']+)["']"#,
38 r#"\.get\s*\(\s*["']([^"']+)["']"#,
40 r#"\.post\s*\(\s*["']([^"']+)["']"#,
41 r#"\.put\s*\(\s*["']([^"']+)["']"#,
42 r#"\.delete\s*\(\s*["']([^"']+)["']"#,
43 r#"\.patch\s*\(\s*["']([^"']+)["']"#,
44 r#"<Route\s+path=["']([^"']+)["']"#,
46 r#"useNavigate\s*\(\s*\)\s*\(\s*["']([^"']+)["']"#,
47 r#"RouterModule\.forRoot\([^)]*path:\s*["']([^"']+)["']"#,
49 r#"\.navigate\(\s*\[["']([^"']+)["']"#,
50 r#"router\.push\(\s*["']([^"']+)["']"#,
52 r#"/api/[^"'\s]+"#,
54 r#"(app|router)\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
56 r#"import\s+.*\s+from\s+["']([^"']+)["']"#,
58];
59
60static JS_URL_PATTERN_CONFIDENCE: &[f32] = &[
66 0.6, 0.5, 0.9, 0.75, 0.9, 0.9, 0.85, 0.85, 0.85, 0.7, 0.7, 0.65, 0.75, 0.8, 0.8, 0.8, 0.75, 0.8, 0.8, 0.8, 0.8, 0.8, 0.85, 0.85, 0.85, 0.85, 0.85, 0.75, 0.9, 0.3, ];
97
98static TEMPLATE_VAR_RE: Lazy<Regex> =
100 Lazy::new(|| Regex::new(r"\$\{[^}]+\}").expect("Failed to compile template variable regex"));
101
102#[derive(Clone)]
104pub struct JavaScriptParser {
105 patterns: Vec<Regex>,
106}
107
108impl JavaScriptParser {
109 pub fn new() -> Result<Self> {
111 debug_assert_eq!(
113 JS_URL_PATTERNS.len(),
114 JS_URL_PATTERN_CONFIDENCE.len(),
115 "JS_URL_PATTERNS and JS_URL_PATTERN_CONFIDENCE must have the same length"
116 );
117
118 let patterns = JS_URL_PATTERNS
119 .iter()
120 .map(|p| Regex::new(p))
121 .collect::<std::result::Result<Vec<_>, _>>()?;
122
123 Ok(Self { patterns })
124 }
125
126 pub fn extract_endpoints(&self, js_content: &str, base_url: &Url) -> Vec<Url> {
128 self.extract_endpoints_with_confidence(js_content, base_url)
129 .into_iter()
130 .map(|(url, _)| url)
131 .collect()
132 }
133
134 pub fn extract_endpoints_with_confidence(
143 &self,
144 js_content: &str,
145 base_url: &Url,
146 ) -> Vec<(Url, f32)> {
147 let mut endpoint_confidence: std::collections::HashMap<String, (Url, f32)> =
150 std::collections::HashMap::new();
151
152 let insert =
153 |map: &mut std::collections::HashMap<String, (Url, f32)>, url: Url, confidence: f32| {
154 let key = url.as_str().to_string();
155 let entry = map.entry(key).or_insert((url.clone(), confidence));
156 if confidence > entry.1 {
157 *entry = (url, confidence);
158 }
159 };
160
161 for (pattern, &confidence) in self.patterns.iter().zip(JS_URL_PATTERN_CONFIDENCE.iter()) {
163 for cap in pattern.captures_iter(js_content) {
164 for i in 1..cap.len() {
167 if let Some(url_match) = cap.get(i) {
168 let url_str = url_match.as_str();
169
170 if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
172 insert(&mut endpoint_confidence, url, confidence);
173 }
174 }
175 }
176 }
177 }
178
179 let frameworks = detect_framework(js_content);
181 for framework in &frameworks {
182 if let Some(framework_endpoints) =
183 self.extract_framework_endpoints(js_content, base_url, framework)
184 {
185 for url in framework_endpoints {
186 insert(&mut endpoint_confidence, url, 0.85);
187 }
188 }
189 }
190
191 endpoint_confidence.into_values().collect()
192 }
193
194 fn extract_framework_endpoints(
196 &self,
197 js_content: &str,
198 base_url: &Url,
199 framework: &Framework,
200 ) -> Option<Vec<Url>> {
201 let patterns = get_compiled_framework_patterns(framework);
203 if patterns.is_empty() {
204 return None;
205 }
206
207 let mut endpoints = Vec::new();
208
209 for pattern in patterns {
210 for cap in pattern.captures_iter(js_content) {
211 for i in 1..cap.len() {
212 if let Some(url_match) = cap.get(i) {
213 let url_str = url_match.as_str();
214 if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
215 endpoints.push(url);
216 }
217 }
218 }
219 }
220 }
221
222 Some(endpoints)
223 }
224
225 fn normalize_and_resolve(&self, url_str: &str, base_url: &Url) -> Result<Url> {
227 let cleaned = url_str.trim_matches(|c| c == '"' || c == '\'' || c == '`');
229
230 let cleaned = self.replace_template_vars(cleaned);
232
233 if let Ok(url) = Url::parse(&cleaned) {
235 return Ok(url);
236 }
237
238 Ok(base_url.join(&cleaned)?)
240 }
241
242 fn replace_template_vars(&self, url: &str) -> String {
244 let mut result = url.to_string();
245
246 result = TEMPLATE_VAR_RE.replace_all(&result, "0").to_string();
248
249 result = result
251 .replace("{id}", "1")
252 .replace("{userId}", "1")
253 .replace("{user_id}", "1")
254 .replace("{uuid}", "00000000-0000-0000-0000-000000000000")
255 .replace("{slug}", "example")
256 .replace("{name}", "example")
257 .replace(":id", "1")
258 .replace(":userId", "1")
259 .replace(":user_id", "1")
260 .replace(":uuid", "00000000-0000-0000-0000-000000000000")
261 .replace(":slug", "example")
262 .replace(":name", "example");
263
264 result
265 }
266}
267
268impl Default for JavaScriptParser {
269 fn default() -> Self {
270 Self::new().unwrap_or_else(|e| panic!("Failed to create default JavaScriptParser: {}", e))
271 }
272}
273
274#[derive(Clone)]
276pub struct FrameFileParser {
277 js_parser: JavaScriptParser,
278}
279
280impl FrameFileParser {
281 pub fn new() -> Result<Self> {
283 Ok(Self {
284 js_parser: JavaScriptParser::new()?,
285 })
286 }
287
288 pub fn extract_endpoints(&self, frame_content: &str, base_url: &Url) -> Vec<Url> {
290 let mut endpoints = Vec::new();
291
292 if let Ok(json) = serde_json::from_str::<serde_json::Value>(frame_content) {
295 endpoints.extend(self.extract_from_json(&json, base_url));
296 }
297
298 endpoints.extend(self.js_parser.extract_endpoints(frame_content, base_url));
300
301 let unique: HashSet<_> = endpoints.into_iter().collect();
303 unique.into_iter().collect()
304 }
305
306 fn extract_from_json(&self, json: &serde_json::Value, base_url: &Url) -> Vec<Url> {
308 let mut endpoints = Vec::new();
309
310 match json {
311 serde_json::Value::Object(map) => {
312 for (key, value) in map {
313 if key.contains("url")
315 || key.contains("endpoint")
316 || key.contains("path")
317 || key.contains("route")
318 || key.contains("href")
319 || key.contains("link")
320 {
321 if let Some(url_str) = value.as_str() {
322 if let Ok(url) = base_url.join(url_str) {
323 endpoints.push(url);
324 }
325 }
326 }
327 endpoints.extend(self.extract_from_json(value, base_url));
329 }
330 }
331 serde_json::Value::Array(arr) => {
332 for item in arr {
333 endpoints.extend(self.extract_from_json(item, base_url));
334 }
335 }
336 _ => {}
337 }
338
339 endpoints
340 }
341}
342
343impl Default for FrameFileParser {
344 fn default() -> Self {
345 Self::new().unwrap_or_else(|e| panic!("Failed to create default FrameFileParser: {}", e))
346 }
347}
348
349#[cfg(test)]
350mod tests {
351 use super::*;
352
353 #[test]
354 fn test_js_endpoint_extraction() {
355 let parser = JavaScriptParser::new().unwrap();
356 let js = r#"
357 fetch('/api/users');
358 axios.get('/api/posts');
359 const endpoint = '/api/comments';
360 "#;
361 let base = Url::parse("https://example.com").unwrap();
362 let endpoints = parser.extract_endpoints(js, &base);
363
364 assert!(endpoints.iter().any(|u| u.path() == "/api/users"));
365 assert!(endpoints.iter().any(|u| u.path() == "/api/posts"));
366 assert!(endpoints.iter().any(|u| u.path() == "/api/comments"));
367 }
368
369 #[test]
370 fn test_template_variable_replacement() {
371 let parser = JavaScriptParser::new().unwrap();
372 let js = r#"
373 fetch('/api/users/${userId}');
374 fetch('/api/items/{id}');
375 fetch('/api/posts/:slug');
376 "#;
377 let base = Url::parse("https://example.com").unwrap();
378 let endpoints = parser.extract_endpoints(js, &base);
379
380 assert!(endpoints.iter().any(|u| u.path() == "/api/users/0"));
382 assert!(endpoints.iter().any(|u| u.path() == "/api/items/1"));
383 assert!(endpoints.iter().any(|u| u.path() == "/api/posts/example"));
384 }
385
386 #[test]
387 fn test_frame_file_json_extraction() {
388 let parser = FrameFileParser::new().unwrap();
389 let frame_content = r#"
390 {
391 "api": {
392 "endpoint": "/api/v1/data",
393 "path": "/api/v1/users"
394 }
395 }
396 "#;
397 let base = Url::parse("https://example.com").unwrap();
398 let endpoints = parser.extract_endpoints(frame_content, &base);
399
400 assert!(endpoints.iter().any(|u| u.path() == "/api/v1/data"));
401 assert!(endpoints.iter().any(|u| u.path() == "/api/v1/users"));
402 }
403
404 #[test]
405 fn test_websocket_extraction() {
406 let parser = JavaScriptParser::new().unwrap();
407 let js = r#"
408 const ws = new WebSocket('wss://example.com/socket');
409 "#;
410 let base = Url::parse("https://example.com").unwrap();
411 let endpoints = parser.extract_endpoints(js, &base);
412
413 assert!(endpoints
414 .iter()
415 .any(|u| u.as_str() == "wss://example.com/socket"));
416 }
417}