Skip to main content

hazler_js_parser/
parser.rs

1use crate::error::Result;
2use crate::framework::{detect_framework, get_compiled_framework_patterns, Framework};
3use once_cell::sync::Lazy;
4use regex::Regex;
5use std::collections::HashSet;
6use url::Url;
7
8/// JavaScript URL patterns for endpoint discovery
9static JS_URL_PATTERNS: &[&str] = &[
10    // URL in quotes
11    r#"["']https?://[^"'\s]+["']"#,
12    r#"["'](/[a-zA-Z0-9/_\-\.]+)["']"#,
13    // Fetch API calls
14    r#"fetch\s*\(\s*["']([^"']+)["']"#,
15    r#"fetch\s*\(\s*`([^`]+)`"#,
16    // XMLHttpRequest
17    r#"\.open\s*\(\s*["'][^"']*["']\s*,\s*["']([^"']+)["']"#,
18    // Axios calls
19    r#"axios\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
20    r#"axios\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
21    // jQuery AJAX
22    r#"\$\.ajax\s*\(\s*\{[^}]*url\s*:\s*["']([^"']+)["']"#,
23    r#"\$\.(get|post)\s*\(\s*["']([^"']+)["']"#,
24    // API endpoint definitions
25    r#"(api|endpoint|url|path|route)\s*[:=]\s*["']([^"']+)["']"#,
26    // Template literals
27    r#"`/api/[^`]+`"#,
28    r#"`https?://[^`]+`"#,
29    // Relative paths in router configs
30    r#"path\s*:\s*["']([^"']+)["']"#,
31    r#"route\s*:\s*["']([^"']+)["']"#,
32    // GraphQL endpoints
33    r#"(graphql|gql)\s*["']([^"']+)["']"#,
34    // WebSocket endpoints
35    r#"["'](wss?://[^"'\s]+)["']"#,
36    // JSON-RPC endpoints
37    r#"rpc\s*:\s*["']([^"']+)["']"#,
38    // More API patterns
39    r#"\.get\s*\(\s*["']([^"']+)["']"#,
40    r#"\.post\s*\(\s*["']([^"']+)["']"#,
41    r#"\.put\s*\(\s*["']([^"']+)["']"#,
42    r#"\.delete\s*\(\s*["']([^"']+)["']"#,
43    r#"\.patch\s*\(\s*["']([^"']+)["']"#,
44    // React Router patterns
45    r#"<Route\s+path=["']([^"']+)["']"#,
46    r#"useNavigate\s*\(\s*\)\s*\(\s*["']([^"']+)["']"#,
47    // Angular routing
48    r#"RouterModule\.forRoot\([^)]*path:\s*["']([^"']+)["']"#,
49    r#"\.navigate\(\s*\[["']([^"']+)["']"#,
50    // Vue Router
51    r#"router\.push\(\s*["']([^"']+)["']"#,
52    // Next.js API routes
53    r#"/api/[^"'\s]+"#,
54    // Express-like route definitions
55    r#"(app|router)\.(get|post|put|delete|patch)\s*\(\s*["']([^"']+)["']"#,
56    // Import statements with URLs
57    r#"import\s+.*\s+from\s+["']([^"']+)["']"#,
58];
59
60/// Confidence scores for each pattern in `JS_URL_PATTERNS` (0.0 – 1.0).
61///
62/// A higher score means the match is very likely to be a real, reachable
63/// endpoint.  A lower score means the pattern is more speculative (e.g. a
64/// general string literal or an import path).
65static JS_URL_PATTERN_CONFIDENCE: &[f32] = &[
66    0.6,  // absolute URL in string quotes
67    0.5,  // relative path in string quotes (very broad)
68    0.9,  // fetch() with string literal
69    0.75, // fetch() with template literal (variable substitution applied)
70    0.9,  // XMLHttpRequest .open()
71    0.9,  // axios.method() with string
72    0.85, // axios({ url: })
73    0.85, // $.ajax({ url: })
74    0.85, // $.get/.post()
75    0.7,  // api/endpoint/url/path/route assignment
76    0.7,  // template literal /api/…
77    0.65, // template literal https://…
78    0.75, // path: "…"
79    0.8,  // route: "…"
80    0.8,  // graphql/gql endpoint reference
81    0.8,  // WebSocket wss?://
82    0.75, // rpc: "…"
83    0.8,  // .get("…")
84    0.8,  // .post("…")
85    0.8,  // .put("…")
86    0.8,  // .delete("…")
87    0.8,  // .patch("…")
88    0.85, // <Route path= (React Router)
89    0.85, // useNavigate() (React Router)
90    0.85, // RouterModule.forRoot() (Angular)
91    0.85, // .navigate([…]) (Angular)
92    0.85, // router.push() (Vue Router)
93    0.75, // /api/… literal (Next.js style)
94    0.9,  // Express app/router.method()
95    0.3,  // import … from "…" (usually a module path, rarely an endpoint)
96];
97
98/// Template variable pattern for replacement
99static TEMPLATE_VAR_RE: Lazy<Regex> =
100    Lazy::new(|| Regex::new(r"\$\{[^}]+\}").expect("Failed to compile template variable regex"));
101
102/// JavaScript parser for extracting endpoints from JavaScript code
103#[derive(Clone)]
104pub struct JavaScriptParser {
105    patterns: Vec<Regex>,
106}
107
108impl JavaScriptParser {
109    /// Create a new JavaScript parser
110    pub fn new() -> Result<Self> {
111        // Verify that the confidence array stays in sync with the pattern array.
112        debug_assert_eq!(
113            JS_URL_PATTERNS.len(),
114            JS_URL_PATTERN_CONFIDENCE.len(),
115            "JS_URL_PATTERNS and JS_URL_PATTERN_CONFIDENCE must have the same length"
116        );
117
118        let patterns = JS_URL_PATTERNS
119            .iter()
120            .map(|p| Regex::new(p))
121            .collect::<std::result::Result<Vec<_>, _>>()?;
122
123        Ok(Self { patterns })
124    }
125
126    /// Extract endpoints from JavaScript content
127    pub fn extract_endpoints(&self, js_content: &str, base_url: &Url) -> Vec<Url> {
128        self.extract_endpoints_with_confidence(js_content, base_url)
129            .into_iter()
130            .map(|(url, _)| url)
131            .collect()
132    }
133
134    /// Extract endpoints from JavaScript content together with a confidence score.
135    ///
136    /// Each returned `(Url, f32)` pair contains the discovered endpoint and a
137    /// confidence value in `[0.0, 1.0]` indicating how reliable the extraction
138    /// pattern is.  Callers can filter on the confidence value to reduce false
139    /// positives from speculative patterns (e.g. import paths).
140    ///
141    /// Framework-specific patterns are assigned a confidence of 0.85.
142    pub fn extract_endpoints_with_confidence(
143        &self,
144        js_content: &str,
145        base_url: &Url,
146    ) -> Vec<(Url, f32)> {
147        // Use a map so that if the same URL is matched by multiple patterns we
148        // keep the *highest* confidence score for it.
149        let mut endpoint_confidence: std::collections::HashMap<String, (Url, f32)> =
150            std::collections::HashMap::new();
151
152        let insert =
153            |map: &mut std::collections::HashMap<String, (Url, f32)>, url: Url, confidence: f32| {
154                let key = url.as_str().to_string();
155                let entry = map.entry(key).or_insert((url.clone(), confidence));
156                if confidence > entry.1 {
157                    *entry = (url, confidence);
158                }
159            };
160
161        // Standard pattern matching
162        for (pattern, &confidence) in self.patterns.iter().zip(JS_URL_PATTERN_CONFIDENCE.iter()) {
163            for cap in pattern.captures_iter(js_content) {
164                // Extract URL from all capture groups (most patterns have 1-2 groups)
165                // Group 0 is the full match, groups 1+ are captured values
166                for i in 1..cap.len() {
167                    if let Some(url_match) = cap.get(i) {
168                        let url_str = url_match.as_str();
169
170                        // Try to resolve as absolute or relative URL
171                        if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
172                            insert(&mut endpoint_confidence, url, confidence);
173                        }
174                    }
175                }
176            }
177        }
178
179        // Detect frameworks and apply framework-specific patterns (confidence 0.85)
180        let frameworks = detect_framework(js_content);
181        for framework in &frameworks {
182            if let Some(framework_endpoints) =
183                self.extract_framework_endpoints(js_content, base_url, framework)
184            {
185                for url in framework_endpoints {
186                    insert(&mut endpoint_confidence, url, 0.85);
187                }
188            }
189        }
190
191        endpoint_confidence.into_values().collect()
192    }
193
194    /// Extract endpoints using framework-specific patterns
195    fn extract_framework_endpoints(
196        &self,
197        js_content: &str,
198        base_url: &Url,
199        framework: &Framework,
200    ) -> Option<Vec<Url>> {
201        // Use pre-compiled regexes to avoid re-compiling on every call.
202        let patterns = get_compiled_framework_patterns(framework);
203        if patterns.is_empty() {
204            return None;
205        }
206
207        let mut endpoints = Vec::new();
208
209        for pattern in patterns {
210            for cap in pattern.captures_iter(js_content) {
211                for i in 1..cap.len() {
212                    if let Some(url_match) = cap.get(i) {
213                        let url_str = url_match.as_str();
214                        if let Ok(url) = self.normalize_and_resolve(url_str, base_url) {
215                            endpoints.push(url);
216                        }
217                    }
218                }
219            }
220        }
221
222        Some(endpoints)
223    }
224
225    /// Normalize and resolve a URL string against a base URL
226    fn normalize_and_resolve(&self, url_str: &str, base_url: &Url) -> Result<Url> {
227        // Remove quotes and backticks
228        let cleaned = url_str.trim_matches(|c| c == '"' || c == '\'' || c == '`');
229
230        // Handle template literals with variables
231        let cleaned = self.replace_template_vars(cleaned);
232
233        // Try absolute URL first
234        if let Ok(url) = Url::parse(&cleaned) {
235            return Ok(url);
236        }
237
238        // Try relative URL
239        Ok(base_url.join(&cleaned)?)
240    }
241
242    /// Replace template variables with placeholder values
243    fn replace_template_vars(&self, url: &str) -> String {
244        let mut result = url.to_string();
245
246        // Replace ${variable} patterns with placeholders
247        result = TEMPLATE_VAR_RE.replace_all(&result, "0").to_string();
248
249        // Replace common placeholder patterns with example values
250        result = result
251            .replace("{id}", "1")
252            .replace("{userId}", "1")
253            .replace("{user_id}", "1")
254            .replace("{uuid}", "00000000-0000-0000-0000-000000000000")
255            .replace("{slug}", "example")
256            .replace("{name}", "example")
257            .replace(":id", "1")
258            .replace(":userId", "1")
259            .replace(":user_id", "1")
260            .replace(":uuid", "00000000-0000-0000-0000-000000000000")
261            .replace(":slug", "example")
262            .replace(":name", "example");
263
264        result
265    }
266}
267
268impl Default for JavaScriptParser {
269    fn default() -> Self {
270        Self::new().unwrap_or_else(|e| panic!("Failed to create default JavaScriptParser: {}", e))
271    }
272}
273
274/// Parser for .frame files
275#[derive(Clone)]
276pub struct FrameFileParser {
277    js_parser: JavaScriptParser,
278}
279
280impl FrameFileParser {
281    /// Create a new Frame file parser
282    pub fn new() -> Result<Self> {
283        Ok(Self {
284            js_parser: JavaScriptParser::new()?,
285        })
286    }
287
288    /// Extract endpoints from .frame file content
289    pub fn extract_endpoints(&self, frame_content: &str, base_url: &Url) -> Vec<Url> {
290        let mut endpoints = Vec::new();
291
292        // .frame files might contain JSON-like structures
293        // Try to parse as JSON first
294        if let Ok(json) = serde_json::from_str::<serde_json::Value>(frame_content) {
295            endpoints.extend(self.extract_from_json(&json, base_url));
296        }
297
298        // Also apply JavaScript patterns
299        endpoints.extend(self.js_parser.extract_endpoints(frame_content, base_url));
300
301        // Deduplicate
302        let unique: HashSet<_> = endpoints.into_iter().collect();
303        unique.into_iter().collect()
304    }
305
306    /// Recursively extract URLs from JSON structure
307    fn extract_from_json(&self, json: &serde_json::Value, base_url: &Url) -> Vec<Url> {
308        let mut endpoints = Vec::new();
309
310        match json {
311            serde_json::Value::Object(map) => {
312                for (key, value) in map {
313                    // Look for keys that suggest URLs
314                    if key.contains("url")
315                        || key.contains("endpoint")
316                        || key.contains("path")
317                        || key.contains("route")
318                        || key.contains("href")
319                        || key.contains("link")
320                    {
321                        if let Some(url_str) = value.as_str() {
322                            if let Ok(url) = base_url.join(url_str) {
323                                endpoints.push(url);
324                            }
325                        }
326                    }
327                    // Recurse into nested objects
328                    endpoints.extend(self.extract_from_json(value, base_url));
329                }
330            }
331            serde_json::Value::Array(arr) => {
332                for item in arr {
333                    endpoints.extend(self.extract_from_json(item, base_url));
334                }
335            }
336            _ => {}
337        }
338
339        endpoints
340    }
341}
342
343impl Default for FrameFileParser {
344    fn default() -> Self {
345        Self::new().unwrap_or_else(|e| panic!("Failed to create default FrameFileParser: {}", e))
346    }
347}
348
349#[cfg(test)]
350mod tests {
351    use super::*;
352
353    #[test]
354    fn test_js_endpoint_extraction() {
355        let parser = JavaScriptParser::new().unwrap();
356        let js = r#"
357            fetch('/api/users');
358            axios.get('/api/posts');
359            const endpoint = '/api/comments';
360        "#;
361        let base = Url::parse("https://example.com").unwrap();
362        let endpoints = parser.extract_endpoints(js, &base);
363
364        assert!(endpoints.iter().any(|u| u.path() == "/api/users"));
365        assert!(endpoints.iter().any(|u| u.path() == "/api/posts"));
366        assert!(endpoints.iter().any(|u| u.path() == "/api/comments"));
367    }
368
369    #[test]
370    fn test_template_variable_replacement() {
371        let parser = JavaScriptParser::new().unwrap();
372        let js = r#"
373            fetch('/api/users/${userId}');
374            fetch('/api/items/{id}');
375            fetch('/api/posts/:slug');
376        "#;
377        let base = Url::parse("https://example.com").unwrap();
378        let endpoints = parser.extract_endpoints(js, &base);
379
380        // Should replace template variables with placeholder values
381        assert!(endpoints.iter().any(|u| u.path() == "/api/users/0"));
382        assert!(endpoints.iter().any(|u| u.path() == "/api/items/1"));
383        assert!(endpoints.iter().any(|u| u.path() == "/api/posts/example"));
384    }
385
386    #[test]
387    fn test_frame_file_json_extraction() {
388        let parser = FrameFileParser::new().unwrap();
389        let frame_content = r#"
390        {
391            "api": {
392                "endpoint": "/api/v1/data",
393                "path": "/api/v1/users"
394            }
395        }
396        "#;
397        let base = Url::parse("https://example.com").unwrap();
398        let endpoints = parser.extract_endpoints(frame_content, &base);
399
400        assert!(endpoints.iter().any(|u| u.path() == "/api/v1/data"));
401        assert!(endpoints.iter().any(|u| u.path() == "/api/v1/users"));
402    }
403
404    #[test]
405    fn test_websocket_extraction() {
406        let parser = JavaScriptParser::new().unwrap();
407        let js = r#"
408            const ws = new WebSocket('wss://example.com/socket');
409        "#;
410        let base = Url::parse("https://example.com").unwrap();
411        let endpoints = parser.extract_endpoints(js, &base);
412
413        assert!(endpoints
414            .iter()
415            .any(|u| u.as_str() == "wss://example.com/socket"));
416    }
417}