Skip to main content

hazler_js_parser/
sourcemap.rs

1use crate::error::Result;
2use serde::{Deserialize, Serialize};
3use std::collections::HashSet;
4use url::Url;
5
6/// Source map parser for extracting original source paths
7#[derive(Clone)]
8pub struct SourceMapParser {
9    /// Maximum source map size to process (in bytes)
10    max_size: usize,
11}
12
13/// Parsed source map information
14#[derive(Debug, Clone, Serialize, Deserialize)]
15#[serde(rename_all = "camelCase")]
16pub struct SourceMap {
17    pub version: i32,
18    pub file: Option<String>,
19    pub sources: Vec<String>,
20    pub sources_content: Option<Vec<Option<String>>>,
21    pub names: Vec<String>,
22    pub mappings: String,
23    pub source_root: Option<String>,
24}
25
26/// Source map detection result
27#[derive(Debug, Clone)]
28pub struct SourceMapReference {
29    pub js_url: Url,
30    pub map_url: Url,
31    pub inline: bool,
32}
33
34/// Analyzed source map with extracted paths
35#[derive(Debug, Clone, Serialize, Deserialize)]
36pub struct SourceMapAnalysis {
37    pub map_url: String,
38    pub total_sources: usize,
39    pub interesting_paths: Vec<InterestingPath>,
40    pub frameworks_detected: Vec<String>,
41    pub project_structure: Vec<String>,
42}
43
44/// Interesting path found in source map
45#[derive(Debug, Clone, Serialize, Deserialize)]
46pub struct InterestingPath {
47    pub path: String,
48    pub category: PathCategory,
49    pub priority: Priority,
50    pub reason: String,
51}
52
53/// Category of an interesting path
54#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
55pub enum PathCategory {
56    Admin,
57    Api,
58    Auth,
59    Config,
60    Database,
61    Internal,
62    Secret,
63    Test,
64    Other,
65}
66
67/// Priority level for interesting paths
68#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq, PartialOrd, Ord)]
69pub enum Priority {
70    Critical,
71    High,
72    Medium,
73    Low,
74}
75
76impl SourceMapParser {
77    /// Create a new source map parser
78    pub fn new() -> Self {
79        Self {
80            max_size: 50 * 1024 * 1024, // 50 MB default
81        }
82    }
83
84    /// Create a new source map parser with custom max size
85    pub fn with_max_size(max_size: usize) -> Self {
86        Self { max_size }
87    }
88
89    /// Detect source map references in JavaScript content
90    pub fn detect_source_map_references(
91        &self,
92        js_content: &str,
93        js_url: &Url,
94    ) -> Vec<SourceMapReference> {
95        let mut references = Vec::new();
96
97        // Check for sourceMappingURL comment at the end of the file
98        let lines: Vec<&str> = js_content.lines().collect();
99        if let Some(last_line) = lines.last() {
100            if let Some(map_url_str) = self.extract_mapping_url(last_line) {
101                if let Ok(map_url) = js_url.join(map_url_str) {
102                    references.push(SourceMapReference {
103                        js_url: js_url.clone(),
104                        map_url,
105                        inline: map_url_str.starts_with("data:"),
106                    });
107                }
108            }
109        }
110
111        // Also check second to last line (some bundlers put it there)
112        if lines.len() > 1 {
113            if let Some(second_last) = lines.get(lines.len() - 2) {
114                if let Some(map_url_str) = self.extract_mapping_url(second_last) {
115                    if let Ok(map_url) = js_url.join(map_url_str) {
116                        if !references.iter().any(|r| r.map_url == map_url) {
117                            references.push(SourceMapReference {
118                                js_url: js_url.clone(),
119                                map_url,
120                                inline: map_url_str.starts_with("data:"),
121                            });
122                        }
123                    }
124                }
125            }
126        }
127
128        // Try adding .map extension as fallback
129        if let Ok(map_url) = Url::parse(&format!("{}.map", js_url.as_str())) {
130            if !references.iter().any(|r| r.map_url == map_url) {
131                references.push(SourceMapReference {
132                    js_url: js_url.clone(),
133                    map_url,
134                    inline: false,
135                });
136            }
137        }
138
139        references
140    }
141
142    /// Extract sourceMappingURL from a line
143    fn extract_mapping_url<'a>(&self, line: &'a str) -> Option<&'a str> {
144        let trimmed = line.trim();
145
146        // Handle //#sourceMappingURL=
147        if let Some(idx) = trimmed.find("sourceMappingURL=") {
148            let url_start = idx + "sourceMappingURL=".len();
149            return Some(trimmed[url_start..].trim());
150        }
151
152        // Handle //@ sourceMappingURL= (deprecated but still used)
153        if let Some(idx) = trimmed.find("@ sourceMappingURL=") {
154            let url_start = idx + "@ sourceMappingURL=".len();
155            return Some(trimmed[url_start..].trim());
156        }
157
158        None
159    }
160
161    /// Parse source map from JSON content
162    pub fn parse_source_map(&self, content: &str) -> Result<SourceMap> {
163        // Check size limit
164        if content.len() > self.max_size {
165            return Err(crate::error::Error::SourceMapTooLarge(content.len()));
166        }
167
168        let source_map: SourceMap = serde_json::from_str(content)?;
169        Ok(source_map)
170    }
171
172    /// Analyze source map and extract interesting information
173    pub fn analyze_source_map(&self, source_map: &SourceMap, map_url: &str) -> SourceMapAnalysis {
174        let mut interesting_paths = Vec::new();
175        let mut frameworks = HashSet::new();
176        let mut project_dirs = HashSet::new();
177
178        for source in &source_map.sources {
179            // Detect frameworks
180            if source.contains("node_modules") {
181                self.detect_framework_from_path(source, &mut frameworks);
182            }
183
184            // Extract project structure (non node_modules paths)
185            if !source.contains("node_modules") && !source.contains("webpack") {
186                if let Some(dir) = self.extract_directory(source) {
187                    project_dirs.insert(dir);
188                }
189            }
190
191            // Check for interesting paths
192            if let Some(interesting) = self.classify_path(source) {
193                interesting_paths.push(interesting);
194            }
195        }
196
197        // Sort by priority
198        interesting_paths.sort_by(|a, b| a.priority.cmp(&b.priority));
199
200        SourceMapAnalysis {
201            map_url: map_url.to_string(),
202            total_sources: source_map.sources.len(),
203            interesting_paths,
204            frameworks_detected: frameworks.into_iter().collect(),
205            project_structure: project_dirs.into_iter().collect(),
206        }
207    }
208
209    /// Detect framework from node_modules path
210    fn detect_framework_from_path(&self, path: &str, frameworks: &mut HashSet<String>) {
211        let common_frameworks = [
212            "react", "vue", "angular", "@angular", "svelte", "next", "nuxt", "gatsby", "express",
213            "fastify", "nest", "redux", "mobx", "axios", "apollo", "graphql",
214        ];
215
216        for framework in &common_frameworks {
217            if path.contains(&format!("node_modules/{}", framework)) {
218                frameworks.insert(framework.to_string());
219            }
220        }
221    }
222
223    /// Extract directory from file path
224    fn extract_directory(&self, path: &str) -> Option<String> {
225        let normalized = path.replace('\\', "/");
226        let parts: Vec<&str> = normalized.split('/').filter(|p| !p.is_empty()).collect();
227
228        if parts.is_empty() {
229            return None;
230        }
231
232        // Skip webpack:// and other protocols
233        let start_idx = if parts[0].ends_with(':') { 1 } else { 0 };
234
235        if start_idx < parts.len() {
236            // Return the first non-empty, meaningful directory
237            Some(parts[start_idx].to_string())
238        } else {
239            None
240        }
241    }
242
243    /// Classify path by category and priority
244    fn classify_path(&self, path: &str) -> Option<InterestingPath> {
245        let lower_path = path.to_lowercase();
246
247        // Skip node_modules and webpack internals
248        if lower_path.contains("node_modules") || lower_path.contains("webpack/runtime") {
249            return None;
250        }
251
252        // Critical paths
253        if lower_path.contains("admin") {
254            return Some(InterestingPath {
255                path: path.to_string(),
256                category: PathCategory::Admin,
257                priority: Priority::Critical,
258                reason: "Admin panel component detected".to_string(),
259            });
260        }
261
262        if lower_path.contains("secret")
263            || lower_path.contains("credential")
264            || lower_path.contains("password")
265            || lower_path.contains(".env")
266        {
267            return Some(InterestingPath {
268                path: path.to_string(),
269                category: PathCategory::Secret,
270                priority: Priority::Critical,
271                reason: "Potential secret or credential reference".to_string(),
272            });
273        }
274
275        // High priority paths
276        if lower_path.contains("/api/")
277            || lower_path.contains("_api")
278            || lower_path.contains("api.")
279        {
280            return Some(InterestingPath {
281                path: path.to_string(),
282                category: PathCategory::Api,
283                priority: Priority::High,
284                reason: "API implementation or routes".to_string(),
285            });
286        }
287
288        if lower_path.contains("auth")
289            || lower_path.contains("login")
290            || lower_path.contains("session")
291        {
292            return Some(InterestingPath {
293                path: path.to_string(),
294                category: PathCategory::Auth,
295                priority: Priority::High,
296                reason: "Authentication logic detected".to_string(),
297            });
298        }
299
300        if lower_path.contains("config") || lower_path.contains("settings") {
301            return Some(InterestingPath {
302                path: path.to_string(),
303                category: PathCategory::Config,
304                priority: Priority::High,
305                reason: "Configuration file detected".to_string(),
306            });
307        }
308
309        // Medium priority paths
310        if lower_path.contains("internal") || lower_path.contains("private") {
311            return Some(InterestingPath {
312                path: path.to_string(),
313                category: PathCategory::Internal,
314                priority: Priority::Medium,
315                reason: "Internal/private component".to_string(),
316            });
317        }
318
319        if lower_path.contains("database")
320            || lower_path.contains("db")
321            || lower_path.contains("model")
322        {
323            return Some(InterestingPath {
324                path: path.to_string(),
325                category: PathCategory::Database,
326                priority: Priority::Medium,
327                reason: "Database or model definition".to_string(),
328            });
329        }
330
331        // Low priority but still interesting
332        if lower_path.contains("test")
333            || lower_path.contains("spec")
334            || lower_path.contains("__test__")
335        {
336            return Some(InterestingPath {
337                path: path.to_string(),
338                category: PathCategory::Test,
339                priority: Priority::Low,
340                reason: "Test file (may reveal endpoints)".to_string(),
341            });
342        }
343
344        None
345    }
346
347    /// Generate a summary report from analysis
348    pub fn generate_report(&self, analysis: &SourceMapAnalysis) -> String {
349        let mut report = String::new();
350
351        report.push_str(&format!(
352            "\n[INFO] Source Map Analysis: {}\n",
353            analysis.map_url
354        ));
355        report.push_str(&format!(
356            "[INFO] Total sources: {}\n",
357            analysis.total_sources
358        ));
359
360        if !analysis.frameworks_detected.is_empty() {
361            report.push_str(&format!(
362                "[INFO] Frameworks detected: {}\n",
363                analysis.frameworks_detected.join(", ")
364            ));
365        }
366
367        if !analysis.project_structure.is_empty() {
368            report.push_str(&format!(
369                "[INFO] Project directories: {}\n",
370                analysis.project_structure.join(", ")
371            ));
372        }
373
374        if !analysis.interesting_paths.is_empty() {
375            report.push_str(&format!(
376                "\n[HIGH] Found {} interesting paths:\n",
377                analysis.interesting_paths.len()
378            ));
379
380            for path_info in &analysis.interesting_paths {
381                let priority_label = match path_info.priority {
382                    Priority::Critical => "CRITICAL",
383                    Priority::High => "HIGH",
384                    Priority::Medium => "MEDIUM",
385                    Priority::Low => "LOW",
386                };
387
388                report.push_str(&format!(
389                    "  [{}] {} - {}\n",
390                    priority_label, path_info.path, path_info.reason
391                ));
392            }
393        }
394
395        report
396    }
397}
398
399impl Default for SourceMapParser {
400    fn default() -> Self {
401        Self::new()
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408
409    #[test]
410    fn test_detect_source_map_references() {
411        let parser = SourceMapParser::new();
412        let js_content = r#"
413        console.log("Hello");
414        //# sourceMappingURL=app.js.map
415        "#;
416        let js_url = Url::parse("https://example.com/static/app.js").unwrap();
417
418        let refs = parser.detect_source_map_references(js_content, &js_url);
419        assert!(!refs.is_empty());
420        assert_eq!(
421            refs[0].map_url.as_str(),
422            "https://example.com/static/app.js.map"
423        );
424    }
425
426    #[test]
427    fn test_detect_source_map_with_deprecated_syntax() {
428        let parser = SourceMapParser::new();
429        let js_content = r#"
430        console.log("Hello");
431        //@ sourceMappingURL=app.js.map
432        "#;
433        let js_url = Url::parse("https://example.com/static/app.js").unwrap();
434
435        let refs = parser.detect_source_map_references(js_content, &js_url);
436        assert!(!refs.is_empty());
437    }
438
439    #[test]
440    fn test_parse_source_map() {
441        let parser = SourceMapParser::new();
442        let content = r#"{
443            "version": 3,
444            "file": "bundle.js",
445            "sources": ["src/index.js", "src/admin/Dashboard.tsx"],
446            "names": ["console", "log"],
447            "mappings": "AAAA"
448        }"#;
449
450        let result = parser.parse_source_map(content);
451        assert!(result.is_ok());
452
453        let map = result.unwrap();
454        assert_eq!(map.version, 3);
455        assert_eq!(map.sources.len(), 2);
456    }
457
458    #[test]
459    fn test_classify_admin_path() {
460        let parser = SourceMapParser::new();
461        let result = parser.classify_path("src/admin/Dashboard.tsx");
462
463        assert!(result.is_some());
464        let classified = result.unwrap();
465        assert_eq!(classified.category, PathCategory::Admin);
466        assert_eq!(classified.priority, Priority::Critical);
467    }
468
469    #[test]
470    fn test_classify_api_path() {
471        let parser = SourceMapParser::new();
472        let result = parser.classify_path("src/api/users.ts");
473
474        assert!(result.is_some());
475        let classified = result.unwrap();
476        assert_eq!(classified.category, PathCategory::Api);
477        assert_eq!(classified.priority, Priority::High);
478    }
479
480    #[test]
481    fn test_classify_secret_path() {
482        let parser = SourceMapParser::new();
483        let result = parser.classify_path("src/config/secrets.ts");
484
485        assert!(result.is_some());
486        let classified = result.unwrap();
487        assert_eq!(classified.category, PathCategory::Secret);
488        assert_eq!(classified.priority, Priority::Critical);
489    }
490
491    #[test]
492    fn test_skip_node_modules() {
493        let parser = SourceMapParser::new();
494        let result = parser.classify_path("node_modules/react/index.js");
495
496        assert!(result.is_none());
497    }
498
499    #[test]
500    fn test_analyze_source_map() {
501        let parser = SourceMapParser::new();
502        let source_map = SourceMap {
503            version: 3,
504            file: Some("bundle.js".to_string()),
505            sources: vec![
506                "src/admin/Dashboard.tsx".to_string(),
507                "src/api/users.ts".to_string(),
508                "src/components/Button.tsx".to_string(),
509                "node_modules/react/index.js".to_string(),
510            ],
511            sources_content: None,
512            names: vec![],
513            mappings: "AAAA".to_string(),
514            source_root: None,
515        };
516
517        let analysis = parser.analyze_source_map(&source_map, "https://example.com/bundle.js.map");
518
519        assert_eq!(analysis.total_sources, 4);
520        assert!(!analysis.interesting_paths.is_empty());
521
522        // Should find admin and api paths
523        assert!(analysis
524            .interesting_paths
525            .iter()
526            .any(|p| p.category == PathCategory::Admin));
527        assert!(analysis
528            .interesting_paths
529            .iter()
530            .any(|p| p.category == PathCategory::Api));
531
532        // Should detect React
533        assert!(analysis.frameworks_detected.contains(&"react".to_string()));
534    }
535
536    #[test]
537    fn test_generate_report() {
538        let parser = SourceMapParser::new();
539        let analysis = SourceMapAnalysis {
540            map_url: "https://example.com/app.js.map".to_string(),
541            total_sources: 10,
542            interesting_paths: vec![InterestingPath {
543                path: "src/admin/panel.tsx".to_string(),
544                category: PathCategory::Admin,
545                priority: Priority::Critical,
546                reason: "Admin panel component detected".to_string(),
547            }],
548            frameworks_detected: vec!["react".to_string()],
549            project_structure: vec!["src".to_string()],
550        };
551
552        let report = parser.generate_report(&analysis);
553
554        assert!(report.contains("Source Map Analysis"));
555        assert!(report.contains("Total sources: 10"));
556        assert!(report.contains("react"));
557        assert!(report.contains("CRITICAL"));
558        assert!(report.contains("admin/panel.tsx"));
559    }
560
561    #[test]
562    fn test_extract_directory() {
563        let parser = SourceMapParser::new();
564
565        assert_eq!(
566            parser.extract_directory("webpack://src/components/Button.tsx"),
567            Some("src".to_string())
568        );
569        assert_eq!(
570            parser.extract_directory("src/admin/Dashboard.tsx"),
571            Some("src".to_string())
572        );
573    }
574
575    #[test]
576    fn test_detect_multiple_source_map_refs() {
577        let parser = SourceMapParser::new();
578        // A JS file that embeds its own content and references an external map
579        let js_content = r#"
580            (function() { return 42; })();
581            //# sourceMappingURL=chunk1.js.map
582        "#;
583        let js_url = Url::parse("https://example.com/js/chunk1.js").unwrap();
584        let refs = parser.detect_source_map_references(js_content, &js_url);
585        assert!(!refs.is_empty());
586        assert!(refs[0].map_url.as_str().contains("chunk1.js.map"));
587        assert!(!refs[0].inline);
588    }
589
590    #[test]
591    fn test_analyze_source_map_no_interesting_paths() {
592        let parser = SourceMapParser::new();
593        let source_map = SourceMap {
594            version: 3,
595            file: None,
596            sources: vec![
597                "src/components/Button.tsx".to_string(),
598                "src/components/Modal.tsx".to_string(),
599            ],
600            sources_content: None,
601            names: vec![],
602            mappings: "AAAA".to_string(),
603            source_root: None,
604        };
605
606        let analysis = parser.analyze_source_map(&source_map, "https://example.com/bundle.js.map");
607        assert_eq!(analysis.total_sources, 2);
608        // None of these paths are admin/api/secret etc., so no interesting paths expected
609        assert!(
610            analysis.interesting_paths.is_empty(),
611            "Plain component paths should not be classified as interesting"
612        );
613    }
614
615    #[test]
616    fn test_parse_source_map_with_source_root() {
617        let parser = SourceMapParser::new();
618        let content = r#"{
619            "version": 3,
620            "sourceRoot": "/project/src",
621            "sources": ["index.js"],
622            "names": [],
623            "mappings": "AAAA"
624        }"#;
625
626        let map = parser.parse_source_map(content).unwrap();
627        assert_eq!(map.source_root, Some("/project/src".to_string()));
628    }
629
630    #[test]
631    fn test_classify_auth_path() {
632        let parser = SourceMapParser::new();
633        let result = parser.classify_path("src/auth/login.ts");
634        assert!(result.is_some());
635        let classified = result.unwrap();
636        assert_eq!(classified.category, PathCategory::Auth);
637    }
638
639    #[test]
640    fn test_classify_test_path() {
641        let parser = SourceMapParser::new();
642        let result = parser.classify_path("src/__tests__/App.test.tsx");
643        // Test paths should be classified (category Test)
644        if let Some(classified) = result {
645            assert_eq!(classified.category, PathCategory::Test);
646        }
647    }
648
649    #[test]
650    fn test_framework_detection_vue() {
651        let parser = SourceMapParser::new();
652        let source_map = SourceMap {
653            version: 3,
654            file: None,
655            sources: vec![
656                "node_modules/vue/dist/vue.esm.js".to_string(),
657                "src/App.vue".to_string(),
658            ],
659            sources_content: None,
660            names: vec![],
661            mappings: "AAAA".to_string(),
662            source_root: None,
663        };
664
665        let analysis = parser.analyze_source_map(&source_map, "https://example.com/bundle.js.map");
666        assert!(
667            analysis.frameworks_detected.contains(&"vue".to_string()),
668            "Should detect Vue from node_modules/vue"
669        );
670    }
671}