Skip to main content

provenant/utils/
sourcemap.rs

1//! Source map file processing for license detection.
2//!
3//! Source map files (.js.map, .css.map) are JSON files containing embedded
4//! source code in a `sourcesContent` array. This module extracts that content
5//! for license detection.
6
7use std::path::Path;
8
9/// Check if a file is a source map file based on extension.
10pub fn is_sourcemap(path: &Path) -> bool {
11    path.file_name()
12        .and_then(|name| name.to_str())
13        .map(|name| {
14            let name_lower = name.to_lowercase();
15            name_lower.ends_with(".js.map") || name_lower.ends_with(".css.map")
16        })
17        .unwrap_or(false)
18}
19
20/// Extract source content from a source map JSON file.
21///
22/// Parses the JSON and extracts all entries from `sourcesContent`,
23/// combining them with newlines for license detection.
24///
25/// Returns `Some(combined_text)` if successfully parsed with content.
26/// Returns `None` if JSON parsing fails or no sourcesContent exists.
27pub fn extract_sourcemap_content(json_text: &str) -> Option<String> {
28    let json: serde_json::Value = serde_json::from_str(json_text).ok()?;
29    let sources = json.get("sourcesContent")?.as_array()?;
30
31    let combined: String = sources
32        .iter()
33        .filter_map(|v| v.as_str())
34        .map(replace_verbatim_cr_lf_chars)
35        .collect::<Vec<_>>()
36        .join("\n");
37
38    if combined.is_empty() {
39        None
40    } else {
41        Some(combined)
42    }
43}
44
45/// Replace verbatim escaped CR/LF characters with actual newlines.
46///
47/// This matches Python's `replace_verbatim_cr_lf_chars()` behavior exactly:
48/// - Double-escaped (e.g., source had literal `\r` that was escaped again):
49///   - `\\r\\n` (backslash-backslash-r-backslash-backslash-n) → newline
50///   - `\\r` (backslash-backslash-r) → newline
51///   - `\\n` (backslash-backslash-n) → newline
52/// - Single-escaped (e.g., JSON-escaped newlines):
53///   - `\r\n` (backslash-r-backslash-n) → newline
54///   - `\r` (backslash-r) → newline
55///   - `\n` (backslash-n) → newline
56fn replace_verbatim_cr_lf_chars(s: &str) -> String {
57    s.replace("\\\\r\\\\n", "\n")
58        .replace("\\r\\n", "\n")
59        .replace("\\\\r", "\n")
60        .replace("\\\\n", "\n")
61        .replace("\\r", "\n")
62        .replace("\\n", "\n")
63}
64
65#[cfg(test)]
66mod tests {
67    use super::*;
68    use std::path::PathBuf;
69
70    #[test]
71    fn test_is_sourcemap_js_map() {
72        assert!(is_sourcemap(&PathBuf::from("app.js.map")));
73        assert!(is_sourcemap(&PathBuf::from("APP.JS.MAP")));
74    }
75
76    #[test]
77    fn test_is_sourcemap_css_map() {
78        assert!(is_sourcemap(&PathBuf::from("style.css.map")));
79        assert!(is_sourcemap(&PathBuf::from("STYLE.CSS.MAP")));
80    }
81
82    #[test]
83    fn test_is_sourcemap_not_map() {
84        assert!(!is_sourcemap(&PathBuf::from("app.js")));
85        assert!(!is_sourcemap(&PathBuf::from("data.json")));
86        assert!(!is_sourcemap(&PathBuf::from("other.map")));
87    }
88
89    #[test]
90    fn test_extract_sourcemap_content_basic() {
91        let json = r#"{"version":3,"sourcesContent":["hello\nworld"]}"#;
92        let result = extract_sourcemap_content(json);
93        assert!(result.is_some());
94        let content = result.unwrap();
95        assert!(content.contains("hello"));
96        assert!(content.contains("world"));
97    }
98
99    #[test]
100    fn test_extract_sourcemap_content_mit_license() {
101        let json = r#"{"version":3,"sourcesContent":["Use of this source code is governed by an MIT-style license\nthat can be found in the LICENSE file"]}"#;
102        let result = extract_sourcemap_content(json);
103        assert!(result.is_some());
104        let content = result.unwrap();
105        assert!(content.contains("MIT-style license"));
106        assert!(content.contains("LICENSE file"));
107        assert!(content.contains("\n"));
108    }
109
110    #[test]
111    fn test_extract_sourcemap_content_multiple_entries() {
112        let json = r#"{"version":3,"sourcesContent":["first\nfile","second\nfile"]}"#;
113        let result = extract_sourcemap_content(json);
114        assert!(result.is_some());
115        let content = result.unwrap();
116        assert!(content.contains("first"));
117        assert!(content.contains("second"));
118    }
119
120    #[test]
121    fn test_extract_sourcemap_content_no_sources() {
122        let json = r#"{"version":3,"sources":[]}"#;
123        let result = extract_sourcemap_content(json);
124        assert!(result.is_none());
125    }
126
127    #[test]
128    fn test_extract_sourcemap_content_invalid_json() {
129        let json = r#"not valid json"#;
130        let result = extract_sourcemap_content(json);
131        assert!(result.is_none());
132    }
133
134    #[test]
135    fn test_extract_sourcemap_content_null_entries() {
136        let json = r#"{"version":3,"sourcesContent":[null,"actual\ncontent"]}"#;
137        let result = extract_sourcemap_content(json);
138        assert!(result.is_some());
139        let content = result.unwrap();
140        assert!(content.contains("actual"));
141    }
142
143    #[test]
144    fn test_replace_verbatim_cr_lf_chars() {
145        // Single-escaped (backslash-n, backslash-r in the string)
146        assert_eq!(replace_verbatim_cr_lf_chars("a\\nb"), "a\nb");
147        assert_eq!(replace_verbatim_cr_lf_chars("a\\rb"), "a\nb");
148        assert_eq!(replace_verbatim_cr_lf_chars("a\\r\\nb"), "a\nb");
149        // Double-escaped (literal backslash-backslash-n in the string)
150        assert_eq!(replace_verbatim_cr_lf_chars("a\\\\nb"), "a\nb");
151        assert_eq!(replace_verbatim_cr_lf_chars("a\\\\rb"), "a\nb");
152        assert_eq!(replace_verbatim_cr_lf_chars("a\\\\r\\\\nb"), "a\nb");
153    }
154
155    #[test]
156    fn test_ar_er_js_map_detection() {
157        let path = PathBuf::from("testdata/license-golden/datadriven/lic2/ar-ER.js.map");
158        if !path.exists() {
159            eprintln!("Skipping test: test file not found");
160            return;
161        }
162
163        let text = std::fs::read_to_string(&path).expect("Failed to read file");
164        eprintln!("Raw text length: {}", text.len());
165
166        let json: serde_json::Value = serde_json::from_str(&text).expect("JSON parse failed");
167        let sources = json
168            .get("sourcesContent")
169            .expect("No sourcesContent")
170            .as_array()
171            .expect("Not array");
172        eprintln!("Sources array length: {}", sources.len());
173
174        if let Some(first) = sources.first().and_then(|v| v.as_str()) {
175            eprintln!("First source length: {}", first.len());
176            eprintln!("First 100 chars: {:?}", &first[..100.min(first.len())]);
177        }
178
179        let result = extract_sourcemap_content(&text);
180        assert!(result.is_some(), "Should extract content from ar-ER.js.map");
181
182        let content = result.unwrap();
183        eprintln!("Extracted content length: {}", content.len());
184        assert!(
185            content.contains("MIT-style license"),
186            "Should contain MIT license text"
187        );
188    }
189}