scancode_rust/parsers/
npm.rs

1use crate::models::{Dependency, LicenseDetection, Match, PackageData, Party};
2use log::warn;
3use packageurl::PackageUrl;
4use serde_json::Value;
5use std::collections::HashMap;
6use std::fs::File;
7use std::io::{BufRead, BufReader};
8use std::path::Path;
9
10use super::PackageParser;
11
12const FIELD_NAME: &str = "name";
13const FIELD_VERSION: &str = "version";
14const FIELD_LICENSE: &str = "license";
15const FIELD_LICENSES: &str = "licenses";
16const FIELD_HOMEPAGE: &str = "homepage";
17const FIELD_REPOSITORY: &str = "repository";
18const FIELD_AUTHOR: &str = "author";
19const FIELD_CONTRIBUTORS: &str = "contributors";
20const FIELD_MAINTAINERS: &str = "maintainers";
21const FIELD_DEPENDENCIES: &str = "dependencies";
22const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
23
24pub struct NpmParser;
25
26impl PackageParser for NpmParser {
27    const PACKAGE_TYPE: &'static str = "npm";
28
29    fn extract_package_data(path: &Path) -> PackageData {
30        let (json, field_lines) = match read_and_parse_json_with_lines(path) {
31            Ok((json, lines)) => (json, lines),
32            Err(e) => {
33                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
34                return default_package_data();
35            }
36        };
37
38        let name = json
39            .get(FIELD_NAME)
40            .and_then(|v| v.as_str())
41            .map(String::from);
42        let version = json
43            .get(FIELD_VERSION)
44            .and_then(|v| v.as_str())
45            .map(String::from);
46        let namespace = extract_namespace(&name);
47        let license_detections = extract_license_info(&json, &field_lines);
48        let dependencies = extract_dependencies(&json, false);
49        let dev_dependencies = extract_dependencies(&json, true);
50        let purl = create_package_url(&name, &version, &namespace);
51
52        PackageData {
53            package_type: Some(Self::PACKAGE_TYPE.to_string()),
54            namespace,
55            name,
56            version,
57            homepage_url: json
58                .get(FIELD_HOMEPAGE)
59                .and_then(|v| v.as_str())
60                .map(String::from),
61            download_url: extract_repository_url(&json),
62            copyright: None, // Not typically present in package.json
63            license_detections,
64            dependencies: [dependencies, dev_dependencies].concat(),
65            parties: extract_parties(&json),
66            purl,
67        }
68    }
69
70    fn is_match(path: &Path) -> bool {
71        path.file_name()
72            .map_or(false, |name| name == "package.json")
73    }
74}
75
76/// Reads and parses a JSON file while tracking line numbers of fields
77fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
78    // Read the file line by line to track line numbers
79    let file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
80    let reader = BufReader::new(file);
81    let lines: Vec<String> = reader
82        .lines()
83        .collect::<Result<_, _>>()
84        .map_err(|e| format!("Error reading file: {}", e))?;
85
86    // Parse the content as JSON
87    let content = lines.join("\n");
88    let json: Value =
89        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
90
91    // Track line numbers for each field in the JSON
92    let mut field_lines = HashMap::new();
93    for (line_num, line) in lines.iter().enumerate() {
94        let line = line.trim();
95        // Look for field names in the format: "field": value
96        if let Some(field_name) = extract_field_name(line) {
97            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
98        }
99    }
100
101    Ok((json, field_lines))
102}
103
104/// Extracts field name from a JSON line
105fn extract_field_name(line: &str) -> Option<String> {
106    // Simple regex-free parsing for field names
107    let line = line.trim();
108    if line.is_empty() || !line.starts_with("\"") {
109        return None;
110    }
111
112    // Find the closing quote of the field name
113    let mut chars = line.chars();
114    chars.next(); // Skip opening quote
115
116    let mut field_name = String::new();
117    for c in chars {
118        if c == '"' {
119            break;
120        }
121        field_name.push(c);
122    }
123
124    if field_name.is_empty() {
125        None
126    } else {
127        Some(field_name)
128    }
129}
130
131fn extract_namespace(name: &Option<String>) -> Option<String> {
132    name.as_ref().and_then(|n| {
133        if n.starts_with('@') && n.contains('/') {
134            // Handle scoped package (@namespace/name)
135            Some(
136                n.split('/')
137                    .next()
138                    .unwrap()
139                    .trim_start_matches('@')
140                    .to_string(),
141            )
142        } else if n.contains('/') {
143            // Handle regular namespaced package (namespace/name)
144            n.split('/').next().map(String::from)
145        } else {
146            None
147        }
148    })
149}
150
151fn create_package_url(
152    name: &Option<String>,
153    version: &Option<String>,
154    namespace: &Option<String>,
155) -> Option<String> {
156    name.as_ref().map(|name| {
157        let mut package_url =
158            PackageUrl::new(NpmParser::PACKAGE_TYPE, name).expect("Failed to create PackageUrl");
159
160        if let Some(v) = version {
161            package_url.with_version(v);
162        }
163
164        if let Some(n) = namespace {
165            package_url.with_namespace(n);
166        }
167
168        package_url.to_string()
169    })
170}
171
172fn extract_license_info(
173    json: &Value,
174    field_lines: &HashMap<String, usize>,
175) -> Vec<LicenseDetection> {
176    let mut detections = Vec::new();
177
178    // Check for string license field
179    if let Some(license_str) = json.get(FIELD_LICENSE).and_then(|v| v.as_str()) {
180        let line = field_lines.get(FIELD_LICENSE).copied().unwrap_or(0);
181        detections.push(LicenseDetection {
182            license_expression: license_str.to_string(),
183            matches: vec![Match {
184                score: 100.0,
185                start_line: line,
186                end_line: line,
187                license_expression: license_str.to_string(),
188                rule_identifier: None,
189                matched_text: None,
190            }],
191        });
192        return detections;
193    }
194
195    // Check for license object
196    if let Some(license_obj) = json.get(FIELD_LICENSE).and_then(|v| v.as_object()) {
197        if let Some(license_type) = license_obj.get("type").and_then(|v| v.as_str()) {
198            let line = field_lines.get(FIELD_LICENSE).copied().unwrap_or(0);
199            detections.push(LicenseDetection {
200                license_expression: license_type.to_string(),
201                matches: vec![Match {
202                    score: 100.0,
203                    start_line: line,
204                    end_line: line,
205                    license_expression: license_type.to_string(),
206                    rule_identifier: None,
207                    matched_text: None,
208                }],
209            });
210            return detections;
211        }
212    }
213
214    // Check for deprecated licenses array
215    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
216        let base_line = field_lines.get(FIELD_LICENSES).copied().unwrap_or(0);
217        for (index, license) in licenses.iter().enumerate() {
218            if let Some(license_type) = license.get("type").and_then(|v| v.as_str()) {
219                detections.push(LicenseDetection {
220                    license_expression: license_type.to_string(),
221                    matches: vec![Match {
222                        score: 100.0,
223                        start_line: base_line + index,
224                        end_line: base_line + index,
225                        license_expression: license_type.to_string(),
226                        rule_identifier: None,
227                        matched_text: None,
228                    }],
229                });
230            }
231        }
232    }
233
234    detections
235}
236
237fn extract_repository_url(json: &Value) -> Option<String> {
238    match json.get(FIELD_REPOSITORY) {
239        Some(Value::String(url)) => Some(normalize_repo_url(url)),
240        Some(Value::Object(obj)) => obj
241            .get("url")
242            .and_then(|u| u.as_str())
243            .map(normalize_repo_url),
244        _ => None,
245    }
246}
247
248/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
249fn normalize_repo_url(url: &str) -> String {
250    let url = url.trim();
251
252    if url.starts_with("git://") {
253        return url.replace("git://", "https://");
254    } else if url.starts_with("git+https://") {
255        return url.replace("git+https://", "https://");
256    } else if url.starts_with("git@github.com:") {
257        return url.replace("git@github.com:", "https://github.com/");
258    }
259
260    url.to_string()
261}
262
263/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
264fn extract_parties(json: &Value) -> Vec<Party> {
265    let mut parties = Vec::new();
266
267    // Extract author field
268    if let Some(author) = json.get(FIELD_AUTHOR) {
269        if let Some(email) = extract_email_from_field(author) {
270            parties.push(Party { email });
271        }
272    }
273
274    // Extract contributors field
275    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS) {
276        if let Some(emails) = extract_emails_from_array(contributors) {
277            parties.extend(emails.into_iter().map(|email| Party { email }));
278        }
279    }
280
281    // Extract maintainers field
282    if let Some(maintainers) = json.get(FIELD_MAINTAINERS) {
283        if let Some(emails) = extract_emails_from_array(maintainers) {
284            parties.extend(emails.into_iter().map(|email| Party { email }));
285        }
286    }
287
288    parties
289}
290
291/// Extracts a single email from a JSON field, which can be a string or an object with an "email" field.
292fn extract_email_from_field(field: &Value) -> Option<String> {
293    match field {
294        Value::String(s) => Some(s.clone()),
295        Value::Object(obj) => obj.get("email").and_then(|v| v.as_str()).map(String::from),
296        _ => None,
297    }
298}
299
300/// Extracts multiple emails from a JSON array, where each element can be a string or an object with an "email" field.
301fn extract_emails_from_array(array: &Value) -> Option<Vec<String>> {
302    if let Value::Array(items) = array {
303        let emails = items
304            .iter()
305            .filter_map(|item| extract_email_from_field(item))
306            .collect::<Vec<_>>();
307        if !emails.is_empty() {
308            return Some(emails);
309        }
310    }
311    None
312}
313
314fn default_package_data() -> PackageData {
315    PackageData {
316        package_type: None,
317        namespace: None,
318        name: None,
319        version: None,
320        homepage_url: None,
321        download_url: None,
322        copyright: None,
323        license_detections: Vec::new(),
324        dependencies: Vec::new(),
325        parties: Vec::new(),
326        purl: None,
327    }
328}
329
330/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
331fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
332    let field = if is_optional {
333        FIELD_DEV_DEPENDENCIES
334    } else {
335        FIELD_DEPENDENCIES
336    };
337
338    json.get(field)
339        .and_then(|deps| deps.as_object())
340        .map_or_else(Vec::new, |deps| {
341            deps.iter()
342                .filter_map(|(name, version)| {
343                    let version_str = version.as_str()?;
344                    let stripped_version = strip_version_modifier(version_str);
345                    let encoded_version = urlencoding::encode(&stripped_version).to_string();
346
347                    let mut package_url =
348                        PackageUrl::new(NpmParser::PACKAGE_TYPE, name).ok()?;
349                    package_url.with_version(&encoded_version);
350
351                    Some(Dependency {
352                        purl: Some(package_url.to_string()),
353                        scope: None,
354                        is_optional,
355                    })
356                })
357                .collect()
358        })
359}
360
361/// Strips version modifiers (e.g., ~, ^, >=) from a version string.
362fn strip_version_modifier(version: &str) -> String {
363    version.trim_start_matches(|c| c == '~' || c == '^' || c == '>' || c == '=').to_string()
364}