scancode_rust/parsers/
npm.rs

1use crate::models::{Dependency, LicenseDetection, Match, PackageData, Party};
2use log::warn;
3use packageurl::PackageUrl;
4use serde_json::Value;
5use std::collections::HashMap;
6use std::fs::File;
7use std::io::{BufRead, BufReader};
8use std::path::Path;
9
10use super::PackageParser;
11
12const FIELD_NAME: &str = "name";
13const FIELD_VERSION: &str = "version";
14const FIELD_LICENSE: &str = "license";
15const FIELD_LICENSES: &str = "licenses";
16const FIELD_HOMEPAGE: &str = "homepage";
17const FIELD_REPOSITORY: &str = "repository";
18const FIELD_AUTHOR: &str = "author";
19const FIELD_CONTRIBUTORS: &str = "contributors";
20const FIELD_MAINTAINERS: &str = "maintainers";
21const FIELD_DEPENDENCIES: &str = "dependencies";
22const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
23
24pub struct NpmParser;
25
26impl PackageParser for NpmParser {
27    const PACKAGE_TYPE: &'static str = "npm";
28
29    fn extract_package_data(path: &Path) -> PackageData {
30        let (json, field_lines) = match read_and_parse_json_with_lines(path) {
31            Ok((json, lines)) => (json, lines),
32            Err(e) => {
33                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
34                return default_package_data();
35            }
36        };
37
38        let name = json
39            .get(FIELD_NAME)
40            .and_then(|v| v.as_str())
41            .map(String::from);
42        let version = json
43            .get(FIELD_VERSION)
44            .and_then(|v| v.as_str())
45            .map(String::from);
46        let namespace = extract_namespace(&name);
47        let license_detections = extract_license_info(&json, &field_lines);
48        let dependencies = extract_dependencies(&json, false);
49        let dev_dependencies = extract_dependencies(&json, true);
50        let purl = create_package_url(&name, &version, &namespace);
51
52        PackageData {
53            package_type: Some(Self::PACKAGE_TYPE.to_string()),
54            namespace,
55            name,
56            version,
57            homepage_url: json
58                .get(FIELD_HOMEPAGE)
59                .and_then(|v| v.as_str())
60                .map(String::from),
61            download_url: extract_repository_url(&json),
62            copyright: None, // Not typically present in package.json
63            license_detections,
64            dependencies: [dependencies, dev_dependencies].concat(),
65            parties: extract_parties(&json),
66            purl,
67        }
68    }
69
70    fn is_match(path: &Path) -> bool {
71        path.file_name()
72            .map_or(false, |name| name == "package.json")
73    }
74}
75
76/// Reads and parses a JSON file while tracking line numbers of fields
77fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
78    // Read the file line by line to track line numbers
79    let file = File::open(path).map_err(|e| format!("Failed to open file: {}", e))?;
80    let reader = BufReader::new(file);
81    let lines: Vec<String> = reader
82        .lines()
83        .collect::<Result<_, _>>()
84        .map_err(|e| format!("Error reading file: {}", e))?;
85
86    // Parse the content as JSON
87    let content = lines.join("\n");
88    let json: Value =
89        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
90
91    // Track line numbers for each field in the JSON
92    let mut field_lines = HashMap::new();
93    for (line_num, line) in lines.iter().enumerate() {
94        let line = line.trim();
95        // Look for field names in the format: "field": value
96        if let Some(field_name) = extract_field_name(line) {
97            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
98        }
99    }
100
101    Ok((json, field_lines))
102}
103
104/// Extracts field name from a JSON line
105fn extract_field_name(line: &str) -> Option<String> {
106    // Simple regex-free parsing for field names
107    let line = line.trim();
108    if line.is_empty() || !line.starts_with("\"") {
109        return None;
110    }
111
112    // Find the closing quote of the field name
113    let mut chars = line.chars();
114    chars.next(); // Skip opening quote
115
116    let mut field_name = String::new();
117    for c in chars {
118        if c == '"' {
119            break;
120        }
121        field_name.push(c);
122    }
123
124    if field_name.is_empty() {
125        None
126    } else {
127        Some(field_name)
128    }
129}
130
131fn extract_namespace(name: &Option<String>) -> Option<String> {
132    name.as_ref().and_then(|n| {
133        if n.starts_with('@') && n.contains('/') {
134            // Handle scoped package (@namespace/name)
135            Some(
136                n.split('/')
137                    .next()
138                    .unwrap()
139                    .trim_start_matches('@')
140                    .to_string(),
141            )
142        } else if n.contains('/') {
143            // Handle regular namespaced package (namespace/name)
144            n.split('/').next().map(String::from)
145        } else {
146            None
147        }
148    })
149}
150
151fn create_package_url(
152    name: &Option<String>,
153    version: &Option<String>,
154    namespace: &Option<String>,
155) -> Option<String> {
156    name.as_ref().map(|name| {
157        // For npm, the package URL format is different based on if it's a scoped package
158        let mut package_url = if name.starts_with('@') && namespace.is_some() {
159            // For scoped packages (@namespace/name), we need to include the namespace in the name
160            // but also put it in the URL parameters
161            let name_without_at = name.trim_start_matches('@');
162            PackageUrl::new(NpmParser::PACKAGE_TYPE, name_without_at)
163                .expect("Failed to create PackageUrl")
164        } else {
165            PackageUrl::new(NpmParser::PACKAGE_TYPE, name)
166                .expect("Failed to create PackageUrl")
167        };
168
169        if let Some(v) = version {
170            package_url.with_version(v);
171        }
172
173        if let Some(n) = namespace {
174            package_url.with_namespace(n);
175        }
176
177        package_url.to_string()
178    })
179}
180
181fn extract_license_info(
182    json: &Value,
183    field_lines: &HashMap<String, usize>,
184) -> Vec<LicenseDetection> {
185    let mut detections = Vec::new();
186
187    // Check for string license field
188    if let Some(license_str) = json.get(FIELD_LICENSE).and_then(|v| v.as_str()) {
189        let line = field_lines.get(FIELD_LICENSE).copied().unwrap_or(0);
190        detections.push(LicenseDetection {
191            license_expression: license_str.to_string(),
192            matches: vec![Match {
193                score: 100.0,
194                start_line: line,
195                end_line: line,
196                license_expression: license_str.to_string(),
197                rule_identifier: None,
198                matched_text: None,
199            }],
200        });
201        return detections;
202    }
203
204    // Check for license object
205    if let Some(license_obj) = json.get(FIELD_LICENSE).and_then(|v| v.as_object()) {
206        if let Some(license_type) = license_obj.get("type").and_then(|v| v.as_str()) {
207            let line = field_lines.get(FIELD_LICENSE).copied().unwrap_or(0);
208            detections.push(LicenseDetection {
209                license_expression: license_type.to_string(),
210                matches: vec![Match {
211                    score: 100.0,
212                    start_line: line,
213                    end_line: line,
214                    license_expression: license_type.to_string(),
215                    rule_identifier: None,
216                    matched_text: None,
217                }],
218            });
219            return detections;
220        }
221    }
222
223    // Check for deprecated licenses array
224    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
225        let base_line = field_lines.get(FIELD_LICENSES).copied().unwrap_or(0);
226        for (index, license) in licenses.iter().enumerate() {
227            if let Some(license_type) = license.get("type").and_then(|v| v.as_str()) {
228                detections.push(LicenseDetection {
229                    license_expression: license_type.to_string(),
230                    matches: vec![Match {
231                        score: 100.0,
232                        start_line: base_line + index,
233                        end_line: base_line + index,
234                        license_expression: license_type.to_string(),
235                        rule_identifier: None,
236                        matched_text: None,
237                    }],
238                });
239            }
240        }
241    }
242
243    detections
244}
245
246fn extract_repository_url(json: &Value) -> Option<String> {
247    match json.get(FIELD_REPOSITORY) {
248        Some(Value::String(url)) => Some(normalize_repo_url(url)),
249        Some(Value::Object(obj)) => obj
250            .get("url")
251            .and_then(|u| u.as_str())
252            .map(normalize_repo_url),
253        _ => None,
254    }
255}
256
257/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
258fn normalize_repo_url(url: &str) -> String {
259    let url = url.trim();
260
261    if url.starts_with("git://") {
262        return url.replace("git://", "https://");
263    } else if url.starts_with("git+https://") {
264        return url.replace("git+https://", "https://");
265    } else if url.starts_with("git@github.com:") {
266        return url.replace("git@github.com:", "https://github.com/");
267    }
268
269    url.to_string()
270}
271
272/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
273fn extract_parties(json: &Value) -> Vec<Party> {
274    let mut parties = Vec::new();
275
276    // Extract author field
277    if let Some(author) = json.get(FIELD_AUTHOR) {
278        if let Some(email) = extract_email_from_field(author) {
279            parties.push(Party { email });
280        }
281    }
282
283    // Extract contributors field
284    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS) {
285        if let Some(emails) = extract_emails_from_array(contributors) {
286            parties.extend(emails.into_iter().map(|email| Party { email }));
287        }
288    }
289
290    // Extract maintainers field
291    if let Some(maintainers) = json.get(FIELD_MAINTAINERS) {
292        if let Some(emails) = extract_emails_from_array(maintainers) {
293            parties.extend(emails.into_iter().map(|email| Party { email }));
294        }
295    }
296
297    parties
298}
299
300/// Extracts email from a string in the format "Name <email@example.com>".
301fn extract_email_from_string(author_str: &str) -> Option<String> {
302    if let Some(email_start) = author_str.find('<') {
303        if let Some(email_end) = author_str.find('>') {
304            if email_start < email_end {
305                return Some(author_str[email_start + 1..email_end].to_string());
306            }
307        }
308    }
309    None
310}
311
312/// Extracts a single email from a JSON field, which can be a string or an object with an "email" field.
313fn extract_email_from_field(field: &Value) -> Option<String> {
314    match field {
315        Value::String(s) => {
316            extract_email_from_string(s).or_else(|| Some(s.clone()))
317        }
318        Value::Object(obj) => obj.get("email").and_then(|v| v.as_str()).map(String::from),
319        _ => None,
320    }
321}
322
323/// Extracts multiple emails from a JSON array, where each element can be a string or an object with an "email" field.
324fn extract_emails_from_array(array: &Value) -> Option<Vec<String>> {
325    if let Value::Array(items) = array {
326        let emails = items
327            .iter()
328            .filter_map(|item| extract_email_from_field(item))
329            .collect::<Vec<_>>();
330        if !emails.is_empty() {
331            return Some(emails);
332        }
333    }
334    None
335}
336
337fn default_package_data() -> PackageData {
338    PackageData {
339        package_type: None,
340        namespace: None,
341        name: None,
342        version: None,
343        homepage_url: None,
344        download_url: None,
345        copyright: None,
346        license_detections: Vec::new(),
347        dependencies: Vec::new(),
348        parties: Vec::new(),
349        purl: None,
350    }
351}
352
353/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
354fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
355    let field = if is_optional {
356        FIELD_DEV_DEPENDENCIES
357    } else {
358        FIELD_DEPENDENCIES
359    };
360
361    json.get(field)
362        .and_then(|deps| deps.as_object())
363        .map_or_else(Vec::new, |deps| {
364            deps.iter()
365                .filter_map(|(name, version)| {
366                    let version_str = version.as_str()?;
367                    let stripped_version = strip_version_modifier(version_str);
368                    let encoded_version = urlencoding::encode(&stripped_version).to_string();
369
370                    let mut package_url =
371                        PackageUrl::new(NpmParser::PACKAGE_TYPE, name).ok()?;
372                    package_url.with_version(&encoded_version);
373
374                    Some(Dependency {
375                        purl: Some(package_url.to_string()),
376                        scope: None,
377                        is_optional,
378                    })
379                })
380                .collect()
381        })
382}
383
384/// Strips version modifiers (e.g., ~, ^, >=) from a version string.
385fn strip_version_modifier(version: &str) -> String {
386    version.trim_start_matches(|c| c == '~' || c == '^' || c == '>' || c == '=').to_string()
387}