Skip to main content

provenant/parsers/
npm.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for npm package.json manifests.
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! package.json files used by Node.js/npm projects.
8//!
9//! # Supported Formats
10//! - package.json (manifest)
11//!
12//! # Key Features
13//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
14//! - Package URL (purl) generation for scoped and unscoped packages
15//! - VCS repository URL extraction
16//! - Distribution integrity hash extraction (sha1, sha512)
17//! - Support for legacy formats (licenses array, license objects)
18//!
19//! # Implementation Notes
20//! - Uses serde_json for JSON parsing
21//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
22//! - Graceful error handling: logs warnings and returns default on parse failure
23
24use crate::models::{
25    DatasourceId, Dependency, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
26    Sha512Digest,
27};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
30use serde_json::Value;
31use std::collections::HashMap;
32use std::path::Path;
33
34use super::PackageParser;
35use super::license_normalization::normalize_spdx_declared_license;
36
37const FIELD_NAME: &str = "name";
38const FIELD_VERSION: &str = "version";
39const FIELD_LICENSE: &str = "license";
40const FIELD_LICENSES: &str = "licenses";
41const FIELD_HOMEPAGE: &str = "homepage";
42const FIELD_REPOSITORY: &str = "repository";
43const FIELD_AUTHOR: &str = "author";
44const FIELD_CONTRIBUTORS: &str = "contributors";
45const FIELD_MAINTAINERS: &str = "maintainers";
46const FIELD_DEPENDENCIES: &str = "dependencies";
47const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
48const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
49const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
50const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
51const FIELD_RESOLUTIONS: &str = "resolutions";
52const FIELD_DESCRIPTION: &str = "description";
53const FIELD_KEYWORDS: &str = "keywords";
54const FIELD_ENGINES: &str = "engines";
55const FIELD_OS: &str = "os";
56const FIELD_CPU: &str = "cpu";
57const FIELD_LIBC: &str = "libc";
58const FIELD_DEPRECATED: &str = "deprecated";
59const FIELD_HAS_BIN: &str = "hasBin";
60const FIELD_PACKAGE_MANAGER: &str = "packageManager";
61const FIELD_WORKSPACES: &str = "workspaces";
62const FIELD_PRIVATE: &str = "private";
63const FIELD_BUGS: &str = "bugs";
64const FIELD_DIST: &str = "dist";
65const FIELD_OVERRIDES: &str = "overrides";
66const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
67const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
68
69/// npm package parser for package.json manifests.
70///
71/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
72/// optionalDependencies, bundledDependencies) and workspace configurations.
73pub struct NpmParser;
74
75impl PackageParser for NpmParser {
76    const PACKAGE_TYPE: PackageType = PackageType::Npm;
77
78    fn extract_packages(path: &Path) -> Vec<PackageData> {
79        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
80            Ok((json, lines)) => (json, lines),
81            Err(e) => {
82                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
83                return vec![default_package_data()];
84            }
85        };
86
87        let name = extract_non_empty_string(&json, FIELD_NAME);
88        let version = extract_non_empty_string(&json, FIELD_VERSION);
89        let namespace = extract_namespace(&name);
90        let package_name = extract_package_name(&name);
91        let description = extract_description(&json);
92
93        let extracted_license_statement = extract_license_statement(&json);
94        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
95            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
96        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
97        let dependencies = extract_dependencies(&json, false);
98        let dev_dependencies = extract_dependencies(&json, true);
99        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
100        let optional_dependencies = extract_optional_dependencies(&json);
101        let bundled_dependencies = extract_bundled_dependencies(&json);
102        let purl = create_package_url(&name, &version, &namespace);
103        let keywords_vec = extract_keywords_as_vec(&json);
104
105        let mut extra_data_map = HashMap::new();
106
107        if let Some(resolutions) = extract_resolutions(&json) {
108            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
109        }
110
111        if let Some(engines) = extract_engines(&json) {
112            extra_data_map.insert("engines".to_string(), engines);
113        }
114
115        for field in [
116            FIELD_OS,
117            FIELD_CPU,
118            FIELD_LIBC,
119            FIELD_DEPRECATED,
120            FIELD_HAS_BIN,
121        ] {
122            if let Some(value) = extract_raw_extra_data_field(&json, field) {
123                extra_data_map.insert(field.to_string(), value);
124            }
125        }
126
127        if let Some(package_manager) = extract_package_manager(&json) {
128            extra_data_map.insert(
129                "packageManager".to_string(),
130                serde_json::Value::String(package_manager),
131            );
132        }
133
134        if let Some(workspaces) = extract_workspaces(&json) {
135            extra_data_map.insert("workspaces".to_string(), workspaces);
136        }
137
138        if let Some(overrides) = extract_overrides(&json) {
139            extra_data_map.insert("overrides".to_string(), overrides);
140        }
141
142        if let Some(private) = extract_private(&json) {
143            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
144        }
145
146        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
147            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
148        }
149
150        let extra_data = if extra_data_map.is_empty() {
151            None
152        } else {
153            Some(extra_data_map)
154        };
155
156        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
157            Some(dist) => extract_dist_hashes(dist),
158            None => (None, None, None),
159        };
160
161        let download_url = json
162            .get(FIELD_DIST)
163            .and_then(extract_dist_tarball)
164            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
165
166        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
167        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
168        let repository_download_url =
169            generate_repository_download_url(&namespace, &package_name, &version);
170        let vcs_url = extract_vcs_url(&json);
171
172        vec![PackageData {
173            package_type: Some(Self::PACKAGE_TYPE),
174            namespace,
175            name: package_name,
176            version,
177            qualifiers: None,
178            subpath: None,
179            primary_language: Some("JavaScript".to_string()),
180            description,
181            release_date: None,
182            parties: extract_parties(&json),
183            keywords: keywords_vec,
184            homepage_url: extract_homepage_url(&json),
185            download_url,
186            size: None,
187            sha1: dist_sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
188            md5: None,
189            sha256: dist_sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
190            sha512: dist_sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
191            bug_tracking_url: extract_bugs(&json),
192            code_view_url: None,
193            vcs_url,
194            copyright: None,
195            holder: None,
196            declared_license_expression,
197            declared_license_expression_spdx,
198            license_detections,
199            other_license_expression: None,
200            other_license_expression_spdx: None,
201            other_license_detections: Vec::new(),
202            extracted_license_statement,
203            notice_text: None,
204            source_packages: Vec::new(),
205            file_references: Vec::new(),
206            is_private: json
207                .get("private")
208                .and_then(|v| v.as_bool())
209                .unwrap_or(false),
210            is_virtual: false,
211            extra_data,
212            dependencies: [
213                dependencies,
214                dev_dependencies,
215                peer_dependencies,
216                optional_dependencies,
217                bundled_dependencies,
218            ]
219            .concat(),
220            repository_homepage_url,
221            repository_download_url,
222            api_data_url,
223            datasource_id: Some(DatasourceId::NpmPackageJson),
224            purl,
225        }]
226    }
227
228    fn is_match(path: &Path) -> bool {
229        path.file_name().is_some_and(|name| name == "package.json")
230    }
231}
232
233/// Reads and parses a JSON file while tracking line numbers of fields
234fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
235    // Read file once into string
236    let content = crate::parsers::utils::read_file_to_string(path, None)
237        .map_err(|e| format!("Failed to read file: {}", e))?;
238
239    // Parse JSON
240    let json: Value =
241        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
242
243    // Track line numbers for each field by iterating over lines
244    let mut field_lines = HashMap::new();
245    for (line_num, line) in content.lines().enumerate().take(MAX_ITERATION_COUNT) {
246        let trimmed = line.trim();
247        if let Some(field_name) = extract_field_name(trimmed) {
248            field_lines.insert(field_name, line_num + 1);
249        }
250    }
251
252    Ok((json, field_lines))
253}
254
255/// Extracts field name from a JSON line
256fn extract_field_name(line: &str) -> Option<String> {
257    // Simple regex-free parsing for field names
258    let line = line.trim();
259    if line.is_empty() || !line.starts_with("\"") {
260        return None;
261    }
262
263    // Find the closing quote of the field name
264    let mut chars = line.chars();
265    chars.next(); // Skip opening quote
266
267    let mut field_name = String::new();
268    for c in chars {
269        if c == '"' {
270            break;
271        }
272        field_name.push(c);
273    }
274
275    if field_name.is_empty() {
276        None
277    } else {
278        Some(field_name)
279    }
280}
281
282fn extract_namespace(name: &Option<String>) -> Option<String> {
283    name.as_ref().and_then(|n| {
284        if n.contains('/') {
285            n.split('/').next().map(String::from)
286        } else {
287            None
288        }
289    })
290}
291
292fn extract_package_name(name: &Option<String>) -> Option<String> {
293    name.as_ref().map(|n| {
294        if n.contains('/') {
295            n.split('/').nth(1).unwrap_or(n).to_string()
296        } else {
297            n.clone()
298        }
299    })
300}
301
302fn create_package_url(
303    name: &Option<String>,
304    version: &Option<String>,
305    _namespace: &Option<String>,
306) -> Option<String> {
307    // Note: We extract and store namespace in PackageData for metadata purposes,
308    // but the full package name (e.g., "@babel/core") is used for PURL generation.
309    let name = name.as_ref()?;
310    npm_purl(name, version.as_deref())
311}
312
313fn extract_license_statement(json: &Value) -> Option<String> {
314    let mut statements = Vec::new();
315
316    if let Some(license_value) = json.get(FIELD_LICENSE) {
317        if let Some(license_str) = license_value.as_str() {
318            statements.push(format!("- {}", license_str));
319        } else if let Some(license_obj) = license_value.as_object()
320            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
321        {
322            statements.push(format!("- type: {}", type_val));
323            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
324                statements.push(format!("  url: {}", url_val));
325            }
326        }
327    }
328
329    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
330        for license in licenses.iter().take(MAX_ITERATION_COUNT) {
331            if let Some(license_obj) = license.as_object()
332                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
333            {
334                statements.push(format!("- type: {}", type_val));
335                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
336                    statements.push(format!("  url: {}", url_val));
337                }
338            }
339        }
340    }
341
342    if statements.is_empty() {
343        None
344    } else {
345        Some(truncate_field(format!("{}\n", statements.join("\n"))))
346    }
347}
348
349fn extract_declared_license_candidate(json: &Value) -> Option<String> {
350    json.get(FIELD_LICENSE)
351        .and_then(|value| value.as_str())
352        .map(str::trim)
353        .filter(|value| !value.is_empty())
354        .map(|s| truncate_field(s.to_string()))
355}
356
357/// Extracts the repository URL from the repository field.
358/// Extracts and normalizes VCS URL from the repository field.
359/// Supports both string and object formats with optional 'type' and 'directory' fields.
360fn extract_vcs_url(json: &Value) -> Option<String> {
361    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
362        Some(Value::String(url)) => {
363            let normalized = normalize_repo_url(url);
364            if normalized.is_empty() {
365                return None;
366            }
367            (None, normalized)
368        }
369        Some(Value::Object(obj)) => {
370            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
371            let normalized = normalize_repo_url(repo_url);
372            if normalized.is_empty() {
373                return None;
374            }
375            let tool = obj
376                .get("type")
377                .and_then(|t| t.as_str())
378                .unwrap_or("git")
379                .to_string();
380            let tool_for_prefix = if normalized.starts_with("git://")
381                || normalized.starts_with("git+")
382                || normalized.starts_with("hg://")
383                || normalized.starts_with("hg+")
384                || normalized.starts_with("svn://")
385                || normalized.starts_with("svn+")
386            {
387                None
388            } else {
389                Some(tool)
390            };
391            (tool_for_prefix, normalized)
392        }
393        _ => return None,
394    };
395
396    if vcs_repository.is_empty() {
397        return None;
398    }
399
400    let mut vcs_url = vcs_tool.map_or_else(
401        || vcs_repository.clone(),
402        |tool| format!("{}+{}", tool, vcs_repository),
403    );
404
405    if let Some(vcs_revision) = json
406        .get("gitHead")
407        .and_then(|v| v.as_str())
408        .and_then(normalize_non_empty_string)
409    {
410        vcs_url.push('@');
411        vcs_url.push_str(&vcs_revision);
412    }
413
414    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
415        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
416    {
417        vcs_url.push('#');
418        vcs_url.push_str(directory);
419    }
420
421    Some(truncate_field(vcs_url))
422}
423
424/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
425/// Based on normalize_vcs_url() from Python reference.
426fn normalize_repo_url(url: &str) -> String {
427    let url = url.trim();
428
429    if url.is_empty() {
430        return String::new();
431    }
432
433    let normalized_schemes = [
434        "https://",
435        "http://",
436        "git://",
437        "git+git://",
438        "git+https://",
439        "git+http://",
440        "hg://",
441        "hg+http://",
442        "hg+https://",
443        "svn://",
444        "svn+http://",
445        "svn+https://",
446    ];
447    if normalized_schemes
448        .iter()
449        .any(|scheme| url.starts_with(scheme))
450    {
451        return url.to_string();
452    }
453
454    if let Some((host, repo)) = url
455        .strip_prefix("git@")
456        .and_then(|rest| rest.split_once(':'))
457    {
458        return format!("https://{}/{}", host, repo);
459    }
460
461    if let Some((platform, repo)) = url.split_once(':') {
462        let host_url = match platform {
463            "github" => "https://github.com/",
464            "gitlab" => "https://gitlab.com/",
465            "bitbucket" => "https://bitbucket.org/",
466            "gist" => "https://gist.github.com/",
467            _ => return url.to_string(),
468        };
469        return format!("{}{}", host_url, repo);
470    }
471
472    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
473        return format!("https://github.com/{}", url);
474    }
475
476    url.to_string()
477}
478
479/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
480fn extract_parties(json: &Value) -> Vec<Party> {
481    let mut parties = Vec::new();
482
483    // Extract author field (can be single value or array)
484    if let Some(author) = json.get(FIELD_AUTHOR) {
485        if let Some(author_list) = extract_parties_from_array(author) {
486            // Author is an array
487            for mut party in author_list {
488                if party.role.is_none() {
489                    party.role = Some("author".to_string());
490                }
491                parties.push(party);
492            }
493        } else if let Some(mut party) = extract_party_from_field(author) {
494            // Author is a single value
495            party.role = Some("author".to_string());
496            parties.push(party);
497        }
498    }
499
500    // Extract contributors field
501    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
502        && let Some(mut party_list) = extract_parties_from_array(contributors)
503    {
504        for party in &mut party_list {
505            if party.role.is_none() {
506                party.role = Some("contributor".to_string());
507            }
508        }
509        parties.extend(party_list);
510    }
511
512    // Extract maintainers field
513    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
514        && let Some(mut party_list) = extract_parties_from_array(maintainers)
515    {
516        for party in &mut party_list {
517            if party.role.is_none() {
518                party.role = Some("maintainer".to_string());
519            }
520        }
521        parties.extend(party_list);
522    }
523
524    parties
525}
526
527/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
528fn extract_party_from_field(field: &Value) -> Option<Party> {
529    match field {
530        Value::String(s) => {
531            if let Some(email) = extract_email_from_string(s) {
532                Some(Party {
533                    r#type: Some("person".to_string()),
534                    role: None,
535                    name: extract_name_from_author_string(s).map(truncate_field),
536                    email: Some(truncate_field(email)),
537                    url: None,
538                    organization: None,
539                    organization_url: None,
540                    timezone: None,
541                })
542            } else {
543                Some(Party {
544                    r#type: Some("person".to_string()),
545                    role: None,
546                    name: Some(truncate_field(s.clone())),
547                    email: None,
548                    url: None,
549                    organization: None,
550                    organization_url: None,
551                    timezone: None,
552                })
553            }
554        }
555        Value::Object(obj) => Some(Party {
556            r#type: Some("person".to_string()),
557            role: obj
558                .get("role")
559                .and_then(|v| v.as_str())
560                .map(|s| truncate_field(s.to_string())),
561            name: obj
562                .get("name")
563                .and_then(|v| v.as_str())
564                .map(|s| truncate_field(s.to_string())),
565            email: obj
566                .get("email")
567                .and_then(|v| v.as_str())
568                .map(|s| truncate_field(s.to_string())),
569            url: obj
570                .get("url")
571                .and_then(|v| v.as_str())
572                .and_then(normalize_optional_party_url)
573                .map(truncate_field),
574            organization: None,
575            organization_url: None,
576            timezone: None,
577        }),
578        _ => None,
579    }
580}
581
582/// Extracts multiple parties from a JSON array.
583fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
584    if let Value::Array(items) = array {
585        let parties = items
586            .iter()
587            .take(MAX_ITERATION_COUNT)
588            .filter_map(extract_party_from_field)
589            .collect::<Vec<_>>();
590        if !parties.is_empty() {
591            return Some(parties);
592        }
593    }
594    None
595}
596
597/// Extracts email from a string in the format "Name <email@example.com>".
598fn extract_email_from_string(author_str: &str) -> Option<String> {
599    if let Some(email_start) = author_str.find('<')
600        && let Some(email_end) = author_str.find('>')
601        && email_start < email_end
602    {
603        return Some(author_str[email_start + 1..email_end].to_string());
604    }
605    None
606}
607
608/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
609fn extract_name_from_author_string(author_str: &str) -> Option<String> {
610    if let Some(end_idx) = author_str.find('<') {
611        let name = author_str[..end_idx].trim();
612        if !name.is_empty() {
613            return Some(name.to_string());
614        }
615    } else {
616        return Some(author_str.trim().to_string());
617    }
618    None
619}
620
621fn default_package_data() -> PackageData {
622    PackageData {
623        package_type: Some(NpmParser::PACKAGE_TYPE),
624        primary_language: Some("JavaScript".to_string()),
625        datasource_id: Some(DatasourceId::NpmPackageJson),
626        ..Default::default()
627    }
628}
629
630fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
631    if version_str.contains(':') && version_str.contains('@') {
632        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
633        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
634        return Some((actual_package_name, constraint));
635    }
636    None
637}
638
639fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
640    json.get(field)
641        .and_then(|value| value.as_str())
642        .map(str::trim)
643        .filter(|value| !value.is_empty())
644        .map(|s| truncate_field(s.to_string()))
645}
646
647fn generate_npm_api_url(
648    namespace: &Option<String>,
649    name: &Option<String>,
650    version: &Option<String>,
651) -> Option<String> {
652    const REGISTRY: &str = "https://registry.npmjs.org";
653    name.as_ref()?;
654
655    let ns_name = if let Some(ns) = namespace {
656        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
657    } else {
658        name.as_ref()?.clone()
659    };
660
661    let url = if let Some(ver) = version {
662        format!("{}/{}/{}", REGISTRY, ns_name, ver)
663    } else {
664        format!("{}/{}", REGISTRY, ns_name)
665    };
666
667    Some(url)
668}
669
670fn build_registry_package_path(
671    namespace: &Option<String>,
672    name: &Option<String>,
673) -> Option<String> {
674    match (namespace.as_ref(), name.as_ref()) {
675        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
676        (None, Some(name)) => Some(name.clone()),
677        _ => None,
678    }
679}
680
681fn generate_repository_homepage_url(
682    namespace: &Option<String>,
683    name: &Option<String>,
684) -> Option<String> {
685    build_registry_package_path(namespace, name)
686        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
687}
688
689fn generate_registry_download_url(
690    namespace: &Option<String>,
691    name: &Option<String>,
692    version: &Option<String>,
693) -> Option<String> {
694    match (
695        build_registry_package_path(namespace, name),
696        name.as_ref(),
697        version.as_ref(),
698    ) {
699        (Some(package_path), Some(name), Some(version)) => Some(format!(
700            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
701            package_path, name, version
702        )),
703        _ => None,
704    }
705}
706
707fn generate_repository_download_url(
708    namespace: &Option<String>,
709    name: &Option<String>,
710    version: &Option<String>,
711) -> Option<String> {
712    generate_registry_download_url(namespace, name, version)
713}
714
715fn extract_dependency_group(
716    json: &Value,
717    field: &str,
718    scope: &str,
719    is_runtime: bool,
720    is_optional: bool,
721    optional_meta: Option<&HashMap<String, bool>>,
722) -> Vec<Dependency> {
723    json.get(field)
724        .and_then(|deps| deps.as_object())
725        .map_or_else(Vec::new, |deps| {
726            deps.iter()
727                .take(MAX_ITERATION_COUNT)
728                .filter_map(|(name, version)| {
729                    let version_str = version.as_str()?;
730
731                    if version_str.starts_with("workspace:") {
732                        let package_url = npm_purl(name, None)?;
733                        let is_opt = if let Some(meta) = optional_meta {
734                            meta.get(name).copied()
735                        } else {
736                            Some(is_optional)
737                        };
738                        return Some(Dependency {
739                            purl: Some(package_url),
740                            extracted_requirement: Some(truncate_field(version_str.to_string())),
741                            scope: Some(scope.to_string()),
742                            is_runtime: Some(is_runtime),
743                            is_optional: is_opt,
744                            is_pinned: Some(false),
745                            is_direct: Some(true),
746                            resolved_package: None,
747                            extra_data: None,
748                        });
749                    }
750
751                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
752                        parse_alias_adapter(version_str)
753                    {
754                        actual_package_name
755                    } else {
756                        name.as_str()
757                    };
758
759                    let package_url = npm_purl(actual_package_name, None)?;
760
761                    let is_opt = if let Some(meta) = optional_meta {
762                        meta.get(name).copied()
763                    } else {
764                        Some(is_optional)
765                    };
766
767                    Some(Dependency {
768                        purl: Some(package_url),
769                        extracted_requirement: Some(truncate_field(version_str.to_string())),
770                        scope: Some(scope.to_string()),
771                        is_runtime: Some(is_runtime),
772                        is_optional: is_opt,
773                        is_pinned: Some(false),
774                        is_direct: Some(true),
775                        resolved_package: None,
776                        extra_data: None,
777                    })
778                })
779                .collect()
780        })
781}
782
783/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
784fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
785    let field = if is_optional {
786        FIELD_DEV_DEPENDENCIES
787    } else {
788        FIELD_DEPENDENCIES
789    };
790
791    let scope = if is_optional {
792        "devDependencies"
793    } else {
794        "dependencies"
795    };
796
797    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
798}
799
800fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
801    extract_dependency_group(
802        json,
803        FIELD_PEER_DEPENDENCIES,
804        "peerDependencies",
805        true,
806        false,
807        Some(meta),
808    )
809}
810
811/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
812/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
813fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
814    extract_dependency_group(
815        json,
816        FIELD_OPTIONAL_DEPENDENCIES,
817        "optionalDependencies",
818        true,
819        true,
820        None,
821    )
822}
823
824fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
825    if let Some(bundled) = json
826        .get(FIELD_BUNDLED_DEPENDENCIES)
827        .and_then(|v| v.as_array())
828    {
829        extract_bundled_list(bundled)
830    } else {
831        Vec::new()
832    }
833}
834
835/// Helper function to extract bundled dependencies from an array of package names.
836fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
837    bundled_array
838        .iter()
839        .take(MAX_ITERATION_COUNT)
840        .filter_map(|value| {
841            let name = value.as_str()?;
842            // Create PURL without version for bundled dependencies
843            let package_url = npm_purl(name, None)?;
844
845            Some(Dependency {
846                purl: Some(package_url),
847                extracted_requirement: None,
848                scope: Some("bundledDependencies".to_string()),
849                is_runtime: Some(true),
850                is_optional: Some(false),
851                is_pinned: Some(false),
852                is_direct: Some(true),
853                resolved_package: None,
854                extra_data: None,
855            })
856        })
857        .collect()
858}
859
860/// Extracts Yarn resolutions from the `resolutions` field.
861/// Returns resolutions as a HashMap to be stored in extra_data.
862fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
863    json.get(FIELD_RESOLUTIONS)
864        .and_then(|resolutions| resolutions.as_object())
865        .map(|resolutions_obj| {
866            let mut extra_data = HashMap::new();
867            extra_data.insert(
868                "resolutions".to_string(),
869                serde_json::Value::Object(resolutions_obj.clone()),
870            );
871            extra_data
872        })
873}
874
875fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
876    json.get(FIELD_PEER_DEPENDENCIES_META)
877        .and_then(|meta| meta.as_object())
878        .map_or_else(HashMap::new, |meta_obj| {
879            meta_obj
880                .iter()
881                .take(MAX_ITERATION_COUNT)
882                .filter_map(|(package_name, meta_value)| {
883                    meta_value.as_object().and_then(|obj| {
884                        obj.get("optional")
885                            .and_then(|opt| opt.as_bool())
886                            .map(|optional| (package_name.clone(), optional))
887                    })
888                })
889                .collect()
890        })
891}
892
893fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
894    json.get(FIELD_DEPENDENCIES_META).cloned()
895}
896
897fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
898    json.get(FIELD_OVERRIDES).cloned()
899}
900
901fn extract_description(json: &Value) -> Option<String> {
902    json.get(FIELD_DESCRIPTION)
903        .and_then(|v| v.as_str())
904        .map(|s| truncate_field(s.to_string()))
905}
906
907fn extract_homepage_url(json: &Value) -> Option<String> {
908    match json.get(FIELD_HOMEPAGE) {
909        Some(Value::String(homepage)) => normalize_non_empty_string(homepage).map(truncate_field),
910        _ => None,
911    }
912}
913
914fn normalize_non_empty_string(value: &str) -> Option<String> {
915    let trimmed = value.trim();
916    if trimmed.is_empty() {
917        None
918    } else {
919        Some(trimmed.to_string())
920    }
921}
922
923fn normalize_optional_party_url(value: &str) -> Option<String> {
924    let normalized = normalize_non_empty_string(value)?;
925
926    if normalized.eq_ignore_ascii_case("none") {
927        None
928    } else {
929        Some(normalized)
930    }
931}
932
933fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
934    json.get(FIELD_KEYWORDS)
935        .and_then(|v| {
936            if let Some(str) = v.as_str() {
937                Some(vec![str.to_string()])
938            } else if let Some(arr) = v.as_array() {
939                let keywords: Vec<String> = arr
940                    .iter()
941                    .take(MAX_ITERATION_COUNT)
942                    .filter_map(|kw| kw.as_str())
943                    .map(|s| truncate_field(s.to_string()))
944                    .collect();
945                if keywords.is_empty() {
946                    None
947                } else {
948                    Some(keywords)
949                }
950            } else {
951                None
952            }
953        })
954        .unwrap_or_default()
955}
956
957fn extract_engines(json: &Value) -> Option<serde_json::Value> {
958    json.get(FIELD_ENGINES).cloned()
959}
960
961fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
962    json.get(field).cloned()
963}
964
965fn extract_package_manager(json: &Value) -> Option<String> {
966    json.get(FIELD_PACKAGE_MANAGER)
967        .and_then(|v| v.as_str())
968        .map(|s| truncate_field(s.to_string()))
969}
970
971fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
972    json.get(FIELD_WORKSPACES).cloned()
973}
974
975fn extract_private(json: &Value) -> Option<bool> {
976    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
977}
978
979fn extract_bugs(json: &Value) -> Option<String> {
980    match json.get(FIELD_BUGS) {
981        Some(bugs) => {
982            if let Some(url) = bugs.as_str() {
983                normalize_non_empty_string(url).map(truncate_field)
984            } else if let Some(obj) = bugs.as_object() {
985                obj.get("url")
986                    .and_then(|v| v.as_str())
987                    .and_then(normalize_non_empty_string)
988                    .map(truncate_field)
989            } else {
990                None
991            }
992        }
993        None => None,
994    }
995}
996
997fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
998    let mut sha1 = dist
999        .get("shasum")
1000        .and_then(|v| v.as_str())
1001        .and_then(normalize_non_empty_string);
1002    let mut sha256 = None;
1003    let mut sha512 = None;
1004
1005    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
1006        && let Some((algo, hex_digest)) = parse_sri(integrity)
1007    {
1008        match algo.as_str() {
1009            "sha1" => {
1010                if sha1.is_none() {
1011                    sha1 = Some(hex_digest);
1012                }
1013            }
1014            "sha256" => sha256 = Some(hex_digest),
1015            "sha512" => sha512 = Some(hex_digest),
1016            _ => {}
1017        }
1018    }
1019
1020    (sha1, sha256, sha512)
1021}
1022
1023fn extract_dist_tarball(dist: &Value) -> Option<String> {
1024    dist.get("tarball")
1025        .or_else(|| dist.get("dnl_url"))
1026        .and_then(|v| v.as_str())
1027        .map(normalize_npm_registry_tarball_url)
1028        .map(truncate_field)
1029}
1030
1031fn normalize_npm_registry_tarball_url(url: &str) -> String {
1032    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1033        format!("https://registry.npmjs.org/{path}")
1034    } else {
1035        url.to_string()
1036    }
1037}
1038
1039fn combine_extra_data(
1040    extra_data: Option<HashMap<String, serde_json::Value>>,
1041    additional_data: HashMap<String, serde_json::Value>,
1042) -> HashMap<String, serde_json::Value> {
1043    let mut combined = extra_data.unwrap_or_default();
1044    for (key, value) in additional_data {
1045        combined.insert(key, value);
1046    }
1047    combined
1048}
1049
1050crate::register_parser!(
1051    "npm package.json manifest",
1052    &["**/package.json"],
1053    "npm",
1054    "JavaScript",
1055    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1056);