Skip to main content

provenant/parsers/
npm.rs

1//! Parser for npm package.json manifests.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! package.json files used by Node.js/npm projects.
5//!
6//! # Supported Formats
7//! - package.json (manifest)
8//!
9//! # Key Features
10//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
11//! - Package URL (purl) generation for scoped and unscoped packages
12//! - VCS repository URL extraction
13//! - Distribution integrity hash extraction (sha1, sha512)
14//! - Support for legacy formats (licenses array, license objects)
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
19//! - Graceful error handling: logs warnings and returns default on parse failure
20
21use crate::models::{
22    DatasourceId, Dependency, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
23    Sha512Digest,
24};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{npm_purl, parse_sri};
27use serde_json::Value;
28use std::collections::HashMap;
29use std::fs;
30use std::path::Path;
31
32use super::PackageParser;
33use super::license_normalization::normalize_spdx_declared_license;
34
35const FIELD_NAME: &str = "name";
36const FIELD_VERSION: &str = "version";
37const FIELD_LICENSE: &str = "license";
38const FIELD_LICENSES: &str = "licenses";
39const FIELD_HOMEPAGE: &str = "homepage";
40const FIELD_REPOSITORY: &str = "repository";
41const FIELD_AUTHOR: &str = "author";
42const FIELD_CONTRIBUTORS: &str = "contributors";
43const FIELD_MAINTAINERS: &str = "maintainers";
44const FIELD_DEPENDENCIES: &str = "dependencies";
45const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
46const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
47const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
48const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
49const FIELD_RESOLUTIONS: &str = "resolutions";
50const FIELD_DESCRIPTION: &str = "description";
51const FIELD_KEYWORDS: &str = "keywords";
52const FIELD_ENGINES: &str = "engines";
53const FIELD_OS: &str = "os";
54const FIELD_CPU: &str = "cpu";
55const FIELD_LIBC: &str = "libc";
56const FIELD_DEPRECATED: &str = "deprecated";
57const FIELD_HAS_BIN: &str = "hasBin";
58const FIELD_PACKAGE_MANAGER: &str = "packageManager";
59const FIELD_WORKSPACES: &str = "workspaces";
60const FIELD_PRIVATE: &str = "private";
61const FIELD_BUGS: &str = "bugs";
62const FIELD_DIST: &str = "dist";
63const FIELD_OVERRIDES: &str = "overrides";
64const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
65const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
66
67/// npm package parser for package.json manifests.
68///
69/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
70/// optionalDependencies, bundledDependencies) and workspace configurations.
71pub struct NpmParser;
72
73impl PackageParser for NpmParser {
74    const PACKAGE_TYPE: PackageType = PackageType::Npm;
75
76    fn extract_packages(path: &Path) -> Vec<PackageData> {
77        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
78            Ok((json, lines)) => (json, lines),
79            Err(e) => {
80                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
81                return vec![default_package_data()];
82            }
83        };
84
85        let name = extract_non_empty_string(&json, FIELD_NAME);
86        let version = extract_non_empty_string(&json, FIELD_VERSION);
87        let namespace = extract_namespace(&name);
88        let package_name = extract_package_name(&name);
89        let description = extract_description(&json);
90
91        let extracted_license_statement = extract_license_statement(&json);
92        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
93            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
94        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
95        let dependencies = extract_dependencies(&json, false);
96        let dev_dependencies = extract_dependencies(&json, true);
97        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
98        let optional_dependencies = extract_optional_dependencies(&json);
99        let bundled_dependencies = extract_bundled_dependencies(&json);
100        let purl = create_package_url(&name, &version, &namespace);
101        let keywords_vec = extract_keywords_as_vec(&json);
102
103        let mut extra_data_map = HashMap::new();
104
105        if let Some(resolutions) = extract_resolutions(&json) {
106            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
107        }
108
109        if let Some(engines) = extract_engines(&json) {
110            extra_data_map.insert("engines".to_string(), engines);
111        }
112
113        for field in [
114            FIELD_OS,
115            FIELD_CPU,
116            FIELD_LIBC,
117            FIELD_DEPRECATED,
118            FIELD_HAS_BIN,
119        ] {
120            if let Some(value) = extract_raw_extra_data_field(&json, field) {
121                extra_data_map.insert(field.to_string(), value);
122            }
123        }
124
125        if let Some(package_manager) = extract_package_manager(&json) {
126            extra_data_map.insert(
127                "packageManager".to_string(),
128                serde_json::Value::String(package_manager),
129            );
130        }
131
132        if let Some(workspaces) = extract_workspaces(&json) {
133            extra_data_map.insert("workspaces".to_string(), workspaces);
134        }
135
136        if let Some(overrides) = extract_overrides(&json) {
137            extra_data_map.insert("overrides".to_string(), overrides);
138        }
139
140        if let Some(private) = extract_private(&json) {
141            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
142        }
143
144        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
145            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
146        }
147
148        let extra_data = if extra_data_map.is_empty() {
149            None
150        } else {
151            Some(extra_data_map)
152        };
153
154        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
155            Some(dist) => extract_dist_hashes(dist),
156            None => (None, None, None),
157        };
158
159        let download_url = json
160            .get(FIELD_DIST)
161            .and_then(extract_dist_tarball)
162            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
163
164        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
165        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
166        let repository_download_url =
167            generate_repository_download_url(&namespace, &package_name, &version);
168        let vcs_url = extract_vcs_url(&json);
169
170        vec![PackageData {
171            package_type: Some(Self::PACKAGE_TYPE),
172            namespace,
173            name: package_name,
174            version,
175            qualifiers: None,
176            subpath: None,
177            primary_language: Some("JavaScript".to_string()),
178            description,
179            release_date: None,
180            parties: extract_parties(&json),
181            keywords: keywords_vec,
182            homepage_url: extract_homepage_url(&json),
183            download_url,
184            size: None,
185            sha1: dist_sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
186            md5: None,
187            sha256: dist_sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
188            sha512: dist_sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
189            bug_tracking_url: extract_bugs(&json),
190            code_view_url: None,
191            vcs_url,
192            copyright: None,
193            holder: None,
194            declared_license_expression,
195            declared_license_expression_spdx,
196            license_detections,
197            other_license_expression: None,
198            other_license_expression_spdx: None,
199            other_license_detections: Vec::new(),
200            extracted_license_statement,
201            notice_text: None,
202            source_packages: Vec::new(),
203            file_references: Vec::new(),
204            is_private: json
205                .get("private")
206                .and_then(|v| v.as_bool())
207                .unwrap_or(false),
208            is_virtual: false,
209            extra_data,
210            dependencies: [
211                dependencies,
212                dev_dependencies,
213                peer_dependencies,
214                optional_dependencies,
215                bundled_dependencies,
216            ]
217            .concat(),
218            repository_homepage_url,
219            repository_download_url,
220            api_data_url,
221            datasource_id: Some(DatasourceId::NpmPackageJson),
222            purl,
223        }]
224    }
225
226    fn is_match(path: &Path) -> bool {
227        path.file_name().is_some_and(|name| name == "package.json")
228    }
229}
230
231/// Reads and parses a JSON file while tracking line numbers of fields
232fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
233    // Read file once into string
234    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
235
236    // Parse JSON
237    let json: Value =
238        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
239
240    // Track line numbers for each field by iterating over lines
241    let mut field_lines = HashMap::new();
242    for (line_num, line) in content.lines().enumerate() {
243        let trimmed = line.trim();
244        // Look for field names in the format: "field": value
245        if let Some(field_name) = extract_field_name(trimmed) {
246            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
247        }
248    }
249
250    Ok((json, field_lines))
251}
252
253/// Extracts field name from a JSON line
254fn extract_field_name(line: &str) -> Option<String> {
255    // Simple regex-free parsing for field names
256    let line = line.trim();
257    if line.is_empty() || !line.starts_with("\"") {
258        return None;
259    }
260
261    // Find the closing quote of the field name
262    let mut chars = line.chars();
263    chars.next(); // Skip opening quote
264
265    let mut field_name = String::new();
266    for c in chars {
267        if c == '"' {
268            break;
269        }
270        field_name.push(c);
271    }
272
273    if field_name.is_empty() {
274        None
275    } else {
276        Some(field_name)
277    }
278}
279
280fn extract_namespace(name: &Option<String>) -> Option<String> {
281    name.as_ref().and_then(|n| {
282        if n.contains('/') {
283            n.split('/').next().map(String::from)
284        } else {
285            None
286        }
287    })
288}
289
290fn extract_package_name(name: &Option<String>) -> Option<String> {
291    name.as_ref().map(|n| {
292        if n.contains('/') {
293            n.split('/').nth(1).unwrap_or(n).to_string()
294        } else {
295            n.clone()
296        }
297    })
298}
299
300fn create_package_url(
301    name: &Option<String>,
302    version: &Option<String>,
303    _namespace: &Option<String>,
304) -> Option<String> {
305    // Note: We extract and store namespace in PackageData for metadata purposes,
306    // but the full package name (e.g., "@babel/core") is used for PURL generation.
307    let name = name.as_ref()?;
308    npm_purl(name, version.as_deref())
309}
310
311fn extract_license_statement(json: &Value) -> Option<String> {
312    let mut statements = Vec::new();
313
314    if let Some(license_value) = json.get(FIELD_LICENSE) {
315        if let Some(license_str) = license_value.as_str() {
316            statements.push(format!("- {}", license_str));
317        } else if let Some(license_obj) = license_value.as_object()
318            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
319        {
320            statements.push(format!("- type: {}", type_val));
321            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
322                statements.push(format!("  url: {}", url_val));
323            }
324        }
325    }
326
327    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
328        for license in licenses {
329            if let Some(license_obj) = license.as_object()
330                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
331            {
332                statements.push(format!("- type: {}", type_val));
333                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
334                    statements.push(format!("  url: {}", url_val));
335                }
336            }
337        }
338    }
339
340    if statements.is_empty() {
341        None
342    } else {
343        Some(format!("{}\n", statements.join("\n")))
344    }
345}
346
347fn extract_declared_license_candidate(json: &Value) -> Option<String> {
348    json.get(FIELD_LICENSE)
349        .and_then(|value| value.as_str())
350        .map(str::trim)
351        .filter(|value| !value.is_empty())
352        .map(str::to_string)
353}
354
355/// Extracts the repository URL from the repository field.
356/// Extracts and normalizes VCS URL from the repository field.
357/// Supports both string and object formats with optional 'type' and 'directory' fields.
358fn extract_vcs_url(json: &Value) -> Option<String> {
359    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
360        Some(Value::String(url)) => {
361            let normalized = normalize_repo_url(url);
362            if normalized.is_empty() {
363                return None;
364            }
365            (None, normalized)
366        }
367        Some(Value::Object(obj)) => {
368            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
369            let normalized = normalize_repo_url(repo_url);
370            if normalized.is_empty() {
371                return None;
372            }
373            let tool = obj
374                .get("type")
375                .and_then(|t| t.as_str())
376                .unwrap_or("git")
377                .to_string();
378            let tool_for_prefix = if normalized.starts_with("git://")
379                || normalized.starts_with("git+")
380                || normalized.starts_with("hg://")
381                || normalized.starts_with("hg+")
382                || normalized.starts_with("svn://")
383                || normalized.starts_with("svn+")
384            {
385                None
386            } else {
387                Some(tool)
388            };
389            (tool_for_prefix, normalized)
390        }
391        _ => return None,
392    };
393
394    if vcs_repository.is_empty() {
395        return None;
396    }
397
398    let mut vcs_url = vcs_tool.map_or_else(
399        || vcs_repository.clone(),
400        |tool| format!("{}+{}", tool, vcs_repository),
401    );
402
403    if let Some(vcs_revision) = json
404        .get("gitHead")
405        .and_then(|v| v.as_str())
406        .and_then(normalize_non_empty_string)
407    {
408        vcs_url.push('@');
409        vcs_url.push_str(&vcs_revision);
410    }
411
412    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
413        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
414    {
415        vcs_url.push('#');
416        vcs_url.push_str(directory);
417    }
418
419    Some(vcs_url)
420}
421
422/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
423/// Based on normalize_vcs_url() from Python reference.
424fn normalize_repo_url(url: &str) -> String {
425    let url = url.trim();
426
427    if url.is_empty() {
428        return String::new();
429    }
430
431    let normalized_schemes = [
432        "https://",
433        "http://",
434        "git://",
435        "git+git://",
436        "git+https://",
437        "git+http://",
438        "hg://",
439        "hg+http://",
440        "hg+https://",
441        "svn://",
442        "svn+http://",
443        "svn+https://",
444    ];
445    if normalized_schemes
446        .iter()
447        .any(|scheme| url.starts_with(scheme))
448    {
449        return url.to_string();
450    }
451
452    if let Some((host, repo)) = url
453        .strip_prefix("git@")
454        .and_then(|rest| rest.split_once(':'))
455    {
456        return format!("https://{}/{}", host, repo);
457    }
458
459    if let Some((platform, repo)) = url.split_once(':') {
460        let host_url = match platform {
461            "github" => "https://github.com/",
462            "gitlab" => "https://gitlab.com/",
463            "bitbucket" => "https://bitbucket.org/",
464            "gist" => "https://gist.github.com/",
465            _ => return url.to_string(),
466        };
467        return format!("{}{}", host_url, repo);
468    }
469
470    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
471        return format!("https://github.com/{}", url);
472    }
473
474    url.to_string()
475}
476
477/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
478fn extract_parties(json: &Value) -> Vec<Party> {
479    let mut parties = Vec::new();
480
481    // Extract author field (can be single value or array)
482    if let Some(author) = json.get(FIELD_AUTHOR) {
483        if let Some(author_list) = extract_parties_from_array(author) {
484            // Author is an array
485            for mut party in author_list {
486                if party.role.is_none() {
487                    party.role = Some("author".to_string());
488                }
489                parties.push(party);
490            }
491        } else if let Some(mut party) = extract_party_from_field(author) {
492            // Author is a single value
493            party.role = Some("author".to_string());
494            parties.push(party);
495        }
496    }
497
498    // Extract contributors field
499    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
500        && let Some(mut party_list) = extract_parties_from_array(contributors)
501    {
502        for party in &mut party_list {
503            if party.role.is_none() {
504                party.role = Some("contributor".to_string());
505            }
506        }
507        parties.extend(party_list);
508    }
509
510    // Extract maintainers field
511    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
512        && let Some(mut party_list) = extract_parties_from_array(maintainers)
513    {
514        for party in &mut party_list {
515            if party.role.is_none() {
516                party.role = Some("maintainer".to_string());
517            }
518        }
519        parties.extend(party_list);
520    }
521
522    parties
523}
524
525/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
526fn extract_party_from_field(field: &Value) -> Option<Party> {
527    match field {
528        Value::String(s) => {
529            // Try to extract email from "Name <email>" format
530            if let Some(email) = extract_email_from_string(s) {
531                Some(Party {
532                    r#type: Some("person".to_string()),
533                    role: None,
534                    name: extract_name_from_author_string(s),
535                    email: Some(email),
536                    url: None,
537                    organization: None,
538                    organization_url: None,
539                    timezone: None,
540                })
541            } else {
542                // Treat the string as name if no email found
543                Some(Party {
544                    r#type: Some("person".to_string()),
545                    role: None,
546                    name: Some(s.clone()),
547                    email: None,
548                    url: None,
549                    organization: None,
550                    organization_url: None,
551                    timezone: None,
552                })
553            }
554        }
555        Value::Object(obj) => Some(Party {
556            r#type: Some("person".to_string()),
557            role: obj.get("role").and_then(|v| v.as_str()).map(String::from),
558            name: obj.get("name").and_then(|v| v.as_str()).map(String::from),
559            email: obj.get("email").and_then(|v| v.as_str()).map(String::from),
560            url: obj
561                .get("url")
562                .and_then(|v| v.as_str())
563                .and_then(normalize_optional_party_url),
564            organization: None,
565            organization_url: None,
566            timezone: None,
567        }),
568        _ => None,
569    }
570}
571
572/// Extracts multiple parties from a JSON array.
573fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
574    if let Value::Array(items) = array {
575        let parties = items
576            .iter()
577            .filter_map(extract_party_from_field)
578            .collect::<Vec<_>>();
579        if !parties.is_empty() {
580            return Some(parties);
581        }
582    }
583    None
584}
585
586/// Extracts email from a string in the format "Name <email@example.com>".
587fn extract_email_from_string(author_str: &str) -> Option<String> {
588    if let Some(email_start) = author_str.find('<')
589        && let Some(email_end) = author_str.find('>')
590        && email_start < email_end
591    {
592        return Some(author_str[email_start + 1..email_end].to_string());
593    }
594    None
595}
596
597/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
598fn extract_name_from_author_string(author_str: &str) -> Option<String> {
599    if let Some(end_idx) = author_str.find('<') {
600        let name = author_str[..end_idx].trim();
601        if !name.is_empty() {
602            return Some(name.to_string());
603        }
604    } else {
605        return Some(author_str.trim().to_string());
606    }
607    None
608}
609
610fn default_package_data() -> PackageData {
611    PackageData {
612        package_type: Some(NpmParser::PACKAGE_TYPE),
613        primary_language: Some("JavaScript".to_string()),
614        datasource_id: Some(DatasourceId::NpmPackageJson),
615        ..Default::default()
616    }
617}
618
619fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
620    if version_str.contains(':') && version_str.contains('@') {
621        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
622        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
623        return Some((actual_package_name, constraint));
624    }
625    None
626}
627
628fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
629    json.get(field)
630        .and_then(|value| value.as_str())
631        .map(str::trim)
632        .filter(|value| !value.is_empty())
633        .map(String::from)
634}
635
636fn generate_npm_api_url(
637    namespace: &Option<String>,
638    name: &Option<String>,
639    version: &Option<String>,
640) -> Option<String> {
641    const REGISTRY: &str = "https://registry.npmjs.org";
642    name.as_ref()?;
643
644    let ns_name = if let Some(ns) = namespace {
645        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
646    } else {
647        name.as_ref()?.clone()
648    };
649
650    let url = if let Some(ver) = version {
651        format!("{}/{}/{}", REGISTRY, ns_name, ver)
652    } else {
653        format!("{}/{}", REGISTRY, ns_name)
654    };
655
656    Some(url)
657}
658
659fn build_registry_package_path(
660    namespace: &Option<String>,
661    name: &Option<String>,
662) -> Option<String> {
663    match (namespace.as_ref(), name.as_ref()) {
664        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
665        (None, Some(name)) => Some(name.clone()),
666        _ => None,
667    }
668}
669
670fn generate_repository_homepage_url(
671    namespace: &Option<String>,
672    name: &Option<String>,
673) -> Option<String> {
674    build_registry_package_path(namespace, name)
675        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
676}
677
678fn generate_registry_download_url(
679    namespace: &Option<String>,
680    name: &Option<String>,
681    version: &Option<String>,
682) -> Option<String> {
683    match (
684        build_registry_package_path(namespace, name),
685        name.as_ref(),
686        version.as_ref(),
687    ) {
688        (Some(package_path), Some(name), Some(version)) => Some(format!(
689            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
690            package_path, name, version
691        )),
692        _ => None,
693    }
694}
695
696fn generate_repository_download_url(
697    namespace: &Option<String>,
698    name: &Option<String>,
699    version: &Option<String>,
700) -> Option<String> {
701    generate_registry_download_url(namespace, name, version)
702}
703
704fn extract_dependency_group(
705    json: &Value,
706    field: &str,
707    scope: &str,
708    is_runtime: bool,
709    is_optional: bool,
710    optional_meta: Option<&HashMap<String, bool>>,
711) -> Vec<Dependency> {
712    json.get(field)
713        .and_then(|deps| deps.as_object())
714        .map_or_else(Vec::new, |deps| {
715            deps.iter()
716                .filter_map(|(name, version)| {
717                    let version_str = version.as_str()?;
718
719                    if version_str.starts_with("workspace:") {
720                        let package_url = npm_purl(name, None)?;
721                        let is_opt = if let Some(meta) = optional_meta {
722                            meta.get(name).copied()
723                        } else {
724                            Some(is_optional)
725                        };
726                        return Some(Dependency {
727                            purl: Some(package_url),
728                            extracted_requirement: Some(version_str.to_string()),
729                            scope: Some(scope.to_string()),
730                            is_runtime: Some(is_runtime),
731                            is_optional: is_opt,
732                            is_pinned: Some(false),
733                            is_direct: Some(true),
734                            resolved_package: None,
735                            extra_data: None,
736                        });
737                    }
738
739                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
740                        parse_alias_adapter(version_str)
741                    {
742                        actual_package_name
743                    } else {
744                        name.as_str()
745                    };
746
747                    let package_url = npm_purl(actual_package_name, None)?;
748
749                    let is_opt = if let Some(meta) = optional_meta {
750                        meta.get(name).copied()
751                    } else {
752                        Some(is_optional)
753                    };
754
755                    Some(Dependency {
756                        purl: Some(package_url),
757                        extracted_requirement: Some(version_str.to_string()),
758                        scope: Some(scope.to_string()),
759                        is_runtime: Some(is_runtime),
760                        is_optional: is_opt,
761                        is_pinned: Some(false),
762                        is_direct: Some(true),
763                        resolved_package: None,
764                        extra_data: None,
765                    })
766                })
767                .collect()
768        })
769}
770
771/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
772fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
773    let field = if is_optional {
774        FIELD_DEV_DEPENDENCIES
775    } else {
776        FIELD_DEPENDENCIES
777    };
778
779    let scope = if is_optional {
780        "devDependencies"
781    } else {
782        "dependencies"
783    };
784
785    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
786}
787
788fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
789    extract_dependency_group(
790        json,
791        FIELD_PEER_DEPENDENCIES,
792        "peerDependencies",
793        true,
794        false,
795        Some(meta),
796    )
797}
798
799/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
800/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
801fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
802    extract_dependency_group(
803        json,
804        FIELD_OPTIONAL_DEPENDENCIES,
805        "optionalDependencies",
806        true,
807        true,
808        None,
809    )
810}
811
812fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
813    if let Some(bundled) = json
814        .get(FIELD_BUNDLED_DEPENDENCIES)
815        .and_then(|v| v.as_array())
816    {
817        extract_bundled_list(bundled)
818    } else {
819        Vec::new()
820    }
821}
822
823/// Helper function to extract bundled dependencies from an array of package names.
824fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
825    bundled_array
826        .iter()
827        .filter_map(|value| {
828            let name = value.as_str()?;
829            // Create PURL without version for bundled dependencies
830            let package_url = npm_purl(name, None)?;
831
832            Some(Dependency {
833                purl: Some(package_url),
834                extracted_requirement: None,
835                scope: Some("bundledDependencies".to_string()),
836                is_runtime: Some(true),
837                is_optional: Some(false),
838                is_pinned: Some(false),
839                is_direct: Some(true),
840                resolved_package: None,
841                extra_data: None,
842            })
843        })
844        .collect()
845}
846
847/// Extracts Yarn resolutions from the `resolutions` field.
848/// Returns resolutions as a HashMap to be stored in extra_data.
849fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
850    json.get(FIELD_RESOLUTIONS)
851        .and_then(|resolutions| resolutions.as_object())
852        .map(|resolutions_obj| {
853            let mut extra_data = HashMap::new();
854            extra_data.insert(
855                "resolutions".to_string(),
856                serde_json::Value::Object(resolutions_obj.clone()),
857            );
858            extra_data
859        })
860}
861
862fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
863    json.get(FIELD_PEER_DEPENDENCIES_META)
864        .and_then(|meta| meta.as_object())
865        .map_or_else(HashMap::new, |meta_obj| {
866            meta_obj
867                .iter()
868                .filter_map(|(package_name, meta_value)| {
869                    meta_value.as_object().and_then(|obj| {
870                        obj.get("optional")
871                            .and_then(|opt| opt.as_bool())
872                            .map(|optional| (package_name.clone(), optional))
873                    })
874                })
875                .collect()
876        })
877}
878
879fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
880    json.get(FIELD_DEPENDENCIES_META).cloned()
881}
882
883fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
884    json.get(FIELD_OVERRIDES).cloned()
885}
886
887fn extract_description(json: &Value) -> Option<String> {
888    json.get(FIELD_DESCRIPTION)
889        .and_then(|v| v.as_str())
890        .map(String::from)
891}
892
893fn extract_homepage_url(json: &Value) -> Option<String> {
894    match json.get(FIELD_HOMEPAGE) {
895        Some(Value::String(homepage)) => normalize_non_empty_string(homepage),
896        _ => None,
897    }
898}
899
900fn normalize_non_empty_string(value: &str) -> Option<String> {
901    let trimmed = value.trim();
902    if trimmed.is_empty() {
903        None
904    } else {
905        Some(trimmed.to_string())
906    }
907}
908
909fn normalize_optional_party_url(value: &str) -> Option<String> {
910    let normalized = normalize_non_empty_string(value)?;
911
912    if normalized.eq_ignore_ascii_case("none") {
913        None
914    } else {
915        Some(normalized)
916    }
917}
918
919fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
920    json.get(FIELD_KEYWORDS)
921        .and_then(|v| {
922            if let Some(str) = v.as_str() {
923                Some(vec![str.to_string()])
924            } else if let Some(arr) = v.as_array() {
925                let keywords: Vec<String> = arr
926                    .iter()
927                    .filter_map(|kw| kw.as_str())
928                    .map(String::from)
929                    .collect();
930                if keywords.is_empty() {
931                    None
932                } else {
933                    Some(keywords)
934                }
935            } else {
936                None
937            }
938        })
939        .unwrap_or_default()
940}
941
942fn extract_engines(json: &Value) -> Option<serde_json::Value> {
943    json.get(FIELD_ENGINES).cloned()
944}
945
946fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
947    json.get(field).cloned()
948}
949
950fn extract_package_manager(json: &Value) -> Option<String> {
951    json.get(FIELD_PACKAGE_MANAGER)
952        .and_then(|v| v.as_str())
953        .map(String::from)
954}
955
956fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
957    json.get(FIELD_WORKSPACES).cloned()
958}
959
960fn extract_private(json: &Value) -> Option<bool> {
961    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
962}
963
964fn extract_bugs(json: &Value) -> Option<String> {
965    match json.get(FIELD_BUGS) {
966        Some(bugs) => {
967            if let Some(url) = bugs.as_str() {
968                normalize_non_empty_string(url)
969            } else if let Some(obj) = bugs.as_object() {
970                obj.get("url")
971                    .and_then(|v| v.as_str())
972                    .and_then(normalize_non_empty_string)
973            } else {
974                None
975            }
976        }
977        None => None,
978    }
979}
980
981fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
982    let mut sha1 = dist
983        .get("shasum")
984        .and_then(|v| v.as_str())
985        .and_then(normalize_non_empty_string);
986    let mut sha256 = None;
987    let mut sha512 = None;
988
989    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
990        && let Some((algo, hex_digest)) = parse_sri(integrity)
991    {
992        match algo.as_str() {
993            "sha1" => {
994                if sha1.is_none() {
995                    sha1 = Some(hex_digest);
996                }
997            }
998            "sha256" => sha256 = Some(hex_digest),
999            "sha512" => sha512 = Some(hex_digest),
1000            _ => {}
1001        }
1002    }
1003
1004    (sha1, sha256, sha512)
1005}
1006
1007fn extract_dist_tarball(dist: &Value) -> Option<String> {
1008    dist.get("tarball")
1009        .or_else(|| dist.get("dnl_url"))
1010        .and_then(|v| v.as_str())
1011        .map(normalize_npm_registry_tarball_url)
1012}
1013
1014fn normalize_npm_registry_tarball_url(url: &str) -> String {
1015    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1016        format!("https://registry.npmjs.org/{path}")
1017    } else {
1018        url.to_string()
1019    }
1020}
1021
1022fn combine_extra_data(
1023    extra_data: Option<HashMap<String, serde_json::Value>>,
1024    additional_data: HashMap<String, serde_json::Value>,
1025) -> HashMap<String, serde_json::Value> {
1026    let mut combined = extra_data.unwrap_or_default();
1027    for (key, value) in additional_data {
1028        combined.insert(key, value);
1029    }
1030    combined
1031}
1032
1033crate::register_parser!(
1034    "npm package.json manifest",
1035    &["**/package.json"],
1036    "npm",
1037    "JavaScript",
1038    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1039);