Skip to main content

provenant/parsers/
npm.rs

1//! Parser for npm package.json manifests.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! package.json files used by Node.js/npm projects.
5//!
6//! # Supported Formats
7//! - package.json (manifest)
8//!
9//! # Key Features
10//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
11//! - Package URL (purl) generation for scoped and unscoped packages
12//! - VCS repository URL extraction
13//! - Distribution integrity hash extraction (sha1, sha512)
14//! - Support for legacy formats (licenses array, license objects)
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
19//! - Graceful error handling: logs warnings and returns default on parse failure
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22use crate::parsers::utils::{npm_purl, parse_sri};
23use log::warn;
24use serde_json::Value;
25use std::collections::HashMap;
26use std::fs;
27use std::path::Path;
28
29use super::PackageParser;
30use super::license_normalization::normalize_spdx_declared_license;
31
32const FIELD_NAME: &str = "name";
33const FIELD_VERSION: &str = "version";
34const FIELD_LICENSE: &str = "license";
35const FIELD_LICENSES: &str = "licenses";
36const FIELD_HOMEPAGE: &str = "homepage";
37const FIELD_REPOSITORY: &str = "repository";
38const FIELD_AUTHOR: &str = "author";
39const FIELD_CONTRIBUTORS: &str = "contributors";
40const FIELD_MAINTAINERS: &str = "maintainers";
41const FIELD_DEPENDENCIES: &str = "dependencies";
42const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
43const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
44const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
45const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
46const FIELD_RESOLUTIONS: &str = "resolutions";
47const FIELD_DESCRIPTION: &str = "description";
48const FIELD_KEYWORDS: &str = "keywords";
49const FIELD_ENGINES: &str = "engines";
50const FIELD_OS: &str = "os";
51const FIELD_CPU: &str = "cpu";
52const FIELD_LIBC: &str = "libc";
53const FIELD_DEPRECATED: &str = "deprecated";
54const FIELD_HAS_BIN: &str = "hasBin";
55const FIELD_PACKAGE_MANAGER: &str = "packageManager";
56const FIELD_WORKSPACES: &str = "workspaces";
57const FIELD_PRIVATE: &str = "private";
58const FIELD_BUGS: &str = "bugs";
59const FIELD_DIST: &str = "dist";
60const FIELD_OVERRIDES: &str = "overrides";
61const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
62const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
63
64/// npm package parser for package.json manifests.
65///
66/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
67/// optionalDependencies, bundledDependencies) and workspace configurations.
68pub struct NpmParser;
69
70impl PackageParser for NpmParser {
71    const PACKAGE_TYPE: PackageType = PackageType::Npm;
72
73    fn extract_packages(path: &Path) -> Vec<PackageData> {
74        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
75            Ok((json, lines)) => (json, lines),
76            Err(e) => {
77                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
78                return vec![default_package_data()];
79            }
80        };
81
82        let name = extract_non_empty_string(&json, FIELD_NAME);
83        let version = extract_non_empty_string(&json, FIELD_VERSION);
84        let namespace = extract_namespace(&name);
85        let package_name = extract_package_name(&name);
86        let description = extract_description(&json);
87
88        let extracted_license_statement = extract_license_statement(&json);
89        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
90            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
91        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
92        let dependencies = extract_dependencies(&json, false);
93        let dev_dependencies = extract_dependencies(&json, true);
94        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
95        let optional_dependencies = extract_optional_dependencies(&json);
96        let bundled_dependencies = extract_bundled_dependencies(&json);
97        let purl = create_package_url(&name, &version, &namespace);
98        let keywords_vec = extract_keywords_as_vec(&json);
99
100        let mut extra_data_map = HashMap::new();
101
102        if let Some(resolutions) = extract_resolutions(&json) {
103            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
104        }
105
106        if let Some(engines) = extract_engines(&json) {
107            extra_data_map.insert("engines".to_string(), engines);
108        }
109
110        for field in [
111            FIELD_OS,
112            FIELD_CPU,
113            FIELD_LIBC,
114            FIELD_DEPRECATED,
115            FIELD_HAS_BIN,
116        ] {
117            if let Some(value) = extract_raw_extra_data_field(&json, field) {
118                extra_data_map.insert(field.to_string(), value);
119            }
120        }
121
122        if let Some(package_manager) = extract_package_manager(&json) {
123            extra_data_map.insert(
124                "packageManager".to_string(),
125                serde_json::Value::String(package_manager),
126            );
127        }
128
129        if let Some(workspaces) = extract_workspaces(&json) {
130            extra_data_map.insert("workspaces".to_string(), workspaces);
131        }
132
133        if let Some(overrides) = extract_overrides(&json) {
134            extra_data_map.insert("overrides".to_string(), overrides);
135        }
136
137        if let Some(private) = extract_private(&json) {
138            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
139        }
140
141        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
142            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
143        }
144
145        let extra_data = if extra_data_map.is_empty() {
146            None
147        } else {
148            Some(extra_data_map)
149        };
150
151        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
152            Some(dist) => extract_dist_hashes(dist),
153            None => (None, None, None),
154        };
155
156        let download_url = json
157            .get(FIELD_DIST)
158            .and_then(extract_dist_tarball)
159            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
160
161        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
162        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
163        let repository_download_url =
164            generate_repository_download_url(&namespace, &package_name, &version);
165        let vcs_url = extract_vcs_url(&json);
166
167        vec![PackageData {
168            package_type: Some(Self::PACKAGE_TYPE),
169            namespace,
170            name,
171            version,
172            qualifiers: None,
173            subpath: None,
174            primary_language: Some("JavaScript".to_string()),
175            description,
176            release_date: None,
177            parties: extract_parties(&json),
178            keywords: keywords_vec,
179            homepage_url: extract_homepage_url(&json),
180            download_url,
181            size: None,
182            sha1: dist_sha1,
183            md5: None,
184            sha256: dist_sha256,
185            sha512: dist_sha512,
186            bug_tracking_url: extract_bugs(&json),
187            code_view_url: None,
188            vcs_url,
189            copyright: None,
190            holder: None,
191            declared_license_expression,
192            declared_license_expression_spdx,
193            license_detections,
194            other_license_expression: None,
195            other_license_expression_spdx: None,
196            other_license_detections: Vec::new(),
197            extracted_license_statement,
198            notice_text: None,
199            source_packages: Vec::new(),
200            file_references: Vec::new(),
201            is_private: json
202                .get("private")
203                .and_then(|v| v.as_bool())
204                .unwrap_or(false),
205            is_virtual: false,
206            extra_data,
207            dependencies: [
208                dependencies,
209                dev_dependencies,
210                peer_dependencies,
211                optional_dependencies,
212                bundled_dependencies,
213            ]
214            .concat(),
215            repository_homepage_url,
216            repository_download_url,
217            api_data_url,
218            datasource_id: Some(DatasourceId::NpmPackageJson),
219            purl,
220        }]
221    }
222
223    fn is_match(path: &Path) -> bool {
224        path.file_name().is_some_and(|name| name == "package.json")
225    }
226}
227
228/// Reads and parses a JSON file while tracking line numbers of fields
229fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
230    // Read file once into string
231    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
232
233    // Parse JSON
234    let json: Value =
235        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
236
237    // Track line numbers for each field by iterating over lines
238    let mut field_lines = HashMap::new();
239    for (line_num, line) in content.lines().enumerate() {
240        let trimmed = line.trim();
241        // Look for field names in the format: "field": value
242        if let Some(field_name) = extract_field_name(trimmed) {
243            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
244        }
245    }
246
247    Ok((json, field_lines))
248}
249
250/// Extracts field name from a JSON line
251fn extract_field_name(line: &str) -> Option<String> {
252    // Simple regex-free parsing for field names
253    let line = line.trim();
254    if line.is_empty() || !line.starts_with("\"") {
255        return None;
256    }
257
258    // Find the closing quote of the field name
259    let mut chars = line.chars();
260    chars.next(); // Skip opening quote
261
262    let mut field_name = String::new();
263    for c in chars {
264        if c == '"' {
265            break;
266        }
267        field_name.push(c);
268    }
269
270    if field_name.is_empty() {
271        None
272    } else {
273        Some(field_name)
274    }
275}
276
277fn extract_namespace(name: &Option<String>) -> Option<String> {
278    name.as_ref().and_then(|n| {
279        if n.contains('/') {
280            n.split('/').next().map(String::from)
281        } else {
282            None
283        }
284    })
285}
286
287fn extract_package_name(name: &Option<String>) -> Option<String> {
288    name.as_ref().map(|n| {
289        if n.contains('/') {
290            n.split('/').nth(1).unwrap_or(n).to_string()
291        } else {
292            n.clone()
293        }
294    })
295}
296
297fn create_package_url(
298    name: &Option<String>,
299    version: &Option<String>,
300    _namespace: &Option<String>,
301) -> Option<String> {
302    // Note: We extract and store namespace in PackageData for metadata purposes,
303    // but the full package name (e.g., "@babel/core") is used for PURL generation.
304    let name = name.as_ref()?;
305    npm_purl(name, version.as_deref())
306}
307
308fn extract_license_statement(json: &Value) -> Option<String> {
309    let mut statements = Vec::new();
310
311    if let Some(license_value) = json.get(FIELD_LICENSE) {
312        if let Some(license_str) = license_value.as_str() {
313            statements.push(format!("- {}", license_str));
314        } else if let Some(license_obj) = license_value.as_object()
315            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
316        {
317            statements.push(format!("- type: {}", type_val));
318            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
319                statements.push(format!("  url: {}", url_val));
320            }
321        }
322    }
323
324    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
325        for license in licenses {
326            if let Some(license_obj) = license.as_object()
327                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
328            {
329                statements.push(format!("- type: {}", type_val));
330                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
331                    statements.push(format!("  url: {}", url_val));
332                }
333            }
334        }
335    }
336
337    if statements.is_empty() {
338        None
339    } else {
340        Some(format!("{}\n", statements.join("\n")))
341    }
342}
343
344fn extract_declared_license_candidate(json: &Value) -> Option<String> {
345    json.get(FIELD_LICENSE)
346        .and_then(|value| value.as_str())
347        .map(str::trim)
348        .filter(|value| !value.is_empty())
349        .map(str::to_string)
350}
351
352/// Extracts the repository URL from the repository field.
353/// Extracts and normalizes VCS URL from the repository field.
354/// Supports both string and object formats with optional 'type' and 'directory' fields.
355fn extract_vcs_url(json: &Value) -> Option<String> {
356    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
357        Some(Value::String(url)) => {
358            let normalized = normalize_repo_url(url);
359            if normalized.is_empty() {
360                return None;
361            }
362            (None, normalized)
363        }
364        Some(Value::Object(obj)) => {
365            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
366            let normalized = normalize_repo_url(repo_url);
367            if normalized.is_empty() {
368                return None;
369            }
370            let tool = obj
371                .get("type")
372                .and_then(|t| t.as_str())
373                .unwrap_or("git")
374                .to_string();
375            let tool_for_prefix = if normalized.starts_with("git://")
376                || normalized.starts_with("git+")
377                || normalized.starts_with("hg://")
378                || normalized.starts_with("hg+")
379                || normalized.starts_with("svn://")
380                || normalized.starts_with("svn+")
381            {
382                None
383            } else {
384                Some(tool)
385            };
386            (tool_for_prefix, normalized)
387        }
388        _ => return None,
389    };
390
391    if vcs_repository.is_empty() {
392        return None;
393    }
394
395    let mut vcs_url = vcs_tool.map_or_else(
396        || vcs_repository.clone(),
397        |tool| format!("{}+{}", tool, vcs_repository),
398    );
399
400    if let Some(vcs_revision) = json
401        .get("gitHead")
402        .and_then(|v| v.as_str())
403        .and_then(normalize_non_empty_string)
404    {
405        vcs_url.push('@');
406        vcs_url.push_str(&vcs_revision);
407    }
408
409    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
410        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
411    {
412        vcs_url.push('#');
413        vcs_url.push_str(directory);
414    }
415
416    Some(vcs_url)
417}
418
419/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
420/// Based on normalize_vcs_url() from Python reference.
421fn normalize_repo_url(url: &str) -> String {
422    let url = url.trim();
423
424    if url.is_empty() {
425        return String::new();
426    }
427
428    let normalized_schemes = [
429        "https://",
430        "http://",
431        "git://",
432        "git+git://",
433        "git+https://",
434        "git+http://",
435        "hg://",
436        "hg+http://",
437        "hg+https://",
438        "svn://",
439        "svn+http://",
440        "svn+https://",
441    ];
442    if normalized_schemes
443        .iter()
444        .any(|scheme| url.starts_with(scheme))
445    {
446        return url.to_string();
447    }
448
449    if let Some((host, repo)) = url
450        .strip_prefix("git@")
451        .and_then(|rest| rest.split_once(':'))
452    {
453        return format!("https://{}/{}", host, repo);
454    }
455
456    if let Some((platform, repo)) = url.split_once(':') {
457        let host_url = match platform {
458            "github" => "https://github.com/",
459            "gitlab" => "https://gitlab.com/",
460            "bitbucket" => "https://bitbucket.org/",
461            "gist" => "https://gist.github.com/",
462            _ => return url.to_string(),
463        };
464        return format!("{}{}", host_url, repo);
465    }
466
467    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
468        return format!("https://github.com/{}", url);
469    }
470
471    url.to_string()
472}
473
474/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
475fn extract_parties(json: &Value) -> Vec<Party> {
476    let mut parties = Vec::new();
477
478    // Extract author field (can be single value or array)
479    if let Some(author) = json.get(FIELD_AUTHOR) {
480        if let Some(author_list) = extract_parties_from_array(author) {
481            // Author is an array
482            for mut party in author_list {
483                if party.role.is_none() {
484                    party.role = Some("author".to_string());
485                }
486                parties.push(party);
487            }
488        } else if let Some(mut party) = extract_party_from_field(author) {
489            // Author is a single value
490            party.role = Some("author".to_string());
491            parties.push(party);
492        }
493    }
494
495    // Extract contributors field
496    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
497        && let Some(mut party_list) = extract_parties_from_array(contributors)
498    {
499        for party in &mut party_list {
500            if party.role.is_none() {
501                party.role = Some("contributor".to_string());
502            }
503        }
504        parties.extend(party_list);
505    }
506
507    // Extract maintainers field
508    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
509        && let Some(mut party_list) = extract_parties_from_array(maintainers)
510    {
511        for party in &mut party_list {
512            if party.role.is_none() {
513                party.role = Some("maintainer".to_string());
514            }
515        }
516        parties.extend(party_list);
517    }
518
519    parties
520}
521
522/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
523fn extract_party_from_field(field: &Value) -> Option<Party> {
524    match field {
525        Value::String(s) => {
526            // Try to extract email from "Name <email>" format
527            if let Some(email) = extract_email_from_string(s) {
528                Some(Party {
529                    r#type: Some("person".to_string()),
530                    role: None,
531                    name: extract_name_from_author_string(s),
532                    email: Some(email),
533                    url: None,
534                    organization: None,
535                    organization_url: None,
536                    timezone: None,
537                })
538            } else {
539                // Treat the string as name if no email found
540                Some(Party {
541                    r#type: Some("person".to_string()),
542                    role: None,
543                    name: Some(s.clone()),
544                    email: None,
545                    url: None,
546                    organization: None,
547                    organization_url: None,
548                    timezone: None,
549                })
550            }
551        }
552        Value::Object(obj) => Some(Party {
553            r#type: Some("person".to_string()),
554            role: obj.get("role").and_then(|v| v.as_str()).map(String::from),
555            name: obj.get("name").and_then(|v| v.as_str()).map(String::from),
556            email: obj.get("email").and_then(|v| v.as_str()).map(String::from),
557            url: obj
558                .get("url")
559                .and_then(|v| v.as_str())
560                .and_then(normalize_optional_party_url),
561            organization: None,
562            organization_url: None,
563            timezone: None,
564        }),
565        _ => None,
566    }
567}
568
569/// Extracts multiple parties from a JSON array.
570fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
571    if let Value::Array(items) = array {
572        let parties = items
573            .iter()
574            .filter_map(extract_party_from_field)
575            .collect::<Vec<_>>();
576        if !parties.is_empty() {
577            return Some(parties);
578        }
579    }
580    None
581}
582
583/// Extracts email from a string in the format "Name <email@example.com>".
584fn extract_email_from_string(author_str: &str) -> Option<String> {
585    if let Some(email_start) = author_str.find('<')
586        && let Some(email_end) = author_str.find('>')
587        && email_start < email_end
588    {
589        return Some(author_str[email_start + 1..email_end].to_string());
590    }
591    None
592}
593
594/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
595fn extract_name_from_author_string(author_str: &str) -> Option<String> {
596    if let Some(end_idx) = author_str.find('<') {
597        let name = author_str[..end_idx].trim();
598        if !name.is_empty() {
599            return Some(name.to_string());
600        }
601    } else {
602        return Some(author_str.trim().to_string());
603    }
604    None
605}
606
607fn default_package_data() -> PackageData {
608    PackageData {
609        primary_language: Some("JavaScript".to_string()),
610        ..Default::default()
611    }
612}
613
614fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
615    if version_str.contains(':') && version_str.contains('@') {
616        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
617        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
618        return Some((actual_package_name, constraint));
619    }
620    None
621}
622
623fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
624    json.get(field)
625        .and_then(|value| value.as_str())
626        .map(str::trim)
627        .filter(|value| !value.is_empty())
628        .map(String::from)
629}
630
631fn generate_npm_api_url(
632    namespace: &Option<String>,
633    name: &Option<String>,
634    version: &Option<String>,
635) -> Option<String> {
636    const REGISTRY: &str = "https://registry.npmjs.org";
637    name.as_ref()?;
638
639    let ns_name = if let Some(ns) = namespace {
640        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
641    } else {
642        name.as_ref()?.clone()
643    };
644
645    let url = if let Some(ver) = version {
646        format!("{}/{}/{}", REGISTRY, ns_name, ver)
647    } else {
648        format!("{}/{}", REGISTRY, ns_name)
649    };
650
651    Some(url)
652}
653
654fn build_registry_package_path(
655    namespace: &Option<String>,
656    name: &Option<String>,
657) -> Option<String> {
658    match (namespace.as_ref(), name.as_ref()) {
659        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
660        (None, Some(name)) => Some(name.clone()),
661        _ => None,
662    }
663}
664
665fn generate_repository_homepage_url(
666    namespace: &Option<String>,
667    name: &Option<String>,
668) -> Option<String> {
669    build_registry_package_path(namespace, name)
670        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
671}
672
673fn generate_registry_download_url(
674    namespace: &Option<String>,
675    name: &Option<String>,
676    version: &Option<String>,
677) -> Option<String> {
678    match (
679        build_registry_package_path(namespace, name),
680        name.as_ref(),
681        version.as_ref(),
682    ) {
683        (Some(package_path), Some(name), Some(version)) => Some(format!(
684            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
685            package_path, name, version
686        )),
687        _ => None,
688    }
689}
690
691fn generate_repository_download_url(
692    namespace: &Option<String>,
693    name: &Option<String>,
694    version: &Option<String>,
695) -> Option<String> {
696    generate_registry_download_url(namespace, name, version)
697}
698
699fn extract_dependency_group(
700    json: &Value,
701    field: &str,
702    scope: &str,
703    is_runtime: bool,
704    is_optional: bool,
705    optional_meta: Option<&HashMap<String, bool>>,
706) -> Vec<Dependency> {
707    json.get(field)
708        .and_then(|deps| deps.as_object())
709        .map_or_else(Vec::new, |deps| {
710            deps.iter()
711                .filter_map(|(name, version)| {
712                    let version_str = version.as_str()?;
713
714                    if version_str.starts_with("workspace:") {
715                        let package_url = npm_purl(name, None)?;
716                        let is_opt = if let Some(meta) = optional_meta {
717                            meta.get(name).copied()
718                        } else {
719                            Some(is_optional)
720                        };
721                        return Some(Dependency {
722                            purl: Some(package_url),
723                            extracted_requirement: Some(version_str.to_string()),
724                            scope: Some(scope.to_string()),
725                            is_runtime: Some(is_runtime),
726                            is_optional: is_opt,
727                            is_pinned: Some(false),
728                            is_direct: Some(true),
729                            resolved_package: None,
730                            extra_data: None,
731                        });
732                    }
733
734                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
735                        parse_alias_adapter(version_str)
736                    {
737                        actual_package_name
738                    } else {
739                        name.as_str()
740                    };
741
742                    let package_url = npm_purl(actual_package_name, None)?;
743
744                    let is_opt = if let Some(meta) = optional_meta {
745                        meta.get(name).copied()
746                    } else {
747                        Some(is_optional)
748                    };
749
750                    Some(Dependency {
751                        purl: Some(package_url),
752                        extracted_requirement: Some(version_str.to_string()),
753                        scope: Some(scope.to_string()),
754                        is_runtime: Some(is_runtime),
755                        is_optional: is_opt,
756                        is_pinned: Some(false),
757                        is_direct: Some(true),
758                        resolved_package: None,
759                        extra_data: None,
760                    })
761                })
762                .collect()
763        })
764}
765
766/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
767fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
768    let field = if is_optional {
769        FIELD_DEV_DEPENDENCIES
770    } else {
771        FIELD_DEPENDENCIES
772    };
773
774    let scope = if is_optional {
775        "devDependencies"
776    } else {
777        "dependencies"
778    };
779
780    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
781}
782
783fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
784    extract_dependency_group(
785        json,
786        FIELD_PEER_DEPENDENCIES,
787        "peerDependencies",
788        true,
789        false,
790        Some(meta),
791    )
792}
793
794/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
795/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
796fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
797    extract_dependency_group(
798        json,
799        FIELD_OPTIONAL_DEPENDENCIES,
800        "optionalDependencies",
801        true,
802        true,
803        None,
804    )
805}
806
807fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
808    if let Some(bundled) = json
809        .get(FIELD_BUNDLED_DEPENDENCIES)
810        .and_then(|v| v.as_array())
811    {
812        extract_bundled_list(bundled)
813    } else {
814        Vec::new()
815    }
816}
817
818/// Helper function to extract bundled dependencies from an array of package names.
819fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
820    bundled_array
821        .iter()
822        .filter_map(|value| {
823            let name = value.as_str()?;
824            // Create PURL without version for bundled dependencies
825            let package_url = npm_purl(name, None)?;
826
827            Some(Dependency {
828                purl: Some(package_url),
829                extracted_requirement: None,
830                scope: Some("bundledDependencies".to_string()),
831                is_runtime: Some(true),
832                is_optional: Some(false),
833                is_pinned: Some(false),
834                is_direct: Some(true),
835                resolved_package: None,
836                extra_data: None,
837            })
838        })
839        .collect()
840}
841
842/// Extracts Yarn resolutions from the `resolutions` field.
843/// Returns resolutions as a HashMap to be stored in extra_data.
844fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
845    json.get(FIELD_RESOLUTIONS)
846        .and_then(|resolutions| resolutions.as_object())
847        .map(|resolutions_obj| {
848            let mut extra_data = HashMap::new();
849            extra_data.insert(
850                "resolutions".to_string(),
851                serde_json::Value::Object(resolutions_obj.clone()),
852            );
853            extra_data
854        })
855}
856
857fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
858    json.get(FIELD_PEER_DEPENDENCIES_META)
859        .and_then(|meta| meta.as_object())
860        .map_or_else(HashMap::new, |meta_obj| {
861            meta_obj
862                .iter()
863                .filter_map(|(package_name, meta_value)| {
864                    meta_value.as_object().and_then(|obj| {
865                        obj.get("optional")
866                            .and_then(|opt| opt.as_bool())
867                            .map(|optional| (package_name.clone(), optional))
868                    })
869                })
870                .collect()
871        })
872}
873
874fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
875    json.get(FIELD_DEPENDENCIES_META).cloned()
876}
877
878fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
879    json.get(FIELD_OVERRIDES).cloned()
880}
881
882fn extract_description(json: &Value) -> Option<String> {
883    json.get(FIELD_DESCRIPTION)
884        .and_then(|v| v.as_str())
885        .map(String::from)
886}
887
888fn extract_homepage_url(json: &Value) -> Option<String> {
889    match json.get(FIELD_HOMEPAGE) {
890        Some(Value::String(homepage)) => normalize_non_empty_string(homepage),
891        _ => None,
892    }
893}
894
895fn normalize_non_empty_string(value: &str) -> Option<String> {
896    let trimmed = value.trim();
897    if trimmed.is_empty() {
898        None
899    } else {
900        Some(trimmed.to_string())
901    }
902}
903
904fn normalize_optional_party_url(value: &str) -> Option<String> {
905    let normalized = normalize_non_empty_string(value)?;
906
907    if normalized.eq_ignore_ascii_case("none") {
908        None
909    } else {
910        Some(normalized)
911    }
912}
913
914fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
915    json.get(FIELD_KEYWORDS)
916        .and_then(|v| {
917            if let Some(str) = v.as_str() {
918                Some(vec![str.to_string()])
919            } else if let Some(arr) = v.as_array() {
920                let keywords: Vec<String> = arr
921                    .iter()
922                    .filter_map(|kw| kw.as_str())
923                    .map(String::from)
924                    .collect();
925                if keywords.is_empty() {
926                    None
927                } else {
928                    Some(keywords)
929                }
930            } else {
931                None
932            }
933        })
934        .unwrap_or_default()
935}
936
937fn extract_engines(json: &Value) -> Option<serde_json::Value> {
938    json.get(FIELD_ENGINES).cloned()
939}
940
941fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
942    json.get(field).cloned()
943}
944
945fn extract_package_manager(json: &Value) -> Option<String> {
946    json.get(FIELD_PACKAGE_MANAGER)
947        .and_then(|v| v.as_str())
948        .map(String::from)
949}
950
951fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
952    json.get(FIELD_WORKSPACES).cloned()
953}
954
955fn extract_private(json: &Value) -> Option<bool> {
956    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
957}
958
959fn extract_bugs(json: &Value) -> Option<String> {
960    match json.get(FIELD_BUGS) {
961        Some(bugs) => {
962            if let Some(url) = bugs.as_str() {
963                normalize_non_empty_string(url)
964            } else if let Some(obj) = bugs.as_object() {
965                obj.get("url")
966                    .and_then(|v| v.as_str())
967                    .and_then(normalize_non_empty_string)
968            } else {
969                None
970            }
971        }
972        None => None,
973    }
974}
975
976fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
977    let mut sha1 = dist
978        .get("shasum")
979        .and_then(|v| v.as_str())
980        .and_then(normalize_non_empty_string);
981    let mut sha256 = None;
982    let mut sha512 = None;
983
984    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
985        && let Some((algo, hex_digest)) = parse_sri(integrity)
986    {
987        match algo.as_str() {
988            "sha1" => {
989                if sha1.is_none() {
990                    sha1 = Some(hex_digest);
991                }
992            }
993            "sha256" => sha256 = Some(hex_digest),
994            "sha512" => sha512 = Some(hex_digest),
995            _ => {}
996        }
997    }
998
999    (sha1, sha256, sha512)
1000}
1001
1002fn extract_dist_tarball(dist: &Value) -> Option<String> {
1003    dist.get("tarball")
1004        .or_else(|| dist.get("dnl_url"))
1005        .and_then(|v| v.as_str())
1006        .map(normalize_npm_registry_tarball_url)
1007}
1008
1009fn normalize_npm_registry_tarball_url(url: &str) -> String {
1010    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1011        format!("https://registry.npmjs.org/{path}")
1012    } else {
1013        url.to_string()
1014    }
1015}
1016
1017fn combine_extra_data(
1018    extra_data: Option<HashMap<String, serde_json::Value>>,
1019    additional_data: HashMap<String, serde_json::Value>,
1020) -> HashMap<String, serde_json::Value> {
1021    let mut combined = extra_data.unwrap_or_default();
1022    for (key, value) in additional_data {
1023        combined.insert(key, value);
1024    }
1025    combined
1026}
1027
1028crate::register_parser!(
1029    "npm package.json manifest",
1030    &["**/package.json"],
1031    "npm",
1032    "JavaScript",
1033    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1034);