Skip to main content

provenant/parsers/
npm.rs

1//! Parser for npm package.json manifests.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! package.json files used by Node.js/npm projects.
5//!
6//! # Supported Formats
7//! - package.json (manifest)
8//!
9//! # Key Features
10//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
11//! - Package URL (purl) generation for scoped and unscoped packages
12//! - VCS repository URL extraction
13//! - Distribution integrity hash extraction (sha1, sha512)
14//! - Support for legacy formats (licenses array, license objects)
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
19//! - Graceful error handling: logs warnings and returns default on parse failure
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22use crate::parser_warn as warn;
23use crate::parsers::utils::{npm_purl, parse_sri};
24use serde_json::Value;
25use std::collections::HashMap;
26use std::fs;
27use std::path::Path;
28
29use super::PackageParser;
30use super::license_normalization::normalize_spdx_declared_license;
31
32const FIELD_NAME: &str = "name";
33const FIELD_VERSION: &str = "version";
34const FIELD_LICENSE: &str = "license";
35const FIELD_LICENSES: &str = "licenses";
36const FIELD_HOMEPAGE: &str = "homepage";
37const FIELD_REPOSITORY: &str = "repository";
38const FIELD_AUTHOR: &str = "author";
39const FIELD_CONTRIBUTORS: &str = "contributors";
40const FIELD_MAINTAINERS: &str = "maintainers";
41const FIELD_DEPENDENCIES: &str = "dependencies";
42const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
43const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
44const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
45const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
46const FIELD_RESOLUTIONS: &str = "resolutions";
47const FIELD_DESCRIPTION: &str = "description";
48const FIELD_KEYWORDS: &str = "keywords";
49const FIELD_ENGINES: &str = "engines";
50const FIELD_OS: &str = "os";
51const FIELD_CPU: &str = "cpu";
52const FIELD_LIBC: &str = "libc";
53const FIELD_DEPRECATED: &str = "deprecated";
54const FIELD_HAS_BIN: &str = "hasBin";
55const FIELD_PACKAGE_MANAGER: &str = "packageManager";
56const FIELD_WORKSPACES: &str = "workspaces";
57const FIELD_PRIVATE: &str = "private";
58const FIELD_BUGS: &str = "bugs";
59const FIELD_DIST: &str = "dist";
60const FIELD_OVERRIDES: &str = "overrides";
61const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
62const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
63
64/// npm package parser for package.json manifests.
65///
66/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
67/// optionalDependencies, bundledDependencies) and workspace configurations.
68pub struct NpmParser;
69
70impl PackageParser for NpmParser {
71    const PACKAGE_TYPE: PackageType = PackageType::Npm;
72
73    fn extract_packages(path: &Path) -> Vec<PackageData> {
74        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
75            Ok((json, lines)) => (json, lines),
76            Err(e) => {
77                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
78                return vec![default_package_data()];
79            }
80        };
81
82        let name = extract_non_empty_string(&json, FIELD_NAME);
83        let version = extract_non_empty_string(&json, FIELD_VERSION);
84        let namespace = extract_namespace(&name);
85        let package_name = extract_package_name(&name);
86        let description = extract_description(&json);
87
88        let extracted_license_statement = extract_license_statement(&json);
89        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
90            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
91        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
92        let dependencies = extract_dependencies(&json, false);
93        let dev_dependencies = extract_dependencies(&json, true);
94        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
95        let optional_dependencies = extract_optional_dependencies(&json);
96        let bundled_dependencies = extract_bundled_dependencies(&json);
97        let purl = create_package_url(&name, &version, &namespace);
98        let keywords_vec = extract_keywords_as_vec(&json);
99
100        let mut extra_data_map = HashMap::new();
101
102        if let Some(resolutions) = extract_resolutions(&json) {
103            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
104        }
105
106        if let Some(engines) = extract_engines(&json) {
107            extra_data_map.insert("engines".to_string(), engines);
108        }
109
110        for field in [
111            FIELD_OS,
112            FIELD_CPU,
113            FIELD_LIBC,
114            FIELD_DEPRECATED,
115            FIELD_HAS_BIN,
116        ] {
117            if let Some(value) = extract_raw_extra_data_field(&json, field) {
118                extra_data_map.insert(field.to_string(), value);
119            }
120        }
121
122        if let Some(package_manager) = extract_package_manager(&json) {
123            extra_data_map.insert(
124                "packageManager".to_string(),
125                serde_json::Value::String(package_manager),
126            );
127        }
128
129        if let Some(workspaces) = extract_workspaces(&json) {
130            extra_data_map.insert("workspaces".to_string(), workspaces);
131        }
132
133        if let Some(overrides) = extract_overrides(&json) {
134            extra_data_map.insert("overrides".to_string(), overrides);
135        }
136
137        if let Some(private) = extract_private(&json) {
138            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
139        }
140
141        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
142            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
143        }
144
145        let extra_data = if extra_data_map.is_empty() {
146            None
147        } else {
148            Some(extra_data_map)
149        };
150
151        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
152            Some(dist) => extract_dist_hashes(dist),
153            None => (None, None, None),
154        };
155
156        let download_url = json
157            .get(FIELD_DIST)
158            .and_then(extract_dist_tarball)
159            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
160
161        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
162        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
163        let repository_download_url =
164            generate_repository_download_url(&namespace, &package_name, &version);
165        let vcs_url = extract_vcs_url(&json);
166
167        vec![PackageData {
168            package_type: Some(Self::PACKAGE_TYPE),
169            namespace,
170            name: package_name,
171            version,
172            qualifiers: None,
173            subpath: None,
174            primary_language: Some("JavaScript".to_string()),
175            description,
176            release_date: None,
177            parties: extract_parties(&json),
178            keywords: keywords_vec,
179            homepage_url: extract_homepage_url(&json),
180            download_url,
181            size: None,
182            sha1: dist_sha1,
183            md5: None,
184            sha256: dist_sha256,
185            sha512: dist_sha512,
186            bug_tracking_url: extract_bugs(&json),
187            code_view_url: None,
188            vcs_url,
189            copyright: None,
190            holder: None,
191            declared_license_expression,
192            declared_license_expression_spdx,
193            license_detections,
194            other_license_expression: None,
195            other_license_expression_spdx: None,
196            other_license_detections: Vec::new(),
197            extracted_license_statement,
198            notice_text: None,
199            source_packages: Vec::new(),
200            file_references: Vec::new(),
201            is_private: json
202                .get("private")
203                .and_then(|v| v.as_bool())
204                .unwrap_or(false),
205            is_virtual: false,
206            extra_data,
207            dependencies: [
208                dependencies,
209                dev_dependencies,
210                peer_dependencies,
211                optional_dependencies,
212                bundled_dependencies,
213            ]
214            .concat(),
215            repository_homepage_url,
216            repository_download_url,
217            api_data_url,
218            datasource_id: Some(DatasourceId::NpmPackageJson),
219            purl,
220        }]
221    }
222
223    fn is_match(path: &Path) -> bool {
224        path.file_name().is_some_and(|name| name == "package.json")
225    }
226}
227
228/// Reads and parses a JSON file while tracking line numbers of fields
229fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
230    // Read file once into string
231    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
232
233    // Parse JSON
234    let json: Value =
235        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
236
237    // Track line numbers for each field by iterating over lines
238    let mut field_lines = HashMap::new();
239    for (line_num, line) in content.lines().enumerate() {
240        let trimmed = line.trim();
241        // Look for field names in the format: "field": value
242        if let Some(field_name) = extract_field_name(trimmed) {
243            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
244        }
245    }
246
247    Ok((json, field_lines))
248}
249
250/// Extracts field name from a JSON line
251fn extract_field_name(line: &str) -> Option<String> {
252    // Simple regex-free parsing for field names
253    let line = line.trim();
254    if line.is_empty() || !line.starts_with("\"") {
255        return None;
256    }
257
258    // Find the closing quote of the field name
259    let mut chars = line.chars();
260    chars.next(); // Skip opening quote
261
262    let mut field_name = String::new();
263    for c in chars {
264        if c == '"' {
265            break;
266        }
267        field_name.push(c);
268    }
269
270    if field_name.is_empty() {
271        None
272    } else {
273        Some(field_name)
274    }
275}
276
277fn extract_namespace(name: &Option<String>) -> Option<String> {
278    name.as_ref().and_then(|n| {
279        if n.contains('/') {
280            n.split('/').next().map(String::from)
281        } else {
282            None
283        }
284    })
285}
286
287fn extract_package_name(name: &Option<String>) -> Option<String> {
288    name.as_ref().map(|n| {
289        if n.contains('/') {
290            n.split('/').nth(1).unwrap_or(n).to_string()
291        } else {
292            n.clone()
293        }
294    })
295}
296
297fn create_package_url(
298    name: &Option<String>,
299    version: &Option<String>,
300    _namespace: &Option<String>,
301) -> Option<String> {
302    // Note: We extract and store namespace in PackageData for metadata purposes,
303    // but the full package name (e.g., "@babel/core") is used for PURL generation.
304    let name = name.as_ref()?;
305    npm_purl(name, version.as_deref())
306}
307
308fn extract_license_statement(json: &Value) -> Option<String> {
309    let mut statements = Vec::new();
310
311    if let Some(license_value) = json.get(FIELD_LICENSE) {
312        if let Some(license_str) = license_value.as_str() {
313            statements.push(format!("- {}", license_str));
314        } else if let Some(license_obj) = license_value.as_object()
315            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
316        {
317            statements.push(format!("- type: {}", type_val));
318            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
319                statements.push(format!("  url: {}", url_val));
320            }
321        }
322    }
323
324    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
325        for license in licenses {
326            if let Some(license_obj) = license.as_object()
327                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
328            {
329                statements.push(format!("- type: {}", type_val));
330                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
331                    statements.push(format!("  url: {}", url_val));
332                }
333            }
334        }
335    }
336
337    if statements.is_empty() {
338        None
339    } else {
340        Some(format!("{}\n", statements.join("\n")))
341    }
342}
343
344fn extract_declared_license_candidate(json: &Value) -> Option<String> {
345    json.get(FIELD_LICENSE)
346        .and_then(|value| value.as_str())
347        .map(str::trim)
348        .filter(|value| !value.is_empty())
349        .map(str::to_string)
350}
351
352/// Extracts the repository URL from the repository field.
353/// Extracts and normalizes VCS URL from the repository field.
354/// Supports both string and object formats with optional 'type' and 'directory' fields.
355fn extract_vcs_url(json: &Value) -> Option<String> {
356    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
357        Some(Value::String(url)) => {
358            let normalized = normalize_repo_url(url);
359            if normalized.is_empty() {
360                return None;
361            }
362            (None, normalized)
363        }
364        Some(Value::Object(obj)) => {
365            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
366            let normalized = normalize_repo_url(repo_url);
367            if normalized.is_empty() {
368                return None;
369            }
370            let tool = obj
371                .get("type")
372                .and_then(|t| t.as_str())
373                .unwrap_or("git")
374                .to_string();
375            let tool_for_prefix = if normalized.starts_with("git://")
376                || normalized.starts_with("git+")
377                || normalized.starts_with("hg://")
378                || normalized.starts_with("hg+")
379                || normalized.starts_with("svn://")
380                || normalized.starts_with("svn+")
381            {
382                None
383            } else {
384                Some(tool)
385            };
386            (tool_for_prefix, normalized)
387        }
388        _ => return None,
389    };
390
391    if vcs_repository.is_empty() {
392        return None;
393    }
394
395    let mut vcs_url = vcs_tool.map_or_else(
396        || vcs_repository.clone(),
397        |tool| format!("{}+{}", tool, vcs_repository),
398    );
399
400    if let Some(vcs_revision) = json
401        .get("gitHead")
402        .and_then(|v| v.as_str())
403        .and_then(normalize_non_empty_string)
404    {
405        vcs_url.push('@');
406        vcs_url.push_str(&vcs_revision);
407    }
408
409    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
410        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
411    {
412        vcs_url.push('#');
413        vcs_url.push_str(directory);
414    }
415
416    Some(vcs_url)
417}
418
419/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
420/// Based on normalize_vcs_url() from Python reference.
421fn normalize_repo_url(url: &str) -> String {
422    let url = url.trim();
423
424    if url.is_empty() {
425        return String::new();
426    }
427
428    let normalized_schemes = [
429        "https://",
430        "http://",
431        "git://",
432        "git+git://",
433        "git+https://",
434        "git+http://",
435        "hg://",
436        "hg+http://",
437        "hg+https://",
438        "svn://",
439        "svn+http://",
440        "svn+https://",
441    ];
442    if normalized_schemes
443        .iter()
444        .any(|scheme| url.starts_with(scheme))
445    {
446        return url.to_string();
447    }
448
449    if let Some((host, repo)) = url
450        .strip_prefix("git@")
451        .and_then(|rest| rest.split_once(':'))
452    {
453        return format!("https://{}/{}", host, repo);
454    }
455
456    if let Some((platform, repo)) = url.split_once(':') {
457        let host_url = match platform {
458            "github" => "https://github.com/",
459            "gitlab" => "https://gitlab.com/",
460            "bitbucket" => "https://bitbucket.org/",
461            "gist" => "https://gist.github.com/",
462            _ => return url.to_string(),
463        };
464        return format!("{}{}", host_url, repo);
465    }
466
467    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
468        return format!("https://github.com/{}", url);
469    }
470
471    url.to_string()
472}
473
474/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
475fn extract_parties(json: &Value) -> Vec<Party> {
476    let mut parties = Vec::new();
477
478    // Extract author field (can be single value or array)
479    if let Some(author) = json.get(FIELD_AUTHOR) {
480        if let Some(author_list) = extract_parties_from_array(author) {
481            // Author is an array
482            for mut party in author_list {
483                if party.role.is_none() {
484                    party.role = Some("author".to_string());
485                }
486                parties.push(party);
487            }
488        } else if let Some(mut party) = extract_party_from_field(author) {
489            // Author is a single value
490            party.role = Some("author".to_string());
491            parties.push(party);
492        }
493    }
494
495    // Extract contributors field
496    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
497        && let Some(mut party_list) = extract_parties_from_array(contributors)
498    {
499        for party in &mut party_list {
500            if party.role.is_none() {
501                party.role = Some("contributor".to_string());
502            }
503        }
504        parties.extend(party_list);
505    }
506
507    // Extract maintainers field
508    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
509        && let Some(mut party_list) = extract_parties_from_array(maintainers)
510    {
511        for party in &mut party_list {
512            if party.role.is_none() {
513                party.role = Some("maintainer".to_string());
514            }
515        }
516        parties.extend(party_list);
517    }
518
519    parties
520}
521
522/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
523fn extract_party_from_field(field: &Value) -> Option<Party> {
524    match field {
525        Value::String(s) => {
526            // Try to extract email from "Name <email>" format
527            if let Some(email) = extract_email_from_string(s) {
528                Some(Party {
529                    r#type: Some("person".to_string()),
530                    role: None,
531                    name: extract_name_from_author_string(s),
532                    email: Some(email),
533                    url: None,
534                    organization: None,
535                    organization_url: None,
536                    timezone: None,
537                })
538            } else {
539                // Treat the string as name if no email found
540                Some(Party {
541                    r#type: Some("person".to_string()),
542                    role: None,
543                    name: Some(s.clone()),
544                    email: None,
545                    url: None,
546                    organization: None,
547                    organization_url: None,
548                    timezone: None,
549                })
550            }
551        }
552        Value::Object(obj) => Some(Party {
553            r#type: Some("person".to_string()),
554            role: obj.get("role").and_then(|v| v.as_str()).map(String::from),
555            name: obj.get("name").and_then(|v| v.as_str()).map(String::from),
556            email: obj.get("email").and_then(|v| v.as_str()).map(String::from),
557            url: obj
558                .get("url")
559                .and_then(|v| v.as_str())
560                .and_then(normalize_optional_party_url),
561            organization: None,
562            organization_url: None,
563            timezone: None,
564        }),
565        _ => None,
566    }
567}
568
569/// Extracts multiple parties from a JSON array.
570fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
571    if let Value::Array(items) = array {
572        let parties = items
573            .iter()
574            .filter_map(extract_party_from_field)
575            .collect::<Vec<_>>();
576        if !parties.is_empty() {
577            return Some(parties);
578        }
579    }
580    None
581}
582
583/// Extracts email from a string in the format "Name <email@example.com>".
584fn extract_email_from_string(author_str: &str) -> Option<String> {
585    if let Some(email_start) = author_str.find('<')
586        && let Some(email_end) = author_str.find('>')
587        && email_start < email_end
588    {
589        return Some(author_str[email_start + 1..email_end].to_string());
590    }
591    None
592}
593
594/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
595fn extract_name_from_author_string(author_str: &str) -> Option<String> {
596    if let Some(end_idx) = author_str.find('<') {
597        let name = author_str[..end_idx].trim();
598        if !name.is_empty() {
599            return Some(name.to_string());
600        }
601    } else {
602        return Some(author_str.trim().to_string());
603    }
604    None
605}
606
607fn default_package_data() -> PackageData {
608    PackageData {
609        package_type: Some(NpmParser::PACKAGE_TYPE),
610        primary_language: Some("JavaScript".to_string()),
611        datasource_id: Some(DatasourceId::NpmPackageJson),
612        ..Default::default()
613    }
614}
615
616fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
617    if version_str.contains(':') && version_str.contains('@') {
618        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
619        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
620        return Some((actual_package_name, constraint));
621    }
622    None
623}
624
625fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
626    json.get(field)
627        .and_then(|value| value.as_str())
628        .map(str::trim)
629        .filter(|value| !value.is_empty())
630        .map(String::from)
631}
632
633fn generate_npm_api_url(
634    namespace: &Option<String>,
635    name: &Option<String>,
636    version: &Option<String>,
637) -> Option<String> {
638    const REGISTRY: &str = "https://registry.npmjs.org";
639    name.as_ref()?;
640
641    let ns_name = if let Some(ns) = namespace {
642        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
643    } else {
644        name.as_ref()?.clone()
645    };
646
647    let url = if let Some(ver) = version {
648        format!("{}/{}/{}", REGISTRY, ns_name, ver)
649    } else {
650        format!("{}/{}", REGISTRY, ns_name)
651    };
652
653    Some(url)
654}
655
656fn build_registry_package_path(
657    namespace: &Option<String>,
658    name: &Option<String>,
659) -> Option<String> {
660    match (namespace.as_ref(), name.as_ref()) {
661        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
662        (None, Some(name)) => Some(name.clone()),
663        _ => None,
664    }
665}
666
667fn generate_repository_homepage_url(
668    namespace: &Option<String>,
669    name: &Option<String>,
670) -> Option<String> {
671    build_registry_package_path(namespace, name)
672        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
673}
674
675fn generate_registry_download_url(
676    namespace: &Option<String>,
677    name: &Option<String>,
678    version: &Option<String>,
679) -> Option<String> {
680    match (
681        build_registry_package_path(namespace, name),
682        name.as_ref(),
683        version.as_ref(),
684    ) {
685        (Some(package_path), Some(name), Some(version)) => Some(format!(
686            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
687            package_path, name, version
688        )),
689        _ => None,
690    }
691}
692
693fn generate_repository_download_url(
694    namespace: &Option<String>,
695    name: &Option<String>,
696    version: &Option<String>,
697) -> Option<String> {
698    generate_registry_download_url(namespace, name, version)
699}
700
701fn extract_dependency_group(
702    json: &Value,
703    field: &str,
704    scope: &str,
705    is_runtime: bool,
706    is_optional: bool,
707    optional_meta: Option<&HashMap<String, bool>>,
708) -> Vec<Dependency> {
709    json.get(field)
710        .and_then(|deps| deps.as_object())
711        .map_or_else(Vec::new, |deps| {
712            deps.iter()
713                .filter_map(|(name, version)| {
714                    let version_str = version.as_str()?;
715
716                    if version_str.starts_with("workspace:") {
717                        let package_url = npm_purl(name, None)?;
718                        let is_opt = if let Some(meta) = optional_meta {
719                            meta.get(name).copied()
720                        } else {
721                            Some(is_optional)
722                        };
723                        return Some(Dependency {
724                            purl: Some(package_url),
725                            extracted_requirement: Some(version_str.to_string()),
726                            scope: Some(scope.to_string()),
727                            is_runtime: Some(is_runtime),
728                            is_optional: is_opt,
729                            is_pinned: Some(false),
730                            is_direct: Some(true),
731                            resolved_package: None,
732                            extra_data: None,
733                        });
734                    }
735
736                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
737                        parse_alias_adapter(version_str)
738                    {
739                        actual_package_name
740                    } else {
741                        name.as_str()
742                    };
743
744                    let package_url = npm_purl(actual_package_name, None)?;
745
746                    let is_opt = if let Some(meta) = optional_meta {
747                        meta.get(name).copied()
748                    } else {
749                        Some(is_optional)
750                    };
751
752                    Some(Dependency {
753                        purl: Some(package_url),
754                        extracted_requirement: Some(version_str.to_string()),
755                        scope: Some(scope.to_string()),
756                        is_runtime: Some(is_runtime),
757                        is_optional: is_opt,
758                        is_pinned: Some(false),
759                        is_direct: Some(true),
760                        resolved_package: None,
761                        extra_data: None,
762                    })
763                })
764                .collect()
765        })
766}
767
768/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
769fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
770    let field = if is_optional {
771        FIELD_DEV_DEPENDENCIES
772    } else {
773        FIELD_DEPENDENCIES
774    };
775
776    let scope = if is_optional {
777        "devDependencies"
778    } else {
779        "dependencies"
780    };
781
782    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
783}
784
785fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
786    extract_dependency_group(
787        json,
788        FIELD_PEER_DEPENDENCIES,
789        "peerDependencies",
790        true,
791        false,
792        Some(meta),
793    )
794}
795
796/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
797/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
798fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
799    extract_dependency_group(
800        json,
801        FIELD_OPTIONAL_DEPENDENCIES,
802        "optionalDependencies",
803        true,
804        true,
805        None,
806    )
807}
808
809fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
810    if let Some(bundled) = json
811        .get(FIELD_BUNDLED_DEPENDENCIES)
812        .and_then(|v| v.as_array())
813    {
814        extract_bundled_list(bundled)
815    } else {
816        Vec::new()
817    }
818}
819
820/// Helper function to extract bundled dependencies from an array of package names.
821fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
822    bundled_array
823        .iter()
824        .filter_map(|value| {
825            let name = value.as_str()?;
826            // Create PURL without version for bundled dependencies
827            let package_url = npm_purl(name, None)?;
828
829            Some(Dependency {
830                purl: Some(package_url),
831                extracted_requirement: None,
832                scope: Some("bundledDependencies".to_string()),
833                is_runtime: Some(true),
834                is_optional: Some(false),
835                is_pinned: Some(false),
836                is_direct: Some(true),
837                resolved_package: None,
838                extra_data: None,
839            })
840        })
841        .collect()
842}
843
844/// Extracts Yarn resolutions from the `resolutions` field.
845/// Returns resolutions as a HashMap to be stored in extra_data.
846fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
847    json.get(FIELD_RESOLUTIONS)
848        .and_then(|resolutions| resolutions.as_object())
849        .map(|resolutions_obj| {
850            let mut extra_data = HashMap::new();
851            extra_data.insert(
852                "resolutions".to_string(),
853                serde_json::Value::Object(resolutions_obj.clone()),
854            );
855            extra_data
856        })
857}
858
859fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
860    json.get(FIELD_PEER_DEPENDENCIES_META)
861        .and_then(|meta| meta.as_object())
862        .map_or_else(HashMap::new, |meta_obj| {
863            meta_obj
864                .iter()
865                .filter_map(|(package_name, meta_value)| {
866                    meta_value.as_object().and_then(|obj| {
867                        obj.get("optional")
868                            .and_then(|opt| opt.as_bool())
869                            .map(|optional| (package_name.clone(), optional))
870                    })
871                })
872                .collect()
873        })
874}
875
876fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
877    json.get(FIELD_DEPENDENCIES_META).cloned()
878}
879
880fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
881    json.get(FIELD_OVERRIDES).cloned()
882}
883
884fn extract_description(json: &Value) -> Option<String> {
885    json.get(FIELD_DESCRIPTION)
886        .and_then(|v| v.as_str())
887        .map(String::from)
888}
889
890fn extract_homepage_url(json: &Value) -> Option<String> {
891    match json.get(FIELD_HOMEPAGE) {
892        Some(Value::String(homepage)) => normalize_non_empty_string(homepage),
893        _ => None,
894    }
895}
896
897fn normalize_non_empty_string(value: &str) -> Option<String> {
898    let trimmed = value.trim();
899    if trimmed.is_empty() {
900        None
901    } else {
902        Some(trimmed.to_string())
903    }
904}
905
906fn normalize_optional_party_url(value: &str) -> Option<String> {
907    let normalized = normalize_non_empty_string(value)?;
908
909    if normalized.eq_ignore_ascii_case("none") {
910        None
911    } else {
912        Some(normalized)
913    }
914}
915
916fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
917    json.get(FIELD_KEYWORDS)
918        .and_then(|v| {
919            if let Some(str) = v.as_str() {
920                Some(vec![str.to_string()])
921            } else if let Some(arr) = v.as_array() {
922                let keywords: Vec<String> = arr
923                    .iter()
924                    .filter_map(|kw| kw.as_str())
925                    .map(String::from)
926                    .collect();
927                if keywords.is_empty() {
928                    None
929                } else {
930                    Some(keywords)
931                }
932            } else {
933                None
934            }
935        })
936        .unwrap_or_default()
937}
938
939fn extract_engines(json: &Value) -> Option<serde_json::Value> {
940    json.get(FIELD_ENGINES).cloned()
941}
942
943fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
944    json.get(field).cloned()
945}
946
947fn extract_package_manager(json: &Value) -> Option<String> {
948    json.get(FIELD_PACKAGE_MANAGER)
949        .and_then(|v| v.as_str())
950        .map(String::from)
951}
952
953fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
954    json.get(FIELD_WORKSPACES).cloned()
955}
956
957fn extract_private(json: &Value) -> Option<bool> {
958    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
959}
960
961fn extract_bugs(json: &Value) -> Option<String> {
962    match json.get(FIELD_BUGS) {
963        Some(bugs) => {
964            if let Some(url) = bugs.as_str() {
965                normalize_non_empty_string(url)
966            } else if let Some(obj) = bugs.as_object() {
967                obj.get("url")
968                    .and_then(|v| v.as_str())
969                    .and_then(normalize_non_empty_string)
970            } else {
971                None
972            }
973        }
974        None => None,
975    }
976}
977
978fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
979    let mut sha1 = dist
980        .get("shasum")
981        .and_then(|v| v.as_str())
982        .and_then(normalize_non_empty_string);
983    let mut sha256 = None;
984    let mut sha512 = None;
985
986    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
987        && let Some((algo, hex_digest)) = parse_sri(integrity)
988    {
989        match algo.as_str() {
990            "sha1" => {
991                if sha1.is_none() {
992                    sha1 = Some(hex_digest);
993                }
994            }
995            "sha256" => sha256 = Some(hex_digest),
996            "sha512" => sha512 = Some(hex_digest),
997            _ => {}
998        }
999    }
1000
1001    (sha1, sha256, sha512)
1002}
1003
1004fn extract_dist_tarball(dist: &Value) -> Option<String> {
1005    dist.get("tarball")
1006        .or_else(|| dist.get("dnl_url"))
1007        .and_then(|v| v.as_str())
1008        .map(normalize_npm_registry_tarball_url)
1009}
1010
1011fn normalize_npm_registry_tarball_url(url: &str) -> String {
1012    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1013        format!("https://registry.npmjs.org/{path}")
1014    } else {
1015        url.to_string()
1016    }
1017}
1018
1019fn combine_extra_data(
1020    extra_data: Option<HashMap<String, serde_json::Value>>,
1021    additional_data: HashMap<String, serde_json::Value>,
1022) -> HashMap<String, serde_json::Value> {
1023    let mut combined = extra_data.unwrap_or_default();
1024    for (key, value) in additional_data {
1025        combined.insert(key, value);
1026    }
1027    combined
1028}
1029
1030crate::register_parser!(
1031    "npm package.json manifest",
1032    &["**/package.json"],
1033    "npm",
1034    "JavaScript",
1035    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1036);