Skip to main content

provenant/parsers/
npm.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for npm package.json manifests.
5//!
6//! Extracts package metadata, dependencies, and license information from
7//! package.json files used by Node.js/npm projects.
8//!
9//! # Supported Formats
10//! - package.json (manifest)
11//!
12//! # Key Features
13//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
14//! - Package URL (purl) generation for scoped and unscoped packages
15//! - VCS repository URL extraction
16//! - Distribution integrity hash extraction (sha1, sha512)
17//! - Support for legacy formats (licenses array, license objects)
18//!
19//! # Implementation Notes
20//! - Uses serde_json for JSON parsing
21//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
22//! - Graceful error handling: logs warnings and returns default on parse failure
23
24use crate::models::{
25    DatasourceId, Dependency, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
26    Sha512Digest,
27};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
30use serde_json::Value;
31use std::collections::HashMap;
32use std::path::Path;
33
34use super::PackageParser;
35use super::license_normalization::normalize_spdx_declared_license;
36use super::metadata::ParserMetadata;
37
38const FIELD_NAME: &str = "name";
39const FIELD_VERSION: &str = "version";
40const FIELD_LICENSE: &str = "license";
41const FIELD_LICENSES: &str = "licenses";
42const FIELD_HOMEPAGE: &str = "homepage";
43const FIELD_REPOSITORY: &str = "repository";
44const FIELD_AUTHOR: &str = "author";
45const FIELD_CONTRIBUTORS: &str = "contributors";
46const FIELD_MAINTAINERS: &str = "maintainers";
47const FIELD_DEPENDENCIES: &str = "dependencies";
48const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
49const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
50const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
51const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
52const FIELD_RESOLUTIONS: &str = "resolutions";
53const FIELD_DESCRIPTION: &str = "description";
54const FIELD_KEYWORDS: &str = "keywords";
55const FIELD_ENGINES: &str = "engines";
56const FIELD_OS: &str = "os";
57const FIELD_CPU: &str = "cpu";
58const FIELD_LIBC: &str = "libc";
59const FIELD_DEPRECATED: &str = "deprecated";
60const FIELD_HAS_BIN: &str = "hasBin";
61const FIELD_PACKAGE_MANAGER: &str = "packageManager";
62const FIELD_WORKSPACES: &str = "workspaces";
63const FIELD_PRIVATE: &str = "private";
64const FIELD_BUGS: &str = "bugs";
65const FIELD_DIST: &str = "dist";
66const FIELD_OVERRIDES: &str = "overrides";
67const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
68const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
69
70/// npm package parser for package.json manifests.
71///
72/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
73/// optionalDependencies, bundledDependencies) and workspace configurations.
74pub struct NpmParser;
75
76impl PackageParser for NpmParser {
77    const PACKAGE_TYPE: PackageType = PackageType::Npm;
78
79    fn metadata() -> Vec<ParserMetadata> {
80        vec![ParserMetadata {
81            description: "npm package.json manifest",
82            file_patterns: &["**/package.json"],
83            package_type: "npm",
84            primary_language: "JavaScript",
85            documentation_url: Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
86        }]
87    }
88
89    fn extract_packages(path: &Path) -> Vec<PackageData> {
90        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
91            Ok((json, lines)) => (json, lines),
92            Err(e) => {
93                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
94                return vec![default_package_data()];
95            }
96        };
97
98        let name = extract_non_empty_string(&json, FIELD_NAME);
99        let version = extract_non_empty_string(&json, FIELD_VERSION);
100        let namespace = extract_namespace(&name);
101        let package_name = extract_package_name(&name);
102        let description = extract_description(&json);
103
104        let extracted_license_statement = extract_license_statement(&json);
105        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
106            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
107        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
108        let dependencies = extract_dependencies(&json, false);
109        let dev_dependencies = extract_dependencies(&json, true);
110        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
111        let optional_dependencies = extract_optional_dependencies(&json);
112        let bundled_dependencies = extract_bundled_dependencies(&json);
113        let purl = create_package_url(&name, &version, &namespace);
114        let keywords_vec = extract_keywords_as_vec(&json);
115
116        let mut extra_data_map = HashMap::new();
117
118        if let Some(resolutions) = extract_resolutions(&json) {
119            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
120        }
121
122        if let Some(engines) = extract_engines(&json) {
123            extra_data_map.insert("engines".to_string(), engines);
124        }
125
126        for field in [
127            FIELD_OS,
128            FIELD_CPU,
129            FIELD_LIBC,
130            FIELD_DEPRECATED,
131            FIELD_HAS_BIN,
132        ] {
133            if let Some(value) = extract_raw_extra_data_field(&json, field) {
134                extra_data_map.insert(field.to_string(), value);
135            }
136        }
137
138        if let Some(package_manager) = extract_package_manager(&json) {
139            extra_data_map.insert(
140                "packageManager".to_string(),
141                serde_json::Value::String(package_manager),
142            );
143        }
144
145        if let Some(workspaces) = extract_workspaces(&json) {
146            extra_data_map.insert("workspaces".to_string(), workspaces);
147        }
148
149        if let Some(overrides) = extract_overrides(&json) {
150            extra_data_map.insert("overrides".to_string(), overrides);
151        }
152
153        if let Some(private) = extract_private(&json) {
154            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
155        }
156
157        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
158            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
159        }
160
161        let extra_data = if extra_data_map.is_empty() {
162            None
163        } else {
164            Some(extra_data_map)
165        };
166
167        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
168            Some(dist) => extract_dist_hashes(dist),
169            None => (None, None, None),
170        };
171
172        let download_url = json
173            .get(FIELD_DIST)
174            .and_then(extract_dist_tarball)
175            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
176
177        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
178        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
179        let repository_download_url =
180            generate_repository_download_url(&namespace, &package_name, &version);
181        let vcs_url = extract_vcs_url(&json);
182
183        vec![PackageData {
184            package_type: Some(Self::PACKAGE_TYPE),
185            namespace,
186            name: package_name,
187            version,
188            qualifiers: None,
189            subpath: None,
190            primary_language: Some("JavaScript".to_string()),
191            description,
192            release_date: None,
193            parties: extract_parties(&json),
194            keywords: keywords_vec,
195            homepage_url: extract_homepage_url(&json),
196            download_url,
197            size: None,
198            sha1: dist_sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
199            md5: None,
200            sha256: dist_sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
201            sha512: dist_sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
202            bug_tracking_url: extract_bugs(&json),
203            code_view_url: None,
204            vcs_url,
205            copyright: None,
206            holder: None,
207            declared_license_expression,
208            declared_license_expression_spdx,
209            license_detections,
210            other_license_expression: None,
211            other_license_expression_spdx: None,
212            other_license_detections: Vec::new(),
213            extracted_license_statement,
214            notice_text: None,
215            source_packages: Vec::new(),
216            file_references: Vec::new(),
217            is_private: json
218                .get("private")
219                .and_then(|v| v.as_bool())
220                .unwrap_or(false),
221            is_virtual: false,
222            extra_data,
223            dependencies: [
224                dependencies,
225                dev_dependencies,
226                peer_dependencies,
227                optional_dependencies,
228                bundled_dependencies,
229            ]
230            .concat(),
231            repository_homepage_url,
232            repository_download_url,
233            api_data_url,
234            datasource_id: Some(DatasourceId::NpmPackageJson),
235            purl,
236        }]
237    }
238
239    fn is_match(path: &Path) -> bool {
240        path.file_name().is_some_and(|name| name == "package.json")
241    }
242}
243
244/// Reads and parses a JSON file while tracking line numbers of fields
245fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
246    // Read file once into string
247    let content = crate::parsers::utils::read_file_to_string(path, None)
248        .map_err(|e| format!("Failed to read file: {}", e))?;
249
250    // Parse JSON
251    let json: Value =
252        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
253
254    // Track line numbers for each field by iterating over lines
255    let mut field_lines = HashMap::new();
256    for (line_num, line) in content.lines().enumerate().take(MAX_ITERATION_COUNT) {
257        let trimmed = line.trim();
258        if let Some(field_name) = extract_field_name(trimmed) {
259            field_lines.insert(field_name, line_num + 1);
260        }
261    }
262
263    Ok((json, field_lines))
264}
265
266/// Extracts field name from a JSON line
267fn extract_field_name(line: &str) -> Option<String> {
268    // Simple regex-free parsing for field names
269    let line = line.trim();
270    if line.is_empty() || !line.starts_with("\"") {
271        return None;
272    }
273
274    // Find the closing quote of the field name
275    let mut chars = line.chars();
276    chars.next(); // Skip opening quote
277
278    let mut field_name = String::new();
279    for c in chars {
280        if c == '"' {
281            break;
282        }
283        field_name.push(c);
284    }
285
286    if field_name.is_empty() {
287        None
288    } else {
289        Some(field_name)
290    }
291}
292
293fn extract_namespace(name: &Option<String>) -> Option<String> {
294    name.as_ref().and_then(|n| {
295        if n.contains('/') {
296            n.split('/').next().map(String::from)
297        } else {
298            None
299        }
300    })
301}
302
303fn extract_package_name(name: &Option<String>) -> Option<String> {
304    name.as_ref().map(|n| {
305        if n.contains('/') {
306            n.split('/').nth(1).unwrap_or(n).to_string()
307        } else {
308            n.clone()
309        }
310    })
311}
312
313fn create_package_url(
314    name: &Option<String>,
315    version: &Option<String>,
316    _namespace: &Option<String>,
317) -> Option<String> {
318    // Note: We extract and store namespace in PackageData for metadata purposes,
319    // but the full package name (e.g., "@babel/core") is used for PURL generation.
320    let name = name.as_ref()?;
321    npm_purl(name, version.as_deref())
322}
323
324fn extract_license_statement(json: &Value) -> Option<String> {
325    let mut statements = Vec::new();
326
327    if let Some(license_value) = json.get(FIELD_LICENSE) {
328        if let Some(license_str) = license_value.as_str() {
329            statements.push(format!("- {}", license_str));
330        } else if let Some(license_obj) = license_value.as_object()
331            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
332        {
333            statements.push(format!("- type: {}", type_val));
334            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
335                statements.push(format!("  url: {}", url_val));
336            }
337        }
338    }
339
340    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
341        for license in licenses.iter().take(MAX_ITERATION_COUNT) {
342            if let Some(license_obj) = license.as_object()
343                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
344            {
345                statements.push(format!("- type: {}", type_val));
346                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
347                    statements.push(format!("  url: {}", url_val));
348                }
349            }
350        }
351    }
352
353    if statements.is_empty() {
354        None
355    } else {
356        Some(truncate_field(format!("{}\n", statements.join("\n"))))
357    }
358}
359
360fn extract_declared_license_candidate(json: &Value) -> Option<String> {
361    json.get(FIELD_LICENSE)
362        .and_then(|value| value.as_str())
363        .map(str::trim)
364        .filter(|value| !value.is_empty())
365        .map(|s| truncate_field(s.to_string()))
366}
367
368/// Extracts the repository URL from the repository field.
369/// Extracts and normalizes VCS URL from the repository field.
370/// Supports both string and object formats with optional 'type' and 'directory' fields.
371fn extract_vcs_url(json: &Value) -> Option<String> {
372    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
373        Some(Value::String(url)) => {
374            let normalized = normalize_repo_url(url);
375            if normalized.is_empty() {
376                return None;
377            }
378            (None, normalized)
379        }
380        Some(Value::Object(obj)) => {
381            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
382            let normalized = normalize_repo_url(repo_url);
383            if normalized.is_empty() {
384                return None;
385            }
386            let tool = obj
387                .get("type")
388                .and_then(|t| t.as_str())
389                .unwrap_or("git")
390                .to_string();
391            let tool_for_prefix = if normalized.starts_with("git://")
392                || normalized.starts_with("git+")
393                || normalized.starts_with("hg://")
394                || normalized.starts_with("hg+")
395                || normalized.starts_with("svn://")
396                || normalized.starts_with("svn+")
397            {
398                None
399            } else {
400                Some(tool)
401            };
402            (tool_for_prefix, normalized)
403        }
404        _ => return None,
405    };
406
407    if vcs_repository.is_empty() {
408        return None;
409    }
410
411    let mut vcs_url = vcs_tool.map_or_else(
412        || vcs_repository.clone(),
413        |tool| format!("{}+{}", tool, vcs_repository),
414    );
415
416    if let Some(vcs_revision) = json
417        .get("gitHead")
418        .and_then(|v| v.as_str())
419        .and_then(normalize_non_empty_string)
420    {
421        vcs_url.push('@');
422        vcs_url.push_str(&vcs_revision);
423    }
424
425    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
426        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
427    {
428        vcs_url.push('#');
429        vcs_url.push_str(directory);
430    }
431
432    Some(truncate_field(vcs_url))
433}
434
435/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
436/// Based on normalize_vcs_url() from Python reference.
437fn normalize_repo_url(url: &str) -> String {
438    let url = url.trim();
439
440    if url.is_empty() {
441        return String::new();
442    }
443
444    let normalized_schemes = [
445        "https://",
446        "http://",
447        "git://",
448        "git+git://",
449        "git+https://",
450        "git+http://",
451        "hg://",
452        "hg+http://",
453        "hg+https://",
454        "svn://",
455        "svn+http://",
456        "svn+https://",
457    ];
458    if normalized_schemes
459        .iter()
460        .any(|scheme| url.starts_with(scheme))
461    {
462        return url.to_string();
463    }
464
465    if let Some((host, repo)) = url
466        .strip_prefix("git@")
467        .and_then(|rest| rest.split_once(':'))
468    {
469        return format!("https://{}/{}", host, repo);
470    }
471
472    if let Some((platform, repo)) = url.split_once(':') {
473        let host_url = match platform {
474            "github" => "https://github.com/",
475            "gitlab" => "https://gitlab.com/",
476            "bitbucket" => "https://bitbucket.org/",
477            "gist" => "https://gist.github.com/",
478            _ => return url.to_string(),
479        };
480        return format!("{}{}", host_url, repo);
481    }
482
483    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
484        return format!("https://github.com/{}", url);
485    }
486
487    url.to_string()
488}
489
490/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
491fn extract_parties(json: &Value) -> Vec<Party> {
492    let mut parties = Vec::new();
493
494    // Extract author field (can be single value or array)
495    if let Some(author) = json.get(FIELD_AUTHOR) {
496        if let Some(author_list) = extract_parties_from_array(author) {
497            // Author is an array
498            for mut party in author_list {
499                if party.role.is_none() {
500                    party.role = Some("author".to_string());
501                }
502                parties.push(party);
503            }
504        } else if let Some(mut party) = extract_party_from_field(author) {
505            // Author is a single value
506            party.role = Some("author".to_string());
507            parties.push(party);
508        }
509    }
510
511    // Extract contributors field
512    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
513        && let Some(mut party_list) = extract_parties_from_array(contributors)
514    {
515        for party in &mut party_list {
516            if party.role.is_none() {
517                party.role = Some("contributor".to_string());
518            }
519        }
520        parties.extend(party_list);
521    }
522
523    // Extract maintainers field
524    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
525        && let Some(mut party_list) = extract_parties_from_array(maintainers)
526    {
527        for party in &mut party_list {
528            if party.role.is_none() {
529                party.role = Some("maintainer".to_string());
530            }
531        }
532        parties.extend(party_list);
533    }
534
535    parties
536}
537
538/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
539fn extract_party_from_field(field: &Value) -> Option<Party> {
540    match field {
541        Value::String(s) => {
542            if let Some(email) = extract_email_from_string(s) {
543                Some(Party {
544                    r#type: Some("person".to_string()),
545                    role: None,
546                    name: extract_name_from_author_string(s).map(truncate_field),
547                    email: Some(truncate_field(email)),
548                    url: None,
549                    organization: None,
550                    organization_url: None,
551                    timezone: None,
552                })
553            } else {
554                Some(Party {
555                    r#type: Some("person".to_string()),
556                    role: None,
557                    name: Some(truncate_field(s.clone())),
558                    email: None,
559                    url: None,
560                    organization: None,
561                    organization_url: None,
562                    timezone: None,
563                })
564            }
565        }
566        Value::Object(obj) => Some(Party {
567            r#type: Some("person".to_string()),
568            role: obj
569                .get("role")
570                .and_then(|v| v.as_str())
571                .map(|s| truncate_field(s.to_string())),
572            name: obj
573                .get("name")
574                .and_then(|v| v.as_str())
575                .map(|s| truncate_field(s.to_string())),
576            email: obj
577                .get("email")
578                .and_then(|v| v.as_str())
579                .map(|s| truncate_field(s.to_string())),
580            url: obj
581                .get("url")
582                .and_then(|v| v.as_str())
583                .and_then(normalize_optional_party_url)
584                .map(truncate_field),
585            organization: None,
586            organization_url: None,
587            timezone: None,
588        }),
589        _ => None,
590    }
591}
592
593/// Extracts multiple parties from a JSON array.
594fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
595    if let Value::Array(items) = array {
596        let parties = items
597            .iter()
598            .take(MAX_ITERATION_COUNT)
599            .filter_map(extract_party_from_field)
600            .collect::<Vec<_>>();
601        if !parties.is_empty() {
602            return Some(parties);
603        }
604    }
605    None
606}
607
608/// Extracts email from a string in the format "Name <email@example.com>".
609fn extract_email_from_string(author_str: &str) -> Option<String> {
610    if let Some(email_start) = author_str.find('<')
611        && let Some(email_end) = author_str.find('>')
612        && email_start < email_end
613    {
614        return Some(author_str[email_start + 1..email_end].to_string());
615    }
616    None
617}
618
619/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
620fn extract_name_from_author_string(author_str: &str) -> Option<String> {
621    if let Some(end_idx) = author_str.find('<') {
622        let name = author_str[..end_idx].trim();
623        if !name.is_empty() {
624            return Some(name.to_string());
625        }
626    } else {
627        return Some(author_str.trim().to_string());
628    }
629    None
630}
631
632fn default_package_data() -> PackageData {
633    PackageData {
634        package_type: Some(NpmParser::PACKAGE_TYPE),
635        primary_language: Some("JavaScript".to_string()),
636        datasource_id: Some(DatasourceId::NpmPackageJson),
637        ..Default::default()
638    }
639}
640
641fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
642    if version_str.contains(':') && version_str.contains('@') {
643        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
644        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
645        return Some((actual_package_name, constraint));
646    }
647    None
648}
649
650fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
651    json.get(field)
652        .and_then(|value| value.as_str())
653        .map(str::trim)
654        .filter(|value| !value.is_empty())
655        .map(|s| truncate_field(s.to_string()))
656}
657
658fn generate_npm_api_url(
659    namespace: &Option<String>,
660    name: &Option<String>,
661    version: &Option<String>,
662) -> Option<String> {
663    const REGISTRY: &str = "https://registry.npmjs.org";
664    name.as_ref()?;
665
666    let ns_name = if let Some(ns) = namespace {
667        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
668    } else {
669        name.as_ref()?.clone()
670    };
671
672    let url = if let Some(ver) = version {
673        format!("{}/{}/{}", REGISTRY, ns_name, ver)
674    } else {
675        format!("{}/{}", REGISTRY, ns_name)
676    };
677
678    Some(url)
679}
680
681fn build_registry_package_path(
682    namespace: &Option<String>,
683    name: &Option<String>,
684) -> Option<String> {
685    match (namespace.as_ref(), name.as_ref()) {
686        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
687        (None, Some(name)) => Some(name.clone()),
688        _ => None,
689    }
690}
691
692fn generate_repository_homepage_url(
693    namespace: &Option<String>,
694    name: &Option<String>,
695) -> Option<String> {
696    build_registry_package_path(namespace, name)
697        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
698}
699
700fn generate_registry_download_url(
701    namespace: &Option<String>,
702    name: &Option<String>,
703    version: &Option<String>,
704) -> Option<String> {
705    match (
706        build_registry_package_path(namespace, name),
707        name.as_ref(),
708        version.as_ref(),
709    ) {
710        (Some(package_path), Some(name), Some(version)) => Some(format!(
711            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
712            package_path, name, version
713        )),
714        _ => None,
715    }
716}
717
718fn generate_repository_download_url(
719    namespace: &Option<String>,
720    name: &Option<String>,
721    version: &Option<String>,
722) -> Option<String> {
723    generate_registry_download_url(namespace, name, version)
724}
725
726fn extract_dependency_group(
727    json: &Value,
728    field: &str,
729    scope: &str,
730    is_runtime: bool,
731    is_optional: bool,
732    optional_meta: Option<&HashMap<String, bool>>,
733) -> Vec<Dependency> {
734    json.get(field)
735        .and_then(|deps| deps.as_object())
736        .map_or_else(Vec::new, |deps| {
737            deps.iter()
738                .take(MAX_ITERATION_COUNT)
739                .filter_map(|(name, version)| {
740                    let version_str = version.as_str()?;
741
742                    if version_str.starts_with("workspace:") {
743                        let package_url = npm_purl(name, None)?;
744                        let is_opt = if let Some(meta) = optional_meta {
745                            meta.get(name).copied()
746                        } else {
747                            Some(is_optional)
748                        };
749                        return Some(Dependency {
750                            purl: Some(package_url),
751                            extracted_requirement: Some(truncate_field(version_str.to_string())),
752                            scope: Some(scope.to_string()),
753                            is_runtime: Some(is_runtime),
754                            is_optional: is_opt,
755                            is_pinned: Some(false),
756                            is_direct: Some(true),
757                            resolved_package: None,
758                            extra_data: None,
759                        });
760                    }
761
762                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
763                        parse_alias_adapter(version_str)
764                    {
765                        actual_package_name
766                    } else {
767                        name.as_str()
768                    };
769
770                    let package_url = npm_purl(actual_package_name, None)?;
771
772                    let is_opt = if let Some(meta) = optional_meta {
773                        meta.get(name).copied()
774                    } else {
775                        Some(is_optional)
776                    };
777
778                    Some(Dependency {
779                        purl: Some(package_url),
780                        extracted_requirement: Some(truncate_field(version_str.to_string())),
781                        scope: Some(scope.to_string()),
782                        is_runtime: Some(is_runtime),
783                        is_optional: is_opt,
784                        is_pinned: Some(false),
785                        is_direct: Some(true),
786                        resolved_package: None,
787                        extra_data: None,
788                    })
789                })
790                .collect()
791        })
792}
793
794/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
795fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
796    let field = if is_optional {
797        FIELD_DEV_DEPENDENCIES
798    } else {
799        FIELD_DEPENDENCIES
800    };
801
802    let scope = if is_optional {
803        "devDependencies"
804    } else {
805        "dependencies"
806    };
807
808    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
809}
810
811fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
812    extract_dependency_group(
813        json,
814        FIELD_PEER_DEPENDENCIES,
815        "peerDependencies",
816        true,
817        false,
818        Some(meta),
819    )
820}
821
822/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
823/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
824fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
825    extract_dependency_group(
826        json,
827        FIELD_OPTIONAL_DEPENDENCIES,
828        "optionalDependencies",
829        true,
830        true,
831        None,
832    )
833}
834
835fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
836    if let Some(bundled) = json
837        .get(FIELD_BUNDLED_DEPENDENCIES)
838        .and_then(|v| v.as_array())
839    {
840        extract_bundled_list(bundled)
841    } else {
842        Vec::new()
843    }
844}
845
846/// Helper function to extract bundled dependencies from an array of package names.
847fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
848    bundled_array
849        .iter()
850        .take(MAX_ITERATION_COUNT)
851        .filter_map(|value| {
852            let name = value.as_str()?;
853            // Create PURL without version for bundled dependencies
854            let package_url = npm_purl(name, None)?;
855
856            Some(Dependency {
857                purl: Some(package_url),
858                extracted_requirement: None,
859                scope: Some("bundledDependencies".to_string()),
860                is_runtime: Some(true),
861                is_optional: Some(false),
862                is_pinned: Some(false),
863                is_direct: Some(true),
864                resolved_package: None,
865                extra_data: None,
866            })
867        })
868        .collect()
869}
870
871/// Extracts Yarn resolutions from the `resolutions` field.
872/// Returns resolutions as a HashMap to be stored in extra_data.
873fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
874    json.get(FIELD_RESOLUTIONS)
875        .and_then(|resolutions| resolutions.as_object())
876        .map(|resolutions_obj| {
877            let mut extra_data = HashMap::new();
878            extra_data.insert(
879                "resolutions".to_string(),
880                serde_json::Value::Object(resolutions_obj.clone()),
881            );
882            extra_data
883        })
884}
885
886fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
887    json.get(FIELD_PEER_DEPENDENCIES_META)
888        .and_then(|meta| meta.as_object())
889        .map_or_else(HashMap::new, |meta_obj| {
890            meta_obj
891                .iter()
892                .take(MAX_ITERATION_COUNT)
893                .filter_map(|(package_name, meta_value)| {
894                    meta_value.as_object().and_then(|obj| {
895                        obj.get("optional")
896                            .and_then(|opt| opt.as_bool())
897                            .map(|optional| (package_name.clone(), optional))
898                    })
899                })
900                .collect()
901        })
902}
903
904fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
905    json.get(FIELD_DEPENDENCIES_META).cloned()
906}
907
908fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
909    json.get(FIELD_OVERRIDES).cloned()
910}
911
912fn extract_description(json: &Value) -> Option<String> {
913    json.get(FIELD_DESCRIPTION)
914        .and_then(|v| v.as_str())
915        .map(|s| truncate_field(s.to_string()))
916}
917
918fn extract_homepage_url(json: &Value) -> Option<String> {
919    match json.get(FIELD_HOMEPAGE) {
920        Some(Value::String(homepage)) => normalize_non_empty_string(homepage).map(truncate_field),
921        _ => None,
922    }
923}
924
925fn normalize_non_empty_string(value: &str) -> Option<String> {
926    let trimmed = value.trim();
927    if trimmed.is_empty() {
928        None
929    } else {
930        Some(trimmed.to_string())
931    }
932}
933
934fn normalize_optional_party_url(value: &str) -> Option<String> {
935    let normalized = normalize_non_empty_string(value)?;
936
937    if normalized.eq_ignore_ascii_case("none") {
938        None
939    } else {
940        Some(normalized)
941    }
942}
943
944fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
945    json.get(FIELD_KEYWORDS)
946        .and_then(|v| {
947            if let Some(str) = v.as_str() {
948                Some(vec![str.to_string()])
949            } else if let Some(arr) = v.as_array() {
950                let keywords: Vec<String> = arr
951                    .iter()
952                    .take(MAX_ITERATION_COUNT)
953                    .filter_map(|kw| kw.as_str())
954                    .map(|s| truncate_field(s.to_string()))
955                    .collect();
956                if keywords.is_empty() {
957                    None
958                } else {
959                    Some(keywords)
960                }
961            } else {
962                None
963            }
964        })
965        .unwrap_or_default()
966}
967
968fn extract_engines(json: &Value) -> Option<serde_json::Value> {
969    json.get(FIELD_ENGINES).cloned()
970}
971
972fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
973    json.get(field).cloned()
974}
975
976fn extract_package_manager(json: &Value) -> Option<String> {
977    json.get(FIELD_PACKAGE_MANAGER)
978        .and_then(|v| v.as_str())
979        .map(|s| truncate_field(s.to_string()))
980}
981
982fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
983    json.get(FIELD_WORKSPACES).cloned()
984}
985
986fn extract_private(json: &Value) -> Option<bool> {
987    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
988}
989
990fn extract_bugs(json: &Value) -> Option<String> {
991    match json.get(FIELD_BUGS) {
992        Some(bugs) => {
993            if let Some(url) = bugs.as_str() {
994                normalize_non_empty_string(url).map(truncate_field)
995            } else if let Some(obj) = bugs.as_object() {
996                obj.get("url")
997                    .and_then(|v| v.as_str())
998                    .and_then(normalize_non_empty_string)
999                    .map(truncate_field)
1000            } else {
1001                None
1002            }
1003        }
1004        None => None,
1005    }
1006}
1007
1008fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
1009    let mut sha1 = dist
1010        .get("shasum")
1011        .and_then(|v| v.as_str())
1012        .and_then(normalize_non_empty_string);
1013    let mut sha256 = None;
1014    let mut sha512 = None;
1015
1016    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
1017        && let Some((algo, hex_digest)) = parse_sri(integrity)
1018    {
1019        match algo.as_str() {
1020            "sha1" if sha1.is_none() => sha1 = Some(hex_digest),
1021            "sha1" => {}
1022            "sha256" => sha256 = Some(hex_digest),
1023            "sha512" => sha512 = Some(hex_digest),
1024            _ => {}
1025        }
1026    }
1027
1028    (sha1, sha256, sha512)
1029}
1030
1031fn extract_dist_tarball(dist: &Value) -> Option<String> {
1032    dist.get("tarball")
1033        .or_else(|| dist.get("dnl_url"))
1034        .and_then(|v| v.as_str())
1035        .map(normalize_npm_registry_tarball_url)
1036        .map(truncate_field)
1037}
1038
1039fn normalize_npm_registry_tarball_url(url: &str) -> String {
1040    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1041        format!("https://registry.npmjs.org/{path}")
1042    } else {
1043        url.to_string()
1044    }
1045}
1046
1047fn combine_extra_data(
1048    extra_data: Option<HashMap<String, serde_json::Value>>,
1049    additional_data: HashMap<String, serde_json::Value>,
1050) -> HashMap<String, serde_json::Value> {
1051    let mut combined = extra_data.unwrap_or_default();
1052    for (key, value) in additional_data {
1053        combined.insert(key, value);
1054    }
1055    combined
1056}