Skip to main content

provenant/parsers/
npm.rs

1//! Parser for npm package.json manifests.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! package.json files used by Node.js/npm projects.
5//!
6//! # Supported Formats
7//! - package.json (manifest)
8//!
9//! # Key Features
10//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
11//! - Package URL (purl) generation for scoped and unscoped packages
12//! - VCS repository URL extraction
13//! - Distribution integrity hash extraction (sha1, sha512)
14//! - Support for legacy formats (licenses array, license objects)
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
19//! - Graceful error handling: logs warnings and returns default on parse failure
20
21use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
22use crate::parsers::utils::{npm_purl, parse_sri};
23use log::warn;
24use serde_json::Value;
25use std::collections::HashMap;
26use std::fs;
27use std::path::Path;
28
29use super::PackageParser;
30
31const FIELD_NAME: &str = "name";
32const FIELD_VERSION: &str = "version";
33const FIELD_LICENSE: &str = "license";
34const FIELD_LICENSES: &str = "licenses";
35const FIELD_HOMEPAGE: &str = "homepage";
36const FIELD_REPOSITORY: &str = "repository";
37const FIELD_AUTHOR: &str = "author";
38const FIELD_CONTRIBUTORS: &str = "contributors";
39const FIELD_MAINTAINERS: &str = "maintainers";
40const FIELD_DEPENDENCIES: &str = "dependencies";
41const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
42const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
43const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
44const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
45const FIELD_RESOLUTIONS: &str = "resolutions";
46const FIELD_DESCRIPTION: &str = "description";
47const FIELD_KEYWORDS: &str = "keywords";
48const FIELD_ENGINES: &str = "engines";
49const FIELD_PACKAGE_MANAGER: &str = "packageManager";
50const FIELD_WORKSPACES: &str = "workspaces";
51const FIELD_PRIVATE: &str = "private";
52const FIELD_BUGS: &str = "bugs";
53const FIELD_DIST: &str = "dist";
54const FIELD_OVERRIDES: &str = "overrides";
55const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
56const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
57
58/// npm package parser for package.json manifests.
59///
60/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
61/// optionalDependencies, bundledDependencies) and workspace configurations.
62pub struct NpmParser;
63
64impl PackageParser for NpmParser {
65    const PACKAGE_TYPE: PackageType = PackageType::Npm;
66
67    fn extract_packages(path: &Path) -> Vec<PackageData> {
68        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
69            Ok((json, lines)) => (json, lines),
70            Err(e) => {
71                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
72                return vec![default_package_data()];
73            }
74        };
75
76        let name = extract_non_empty_string(&json, FIELD_NAME);
77        let version = extract_non_empty_string(&json, FIELD_VERSION);
78        let namespace = extract_namespace(&name);
79        let package_name = extract_package_name(&name);
80        let description = extract_description(&json);
81
82        let extracted_license_statement = extract_license_statement(&json);
83        // Extract license statement only - detection happens in separate engine
84        let declared_license_expression = None;
85        let declared_license_expression_spdx = None;
86        let license_detections = Vec::new();
87        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
88        let dependencies = extract_dependencies(&json, false);
89        let dev_dependencies = extract_dependencies(&json, true);
90        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
91        let optional_dependencies = extract_optional_dependencies(&json);
92        let bundled_dependencies = extract_bundled_dependencies(&json);
93        let purl = create_package_url(&name, &version, &namespace);
94        let keywords_vec = extract_keywords_as_vec(&json);
95
96        let mut extra_data_map = HashMap::new();
97
98        if let Some(resolutions) = extract_resolutions(&json) {
99            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
100        }
101
102        if let Some(engines) = extract_engines(&json) {
103            extra_data_map.insert("engines".to_string(), engines);
104        }
105
106        if let Some(package_manager) = extract_package_manager(&json) {
107            extra_data_map.insert(
108                "packageManager".to_string(),
109                serde_json::Value::String(package_manager),
110            );
111        }
112
113        if let Some(workspaces) = extract_workspaces(&json) {
114            extra_data_map.insert("workspaces".to_string(), workspaces);
115        }
116
117        if let Some(overrides) = extract_overrides(&json) {
118            extra_data_map.insert("overrides".to_string(), overrides);
119        }
120
121        if let Some(private) = extract_private(&json) {
122            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
123        }
124
125        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
126            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
127        }
128
129        let extra_data = if extra_data_map.is_empty() {
130            None
131        } else {
132            Some(extra_data_map)
133        };
134
135        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
136            Some(dist) => extract_dist_hashes(dist),
137            None => (None, None, None),
138        };
139
140        let download_url = json
141            .get(FIELD_DIST)
142            .and_then(extract_dist_tarball)
143            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
144
145        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
146        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
147        let repository_download_url =
148            generate_repository_download_url(&namespace, &package_name, &version);
149        let vcs_url = extract_vcs_url(&json);
150
151        vec![PackageData {
152            package_type: Some(Self::PACKAGE_TYPE),
153            namespace,
154            name,
155            version,
156            qualifiers: None,
157            subpath: None,
158            primary_language: Some("JavaScript".to_string()),
159            description,
160            release_date: None,
161            parties: extract_parties(&json),
162            keywords: keywords_vec,
163            homepage_url: extract_homepage_url(&json),
164            download_url,
165            size: None,
166            sha1: dist_sha1,
167            md5: None,
168            sha256: dist_sha256,
169            sha512: dist_sha512,
170            bug_tracking_url: extract_bugs(&json),
171            code_view_url: None,
172            vcs_url,
173            copyright: None,
174            holder: None,
175            declared_license_expression,
176            declared_license_expression_spdx,
177            license_detections,
178            other_license_expression: None,
179            other_license_expression_spdx: None,
180            other_license_detections: Vec::new(),
181            extracted_license_statement,
182            notice_text: None,
183            source_packages: Vec::new(),
184            file_references: Vec::new(),
185            is_private: json
186                .get("private")
187                .and_then(|v| v.as_bool())
188                .unwrap_or(false),
189            is_virtual: false,
190            extra_data,
191            dependencies: [
192                dependencies,
193                dev_dependencies,
194                peer_dependencies,
195                optional_dependencies,
196                bundled_dependencies,
197            ]
198            .concat(),
199            repository_homepage_url,
200            repository_download_url,
201            api_data_url,
202            datasource_id: Some(DatasourceId::NpmPackageJson),
203            purl,
204        }]
205    }
206
207    fn is_match(path: &Path) -> bool {
208        path.file_name().is_some_and(|name| name == "package.json")
209    }
210}
211
212/// Reads and parses a JSON file while tracking line numbers of fields
213fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
214    // Read file once into string
215    let content = fs::read_to_string(path).map_err(|e| format!("Failed to read file: {}", e))?;
216
217    // Parse JSON
218    let json: Value =
219        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
220
221    // Track line numbers for each field by iterating over lines
222    let mut field_lines = HashMap::new();
223    for (line_num, line) in content.lines().enumerate() {
224        let trimmed = line.trim();
225        // Look for field names in the format: "field": value
226        if let Some(field_name) = extract_field_name(trimmed) {
227            field_lines.insert(field_name, line_num + 1); // 1-based line numbers
228        }
229    }
230
231    Ok((json, field_lines))
232}
233
234/// Extracts field name from a JSON line
235fn extract_field_name(line: &str) -> Option<String> {
236    // Simple regex-free parsing for field names
237    let line = line.trim();
238    if line.is_empty() || !line.starts_with("\"") {
239        return None;
240    }
241
242    // Find the closing quote of the field name
243    let mut chars = line.chars();
244    chars.next(); // Skip opening quote
245
246    let mut field_name = String::new();
247    for c in chars {
248        if c == '"' {
249            break;
250        }
251        field_name.push(c);
252    }
253
254    if field_name.is_empty() {
255        None
256    } else {
257        Some(field_name)
258    }
259}
260
261fn extract_namespace(name: &Option<String>) -> Option<String> {
262    name.as_ref().and_then(|n| {
263        if n.contains('/') {
264            n.split('/').next().map(String::from)
265        } else {
266            None
267        }
268    })
269}
270
271fn extract_package_name(name: &Option<String>) -> Option<String> {
272    name.as_ref().map(|n| {
273        if n.contains('/') {
274            n.split('/').nth(1).unwrap_or(n).to_string()
275        } else {
276            n.clone()
277        }
278    })
279}
280
281fn create_package_url(
282    name: &Option<String>,
283    version: &Option<String>,
284    _namespace: &Option<String>,
285) -> Option<String> {
286    // Note: We extract and store namespace in PackageData for metadata purposes,
287    // but the full package name (e.g., "@babel/core") is used for PURL generation.
288    let name = name.as_ref()?;
289    npm_purl(name, version.as_deref())
290}
291
292fn extract_license_statement(json: &Value) -> Option<String> {
293    let mut statements = Vec::new();
294
295    if let Some(license_value) = json.get(FIELD_LICENSE) {
296        if let Some(license_str) = license_value.as_str() {
297            statements.push(format!("- {}", license_str));
298        } else if let Some(license_obj) = license_value.as_object()
299            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
300        {
301            statements.push(format!("- type: {}", type_val));
302            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
303                statements.push(format!("  url: {}", url_val));
304            }
305        }
306    }
307
308    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
309        for license in licenses {
310            if let Some(license_obj) = license.as_object()
311                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
312            {
313                statements.push(format!("- type: {}", type_val));
314                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
315                    statements.push(format!("  url: {}", url_val));
316                }
317            }
318        }
319    }
320
321    if statements.is_empty() {
322        None
323    } else {
324        Some(format!("{}\n", statements.join("\n")))
325    }
326}
327
328/// Extracts the repository URL from the repository field.
329/// Extracts and normalizes VCS URL from the repository field.
330/// Supports both string and object formats with optional 'type' and 'directory' fields.
331fn extract_vcs_url(json: &Value) -> Option<String> {
332    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
333        Some(Value::String(url)) => {
334            let normalized = normalize_repo_url(url);
335            if normalized.is_empty() {
336                return None;
337            }
338            (None, normalized)
339        }
340        Some(Value::Object(obj)) => {
341            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
342            let normalized = normalize_repo_url(repo_url);
343            if normalized.is_empty() {
344                return None;
345            }
346            let tool = obj
347                .get("type")
348                .and_then(|t| t.as_str())
349                .unwrap_or("git")
350                .to_string();
351            let tool_for_prefix = if normalized.starts_with("git://")
352                || normalized.starts_with("git+")
353                || normalized.starts_with("hg://")
354                || normalized.starts_with("hg+")
355                || normalized.starts_with("svn://")
356                || normalized.starts_with("svn+")
357            {
358                None
359            } else {
360                Some(tool)
361            };
362            (tool_for_prefix, normalized)
363        }
364        _ => return None,
365    };
366
367    if vcs_repository.is_empty() {
368        return None;
369    }
370
371    let mut vcs_url = vcs_tool.map_or_else(
372        || vcs_repository.clone(),
373        |tool| format!("{}+{}", tool, vcs_repository),
374    );
375
376    if let Some(vcs_revision) = json
377        .get("gitHead")
378        .and_then(|v| v.as_str())
379        .and_then(normalize_non_empty_string)
380    {
381        vcs_url.push('@');
382        vcs_url.push_str(&vcs_revision);
383    }
384
385    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
386        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
387    {
388        vcs_url.push('#');
389        vcs_url.push_str(directory);
390    }
391
392    Some(vcs_url)
393}
394
395/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
396/// Based on normalize_vcs_url() from Python reference.
397fn normalize_repo_url(url: &str) -> String {
398    let url = url.trim();
399
400    if url.is_empty() {
401        return String::new();
402    }
403
404    let normalized_schemes = [
405        "https://",
406        "http://",
407        "git://",
408        "git+git://",
409        "git+https://",
410        "git+http://",
411        "hg://",
412        "hg+http://",
413        "hg+https://",
414        "svn://",
415        "svn+http://",
416        "svn+https://",
417    ];
418    if normalized_schemes
419        .iter()
420        .any(|scheme| url.starts_with(scheme))
421    {
422        return url.to_string();
423    }
424
425    if let Some((host, repo)) = url
426        .strip_prefix("git@")
427        .and_then(|rest| rest.split_once(':'))
428    {
429        return format!("https://{}/{}", host, repo);
430    }
431
432    if let Some((platform, repo)) = url.split_once(':') {
433        let host_url = match platform {
434            "github" => "https://github.com/",
435            "gitlab" => "https://gitlab.com/",
436            "bitbucket" => "https://bitbucket.org/",
437            "gist" => "https://gist.github.com/",
438            _ => return url.to_string(),
439        };
440        return format!("{}{}", host_url, repo);
441    }
442
443    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
444        return format!("https://github.com/{}", url);
445    }
446
447    url.to_string()
448}
449
450/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
451fn extract_parties(json: &Value) -> Vec<Party> {
452    let mut parties = Vec::new();
453
454    // Extract author field (can be single value or array)
455    if let Some(author) = json.get(FIELD_AUTHOR) {
456        if let Some(author_list) = extract_parties_from_array(author) {
457            // Author is an array
458            for mut party in author_list {
459                if party.role.is_none() {
460                    party.role = Some("author".to_string());
461                }
462                parties.push(party);
463            }
464        } else if let Some(mut party) = extract_party_from_field(author) {
465            // Author is a single value
466            party.role = Some("author".to_string());
467            parties.push(party);
468        }
469    }
470
471    // Extract contributors field
472    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
473        && let Some(mut party_list) = extract_parties_from_array(contributors)
474    {
475        for party in &mut party_list {
476            if party.role.is_none() {
477                party.role = Some("contributor".to_string());
478            }
479        }
480        parties.extend(party_list);
481    }
482
483    // Extract maintainers field
484    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
485        && let Some(mut party_list) = extract_parties_from_array(maintainers)
486    {
487        for party in &mut party_list {
488            if party.role.is_none() {
489                party.role = Some("maintainer".to_string());
490            }
491        }
492        parties.extend(party_list);
493    }
494
495    parties
496}
497
498/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
499fn extract_party_from_field(field: &Value) -> Option<Party> {
500    match field {
501        Value::String(s) => {
502            // Try to extract email from "Name <email>" format
503            if let Some(email) = extract_email_from_string(s) {
504                Some(Party {
505                    r#type: Some("person".to_string()),
506                    role: None,
507                    name: extract_name_from_author_string(s),
508                    email: Some(email),
509                    url: None,
510                    organization: None,
511                    organization_url: None,
512                    timezone: None,
513                })
514            } else {
515                // Treat the string as name if no email found
516                Some(Party {
517                    r#type: Some("person".to_string()),
518                    role: None,
519                    name: Some(s.clone()),
520                    email: None,
521                    url: None,
522                    organization: None,
523                    organization_url: None,
524                    timezone: None,
525                })
526            }
527        }
528        Value::Object(obj) => Some(Party {
529            r#type: Some("person".to_string()),
530            role: obj.get("role").and_then(|v| v.as_str()).map(String::from),
531            name: obj.get("name").and_then(|v| v.as_str()).map(String::from),
532            email: obj.get("email").and_then(|v| v.as_str()).map(String::from),
533            url: obj
534                .get("url")
535                .and_then(|v| v.as_str())
536                .and_then(normalize_optional_party_url),
537            organization: None,
538            organization_url: None,
539            timezone: None,
540        }),
541        _ => None,
542    }
543}
544
545/// Extracts multiple parties from a JSON array.
546fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
547    if let Value::Array(items) = array {
548        let parties = items
549            .iter()
550            .filter_map(extract_party_from_field)
551            .collect::<Vec<_>>();
552        if !parties.is_empty() {
553            return Some(parties);
554        }
555    }
556    None
557}
558
559/// Extracts email from a string in the format "Name <email@example.com>".
560fn extract_email_from_string(author_str: &str) -> Option<String> {
561    if let Some(email_start) = author_str.find('<')
562        && let Some(email_end) = author_str.find('>')
563        && email_start < email_end
564    {
565        return Some(author_str[email_start + 1..email_end].to_string());
566    }
567    None
568}
569
570/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
571fn extract_name_from_author_string(author_str: &str) -> Option<String> {
572    if let Some(end_idx) = author_str.find('<') {
573        let name = author_str[..end_idx].trim();
574        if !name.is_empty() {
575            return Some(name.to_string());
576        }
577    } else {
578        return Some(author_str.trim().to_string());
579    }
580    None
581}
582
583fn default_package_data() -> PackageData {
584    PackageData {
585        primary_language: Some("JavaScript".to_string()),
586        ..Default::default()
587    }
588}
589
590fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
591    if version_str.contains(':') && version_str.contains('@') {
592        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
593        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
594        return Some((actual_package_name, constraint));
595    }
596    None
597}
598
599fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
600    json.get(field)
601        .and_then(|value| value.as_str())
602        .map(str::trim)
603        .filter(|value| !value.is_empty())
604        .map(String::from)
605}
606
607fn generate_npm_api_url(
608    namespace: &Option<String>,
609    name: &Option<String>,
610    version: &Option<String>,
611) -> Option<String> {
612    const REGISTRY: &str = "https://registry.npmjs.org";
613    name.as_ref()?;
614
615    let ns_name = if let Some(ns) = namespace {
616        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
617    } else {
618        name.as_ref()?.clone()
619    };
620
621    let url = if let Some(ver) = version {
622        format!("{}/{}/{}", REGISTRY, ns_name, ver)
623    } else {
624        format!("{}/{}", REGISTRY, ns_name)
625    };
626
627    Some(url)
628}
629
630fn build_registry_package_path(
631    namespace: &Option<String>,
632    name: &Option<String>,
633) -> Option<String> {
634    match (namespace.as_ref(), name.as_ref()) {
635        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
636        (None, Some(name)) => Some(name.clone()),
637        _ => None,
638    }
639}
640
641fn generate_repository_homepage_url(
642    namespace: &Option<String>,
643    name: &Option<String>,
644) -> Option<String> {
645    build_registry_package_path(namespace, name)
646        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
647}
648
649fn generate_registry_download_url(
650    namespace: &Option<String>,
651    name: &Option<String>,
652    version: &Option<String>,
653) -> Option<String> {
654    match (
655        build_registry_package_path(namespace, name),
656        name.as_ref(),
657        version.as_ref(),
658    ) {
659        (Some(package_path), Some(name), Some(version)) => Some(format!(
660            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
661            package_path, name, version
662        )),
663        _ => None,
664    }
665}
666
667fn generate_repository_download_url(
668    namespace: &Option<String>,
669    name: &Option<String>,
670    version: &Option<String>,
671) -> Option<String> {
672    generate_registry_download_url(namespace, name, version)
673}
674
675fn extract_dependency_group(
676    json: &Value,
677    field: &str,
678    scope: &str,
679    is_runtime: bool,
680    is_optional: bool,
681    optional_meta: Option<&HashMap<String, bool>>,
682) -> Vec<Dependency> {
683    json.get(field)
684        .and_then(|deps| deps.as_object())
685        .map_or_else(Vec::new, |deps| {
686            deps.iter()
687                .filter_map(|(name, version)| {
688                    let version_str = version.as_str()?;
689
690                    if version_str.starts_with("workspace:") {
691                        let package_url = npm_purl(name, None)?;
692                        let is_opt = if let Some(meta) = optional_meta {
693                            meta.get(name).copied()
694                        } else {
695                            Some(is_optional)
696                        };
697                        return Some(Dependency {
698                            purl: Some(package_url),
699                            extracted_requirement: Some(version_str.to_string()),
700                            scope: Some(scope.to_string()),
701                            is_runtime: Some(is_runtime),
702                            is_optional: is_opt,
703                            is_pinned: Some(false),
704                            is_direct: Some(true),
705                            resolved_package: None,
706                            extra_data: None,
707                        });
708                    }
709
710                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
711                        parse_alias_adapter(version_str)
712                    {
713                        actual_package_name
714                    } else {
715                        name.as_str()
716                    };
717
718                    let package_url = npm_purl(actual_package_name, None)?;
719
720                    let is_opt = if let Some(meta) = optional_meta {
721                        meta.get(name).copied()
722                    } else {
723                        Some(is_optional)
724                    };
725
726                    Some(Dependency {
727                        purl: Some(package_url),
728                        extracted_requirement: Some(version_str.to_string()),
729                        scope: Some(scope.to_string()),
730                        is_runtime: Some(is_runtime),
731                        is_optional: is_opt,
732                        is_pinned: Some(false),
733                        is_direct: Some(true),
734                        resolved_package: None,
735                        extra_data: None,
736                    })
737                })
738                .collect()
739        })
740}
741
742/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
743fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
744    let field = if is_optional {
745        FIELD_DEV_DEPENDENCIES
746    } else {
747        FIELD_DEPENDENCIES
748    };
749
750    let scope = if is_optional {
751        "devDependencies"
752    } else {
753        "dependencies"
754    };
755
756    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
757}
758
759fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
760    extract_dependency_group(
761        json,
762        FIELD_PEER_DEPENDENCIES,
763        "peerDependencies",
764        true,
765        false,
766        Some(meta),
767    )
768}
769
770/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
771/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
772fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
773    extract_dependency_group(
774        json,
775        FIELD_OPTIONAL_DEPENDENCIES,
776        "optionalDependencies",
777        true,
778        true,
779        None,
780    )
781}
782
783fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
784    if let Some(bundled) = json
785        .get(FIELD_BUNDLED_DEPENDENCIES)
786        .and_then(|v| v.as_array())
787    {
788        extract_bundled_list(bundled)
789    } else {
790        Vec::new()
791    }
792}
793
794/// Helper function to extract bundled dependencies from an array of package names.
795fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
796    bundled_array
797        .iter()
798        .filter_map(|value| {
799            let name = value.as_str()?;
800            // Create PURL without version for bundled dependencies
801            let package_url = npm_purl(name, None)?;
802
803            Some(Dependency {
804                purl: Some(package_url),
805                extracted_requirement: None,
806                scope: Some("bundledDependencies".to_string()),
807                is_runtime: Some(true),
808                is_optional: Some(false),
809                is_pinned: Some(false),
810                is_direct: Some(true),
811                resolved_package: None,
812                extra_data: None,
813            })
814        })
815        .collect()
816}
817
818/// Extracts Yarn resolutions from the `resolutions` field.
819/// Returns resolutions as a HashMap to be stored in extra_data.
820fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
821    json.get(FIELD_RESOLUTIONS)
822        .and_then(|resolutions| resolutions.as_object())
823        .map(|resolutions_obj| {
824            let mut extra_data = HashMap::new();
825            extra_data.insert(
826                "resolutions".to_string(),
827                serde_json::Value::Object(resolutions_obj.clone()),
828            );
829            extra_data
830        })
831}
832
833fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
834    json.get(FIELD_PEER_DEPENDENCIES_META)
835        .and_then(|meta| meta.as_object())
836        .map_or_else(HashMap::new, |meta_obj| {
837            meta_obj
838                .iter()
839                .filter_map(|(package_name, meta_value)| {
840                    meta_value.as_object().and_then(|obj| {
841                        obj.get("optional")
842                            .and_then(|opt| opt.as_bool())
843                            .map(|optional| (package_name.clone(), optional))
844                    })
845                })
846                .collect()
847        })
848}
849
850fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
851    json.get(FIELD_DEPENDENCIES_META).cloned()
852}
853
854fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
855    json.get(FIELD_OVERRIDES).cloned()
856}
857
858fn extract_description(json: &Value) -> Option<String> {
859    json.get(FIELD_DESCRIPTION)
860        .and_then(|v| v.as_str())
861        .map(String::from)
862}
863
864fn extract_homepage_url(json: &Value) -> Option<String> {
865    match json.get(FIELD_HOMEPAGE) {
866        Some(Value::String(homepage)) => normalize_non_empty_string(homepage),
867        _ => None,
868    }
869}
870
871fn normalize_non_empty_string(value: &str) -> Option<String> {
872    let trimmed = value.trim();
873    if trimmed.is_empty() {
874        None
875    } else {
876        Some(trimmed.to_string())
877    }
878}
879
880fn normalize_optional_party_url(value: &str) -> Option<String> {
881    let normalized = normalize_non_empty_string(value)?;
882
883    if normalized.eq_ignore_ascii_case("none") {
884        None
885    } else {
886        Some(normalized)
887    }
888}
889
890fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
891    json.get(FIELD_KEYWORDS)
892        .and_then(|v| {
893            if let Some(str) = v.as_str() {
894                Some(vec![str.to_string()])
895            } else if let Some(arr) = v.as_array() {
896                let keywords: Vec<String> = arr
897                    .iter()
898                    .filter_map(|kw| kw.as_str())
899                    .map(String::from)
900                    .collect();
901                if keywords.is_empty() {
902                    None
903                } else {
904                    Some(keywords)
905                }
906            } else {
907                None
908            }
909        })
910        .unwrap_or_default()
911}
912
913fn extract_engines(json: &Value) -> Option<serde_json::Value> {
914    json.get(FIELD_ENGINES).cloned()
915}
916
917fn extract_package_manager(json: &Value) -> Option<String> {
918    json.get(FIELD_PACKAGE_MANAGER)
919        .and_then(|v| v.as_str())
920        .map(String::from)
921}
922
923fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
924    json.get(FIELD_WORKSPACES).cloned()
925}
926
927fn extract_private(json: &Value) -> Option<bool> {
928    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
929}
930
931fn extract_bugs(json: &Value) -> Option<String> {
932    match json.get(FIELD_BUGS) {
933        Some(bugs) => {
934            if let Some(url) = bugs.as_str() {
935                normalize_non_empty_string(url)
936            } else if let Some(obj) = bugs.as_object() {
937                obj.get("url")
938                    .and_then(|v| v.as_str())
939                    .and_then(normalize_non_empty_string)
940            } else {
941                None
942            }
943        }
944        None => None,
945    }
946}
947
948fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
949    let mut sha1 = dist
950        .get("shasum")
951        .and_then(|v| v.as_str())
952        .and_then(normalize_non_empty_string);
953    let mut sha256 = None;
954    let mut sha512 = None;
955
956    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
957        && let Some((algo, hex_digest)) = parse_sri(integrity)
958    {
959        match algo.as_str() {
960            "sha1" => {
961                if sha1.is_none() {
962                    sha1 = Some(hex_digest);
963                }
964            }
965            "sha256" => sha256 = Some(hex_digest),
966            "sha512" => sha512 = Some(hex_digest),
967            _ => {}
968        }
969    }
970
971    (sha1, sha256, sha512)
972}
973
974fn extract_dist_tarball(dist: &Value) -> Option<String> {
975    dist.get("tarball")
976        .or_else(|| dist.get("dnl_url"))
977        .and_then(|v| v.as_str())
978        .map(normalize_npm_registry_tarball_url)
979}
980
981fn normalize_npm_registry_tarball_url(url: &str) -> String {
982    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
983        format!("https://registry.npmjs.org/{path}")
984    } else {
985        url.to_string()
986    }
987}
988
989fn combine_extra_data(
990    extra_data: Option<HashMap<String, serde_json::Value>>,
991    additional_data: HashMap<String, serde_json::Value>,
992) -> HashMap<String, serde_json::Value> {
993    let mut combined = extra_data.unwrap_or_default();
994    for (key, value) in additional_data {
995        combined.insert(key, value);
996    }
997    combined
998}
999
1000crate::register_parser!(
1001    "npm package.json manifest",
1002    &["**/package.json"],
1003    "npm",
1004    "JavaScript",
1005    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1006);