Skip to main content

provenant/parsers/
npm.rs

1//! Parser for npm package.json manifests.
2//!
3//! Extracts package metadata, dependencies, and license information from
4//! package.json files used by Node.js/npm projects.
5//!
6//! # Supported Formats
7//! - package.json (manifest)
8//!
9//! # Key Features
10//! - Full dependency extraction (dependencies, devDependencies, peerDependencies, optionalDependencies, bundledDependencies)
11//! - Package URL (purl) generation for scoped and unscoped packages
12//! - VCS repository URL extraction
13//! - Distribution integrity hash extraction (sha1, sha512)
14//! - Support for legacy formats (licenses array, license objects)
15//!
16//! # Implementation Notes
17//! - Uses serde_json for JSON parsing
18//! - Namespace format: `@org` for scoped packages (e.g., `@babel/core`)
19//! - Graceful error handling: logs warnings and returns default on parse failure
20
21use crate::models::{
22    DatasourceId, Dependency, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
23    Sha512Digest,
24};
25use crate::parser_warn as warn;
26use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
27use serde_json::Value;
28use std::collections::HashMap;
29use std::path::Path;
30
31use super::PackageParser;
32use super::license_normalization::normalize_spdx_declared_license;
33
34const FIELD_NAME: &str = "name";
35const FIELD_VERSION: &str = "version";
36const FIELD_LICENSE: &str = "license";
37const FIELD_LICENSES: &str = "licenses";
38const FIELD_HOMEPAGE: &str = "homepage";
39const FIELD_REPOSITORY: &str = "repository";
40const FIELD_AUTHOR: &str = "author";
41const FIELD_CONTRIBUTORS: &str = "contributors";
42const FIELD_MAINTAINERS: &str = "maintainers";
43const FIELD_DEPENDENCIES: &str = "dependencies";
44const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
45const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
46const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
47const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
48const FIELD_RESOLUTIONS: &str = "resolutions";
49const FIELD_DESCRIPTION: &str = "description";
50const FIELD_KEYWORDS: &str = "keywords";
51const FIELD_ENGINES: &str = "engines";
52const FIELD_OS: &str = "os";
53const FIELD_CPU: &str = "cpu";
54const FIELD_LIBC: &str = "libc";
55const FIELD_DEPRECATED: &str = "deprecated";
56const FIELD_HAS_BIN: &str = "hasBin";
57const FIELD_PACKAGE_MANAGER: &str = "packageManager";
58const FIELD_WORKSPACES: &str = "workspaces";
59const FIELD_PRIVATE: &str = "private";
60const FIELD_BUGS: &str = "bugs";
61const FIELD_DIST: &str = "dist";
62const FIELD_OVERRIDES: &str = "overrides";
63const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
64const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
65
66/// npm package parser for package.json manifests.
67///
68/// Supports all npm dependency types (dependencies, devDependencies, peerDependencies,
69/// optionalDependencies, bundledDependencies) and workspace configurations.
70pub struct NpmParser;
71
72impl PackageParser for NpmParser {
73    const PACKAGE_TYPE: PackageType = PackageType::Npm;
74
75    fn extract_packages(path: &Path) -> Vec<PackageData> {
76        let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
77            Ok((json, lines)) => (json, lines),
78            Err(e) => {
79                warn!("Failed to read or parse package.json at {:?}: {}", path, e);
80                return vec![default_package_data()];
81            }
82        };
83
84        let name = extract_non_empty_string(&json, FIELD_NAME);
85        let version = extract_non_empty_string(&json, FIELD_VERSION);
86        let namespace = extract_namespace(&name);
87        let package_name = extract_package_name(&name);
88        let description = extract_description(&json);
89
90        let extracted_license_statement = extract_license_statement(&json);
91        let (declared_license_expression, declared_license_expression_spdx, license_detections) =
92            normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
93        let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
94        let dependencies = extract_dependencies(&json, false);
95        let dev_dependencies = extract_dependencies(&json, true);
96        let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
97        let optional_dependencies = extract_optional_dependencies(&json);
98        let bundled_dependencies = extract_bundled_dependencies(&json);
99        let purl = create_package_url(&name, &version, &namespace);
100        let keywords_vec = extract_keywords_as_vec(&json);
101
102        let mut extra_data_map = HashMap::new();
103
104        if let Some(resolutions) = extract_resolutions(&json) {
105            extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
106        }
107
108        if let Some(engines) = extract_engines(&json) {
109            extra_data_map.insert("engines".to_string(), engines);
110        }
111
112        for field in [
113            FIELD_OS,
114            FIELD_CPU,
115            FIELD_LIBC,
116            FIELD_DEPRECATED,
117            FIELD_HAS_BIN,
118        ] {
119            if let Some(value) = extract_raw_extra_data_field(&json, field) {
120                extra_data_map.insert(field.to_string(), value);
121            }
122        }
123
124        if let Some(package_manager) = extract_package_manager(&json) {
125            extra_data_map.insert(
126                "packageManager".to_string(),
127                serde_json::Value::String(package_manager),
128            );
129        }
130
131        if let Some(workspaces) = extract_workspaces(&json) {
132            extra_data_map.insert("workspaces".to_string(), workspaces);
133        }
134
135        if let Some(overrides) = extract_overrides(&json) {
136            extra_data_map.insert("overrides".to_string(), overrides);
137        }
138
139        if let Some(private) = extract_private(&json) {
140            extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
141        }
142
143        if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
144            extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
145        }
146
147        let extra_data = if extra_data_map.is_empty() {
148            None
149        } else {
150            Some(extra_data_map)
151        };
152
153        let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
154            Some(dist) => extract_dist_hashes(dist),
155            None => (None, None, None),
156        };
157
158        let download_url = json
159            .get(FIELD_DIST)
160            .and_then(extract_dist_tarball)
161            .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
162
163        let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
164        let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
165        let repository_download_url =
166            generate_repository_download_url(&namespace, &package_name, &version);
167        let vcs_url = extract_vcs_url(&json);
168
169        vec![PackageData {
170            package_type: Some(Self::PACKAGE_TYPE),
171            namespace,
172            name: package_name,
173            version,
174            qualifiers: None,
175            subpath: None,
176            primary_language: Some("JavaScript".to_string()),
177            description,
178            release_date: None,
179            parties: extract_parties(&json),
180            keywords: keywords_vec,
181            homepage_url: extract_homepage_url(&json),
182            download_url,
183            size: None,
184            sha1: dist_sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
185            md5: None,
186            sha256: dist_sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
187            sha512: dist_sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
188            bug_tracking_url: extract_bugs(&json),
189            code_view_url: None,
190            vcs_url,
191            copyright: None,
192            holder: None,
193            declared_license_expression,
194            declared_license_expression_spdx,
195            license_detections,
196            other_license_expression: None,
197            other_license_expression_spdx: None,
198            other_license_detections: Vec::new(),
199            extracted_license_statement,
200            notice_text: None,
201            source_packages: Vec::new(),
202            file_references: Vec::new(),
203            is_private: json
204                .get("private")
205                .and_then(|v| v.as_bool())
206                .unwrap_or(false),
207            is_virtual: false,
208            extra_data,
209            dependencies: [
210                dependencies,
211                dev_dependencies,
212                peer_dependencies,
213                optional_dependencies,
214                bundled_dependencies,
215            ]
216            .concat(),
217            repository_homepage_url,
218            repository_download_url,
219            api_data_url,
220            datasource_id: Some(DatasourceId::NpmPackageJson),
221            purl,
222        }]
223    }
224
225    fn is_match(path: &Path) -> bool {
226        path.file_name().is_some_and(|name| name == "package.json")
227    }
228}
229
230/// Reads and parses a JSON file while tracking line numbers of fields
231fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
232    // Read file once into string
233    let content = crate::parsers::utils::read_file_to_string(path, None)
234        .map_err(|e| format!("Failed to read file: {}", e))?;
235
236    // Parse JSON
237    let json: Value =
238        serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
239
240    // Track line numbers for each field by iterating over lines
241    let mut field_lines = HashMap::new();
242    for (line_num, line) in content.lines().enumerate().take(MAX_ITERATION_COUNT) {
243        let trimmed = line.trim();
244        if let Some(field_name) = extract_field_name(trimmed) {
245            field_lines.insert(field_name, line_num + 1);
246        }
247    }
248
249    Ok((json, field_lines))
250}
251
252/// Extracts field name from a JSON line
253fn extract_field_name(line: &str) -> Option<String> {
254    // Simple regex-free parsing for field names
255    let line = line.trim();
256    if line.is_empty() || !line.starts_with("\"") {
257        return None;
258    }
259
260    // Find the closing quote of the field name
261    let mut chars = line.chars();
262    chars.next(); // Skip opening quote
263
264    let mut field_name = String::new();
265    for c in chars {
266        if c == '"' {
267            break;
268        }
269        field_name.push(c);
270    }
271
272    if field_name.is_empty() {
273        None
274    } else {
275        Some(field_name)
276    }
277}
278
279fn extract_namespace(name: &Option<String>) -> Option<String> {
280    name.as_ref().and_then(|n| {
281        if n.contains('/') {
282            n.split('/').next().map(String::from)
283        } else {
284            None
285        }
286    })
287}
288
289fn extract_package_name(name: &Option<String>) -> Option<String> {
290    name.as_ref().map(|n| {
291        if n.contains('/') {
292            n.split('/').nth(1).unwrap_or(n).to_string()
293        } else {
294            n.clone()
295        }
296    })
297}
298
299fn create_package_url(
300    name: &Option<String>,
301    version: &Option<String>,
302    _namespace: &Option<String>,
303) -> Option<String> {
304    // Note: We extract and store namespace in PackageData for metadata purposes,
305    // but the full package name (e.g., "@babel/core") is used for PURL generation.
306    let name = name.as_ref()?;
307    npm_purl(name, version.as_deref())
308}
309
310fn extract_license_statement(json: &Value) -> Option<String> {
311    let mut statements = Vec::new();
312
313    if let Some(license_value) = json.get(FIELD_LICENSE) {
314        if let Some(license_str) = license_value.as_str() {
315            statements.push(format!("- {}", license_str));
316        } else if let Some(license_obj) = license_value.as_object()
317            && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
318        {
319            statements.push(format!("- type: {}", type_val));
320            if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
321                statements.push(format!("  url: {}", url_val));
322            }
323        }
324    }
325
326    if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
327        for license in licenses.iter().take(MAX_ITERATION_COUNT) {
328            if let Some(license_obj) = license.as_object()
329                && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
330            {
331                statements.push(format!("- type: {}", type_val));
332                if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
333                    statements.push(format!("  url: {}", url_val));
334                }
335            }
336        }
337    }
338
339    if statements.is_empty() {
340        None
341    } else {
342        Some(truncate_field(format!("{}\n", statements.join("\n"))))
343    }
344}
345
346fn extract_declared_license_candidate(json: &Value) -> Option<String> {
347    json.get(FIELD_LICENSE)
348        .and_then(|value| value.as_str())
349        .map(str::trim)
350        .filter(|value| !value.is_empty())
351        .map(|s| truncate_field(s.to_string()))
352}
353
354/// Extracts the repository URL from the repository field.
355/// Extracts and normalizes VCS URL from the repository field.
356/// Supports both string and object formats with optional 'type' and 'directory' fields.
357fn extract_vcs_url(json: &Value) -> Option<String> {
358    let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
359        Some(Value::String(url)) => {
360            let normalized = normalize_repo_url(url);
361            if normalized.is_empty() {
362                return None;
363            }
364            (None, normalized)
365        }
366        Some(Value::Object(obj)) => {
367            let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
368            let normalized = normalize_repo_url(repo_url);
369            if normalized.is_empty() {
370                return None;
371            }
372            let tool = obj
373                .get("type")
374                .and_then(|t| t.as_str())
375                .unwrap_or("git")
376                .to_string();
377            let tool_for_prefix = if normalized.starts_with("git://")
378                || normalized.starts_with("git+")
379                || normalized.starts_with("hg://")
380                || normalized.starts_with("hg+")
381                || normalized.starts_with("svn://")
382                || normalized.starts_with("svn+")
383            {
384                None
385            } else {
386                Some(tool)
387            };
388            (tool_for_prefix, normalized)
389        }
390        _ => return None,
391    };
392
393    if vcs_repository.is_empty() {
394        return None;
395    }
396
397    let mut vcs_url = vcs_tool.map_or_else(
398        || vcs_repository.clone(),
399        |tool| format!("{}+{}", tool, vcs_repository),
400    );
401
402    if let Some(vcs_revision) = json
403        .get("gitHead")
404        .and_then(|v| v.as_str())
405        .and_then(normalize_non_empty_string)
406    {
407        vcs_url.push('@');
408        vcs_url.push_str(&vcs_revision);
409    }
410
411    if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
412        && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
413    {
414        vcs_url.push('#');
415        vcs_url.push_str(directory);
416    }
417
418    Some(truncate_field(vcs_url))
419}
420
421/// Normalizes repository URLs by converting various formats to a standard HTTPS URL.
422/// Based on normalize_vcs_url() from Python reference.
423fn normalize_repo_url(url: &str) -> String {
424    let url = url.trim();
425
426    if url.is_empty() {
427        return String::new();
428    }
429
430    let normalized_schemes = [
431        "https://",
432        "http://",
433        "git://",
434        "git+git://",
435        "git+https://",
436        "git+http://",
437        "hg://",
438        "hg+http://",
439        "hg+https://",
440        "svn://",
441        "svn+http://",
442        "svn+https://",
443    ];
444    if normalized_schemes
445        .iter()
446        .any(|scheme| url.starts_with(scheme))
447    {
448        return url.to_string();
449    }
450
451    if let Some((host, repo)) = url
452        .strip_prefix("git@")
453        .and_then(|rest| rest.split_once(':'))
454    {
455        return format!("https://{}/{}", host, repo);
456    }
457
458    if let Some((platform, repo)) = url.split_once(':') {
459        let host_url = match platform {
460            "github" => "https://github.com/",
461            "gitlab" => "https://gitlab.com/",
462            "bitbucket" => "https://bitbucket.org/",
463            "gist" => "https://gist.github.com/",
464            _ => return url.to_string(),
465        };
466        return format!("{}{}", host_url, repo);
467    }
468
469    if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
470        return format!("https://github.com/{}", url);
471    }
472
473    url.to_string()
474}
475
476/// Extracts party information (emails) from the `author`, `contributors`, and `maintainers` fields.
477fn extract_parties(json: &Value) -> Vec<Party> {
478    let mut parties = Vec::new();
479
480    // Extract author field (can be single value or array)
481    if let Some(author) = json.get(FIELD_AUTHOR) {
482        if let Some(author_list) = extract_parties_from_array(author) {
483            // Author is an array
484            for mut party in author_list {
485                if party.role.is_none() {
486                    party.role = Some("author".to_string());
487                }
488                parties.push(party);
489            }
490        } else if let Some(mut party) = extract_party_from_field(author) {
491            // Author is a single value
492            party.role = Some("author".to_string());
493            parties.push(party);
494        }
495    }
496
497    // Extract contributors field
498    if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
499        && let Some(mut party_list) = extract_parties_from_array(contributors)
500    {
501        for party in &mut party_list {
502            if party.role.is_none() {
503                party.role = Some("contributor".to_string());
504            }
505        }
506        parties.extend(party_list);
507    }
508
509    // Extract maintainers field
510    if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
511        && let Some(mut party_list) = extract_parties_from_array(maintainers)
512    {
513        for party in &mut party_list {
514            if party.role.is_none() {
515                party.role = Some("maintainer".to_string());
516            }
517        }
518        parties.extend(party_list);
519    }
520
521    parties
522}
523
524/// Extracts a party from a JSON field, which can be a string or an object with name/email fields.
525fn extract_party_from_field(field: &Value) -> Option<Party> {
526    match field {
527        Value::String(s) => {
528            if let Some(email) = extract_email_from_string(s) {
529                Some(Party {
530                    r#type: Some("person".to_string()),
531                    role: None,
532                    name: extract_name_from_author_string(s).map(truncate_field),
533                    email: Some(truncate_field(email)),
534                    url: None,
535                    organization: None,
536                    organization_url: None,
537                    timezone: None,
538                })
539            } else {
540                Some(Party {
541                    r#type: Some("person".to_string()),
542                    role: None,
543                    name: Some(truncate_field(s.clone())),
544                    email: None,
545                    url: None,
546                    organization: None,
547                    organization_url: None,
548                    timezone: None,
549                })
550            }
551        }
552        Value::Object(obj) => Some(Party {
553            r#type: Some("person".to_string()),
554            role: obj
555                .get("role")
556                .and_then(|v| v.as_str())
557                .map(|s| truncate_field(s.to_string())),
558            name: obj
559                .get("name")
560                .and_then(|v| v.as_str())
561                .map(|s| truncate_field(s.to_string())),
562            email: obj
563                .get("email")
564                .and_then(|v| v.as_str())
565                .map(|s| truncate_field(s.to_string())),
566            url: obj
567                .get("url")
568                .and_then(|v| v.as_str())
569                .and_then(normalize_optional_party_url)
570                .map(truncate_field),
571            organization: None,
572            organization_url: None,
573            timezone: None,
574        }),
575        _ => None,
576    }
577}
578
579/// Extracts multiple parties from a JSON array.
580fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
581    if let Value::Array(items) = array {
582        let parties = items
583            .iter()
584            .take(MAX_ITERATION_COUNT)
585            .filter_map(extract_party_from_field)
586            .collect::<Vec<_>>();
587        if !parties.is_empty() {
588            return Some(parties);
589        }
590    }
591    None
592}
593
594/// Extracts email from a string in the format "Name <email@example.com>".
595fn extract_email_from_string(author_str: &str) -> Option<String> {
596    if let Some(email_start) = author_str.find('<')
597        && let Some(email_end) = author_str.find('>')
598        && email_start < email_end
599    {
600        return Some(author_str[email_start + 1..email_end].to_string());
601    }
602    None
603}
604
605/// Extracts name from a string in the format "Name <email@example.com>" or returns full string as name.
606fn extract_name_from_author_string(author_str: &str) -> Option<String> {
607    if let Some(end_idx) = author_str.find('<') {
608        let name = author_str[..end_idx].trim();
609        if !name.is_empty() {
610            return Some(name.to_string());
611        }
612    } else {
613        return Some(author_str.trim().to_string());
614    }
615    None
616}
617
618fn default_package_data() -> PackageData {
619    PackageData {
620        package_type: Some(NpmParser::PACKAGE_TYPE),
621        primary_language: Some("JavaScript".to_string()),
622        datasource_id: Some(DatasourceId::NpmPackageJson),
623        ..Default::default()
624    }
625}
626
627fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
628    if version_str.contains(':') && version_str.contains('@') {
629        let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
630        let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
631        return Some((actual_package_name, constraint));
632    }
633    None
634}
635
636fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
637    json.get(field)
638        .and_then(|value| value.as_str())
639        .map(str::trim)
640        .filter(|value| !value.is_empty())
641        .map(|s| truncate_field(s.to_string()))
642}
643
644fn generate_npm_api_url(
645    namespace: &Option<String>,
646    name: &Option<String>,
647    version: &Option<String>,
648) -> Option<String> {
649    const REGISTRY: &str = "https://registry.npmjs.org";
650    name.as_ref()?;
651
652    let ns_name = if let Some(ns) = namespace {
653        format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
654    } else {
655        name.as_ref()?.clone()
656    };
657
658    let url = if let Some(ver) = version {
659        format!("{}/{}/{}", REGISTRY, ns_name, ver)
660    } else {
661        format!("{}/{}", REGISTRY, ns_name)
662    };
663
664    Some(url)
665}
666
667fn build_registry_package_path(
668    namespace: &Option<String>,
669    name: &Option<String>,
670) -> Option<String> {
671    match (namespace.as_ref(), name.as_ref()) {
672        (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
673        (None, Some(name)) => Some(name.clone()),
674        _ => None,
675    }
676}
677
678fn generate_repository_homepage_url(
679    namespace: &Option<String>,
680    name: &Option<String>,
681) -> Option<String> {
682    build_registry_package_path(namespace, name)
683        .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
684}
685
686fn generate_registry_download_url(
687    namespace: &Option<String>,
688    name: &Option<String>,
689    version: &Option<String>,
690) -> Option<String> {
691    match (
692        build_registry_package_path(namespace, name),
693        name.as_ref(),
694        version.as_ref(),
695    ) {
696        (Some(package_path), Some(name), Some(version)) => Some(format!(
697            "https://registry.npmjs.org/{}/-/{}-{}.tgz",
698            package_path, name, version
699        )),
700        _ => None,
701    }
702}
703
704fn generate_repository_download_url(
705    namespace: &Option<String>,
706    name: &Option<String>,
707    version: &Option<String>,
708) -> Option<String> {
709    generate_registry_download_url(namespace, name, version)
710}
711
712fn extract_dependency_group(
713    json: &Value,
714    field: &str,
715    scope: &str,
716    is_runtime: bool,
717    is_optional: bool,
718    optional_meta: Option<&HashMap<String, bool>>,
719) -> Vec<Dependency> {
720    json.get(field)
721        .and_then(|deps| deps.as_object())
722        .map_or_else(Vec::new, |deps| {
723            deps.iter()
724                .take(MAX_ITERATION_COUNT)
725                .filter_map(|(name, version)| {
726                    let version_str = version.as_str()?;
727
728                    if version_str.starts_with("workspace:") {
729                        let package_url = npm_purl(name, None)?;
730                        let is_opt = if let Some(meta) = optional_meta {
731                            meta.get(name).copied()
732                        } else {
733                            Some(is_optional)
734                        };
735                        return Some(Dependency {
736                            purl: Some(package_url),
737                            extracted_requirement: Some(truncate_field(version_str.to_string())),
738                            scope: Some(scope.to_string()),
739                            is_runtime: Some(is_runtime),
740                            is_optional: is_opt,
741                            is_pinned: Some(false),
742                            is_direct: Some(true),
743                            resolved_package: None,
744                            extra_data: None,
745                        });
746                    }
747
748                    let actual_package_name = if let Some((actual_package_name, _constraint)) =
749                        parse_alias_adapter(version_str)
750                    {
751                        actual_package_name
752                    } else {
753                        name.as_str()
754                    };
755
756                    let package_url = npm_purl(actual_package_name, None)?;
757
758                    let is_opt = if let Some(meta) = optional_meta {
759                        meta.get(name).copied()
760                    } else {
761                        Some(is_optional)
762                    };
763
764                    Some(Dependency {
765                        purl: Some(package_url),
766                        extracted_requirement: Some(truncate_field(version_str.to_string())),
767                        scope: Some(scope.to_string()),
768                        is_runtime: Some(is_runtime),
769                        is_optional: is_opt,
770                        is_pinned: Some(false),
771                        is_direct: Some(true),
772                        resolved_package: None,
773                        extra_data: None,
774                    })
775                })
776                .collect()
777        })
778}
779
780/// Extracts dependencies from the `dependencies` or `devDependencies` field in the JSON.
781fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
782    let field = if is_optional {
783        FIELD_DEV_DEPENDENCIES
784    } else {
785        FIELD_DEPENDENCIES
786    };
787
788    let scope = if is_optional {
789        "devDependencies"
790    } else {
791        "dependencies"
792    };
793
794    extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
795}
796
797fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
798    extract_dependency_group(
799        json,
800        FIELD_PEER_DEPENDENCIES,
801        "peerDependencies",
802        true,
803        false,
804        Some(meta),
805    )
806}
807
808/// Extracts optional dependencies from the `optionalDependencies` field in the JSON.
809/// Optional dependencies are marked with is_optional: true, is_runtime: true, and scope "optionalDependencies".
810fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
811    extract_dependency_group(
812        json,
813        FIELD_OPTIONAL_DEPENDENCIES,
814        "optionalDependencies",
815        true,
816        true,
817        None,
818    )
819}
820
821fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
822    if let Some(bundled) = json
823        .get(FIELD_BUNDLED_DEPENDENCIES)
824        .and_then(|v| v.as_array())
825    {
826        extract_bundled_list(bundled)
827    } else {
828        Vec::new()
829    }
830}
831
832/// Helper function to extract bundled dependencies from an array of package names.
833fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
834    bundled_array
835        .iter()
836        .take(MAX_ITERATION_COUNT)
837        .filter_map(|value| {
838            let name = value.as_str()?;
839            // Create PURL without version for bundled dependencies
840            let package_url = npm_purl(name, None)?;
841
842            Some(Dependency {
843                purl: Some(package_url),
844                extracted_requirement: None,
845                scope: Some("bundledDependencies".to_string()),
846                is_runtime: Some(true),
847                is_optional: Some(false),
848                is_pinned: Some(false),
849                is_direct: Some(true),
850                resolved_package: None,
851                extra_data: None,
852            })
853        })
854        .collect()
855}
856
857/// Extracts Yarn resolutions from the `resolutions` field.
858/// Returns resolutions as a HashMap to be stored in extra_data.
859fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
860    json.get(FIELD_RESOLUTIONS)
861        .and_then(|resolutions| resolutions.as_object())
862        .map(|resolutions_obj| {
863            let mut extra_data = HashMap::new();
864            extra_data.insert(
865                "resolutions".to_string(),
866                serde_json::Value::Object(resolutions_obj.clone()),
867            );
868            extra_data
869        })
870}
871
872fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
873    json.get(FIELD_PEER_DEPENDENCIES_META)
874        .and_then(|meta| meta.as_object())
875        .map_or_else(HashMap::new, |meta_obj| {
876            meta_obj
877                .iter()
878                .take(MAX_ITERATION_COUNT)
879                .filter_map(|(package_name, meta_value)| {
880                    meta_value.as_object().and_then(|obj| {
881                        obj.get("optional")
882                            .and_then(|opt| opt.as_bool())
883                            .map(|optional| (package_name.clone(), optional))
884                    })
885                })
886                .collect()
887        })
888}
889
890fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
891    json.get(FIELD_DEPENDENCIES_META).cloned()
892}
893
894fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
895    json.get(FIELD_OVERRIDES).cloned()
896}
897
898fn extract_description(json: &Value) -> Option<String> {
899    json.get(FIELD_DESCRIPTION)
900        .and_then(|v| v.as_str())
901        .map(|s| truncate_field(s.to_string()))
902}
903
904fn extract_homepage_url(json: &Value) -> Option<String> {
905    match json.get(FIELD_HOMEPAGE) {
906        Some(Value::String(homepage)) => normalize_non_empty_string(homepage).map(truncate_field),
907        _ => None,
908    }
909}
910
911fn normalize_non_empty_string(value: &str) -> Option<String> {
912    let trimmed = value.trim();
913    if trimmed.is_empty() {
914        None
915    } else {
916        Some(trimmed.to_string())
917    }
918}
919
920fn normalize_optional_party_url(value: &str) -> Option<String> {
921    let normalized = normalize_non_empty_string(value)?;
922
923    if normalized.eq_ignore_ascii_case("none") {
924        None
925    } else {
926        Some(normalized)
927    }
928}
929
930fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
931    json.get(FIELD_KEYWORDS)
932        .and_then(|v| {
933            if let Some(str) = v.as_str() {
934                Some(vec![str.to_string()])
935            } else if let Some(arr) = v.as_array() {
936                let keywords: Vec<String> = arr
937                    .iter()
938                    .take(MAX_ITERATION_COUNT)
939                    .filter_map(|kw| kw.as_str())
940                    .map(|s| truncate_field(s.to_string()))
941                    .collect();
942                if keywords.is_empty() {
943                    None
944                } else {
945                    Some(keywords)
946                }
947            } else {
948                None
949            }
950        })
951        .unwrap_or_default()
952}
953
954fn extract_engines(json: &Value) -> Option<serde_json::Value> {
955    json.get(FIELD_ENGINES).cloned()
956}
957
958fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
959    json.get(field).cloned()
960}
961
962fn extract_package_manager(json: &Value) -> Option<String> {
963    json.get(FIELD_PACKAGE_MANAGER)
964        .and_then(|v| v.as_str())
965        .map(|s| truncate_field(s.to_string()))
966}
967
968fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
969    json.get(FIELD_WORKSPACES).cloned()
970}
971
972fn extract_private(json: &Value) -> Option<bool> {
973    json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
974}
975
976fn extract_bugs(json: &Value) -> Option<String> {
977    match json.get(FIELD_BUGS) {
978        Some(bugs) => {
979            if let Some(url) = bugs.as_str() {
980                normalize_non_empty_string(url).map(truncate_field)
981            } else if let Some(obj) = bugs.as_object() {
982                obj.get("url")
983                    .and_then(|v| v.as_str())
984                    .and_then(normalize_non_empty_string)
985                    .map(truncate_field)
986            } else {
987                None
988            }
989        }
990        None => None,
991    }
992}
993
994fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
995    let mut sha1 = dist
996        .get("shasum")
997        .and_then(|v| v.as_str())
998        .and_then(normalize_non_empty_string);
999    let mut sha256 = None;
1000    let mut sha512 = None;
1001
1002    if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
1003        && let Some((algo, hex_digest)) = parse_sri(integrity)
1004    {
1005        match algo.as_str() {
1006            "sha1" => {
1007                if sha1.is_none() {
1008                    sha1 = Some(hex_digest);
1009                }
1010            }
1011            "sha256" => sha256 = Some(hex_digest),
1012            "sha512" => sha512 = Some(hex_digest),
1013            _ => {}
1014        }
1015    }
1016
1017    (sha1, sha256, sha512)
1018}
1019
1020fn extract_dist_tarball(dist: &Value) -> Option<String> {
1021    dist.get("tarball")
1022        .or_else(|| dist.get("dnl_url"))
1023        .and_then(|v| v.as_str())
1024        .map(normalize_npm_registry_tarball_url)
1025        .map(truncate_field)
1026}
1027
1028fn normalize_npm_registry_tarball_url(url: &str) -> String {
1029    if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1030        format!("https://registry.npmjs.org/{path}")
1031    } else {
1032        url.to_string()
1033    }
1034}
1035
1036fn combine_extra_data(
1037    extra_data: Option<HashMap<String, serde_json::Value>>,
1038    additional_data: HashMap<String, serde_json::Value>,
1039) -> HashMap<String, serde_json::Value> {
1040    let mut combined = extra_data.unwrap_or_default();
1041    for (key, value) in additional_data {
1042        combined.insert(key, value);
1043    }
1044    combined
1045}
1046
1047crate::register_parser!(
1048    "npm package.json manifest",
1049    &["**/package.json"],
1050    "npm",
1051    "JavaScript",
1052    Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
1053);