1use crate::models::{
25 DatasourceId, Dependency, PackageData, PackageType, Party, Sha1Digest, Sha256Digest,
26 Sha512Digest,
27};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{MAX_ITERATION_COUNT, npm_purl, parse_sri, truncate_field};
30use serde_json::Value;
31use std::collections::HashMap;
32use std::path::Path;
33
34use super::PackageParser;
35use super::license_normalization::normalize_spdx_declared_license;
36use super::metadata::ParserMetadata;
37
38const FIELD_NAME: &str = "name";
39const FIELD_VERSION: &str = "version";
40const FIELD_LICENSE: &str = "license";
41const FIELD_LICENSES: &str = "licenses";
42const FIELD_HOMEPAGE: &str = "homepage";
43const FIELD_REPOSITORY: &str = "repository";
44const FIELD_AUTHOR: &str = "author";
45const FIELD_CONTRIBUTORS: &str = "contributors";
46const FIELD_MAINTAINERS: &str = "maintainers";
47const FIELD_DEPENDENCIES: &str = "dependencies";
48const FIELD_DEV_DEPENDENCIES: &str = "devDependencies";
49const FIELD_PEER_DEPENDENCIES: &str = "peerDependencies";
50const FIELD_OPTIONAL_DEPENDENCIES: &str = "optionalDependencies";
51const FIELD_BUNDLED_DEPENDENCIES: &str = "bundledDependencies";
52const FIELD_RESOLUTIONS: &str = "resolutions";
53const FIELD_DESCRIPTION: &str = "description";
54const FIELD_KEYWORDS: &str = "keywords";
55const FIELD_ENGINES: &str = "engines";
56const FIELD_OS: &str = "os";
57const FIELD_CPU: &str = "cpu";
58const FIELD_LIBC: &str = "libc";
59const FIELD_DEPRECATED: &str = "deprecated";
60const FIELD_HAS_BIN: &str = "hasBin";
61const FIELD_PACKAGE_MANAGER: &str = "packageManager";
62const FIELD_WORKSPACES: &str = "workspaces";
63const FIELD_PRIVATE: &str = "private";
64const FIELD_BUGS: &str = "bugs";
65const FIELD_DIST: &str = "dist";
66const FIELD_OVERRIDES: &str = "overrides";
67const FIELD_PEER_DEPENDENCIES_META: &str = "peerDependenciesMeta";
68const FIELD_DEPENDENCIES_META: &str = "dependenciesMeta";
69
70pub struct NpmParser;
75
76impl PackageParser for NpmParser {
77 const PACKAGE_TYPE: PackageType = PackageType::Npm;
78
79 fn metadata() -> Vec<ParserMetadata> {
80 vec![ParserMetadata {
81 description: "npm package.json manifest",
82 file_patterns: &["**/package.json"],
83 package_type: "npm",
84 primary_language: "JavaScript",
85 documentation_url: Some("https://docs.npmjs.com/cli/v10/configuring-npm/package-json"),
86 }]
87 }
88
89 fn extract_packages(path: &Path) -> Vec<PackageData> {
90 let (json, _field_lines) = match read_and_parse_json_with_lines(path) {
91 Ok((json, lines)) => (json, lines),
92 Err(e) => {
93 warn!("Failed to read or parse package.json at {:?}: {}", path, e);
94 return vec![default_package_data()];
95 }
96 };
97
98 let name = extract_non_empty_string(&json, FIELD_NAME);
99 let version = extract_non_empty_string(&json, FIELD_VERSION);
100 let namespace = extract_namespace(&name);
101 let package_name = extract_package_name(&name);
102 let description = extract_description(&json);
103
104 let extracted_license_statement = extract_license_statement(&json);
105 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
106 normalize_spdx_declared_license(extract_declared_license_candidate(&json).as_deref());
107 let peer_dependencies_meta = extract_peer_dependencies_meta(&json);
108 let dependencies = extract_dependencies(&json, false);
109 let dev_dependencies = extract_dependencies(&json, true);
110 let peer_dependencies = extract_peer_dependencies(&json, &peer_dependencies_meta);
111 let optional_dependencies = extract_optional_dependencies(&json);
112 let bundled_dependencies = extract_bundled_dependencies(&json);
113 let purl = create_package_url(&name, &version, &namespace);
114 let keywords_vec = extract_keywords_as_vec(&json);
115
116 let mut extra_data_map = HashMap::new();
117
118 if let Some(resolutions) = extract_resolutions(&json) {
119 extra_data_map = combine_extra_data(Some(extra_data_map), resolutions);
120 }
121
122 if let Some(engines) = extract_engines(&json) {
123 extra_data_map.insert("engines".to_string(), engines);
124 }
125
126 for field in [
127 FIELD_OS,
128 FIELD_CPU,
129 FIELD_LIBC,
130 FIELD_DEPRECATED,
131 FIELD_HAS_BIN,
132 ] {
133 if let Some(value) = extract_raw_extra_data_field(&json, field) {
134 extra_data_map.insert(field.to_string(), value);
135 }
136 }
137
138 if let Some(package_manager) = extract_package_manager(&json) {
139 extra_data_map.insert(
140 "packageManager".to_string(),
141 serde_json::Value::String(package_manager),
142 );
143 }
144
145 if let Some(workspaces) = extract_workspaces(&json) {
146 extra_data_map.insert("workspaces".to_string(), workspaces);
147 }
148
149 if let Some(overrides) = extract_overrides(&json) {
150 extra_data_map.insert("overrides".to_string(), overrides);
151 }
152
153 if let Some(private) = extract_private(&json) {
154 extra_data_map.insert("private".to_string(), serde_json::Value::Bool(private));
155 }
156
157 if let Some(dependencies_meta) = extract_dependencies_meta(&json) {
158 extra_data_map.insert("dependenciesMeta".to_string(), dependencies_meta);
159 }
160
161 let extra_data = if extra_data_map.is_empty() {
162 None
163 } else {
164 Some(extra_data_map)
165 };
166
167 let (dist_sha1, dist_sha256, dist_sha512) = match json.get(FIELD_DIST) {
168 Some(dist) => extract_dist_hashes(dist),
169 None => (None, None, None),
170 };
171
172 let download_url = json
173 .get(FIELD_DIST)
174 .and_then(extract_dist_tarball)
175 .or_else(|| generate_registry_download_url(&namespace, &package_name, &version));
176
177 let api_data_url = generate_npm_api_url(&namespace, &package_name, &version);
178 let repository_homepage_url = generate_repository_homepage_url(&namespace, &package_name);
179 let repository_download_url =
180 generate_repository_download_url(&namespace, &package_name, &version);
181 let vcs_url = extract_vcs_url(&json);
182
183 vec![PackageData {
184 package_type: Some(Self::PACKAGE_TYPE),
185 namespace,
186 name: package_name,
187 version,
188 qualifiers: None,
189 subpath: None,
190 primary_language: Some("JavaScript".to_string()),
191 description,
192 release_date: None,
193 parties: extract_parties(&json),
194 keywords: keywords_vec,
195 homepage_url: extract_homepage_url(&json),
196 download_url,
197 size: None,
198 sha1: dist_sha1.and_then(|h| Sha1Digest::from_hex(&h).ok()),
199 md5: None,
200 sha256: dist_sha256.and_then(|h| Sha256Digest::from_hex(&h).ok()),
201 sha512: dist_sha512.and_then(|h| Sha512Digest::from_hex(&h).ok()),
202 bug_tracking_url: extract_bugs(&json),
203 code_view_url: None,
204 vcs_url,
205 copyright: None,
206 holder: None,
207 declared_license_expression,
208 declared_license_expression_spdx,
209 license_detections,
210 other_license_expression: None,
211 other_license_expression_spdx: None,
212 other_license_detections: Vec::new(),
213 extracted_license_statement,
214 notice_text: None,
215 source_packages: Vec::new(),
216 file_references: Vec::new(),
217 is_private: json
218 .get("private")
219 .and_then(|v| v.as_bool())
220 .unwrap_or(false),
221 is_virtual: false,
222 extra_data,
223 dependencies: [
224 dependencies,
225 dev_dependencies,
226 peer_dependencies,
227 optional_dependencies,
228 bundled_dependencies,
229 ]
230 .concat(),
231 repository_homepage_url,
232 repository_download_url,
233 api_data_url,
234 datasource_id: Some(DatasourceId::NpmPackageJson),
235 purl,
236 }]
237 }
238
239 fn is_match(path: &Path) -> bool {
240 path.file_name().is_some_and(|name| name == "package.json")
241 }
242}
243
244fn read_and_parse_json_with_lines(path: &Path) -> Result<(Value, HashMap<String, usize>), String> {
246 let content = crate::parsers::utils::read_file_to_string(path, None)
248 .map_err(|e| format!("Failed to read file: {}", e))?;
249
250 let json: Value =
252 serde_json::from_str(&content).map_err(|e| format!("Failed to parse JSON: {}", e))?;
253
254 let mut field_lines = HashMap::new();
256 for (line_num, line) in content.lines().enumerate().take(MAX_ITERATION_COUNT) {
257 let trimmed = line.trim();
258 if let Some(field_name) = extract_field_name(trimmed) {
259 field_lines.insert(field_name, line_num + 1);
260 }
261 }
262
263 Ok((json, field_lines))
264}
265
266fn extract_field_name(line: &str) -> Option<String> {
268 let line = line.trim();
270 if line.is_empty() || !line.starts_with("\"") {
271 return None;
272 }
273
274 let mut chars = line.chars();
276 chars.next(); let mut field_name = String::new();
279 for c in chars {
280 if c == '"' {
281 break;
282 }
283 field_name.push(c);
284 }
285
286 if field_name.is_empty() {
287 None
288 } else {
289 Some(field_name)
290 }
291}
292
293fn extract_namespace(name: &Option<String>) -> Option<String> {
294 name.as_ref().and_then(|n| {
295 if n.contains('/') {
296 n.split('/').next().map(String::from)
297 } else {
298 None
299 }
300 })
301}
302
303fn extract_package_name(name: &Option<String>) -> Option<String> {
304 name.as_ref().map(|n| {
305 if n.contains('/') {
306 n.split('/').nth(1).unwrap_or(n).to_string()
307 } else {
308 n.clone()
309 }
310 })
311}
312
313fn create_package_url(
314 name: &Option<String>,
315 version: &Option<String>,
316 _namespace: &Option<String>,
317) -> Option<String> {
318 let name = name.as_ref()?;
321 npm_purl(name, version.as_deref())
322}
323
324fn extract_license_statement(json: &Value) -> Option<String> {
325 let mut statements = Vec::new();
326
327 if let Some(license_value) = json.get(FIELD_LICENSE) {
328 if let Some(license_str) = license_value.as_str() {
329 statements.push(format!("- {}", license_str));
330 } else if let Some(license_obj) = license_value.as_object()
331 && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
332 {
333 statements.push(format!("- type: {}", type_val));
334 if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
335 statements.push(format!(" url: {}", url_val));
336 }
337 }
338 }
339
340 if let Some(licenses) = json.get(FIELD_LICENSES).and_then(|v| v.as_array()) {
341 for license in licenses.iter().take(MAX_ITERATION_COUNT) {
342 if let Some(license_obj) = license.as_object()
343 && let Some(type_val) = license_obj.get("type").and_then(|v| v.as_str())
344 {
345 statements.push(format!("- type: {}", type_val));
346 if let Some(url_val) = license_obj.get("url").and_then(|v| v.as_str()) {
347 statements.push(format!(" url: {}", url_val));
348 }
349 }
350 }
351 }
352
353 if statements.is_empty() {
354 None
355 } else {
356 Some(truncate_field(format!("{}\n", statements.join("\n"))))
357 }
358}
359
360fn extract_declared_license_candidate(json: &Value) -> Option<String> {
361 json.get(FIELD_LICENSE)
362 .and_then(|value| value.as_str())
363 .map(str::trim)
364 .filter(|value| !value.is_empty())
365 .map(|s| truncate_field(s.to_string()))
366}
367
368fn extract_vcs_url(json: &Value) -> Option<String> {
372 let (vcs_tool, vcs_repository) = match json.get(FIELD_REPOSITORY) {
373 Some(Value::String(url)) => {
374 let normalized = normalize_repo_url(url);
375 if normalized.is_empty() {
376 return None;
377 }
378 (None, normalized)
379 }
380 Some(Value::Object(obj)) => {
381 let repo_url = obj.get("url").and_then(|u| u.as_str()).unwrap_or("");
382 let normalized = normalize_repo_url(repo_url);
383 if normalized.is_empty() {
384 return None;
385 }
386 let tool = obj
387 .get("type")
388 .and_then(|t| t.as_str())
389 .unwrap_or("git")
390 .to_string();
391 let tool_for_prefix = if normalized.starts_with("git://")
392 || normalized.starts_with("git+")
393 || normalized.starts_with("hg://")
394 || normalized.starts_with("hg+")
395 || normalized.starts_with("svn://")
396 || normalized.starts_with("svn+")
397 {
398 None
399 } else {
400 Some(tool)
401 };
402 (tool_for_prefix, normalized)
403 }
404 _ => return None,
405 };
406
407 if vcs_repository.is_empty() {
408 return None;
409 }
410
411 let mut vcs_url = vcs_tool.map_or_else(
412 || vcs_repository.clone(),
413 |tool| format!("{}+{}", tool, vcs_repository),
414 );
415
416 if let Some(vcs_revision) = json
417 .get("gitHead")
418 .and_then(|v| v.as_str())
419 .and_then(normalize_non_empty_string)
420 {
421 vcs_url.push('@');
422 vcs_url.push_str(&vcs_revision);
423 }
424
425 if let Some(Value::Object(obj)) = json.get(FIELD_REPOSITORY)
426 && let Some(directory) = obj.get("directory").and_then(|d| d.as_str())
427 {
428 vcs_url.push('#');
429 vcs_url.push_str(directory);
430 }
431
432 Some(truncate_field(vcs_url))
433}
434
435fn normalize_repo_url(url: &str) -> String {
438 let url = url.trim();
439
440 if url.is_empty() {
441 return String::new();
442 }
443
444 let normalized_schemes = [
445 "https://",
446 "http://",
447 "git://",
448 "git+git://",
449 "git+https://",
450 "git+http://",
451 "hg://",
452 "hg+http://",
453 "hg+https://",
454 "svn://",
455 "svn+http://",
456 "svn+https://",
457 ];
458 if normalized_schemes
459 .iter()
460 .any(|scheme| url.starts_with(scheme))
461 {
462 return url.to_string();
463 }
464
465 if let Some((host, repo)) = url
466 .strip_prefix("git@")
467 .and_then(|rest| rest.split_once(':'))
468 {
469 return format!("https://{}/{}", host, repo);
470 }
471
472 if let Some((platform, repo)) = url.split_once(':') {
473 let host_url = match platform {
474 "github" => "https://github.com/",
475 "gitlab" => "https://gitlab.com/",
476 "bitbucket" => "https://bitbucket.org/",
477 "gist" => "https://gist.github.com/",
478 _ => return url.to_string(),
479 };
480 return format!("{}{}", host_url, repo);
481 }
482
483 if !url.contains(':') && url.chars().filter(|&c| c == '/').count() == 1 {
484 return format!("https://github.com/{}", url);
485 }
486
487 url.to_string()
488}
489
490fn extract_parties(json: &Value) -> Vec<Party> {
492 let mut parties = Vec::new();
493
494 if let Some(author) = json.get(FIELD_AUTHOR) {
496 if let Some(author_list) = extract_parties_from_array(author) {
497 for mut party in author_list {
499 if party.role.is_none() {
500 party.role = Some("author".to_string());
501 }
502 parties.push(party);
503 }
504 } else if let Some(mut party) = extract_party_from_field(author) {
505 party.role = Some("author".to_string());
507 parties.push(party);
508 }
509 }
510
511 if let Some(contributors) = json.get(FIELD_CONTRIBUTORS)
513 && let Some(mut party_list) = extract_parties_from_array(contributors)
514 {
515 for party in &mut party_list {
516 if party.role.is_none() {
517 party.role = Some("contributor".to_string());
518 }
519 }
520 parties.extend(party_list);
521 }
522
523 if let Some(maintainers) = json.get(FIELD_MAINTAINERS)
525 && let Some(mut party_list) = extract_parties_from_array(maintainers)
526 {
527 for party in &mut party_list {
528 if party.role.is_none() {
529 party.role = Some("maintainer".to_string());
530 }
531 }
532 parties.extend(party_list);
533 }
534
535 parties
536}
537
538fn extract_party_from_field(field: &Value) -> Option<Party> {
540 match field {
541 Value::String(s) => {
542 if let Some(email) = extract_email_from_string(s) {
543 Some(Party {
544 r#type: Some("person".to_string()),
545 role: None,
546 name: extract_name_from_author_string(s).map(truncate_field),
547 email: Some(truncate_field(email)),
548 url: None,
549 organization: None,
550 organization_url: None,
551 timezone: None,
552 })
553 } else {
554 Some(Party {
555 r#type: Some("person".to_string()),
556 role: None,
557 name: Some(truncate_field(s.clone())),
558 email: None,
559 url: None,
560 organization: None,
561 organization_url: None,
562 timezone: None,
563 })
564 }
565 }
566 Value::Object(obj) => Some(Party {
567 r#type: Some("person".to_string()),
568 role: obj
569 .get("role")
570 .and_then(|v| v.as_str())
571 .map(|s| truncate_field(s.to_string())),
572 name: obj
573 .get("name")
574 .and_then(|v| v.as_str())
575 .map(|s| truncate_field(s.to_string())),
576 email: obj
577 .get("email")
578 .and_then(|v| v.as_str())
579 .map(|s| truncate_field(s.to_string())),
580 url: obj
581 .get("url")
582 .and_then(|v| v.as_str())
583 .and_then(normalize_optional_party_url)
584 .map(truncate_field),
585 organization: None,
586 organization_url: None,
587 timezone: None,
588 }),
589 _ => None,
590 }
591}
592
593fn extract_parties_from_array(array: &Value) -> Option<Vec<Party>> {
595 if let Value::Array(items) = array {
596 let parties = items
597 .iter()
598 .take(MAX_ITERATION_COUNT)
599 .filter_map(extract_party_from_field)
600 .collect::<Vec<_>>();
601 if !parties.is_empty() {
602 return Some(parties);
603 }
604 }
605 None
606}
607
608fn extract_email_from_string(author_str: &str) -> Option<String> {
610 if let Some(email_start) = author_str.find('<')
611 && let Some(email_end) = author_str.find('>')
612 && email_start < email_end
613 {
614 return Some(author_str[email_start + 1..email_end].to_string());
615 }
616 None
617}
618
619fn extract_name_from_author_string(author_str: &str) -> Option<String> {
621 if let Some(end_idx) = author_str.find('<') {
622 let name = author_str[..end_idx].trim();
623 if !name.is_empty() {
624 return Some(name.to_string());
625 }
626 } else {
627 return Some(author_str.trim().to_string());
628 }
629 None
630}
631
632fn default_package_data() -> PackageData {
633 PackageData {
634 package_type: Some(NpmParser::PACKAGE_TYPE),
635 primary_language: Some("JavaScript".to_string()),
636 datasource_id: Some(DatasourceId::NpmPackageJson),
637 ..Default::default()
638 }
639}
640
641fn parse_alias_adapter(version_str: &str) -> Option<(&str, &str)> {
642 if version_str.contains(':') && version_str.contains('@') {
643 let (aliased_package_part, constraint) = version_str.rsplit_once('@')?;
644 let (_, actual_package_name) = aliased_package_part.rsplit_once(':')?;
645 return Some((actual_package_name, constraint));
646 }
647 None
648}
649
650fn extract_non_empty_string(json: &Value, field: &str) -> Option<String> {
651 json.get(field)
652 .and_then(|value| value.as_str())
653 .map(str::trim)
654 .filter(|value| !value.is_empty())
655 .map(|s| truncate_field(s.to_string()))
656}
657
658fn generate_npm_api_url(
659 namespace: &Option<String>,
660 name: &Option<String>,
661 version: &Option<String>,
662) -> Option<String> {
663 const REGISTRY: &str = "https://registry.npmjs.org";
664 name.as_ref()?;
665
666 let ns_name = if let Some(ns) = namespace {
667 format!("{}/{}", ns, name.as_ref()?).replace('/', "%2f")
668 } else {
669 name.as_ref()?.clone()
670 };
671
672 let url = if let Some(ver) = version {
673 format!("{}/{}/{}", REGISTRY, ns_name, ver)
674 } else {
675 format!("{}/{}", REGISTRY, ns_name)
676 };
677
678 Some(url)
679}
680
681fn build_registry_package_path(
682 namespace: &Option<String>,
683 name: &Option<String>,
684) -> Option<String> {
685 match (namespace.as_ref(), name.as_ref()) {
686 (Some(namespace), Some(name)) => Some(format!("{namespace}/{name}")),
687 (None, Some(name)) => Some(name.clone()),
688 _ => None,
689 }
690}
691
692fn generate_repository_homepage_url(
693 namespace: &Option<String>,
694 name: &Option<String>,
695) -> Option<String> {
696 build_registry_package_path(namespace, name)
697 .map(|package_path| format!("https://www.npmjs.com/package/{package_path}"))
698}
699
700fn generate_registry_download_url(
701 namespace: &Option<String>,
702 name: &Option<String>,
703 version: &Option<String>,
704) -> Option<String> {
705 match (
706 build_registry_package_path(namespace, name),
707 name.as_ref(),
708 version.as_ref(),
709 ) {
710 (Some(package_path), Some(name), Some(version)) => Some(format!(
711 "https://registry.npmjs.org/{}/-/{}-{}.tgz",
712 package_path, name, version
713 )),
714 _ => None,
715 }
716}
717
718fn generate_repository_download_url(
719 namespace: &Option<String>,
720 name: &Option<String>,
721 version: &Option<String>,
722) -> Option<String> {
723 generate_registry_download_url(namespace, name, version)
724}
725
726fn extract_dependency_group(
727 json: &Value,
728 field: &str,
729 scope: &str,
730 is_runtime: bool,
731 is_optional: bool,
732 optional_meta: Option<&HashMap<String, bool>>,
733) -> Vec<Dependency> {
734 json.get(field)
735 .and_then(|deps| deps.as_object())
736 .map_or_else(Vec::new, |deps| {
737 deps.iter()
738 .take(MAX_ITERATION_COUNT)
739 .filter_map(|(name, version)| {
740 let version_str = version.as_str()?;
741
742 if version_str.starts_with("workspace:") {
743 let package_url = npm_purl(name, None)?;
744 let is_opt = if let Some(meta) = optional_meta {
745 meta.get(name).copied()
746 } else {
747 Some(is_optional)
748 };
749 return Some(Dependency {
750 purl: Some(package_url),
751 extracted_requirement: Some(truncate_field(version_str.to_string())),
752 scope: Some(scope.to_string()),
753 is_runtime: Some(is_runtime),
754 is_optional: is_opt,
755 is_pinned: Some(false),
756 is_direct: Some(true),
757 resolved_package: None,
758 extra_data: None,
759 });
760 }
761
762 let actual_package_name = if let Some((actual_package_name, _constraint)) =
763 parse_alias_adapter(version_str)
764 {
765 actual_package_name
766 } else {
767 name.as_str()
768 };
769
770 let package_url = npm_purl(actual_package_name, None)?;
771
772 let is_opt = if let Some(meta) = optional_meta {
773 meta.get(name).copied()
774 } else {
775 Some(is_optional)
776 };
777
778 Some(Dependency {
779 purl: Some(package_url),
780 extracted_requirement: Some(truncate_field(version_str.to_string())),
781 scope: Some(scope.to_string()),
782 is_runtime: Some(is_runtime),
783 is_optional: is_opt,
784 is_pinned: Some(false),
785 is_direct: Some(true),
786 resolved_package: None,
787 extra_data: None,
788 })
789 })
790 .collect()
791 })
792}
793
794fn extract_dependencies(json: &Value, is_optional: bool) -> Vec<Dependency> {
796 let field = if is_optional {
797 FIELD_DEV_DEPENDENCIES
798 } else {
799 FIELD_DEPENDENCIES
800 };
801
802 let scope = if is_optional {
803 "devDependencies"
804 } else {
805 "dependencies"
806 };
807
808 extract_dependency_group(json, field, scope, !is_optional, is_optional, None)
809}
810
811fn extract_peer_dependencies(json: &Value, meta: &HashMap<String, bool>) -> Vec<Dependency> {
812 extract_dependency_group(
813 json,
814 FIELD_PEER_DEPENDENCIES,
815 "peerDependencies",
816 true,
817 false,
818 Some(meta),
819 )
820}
821
822fn extract_optional_dependencies(json: &Value) -> Vec<Dependency> {
825 extract_dependency_group(
826 json,
827 FIELD_OPTIONAL_DEPENDENCIES,
828 "optionalDependencies",
829 true,
830 true,
831 None,
832 )
833}
834
835fn extract_bundled_dependencies(json: &Value) -> Vec<Dependency> {
836 if let Some(bundled) = json
837 .get(FIELD_BUNDLED_DEPENDENCIES)
838 .and_then(|v| v.as_array())
839 {
840 extract_bundled_list(bundled)
841 } else {
842 Vec::new()
843 }
844}
845
846fn extract_bundled_list(bundled_array: &[Value]) -> Vec<Dependency> {
848 bundled_array
849 .iter()
850 .take(MAX_ITERATION_COUNT)
851 .filter_map(|value| {
852 let name = value.as_str()?;
853 let package_url = npm_purl(name, None)?;
855
856 Some(Dependency {
857 purl: Some(package_url),
858 extracted_requirement: None,
859 scope: Some("bundledDependencies".to_string()),
860 is_runtime: Some(true),
861 is_optional: Some(false),
862 is_pinned: Some(false),
863 is_direct: Some(true),
864 resolved_package: None,
865 extra_data: None,
866 })
867 })
868 .collect()
869}
870
871fn extract_resolutions(json: &Value) -> Option<HashMap<String, serde_json::Value>> {
874 json.get(FIELD_RESOLUTIONS)
875 .and_then(|resolutions| resolutions.as_object())
876 .map(|resolutions_obj| {
877 let mut extra_data = HashMap::new();
878 extra_data.insert(
879 "resolutions".to_string(),
880 serde_json::Value::Object(resolutions_obj.clone()),
881 );
882 extra_data
883 })
884}
885
886fn extract_peer_dependencies_meta(json: &Value) -> HashMap<String, bool> {
887 json.get(FIELD_PEER_DEPENDENCIES_META)
888 .and_then(|meta| meta.as_object())
889 .map_or_else(HashMap::new, |meta_obj| {
890 meta_obj
891 .iter()
892 .take(MAX_ITERATION_COUNT)
893 .filter_map(|(package_name, meta_value)| {
894 meta_value.as_object().and_then(|obj| {
895 obj.get("optional")
896 .and_then(|opt| opt.as_bool())
897 .map(|optional| (package_name.clone(), optional))
898 })
899 })
900 .collect()
901 })
902}
903
904fn extract_dependencies_meta(json: &Value) -> Option<serde_json::Value> {
905 json.get(FIELD_DEPENDENCIES_META).cloned()
906}
907
908fn extract_overrides(json: &Value) -> Option<serde_json::Value> {
909 json.get(FIELD_OVERRIDES).cloned()
910}
911
912fn extract_description(json: &Value) -> Option<String> {
913 json.get(FIELD_DESCRIPTION)
914 .and_then(|v| v.as_str())
915 .map(|s| truncate_field(s.to_string()))
916}
917
918fn extract_homepage_url(json: &Value) -> Option<String> {
919 match json.get(FIELD_HOMEPAGE) {
920 Some(Value::String(homepage)) => normalize_non_empty_string(homepage).map(truncate_field),
921 _ => None,
922 }
923}
924
925fn normalize_non_empty_string(value: &str) -> Option<String> {
926 let trimmed = value.trim();
927 if trimmed.is_empty() {
928 None
929 } else {
930 Some(trimmed.to_string())
931 }
932}
933
934fn normalize_optional_party_url(value: &str) -> Option<String> {
935 let normalized = normalize_non_empty_string(value)?;
936
937 if normalized.eq_ignore_ascii_case("none") {
938 None
939 } else {
940 Some(normalized)
941 }
942}
943
944fn extract_keywords_as_vec(json: &Value) -> Vec<String> {
945 json.get(FIELD_KEYWORDS)
946 .and_then(|v| {
947 if let Some(str) = v.as_str() {
948 Some(vec![str.to_string()])
949 } else if let Some(arr) = v.as_array() {
950 let keywords: Vec<String> = arr
951 .iter()
952 .take(MAX_ITERATION_COUNT)
953 .filter_map(|kw| kw.as_str())
954 .map(|s| truncate_field(s.to_string()))
955 .collect();
956 if keywords.is_empty() {
957 None
958 } else {
959 Some(keywords)
960 }
961 } else {
962 None
963 }
964 })
965 .unwrap_or_default()
966}
967
968fn extract_engines(json: &Value) -> Option<serde_json::Value> {
969 json.get(FIELD_ENGINES).cloned()
970}
971
972fn extract_raw_extra_data_field(json: &Value, field: &str) -> Option<serde_json::Value> {
973 json.get(field).cloned()
974}
975
976fn extract_package_manager(json: &Value) -> Option<String> {
977 json.get(FIELD_PACKAGE_MANAGER)
978 .and_then(|v| v.as_str())
979 .map(|s| truncate_field(s.to_string()))
980}
981
982fn extract_workspaces(json: &Value) -> Option<serde_json::Value> {
983 json.get(FIELD_WORKSPACES).cloned()
984}
985
986fn extract_private(json: &Value) -> Option<bool> {
987 json.get(FIELD_PRIVATE).and_then(|v| v.as_bool())
988}
989
990fn extract_bugs(json: &Value) -> Option<String> {
991 match json.get(FIELD_BUGS) {
992 Some(bugs) => {
993 if let Some(url) = bugs.as_str() {
994 normalize_non_empty_string(url).map(truncate_field)
995 } else if let Some(obj) = bugs.as_object() {
996 obj.get("url")
997 .and_then(|v| v.as_str())
998 .and_then(normalize_non_empty_string)
999 .map(truncate_field)
1000 } else {
1001 None
1002 }
1003 }
1004 None => None,
1005 }
1006}
1007
1008fn extract_dist_hashes(dist: &Value) -> (Option<String>, Option<String>, Option<String>) {
1009 let mut sha1 = dist
1010 .get("shasum")
1011 .and_then(|v| v.as_str())
1012 .and_then(normalize_non_empty_string);
1013 let mut sha256 = None;
1014 let mut sha512 = None;
1015
1016 if let Some(integrity) = dist.get("integrity").and_then(|v| v.as_str())
1017 && let Some((algo, hex_digest)) = parse_sri(integrity)
1018 {
1019 match algo.as_str() {
1020 "sha1" if sha1.is_none() => sha1 = Some(hex_digest),
1021 "sha1" => {}
1022 "sha256" => sha256 = Some(hex_digest),
1023 "sha512" => sha512 = Some(hex_digest),
1024 _ => {}
1025 }
1026 }
1027
1028 (sha1, sha256, sha512)
1029}
1030
1031fn extract_dist_tarball(dist: &Value) -> Option<String> {
1032 dist.get("tarball")
1033 .or_else(|| dist.get("dnl_url"))
1034 .and_then(|v| v.as_str())
1035 .map(normalize_npm_registry_tarball_url)
1036 .map(truncate_field)
1037}
1038
1039fn normalize_npm_registry_tarball_url(url: &str) -> String {
1040 if let Some(path) = url.strip_prefix("http://registry.npmjs.org/") {
1041 format!("https://registry.npmjs.org/{path}")
1042 } else {
1043 url.to_string()
1044 }
1045}
1046
1047fn combine_extra_data(
1048 extra_data: Option<HashMap<String, serde_json::Value>>,
1049 additional_data: HashMap<String, serde_json::Value>,
1050) -> HashMap<String, serde_json::Value> {
1051 let mut combined = extra_data.unwrap_or_default();
1052 for (key, value) in additional_data {
1053 combined.insert(key, value);
1054 }
1055 combined
1056}