1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::process::Command;
4
5use serde::Deserialize;
6
7use super::types::{CrawledPackage, CrawlerOptions};
8
9#[cfg(test)]
11const DEFAULT_BATCH_SIZE: usize = 100;
12
13const SKIP_DIRS: &[&str] = &[
15 "dist",
16 "build",
17 "coverage",
18 "tmp",
19 "temp",
20 "__pycache__",
21 "vendor",
22];
23
24#[derive(Deserialize)]
30struct PackageJsonPartial {
31 name: Option<String>,
32 version: Option<String>,
33}
34
35pub async fn read_package_json(pkg_json_path: &Path) -> Option<(String, String)> {
37 let content = tokio::fs::read_to_string(pkg_json_path).await.ok()?;
38 let pkg: PackageJsonPartial = serde_json::from_str(&content).ok()?;
39 let name = pkg.name?;
40 let version = pkg.version?;
41 if name.is_empty() || version.is_empty() {
42 return None;
43 }
44 Some((name, version))
45}
46
47pub fn parse_package_name(full_name: &str) -> (Option<String>, String) {
57 if full_name.starts_with('@') {
58 if let Some(slash_idx) = full_name.find('/') {
59 let namespace = full_name[..slash_idx].to_string();
60 let name = full_name[slash_idx + 1..].to_string();
61 return (Some(namespace), name);
62 }
63 }
64 (None, full_name.to_string())
65}
66
67pub fn build_npm_purl(namespace: Option<&str>, name: &str, version: &str) -> String {
73 match namespace {
74 Some(ns) => format!("pkg:npm/{ns}/{name}@{version}"),
75 None => format!("pkg:npm/{name}@{version}"),
76 }
77}
78
79pub fn get_npm_global_prefix() -> Result<String, String> {
85 let output = Command::new("npm")
86 .args(["root", "-g"])
87 .stdin(std::process::Stdio::null())
88 .stdout(std::process::Stdio::piped())
89 .stderr(std::process::Stdio::piped())
90 .output()
91 .map_err(|e| format!("Failed to run `npm root -g`: {e}"))?;
92
93 if !output.status.success() {
94 return Err(
95 "Failed to determine npm global prefix. Ensure npm is installed and in PATH."
96 .to_string(),
97 );
98 }
99
100 Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
101}
102
103pub fn get_yarn_global_prefix() -> Option<String> {
105 let output = Command::new("yarn")
106 .args(["global", "dir"])
107 .stdin(std::process::Stdio::null())
108 .stdout(std::process::Stdio::piped())
109 .stderr(std::process::Stdio::piped())
110 .output()
111 .ok()?;
112
113 if !output.status.success() {
114 return None;
115 }
116
117 let dir = String::from_utf8_lossy(&output.stdout).trim().to_string();
118 if dir.is_empty() {
119 return None;
120 }
121 Some(PathBuf::from(dir).join("node_modules").to_string_lossy().to_string())
122}
123
124pub fn get_pnpm_global_prefix() -> Option<String> {
126 let output = Command::new("pnpm")
127 .args(["root", "-g"])
128 .stdin(std::process::Stdio::null())
129 .stdout(std::process::Stdio::piped())
130 .stderr(std::process::Stdio::piped())
131 .output()
132 .ok()?;
133
134 if !output.status.success() {
135 return None;
136 }
137
138 let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
139 if path.is_empty() {
140 return None;
141 }
142 Some(path)
143}
144
145pub fn get_bun_global_prefix() -> Option<String> {
147 let output = Command::new("bun")
148 .args(["pm", "bin", "-g"])
149 .stdin(std::process::Stdio::null())
150 .stdout(std::process::Stdio::piped())
151 .stderr(std::process::Stdio::piped())
152 .output()
153 .ok()?;
154
155 if !output.status.success() {
156 return None;
157 }
158
159 let bin_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
160 if bin_path.is_empty() {
161 return None;
162 }
163
164 let bun_root = PathBuf::from(&bin_path);
165 let bun_root = bun_root.parent()?;
166 Some(
167 bun_root
168 .join("install")
169 .join("global")
170 .join("node_modules")
171 .to_string_lossy()
172 .to_string(),
173 )
174}
175
176pub struct NpmCrawler;
182
183impl NpmCrawler {
184 pub fn new() -> Self {
186 Self
187 }
188
189 pub async fn get_node_modules_paths(&self, options: &CrawlerOptions) -> Result<Vec<PathBuf>, std::io::Error> {
199 if options.global || options.global_prefix.is_some() {
200 if let Some(ref custom) = options.global_prefix {
201 return Ok(vec![custom.clone()]);
202 }
203 return Ok(self.get_global_node_modules_paths());
204 }
205
206 Ok(self.find_local_node_modules_dirs(&options.cwd).await)
207 }
208
209 pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
211 let mut packages = Vec::new();
212 let mut seen = HashSet::new();
213
214 let nm_paths = self.get_node_modules_paths(options).await.unwrap_or_default();
215
216 for nm_path in &nm_paths {
217 let found = self.scan_node_modules(nm_path, &mut seen).await;
218 packages.extend(found);
219 }
220
221 packages
222 }
223
224 pub async fn find_by_purls(
230 &self,
231 node_modules_path: &Path,
232 purls: &[String],
233 ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
234 let mut result: HashMap<String, CrawledPackage> = HashMap::new();
235
236 struct Target {
238 namespace: Option<String>,
239 name: String,
240 version: String,
241 #[allow(dead_code)] purl: String,
242 dir_key: String,
243 }
244
245 let purl_set: HashSet<&str> = purls.iter().map(|s| s.as_str()).collect();
246 let mut targets: Vec<Target> = Vec::new();
247
248 for purl in purls {
249 if let Some((ns, name, version)) = Self::parse_purl_components(purl) {
250 let dir_key = match &ns {
251 Some(ns_str) => format!("{ns_str}/{name}"),
252 None => name.clone(),
253 };
254 targets.push(Target {
255 namespace: ns,
256 name,
257 version,
258 purl: purl.clone(),
259 dir_key,
260 });
261 }
262 }
263
264 for target in &targets {
265 let pkg_path = node_modules_path.join(&target.dir_key);
266 let pkg_json_path = pkg_path.join("package.json");
267
268 if let Some((_, version)) = read_package_json(&pkg_json_path).await {
269 if version == target.version {
270 let purl = build_npm_purl(
271 target.namespace.as_deref(),
272 &target.name,
273 &version,
274 );
275 if purl_set.contains(purl.as_str()) {
276 result.insert(
277 purl.clone(),
278 CrawledPackage {
279 name: target.name.clone(),
280 version,
281 namespace: target.namespace.clone(),
282 purl,
283 path: pkg_path.clone(),
284 },
285 );
286 }
287 }
288 }
289 }
290
291 Ok(result)
292 }
293
294 fn get_global_node_modules_paths(&self) -> Vec<PathBuf> {
300 let mut paths = Vec::new();
301
302 if let Ok(npm_path) = get_npm_global_prefix() {
303 paths.push(PathBuf::from(npm_path));
304 }
305 if let Some(pnpm_path) = get_pnpm_global_prefix() {
306 paths.push(PathBuf::from(pnpm_path));
307 }
308 if let Some(yarn_path) = get_yarn_global_prefix() {
309 paths.push(PathBuf::from(yarn_path));
310 }
311 if let Some(bun_path) = get_bun_global_prefix() {
312 paths.push(PathBuf::from(bun_path));
313 }
314
315 paths
316 }
317
318 async fn find_local_node_modules_dirs(&self, start_path: &Path) -> Vec<PathBuf> {
326 let mut results = Vec::new();
327
328 let direct = start_path.join("node_modules");
330 if is_dir(&direct).await {
331 results.push(direct);
332 }
333
334 Self::find_workspace_node_modules(start_path, &mut results).await;
336
337 results
338 }
339
340 fn find_workspace_node_modules<'a>(
343 dir: &'a Path,
344 results: &'a mut Vec<PathBuf>,
345 ) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> {
346 Box::pin(async move {
347 let mut entries = match tokio::fs::read_dir(dir).await {
348 Ok(rd) => rd,
349 Err(_) => return,
350 };
351
352 let mut entry_list = Vec::new();
353 while let Ok(Some(entry)) = entries.next_entry().await {
354 entry_list.push(entry);
355 }
356
357 for entry in entry_list {
358 let file_type = match entry.file_type().await {
359 Ok(ft) => ft,
360 Err(_) => continue,
361 };
362
363 if !file_type.is_dir() {
364 continue;
365 }
366
367 let name = entry.file_name();
368 let name_str = name.to_string_lossy();
369
370 if name_str == "node_modules"
372 || name_str.starts_with('.')
373 || SKIP_DIRS.contains(&name_str.as_ref())
374 {
375 continue;
376 }
377
378 let full_path = dir.join(&name);
379
380 let sub_nm = full_path.join("node_modules");
382 if is_dir(&sub_nm).await {
383 results.push(sub_nm);
384 }
385
386 Self::find_workspace_node_modules(&full_path, results).await;
388 }
389 })
390 }
391
392 async fn scan_node_modules(
398 &self,
399 node_modules_path: &Path,
400 seen: &mut HashSet<String>,
401 ) -> Vec<CrawledPackage> {
402 let mut results = Vec::new();
403
404 let mut entries = match tokio::fs::read_dir(node_modules_path).await {
405 Ok(rd) => rd,
406 Err(_) => return results,
407 };
408
409 let mut entry_list = Vec::new();
410 while let Ok(Some(entry)) = entries.next_entry().await {
411 entry_list.push(entry);
412 }
413
414 for entry in entry_list {
415 let name = entry.file_name();
416 let name_str = name.to_string_lossy().to_string();
417
418 if name_str.starts_with('.') || name_str == "node_modules" {
420 continue;
421 }
422
423 let file_type = match entry.file_type().await {
424 Ok(ft) => ft,
425 Err(_) => continue,
426 };
427
428 if !file_type.is_dir() && !file_type.is_symlink() {
430 continue;
431 }
432
433 let entry_path = node_modules_path.join(&name_str);
434
435 if name_str.starts_with('@') {
436 let scoped =
438 Self::scan_scoped_packages(&entry_path, seen).await;
439 results.extend(scoped);
440 } else {
441 if let Some(pkg) = Self::check_package(&entry_path, seen).await {
443 results.push(pkg);
444 }
445 if file_type.is_dir() {
447 let nested =
448 Self::scan_nested_node_modules(&entry_path, seen).await;
449 results.extend(nested);
450 }
451 }
452 }
453
454 results
455 }
456
457 fn scan_scoped_packages<'a>(
459 scope_path: &'a Path,
460 seen: &'a mut HashSet<String>,
461 ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
462 Box::pin(async move {
463 let mut results = Vec::new();
464
465 let mut entries = match tokio::fs::read_dir(scope_path).await {
466 Ok(rd) => rd,
467 Err(_) => return results,
468 };
469
470 let mut entry_list = Vec::new();
471 while let Ok(Some(entry)) = entries.next_entry().await {
472 entry_list.push(entry);
473 }
474
475 for entry in entry_list {
476 let name = entry.file_name();
477 let name_str = name.to_string_lossy().to_string();
478
479 if name_str.starts_with('.') {
480 continue;
481 }
482
483 let file_type = match entry.file_type().await {
484 Ok(ft) => ft,
485 Err(_) => continue,
486 };
487
488 if !file_type.is_dir() && !file_type.is_symlink() {
489 continue;
490 }
491
492 let pkg_path = scope_path.join(&name_str);
493 if let Some(pkg) = Self::check_package(&pkg_path, seen).await {
494 results.push(pkg);
495 }
496
497 if file_type.is_dir() {
499 let nested =
500 Self::scan_nested_node_modules(&pkg_path, seen).await;
501 results.extend(nested);
502 }
503 }
504
505 results
506 })
507 }
508
509 fn scan_nested_node_modules<'a>(
511 pkg_path: &'a Path,
512 seen: &'a mut HashSet<String>,
513 ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
514 Box::pin(async move {
515 let nested_nm = pkg_path.join("node_modules");
516
517 let mut entries = match tokio::fs::read_dir(&nested_nm).await {
518 Ok(rd) => rd,
519 Err(_) => return Vec::new(),
520 };
521
522 let mut results = Vec::new();
523
524 let mut entry_list = Vec::new();
525 while let Ok(Some(entry)) = entries.next_entry().await {
526 entry_list.push(entry);
527 }
528
529 for entry in entry_list {
530 let name = entry.file_name();
531 let name_str = name.to_string_lossy().to_string();
532
533 if name_str.starts_with('.') || name_str == "node_modules" {
534 continue;
535 }
536
537 let file_type = match entry.file_type().await {
538 Ok(ft) => ft,
539 Err(_) => continue,
540 };
541
542 if !file_type.is_dir() && !file_type.is_symlink() {
543 continue;
544 }
545
546 let entry_path = nested_nm.join(&name_str);
547
548 if name_str.starts_with('@') {
549 let scoped =
550 Self::scan_scoped_packages(&entry_path, seen).await;
551 results.extend(scoped);
552 } else {
553 if let Some(pkg) = Self::check_package(&entry_path, seen).await {
554 results.push(pkg);
555 }
556 let deeper =
558 Self::scan_nested_node_modules(&entry_path, seen).await;
559 results.extend(deeper);
560 }
561 }
562
563 results
564 })
565 }
566
567 async fn check_package(
570 pkg_path: &Path,
571 seen: &mut HashSet<String>,
572 ) -> Option<CrawledPackage> {
573 let pkg_json_path = pkg_path.join("package.json");
574 let (full_name, version) = read_package_json(&pkg_json_path).await?;
575 let (namespace, name) = parse_package_name(&full_name);
576 let purl = build_npm_purl(namespace.as_deref(), &name, &version);
577
578 if seen.contains(&purl) {
579 return None;
580 }
581 seen.insert(purl.clone());
582
583 Some(CrawledPackage {
584 name,
585 version,
586 namespace,
587 purl,
588 path: pkg_path.to_path_buf(),
589 })
590 }
591
592 fn parse_purl_components(purl: &str) -> Option<(Option<String>, String, String)> {
598 let base = match purl.find('?') {
600 Some(idx) => &purl[..idx],
601 None => purl,
602 };
603
604 let rest = base.strip_prefix("pkg:npm/")?;
605 let at_idx = rest.rfind('@')?;
606 let name_part = &rest[..at_idx];
607 let version = &rest[at_idx + 1..];
608
609 if name_part.is_empty() || version.is_empty() {
610 return None;
611 }
612
613 if name_part.starts_with('@') {
614 let slash_idx = name_part.find('/')?;
615 let namespace = name_part[..slash_idx].to_string();
616 let name = name_part[slash_idx + 1..].to_string();
617 if name.is_empty() {
618 return None;
619 }
620 Some((Some(namespace), name, version.to_string()))
621 } else {
622 Some((None, name_part.to_string(), version.to_string()))
623 }
624 }
625}
626
627impl Default for NpmCrawler {
628 fn default() -> Self {
629 Self::new()
630 }
631}
632
633async fn is_dir(path: &Path) -> bool {
639 tokio::fs::metadata(path)
640 .await
641 .map(|m| m.is_dir())
642 .unwrap_or(false)
643}
644
645#[cfg(test)]
646mod tests {
647 use super::*;
648
649 #[test]
650 fn test_parse_package_name_scoped() {
651 let (ns, name) = parse_package_name("@types/node");
652 assert_eq!(ns.as_deref(), Some("@types"));
653 assert_eq!(name, "node");
654 }
655
656 #[test]
657 fn test_parse_package_name_unscoped() {
658 let (ns, name) = parse_package_name("lodash");
659 assert!(ns.is_none());
660 assert_eq!(name, "lodash");
661 }
662
663 #[test]
664 fn test_build_npm_purl_scoped() {
665 assert_eq!(
666 build_npm_purl(Some("@types"), "node", "20.0.0"),
667 "pkg:npm/@types/node@20.0.0"
668 );
669 }
670
671 #[test]
672 fn test_build_npm_purl_unscoped() {
673 assert_eq!(
674 build_npm_purl(None, "lodash", "4.17.21"),
675 "pkg:npm/lodash@4.17.21"
676 );
677 }
678
679 #[test]
680 fn test_parse_purl_components_scoped() {
681 let (ns, name, ver) =
682 NpmCrawler::parse_purl_components("pkg:npm/@types/node@20.0.0").unwrap();
683 assert_eq!(ns.as_deref(), Some("@types"));
684 assert_eq!(name, "node");
685 assert_eq!(ver, "20.0.0");
686 }
687
688 #[test]
689 fn test_parse_purl_components_unscoped() {
690 let (ns, name, ver) =
691 NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap();
692 assert!(ns.is_none());
693 assert_eq!(name, "lodash");
694 assert_eq!(ver, "4.17.21");
695 }
696
697 #[test]
698 fn test_parse_purl_components_invalid() {
699 assert!(NpmCrawler::parse_purl_components("pkg:pypi/requests@2.0").is_none());
700 assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none());
701 }
702
703 #[tokio::test]
704 async fn test_read_package_json_valid() {
705 let dir = tempfile::tempdir().unwrap();
706 let pkg_json = dir.path().join("package.json");
707 tokio::fs::write(
708 &pkg_json,
709 r#"{"name": "test-pkg", "version": "1.0.0"}"#,
710 )
711 .await
712 .unwrap();
713
714 let result = read_package_json(&pkg_json).await;
715 assert!(result.is_some());
716 let (name, version) = result.unwrap();
717 assert_eq!(name, "test-pkg");
718 assert_eq!(version, "1.0.0");
719 }
720
721 #[tokio::test]
722 async fn test_read_package_json_missing() {
723 let dir = tempfile::tempdir().unwrap();
724 let pkg_json = dir.path().join("package.json");
725 assert!(read_package_json(&pkg_json).await.is_none());
726 }
727
728 #[tokio::test]
729 async fn test_read_package_json_invalid() {
730 let dir = tempfile::tempdir().unwrap();
731 let pkg_json = dir.path().join("package.json");
732 tokio::fs::write(&pkg_json, "not json").await.unwrap();
733 assert!(read_package_json(&pkg_json).await.is_none());
734 }
735
736 #[tokio::test]
737 async fn test_crawl_all_basic() {
738 let dir = tempfile::tempdir().unwrap();
739 let nm = dir.path().join("node_modules");
740 let pkg_dir = nm.join("foo");
741 tokio::fs::create_dir_all(&pkg_dir).await.unwrap();
742 tokio::fs::write(
743 pkg_dir.join("package.json"),
744 r#"{"name": "foo", "version": "1.2.3"}"#,
745 )
746 .await
747 .unwrap();
748
749 let crawler = NpmCrawler::new();
750 let options = CrawlerOptions {
751 cwd: dir.path().to_path_buf(),
752 global: false,
753 global_prefix: None,
754 batch_size: DEFAULT_BATCH_SIZE,
755 };
756
757 let packages = crawler.crawl_all(&options).await;
758 assert_eq!(packages.len(), 1);
759 assert_eq!(packages[0].name, "foo");
760 assert_eq!(packages[0].version, "1.2.3");
761 assert_eq!(packages[0].purl, "pkg:npm/foo@1.2.3");
762 assert!(packages[0].namespace.is_none());
763 }
764
765 #[tokio::test]
766 async fn test_crawl_all_scoped() {
767 let dir = tempfile::tempdir().unwrap();
768 let nm = dir.path().join("node_modules");
769 let scope_dir = nm.join("@types").join("node");
770 tokio::fs::create_dir_all(&scope_dir).await.unwrap();
771 tokio::fs::write(
772 scope_dir.join("package.json"),
773 r#"{"name": "@types/node", "version": "20.0.0"}"#,
774 )
775 .await
776 .unwrap();
777
778 let crawler = NpmCrawler::new();
779 let options = CrawlerOptions {
780 cwd: dir.path().to_path_buf(),
781 global: false,
782 global_prefix: None,
783 batch_size: DEFAULT_BATCH_SIZE,
784 };
785
786 let packages = crawler.crawl_all(&options).await;
787 assert_eq!(packages.len(), 1);
788 assert_eq!(packages[0].name, "node");
789 assert_eq!(packages[0].namespace.as_deref(), Some("@types"));
790 assert_eq!(packages[0].purl, "pkg:npm/@types/node@20.0.0");
791 }
792
793 #[tokio::test]
794 async fn test_find_by_purls() {
795 let dir = tempfile::tempdir().unwrap();
796 let nm = dir.path().join("node_modules");
797
798 let foo_dir = nm.join("foo");
800 tokio::fs::create_dir_all(&foo_dir).await.unwrap();
801 tokio::fs::write(
802 foo_dir.join("package.json"),
803 r#"{"name": "foo", "version": "1.0.0"}"#,
804 )
805 .await
806 .unwrap();
807
808 let types_dir = nm.join("@types").join("node");
810 tokio::fs::create_dir_all(&types_dir).await.unwrap();
811 tokio::fs::write(
812 types_dir.join("package.json"),
813 r#"{"name": "@types/node", "version": "20.0.0"}"#,
814 )
815 .await
816 .unwrap();
817
818 let crawler = NpmCrawler::new();
819 let purls = vec![
820 "pkg:npm/foo@1.0.0".to_string(),
821 "pkg:npm/@types/node@20.0.0".to_string(),
822 "pkg:npm/not-installed@0.0.1".to_string(),
823 ];
824
825 let result = crawler.find_by_purls(&nm, &purls).await.unwrap();
826
827 assert_eq!(result.len(), 2);
828 assert!(result.contains_key("pkg:npm/foo@1.0.0"));
829 assert!(result.contains_key("pkg:npm/@types/node@20.0.0"));
830 assert!(!result.contains_key("pkg:npm/not-installed@0.0.1"));
831 }
832}