Skip to main content

socket_patch_core/crawlers/
npm_crawler.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::process::Command;
4
5use serde::Deserialize;
6
7use super::types::{CrawledPackage, CrawlerOptions};
8
9/// Default batch size for crawling.
10#[cfg(test)]
11const DEFAULT_BATCH_SIZE: usize = 100;
12
13/// Directories to skip when searching for workspace node_modules.
14const SKIP_DIRS: &[&str] = &[
15    "dist",
16    "build",
17    "coverage",
18    "tmp",
19    "temp",
20    "__pycache__",
21    "vendor",
22];
23
24// ---------------------------------------------------------------------------
25// Helper: read and parse package.json
26// ---------------------------------------------------------------------------
27
28/// Minimal fields we need from package.json.
29#[derive(Deserialize)]
30struct PackageJsonPartial {
31    name: Option<String>,
32    version: Option<String>,
33}
34
35/// Read and parse a `package.json` file, returning `(name, version)` if valid.
36pub async fn read_package_json(pkg_json_path: &Path) -> Option<(String, String)> {
37    let content = tokio::fs::read_to_string(pkg_json_path).await.ok()?;
38    let pkg: PackageJsonPartial = serde_json::from_str(&content).ok()?;
39    let name = pkg.name?;
40    let version = pkg.version?;
41    if name.is_empty() || version.is_empty() {
42        return None;
43    }
44    Some((name, version))
45}
46
47// ---------------------------------------------------------------------------
48// Helper: parse package name into (namespace, name)
49// ---------------------------------------------------------------------------
50
51/// Parse a full npm package name into optional namespace and bare name.
52///
53/// Examples:
54/// - `"@types/node"` -> `(Some("@types"), "node")`
55/// - `"lodash"` -> `(None, "lodash")`
56pub fn parse_package_name(full_name: &str) -> (Option<String>, String) {
57    if full_name.starts_with('@') {
58        if let Some(slash_idx) = full_name.find('/') {
59            let namespace = full_name[..slash_idx].to_string();
60            let name = full_name[slash_idx + 1..].to_string();
61            return (Some(namespace), name);
62        }
63    }
64    (None, full_name.to_string())
65}
66
67// ---------------------------------------------------------------------------
68// Helper: build PURL
69// ---------------------------------------------------------------------------
70
71/// Build a PURL string for an npm package.
72pub fn build_npm_purl(namespace: Option<&str>, name: &str, version: &str) -> String {
73    match namespace {
74        Some(ns) => format!("pkg:npm/{ns}/{name}@{version}"),
75        None => format!("pkg:npm/{name}@{version}"),
76    }
77}
78
79// ---------------------------------------------------------------------------
80// Global prefix detection helpers
81// ---------------------------------------------------------------------------
82
83/// Get the npm global `node_modules` path via `npm root -g`.
84pub fn get_npm_global_prefix() -> Result<String, String> {
85    let output = Command::new("npm")
86        .args(["root", "-g"])
87        .stdin(std::process::Stdio::null())
88        .stdout(std::process::Stdio::piped())
89        .stderr(std::process::Stdio::piped())
90        .output()
91        .map_err(|e| format!("Failed to run `npm root -g`: {e}"))?;
92
93    if !output.status.success() {
94        return Err(
95            "Failed to determine npm global prefix. Ensure npm is installed and in PATH."
96                .to_string(),
97        );
98    }
99
100    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
101}
102
103/// Get the yarn global `node_modules` path via `yarn global dir`.
104pub fn get_yarn_global_prefix() -> Option<String> {
105    let output = Command::new("yarn")
106        .args(["global", "dir"])
107        .stdin(std::process::Stdio::null())
108        .stdout(std::process::Stdio::piped())
109        .stderr(std::process::Stdio::piped())
110        .output()
111        .ok()?;
112
113    if !output.status.success() {
114        return None;
115    }
116
117    let dir = String::from_utf8_lossy(&output.stdout).trim().to_string();
118    if dir.is_empty() {
119        return None;
120    }
121    Some(PathBuf::from(dir).join("node_modules").to_string_lossy().to_string())
122}
123
124/// Get the pnpm global `node_modules` path via `pnpm root -g`.
125pub fn get_pnpm_global_prefix() -> Option<String> {
126    let output = Command::new("pnpm")
127        .args(["root", "-g"])
128        .stdin(std::process::Stdio::null())
129        .stdout(std::process::Stdio::piped())
130        .stderr(std::process::Stdio::piped())
131        .output()
132        .ok()?;
133
134    if !output.status.success() {
135        return None;
136    }
137
138    let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
139    if path.is_empty() {
140        return None;
141    }
142    Some(path)
143}
144
145/// Get the bun global `node_modules` path via `bun pm bin -g`.
146pub fn get_bun_global_prefix() -> Option<String> {
147    let output = Command::new("bun")
148        .args(["pm", "bin", "-g"])
149        .stdin(std::process::Stdio::null())
150        .stdout(std::process::Stdio::piped())
151        .stderr(std::process::Stdio::piped())
152        .output()
153        .ok()?;
154
155    if !output.status.success() {
156        return None;
157    }
158
159    let bin_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
160    if bin_path.is_empty() {
161        return None;
162    }
163
164    let bun_root = PathBuf::from(&bin_path);
165    let bun_root = bun_root.parent()?;
166    Some(
167        bun_root
168            .join("install")
169            .join("global")
170            .join("node_modules")
171            .to_string_lossy()
172            .to_string(),
173    )
174}
175
176// ---------------------------------------------------------------------------
177// Helpers: synchronous wildcard directory resolver
178// ---------------------------------------------------------------------------
179
180/// Resolve a path with `"*"` wildcard segments synchronously.
181///
182/// Each segment is either a literal directory name or `"*"` which matches any
183/// directory entry. Symlinks are followed via `std::fs::metadata`.
184fn find_node_dirs_sync(base: &Path, segments: &[&str]) -> Vec<PathBuf> {
185    if !base.is_dir() {
186        return Vec::new();
187    }
188    if segments.is_empty() {
189        return vec![base.to_path_buf()];
190    }
191
192    let first = segments[0];
193    let rest = &segments[1..];
194
195    if first == "*" {
196        let mut results = Vec::new();
197        if let Ok(entries) = std::fs::read_dir(base) {
198            for entry in entries.flatten() {
199                // Follow symlinks: use metadata() not symlink_metadata()
200                let is_dir = entry
201                    .metadata()
202                    .map(|m| m.is_dir())
203                    .unwrap_or(false);
204                if is_dir {
205                    results.extend(find_node_dirs_sync(&base.join(entry.file_name()), rest));
206                }
207            }
208        }
209        results
210    } else {
211        find_node_dirs_sync(&base.join(first), rest)
212    }
213}
214
215// ---------------------------------------------------------------------------
216// NpmCrawler
217// ---------------------------------------------------------------------------
218
219/// NPM ecosystem crawler for discovering packages in `node_modules`.
220pub struct NpmCrawler;
221
222impl NpmCrawler {
223    /// Create a new `NpmCrawler`.
224    pub fn new() -> Self {
225        Self
226    }
227
228    // ------------------------------------------------------------------
229    // Public API
230    // ------------------------------------------------------------------
231
232    /// Get `node_modules` paths based on options.
233    ///
234    /// In global mode returns well-known global paths; in local mode walks
235    /// the project tree looking for `node_modules` directories (including
236    /// workspace packages).
237    pub async fn get_node_modules_paths(&self, options: &CrawlerOptions) -> Result<Vec<PathBuf>, std::io::Error> {
238        if options.global || options.global_prefix.is_some() {
239            if let Some(ref custom) = options.global_prefix {
240                return Ok(vec![custom.clone()]);
241            }
242            return Ok(self.get_global_node_modules_paths());
243        }
244
245        Ok(self.find_local_node_modules_dirs(&options.cwd).await)
246    }
247
248    /// Crawl all discovered `node_modules` and return every package found.
249    pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
250        let mut packages = Vec::new();
251        let mut seen = HashSet::new();
252
253        let nm_paths = self.get_node_modules_paths(options).await.unwrap_or_default();
254
255        for nm_path in &nm_paths {
256            let found = self.scan_node_modules(nm_path, &mut seen).await;
257            packages.extend(found);
258        }
259
260        packages
261    }
262
263    /// Find specific packages by PURL inside a single `node_modules` tree.
264    ///
265    /// This is an efficient O(n) lookup where n = number of PURLs: we parse
266    /// each PURL to derive the expected directory path, then do a direct stat
267    /// + `package.json` read.
268    pub async fn find_by_purls(
269        &self,
270        node_modules_path: &Path,
271        purls: &[String],
272    ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
273        let mut result: HashMap<String, CrawledPackage> = HashMap::new();
274
275        // Parse each PURL to extract the directory key and expected version.
276        struct Target {
277            namespace: Option<String>,
278            name: String,
279            version: String,
280            #[allow(dead_code)] purl: String,
281            dir_key: String,
282        }
283
284        let purl_set: HashSet<&str> = purls.iter().map(|s| s.as_str()).collect();
285        let mut targets: Vec<Target> = Vec::new();
286
287        for purl in purls {
288            if let Some((ns, name, version)) = Self::parse_purl_components(purl) {
289                let dir_key = match &ns {
290                    Some(ns_str) => format!("{ns_str}/{name}"),
291                    None => name.clone(),
292                };
293                targets.push(Target {
294                    namespace: ns,
295                    name,
296                    version,
297                    purl: purl.clone(),
298                    dir_key,
299                });
300            }
301        }
302
303        for target in &targets {
304            let pkg_path = node_modules_path.join(&target.dir_key);
305            let pkg_json_path = pkg_path.join("package.json");
306
307            if let Some((_, version)) = read_package_json(&pkg_json_path).await {
308                if version == target.version {
309                    let purl = build_npm_purl(
310                        target.namespace.as_deref(),
311                        &target.name,
312                        &version,
313                    );
314                    if purl_set.contains(purl.as_str()) {
315                        result.insert(
316                            purl.clone(),
317                            CrawledPackage {
318                                name: target.name.clone(),
319                                version,
320                                namespace: target.namespace.clone(),
321                                purl,
322                                path: pkg_path.clone(),
323                            },
324                        );
325                    }
326                }
327            }
328        }
329
330        Ok(result)
331    }
332
333    // ------------------------------------------------------------------
334    // Private helpers – global paths
335    // ------------------------------------------------------------------
336
337    /// Collect global `node_modules` paths from all known package managers.
338    fn get_global_node_modules_paths(&self) -> Vec<PathBuf> {
339        let mut seen = HashSet::new();
340        let mut paths = Vec::new();
341
342        let mut add = |p: PathBuf| {
343            if p.is_dir() && seen.insert(p.clone()) {
344                paths.push(p);
345            }
346        };
347
348        if let Ok(npm_path) = get_npm_global_prefix() {
349            add(PathBuf::from(npm_path));
350        }
351        if let Some(pnpm_path) = get_pnpm_global_prefix() {
352            add(PathBuf::from(pnpm_path));
353        }
354        if let Some(yarn_path) = get_yarn_global_prefix() {
355            add(PathBuf::from(yarn_path));
356        }
357        if let Some(bun_path) = get_bun_global_prefix() {
358            add(PathBuf::from(bun_path));
359        }
360
361        // macOS-specific fallback paths
362        if cfg!(target_os = "macos") {
363            let home = std::env::var("HOME").unwrap_or_default();
364
365            // Homebrew Apple Silicon
366            add(PathBuf::from("/opt/homebrew/lib/node_modules"));
367            // Homebrew Intel / default npm
368            add(PathBuf::from("/usr/local/lib/node_modules"));
369
370            if !home.is_empty() {
371                // nvm
372                for p in find_node_dirs_sync(
373                    &PathBuf::from(&home).join(".nvm/versions/node"),
374                    &["*", "lib", "node_modules"],
375                ) {
376                    add(p);
377                }
378                // volta
379                for p in find_node_dirs_sync(
380                    &PathBuf::from(&home).join(".volta/tools/image/node"),
381                    &["*", "lib", "node_modules"],
382                ) {
383                    add(p);
384                }
385                // fnm
386                for p in find_node_dirs_sync(
387                    &PathBuf::from(&home).join(".fnm/node-versions"),
388                    &["*", "installation", "lib", "node_modules"],
389                ) {
390                    add(p);
391                }
392            }
393        }
394
395        paths
396    }
397
398    // ------------------------------------------------------------------
399    // Private helpers – local node_modules discovery
400    // ------------------------------------------------------------------
401
402    /// Find `node_modules` directories within the project root.
403    /// Recursively searches for workspace `node_modules` but stays within the
404    /// project.
405    async fn find_local_node_modules_dirs(&self, start_path: &Path) -> Vec<PathBuf> {
406        let mut results = Vec::new();
407
408        // Direct node_modules in start_path
409        let direct = start_path.join("node_modules");
410        if is_dir(&direct).await {
411            results.push(direct);
412        }
413
414        // Recursively search for workspace node_modules
415        Self::find_workspace_node_modules(start_path, &mut results).await;
416
417        results
418    }
419
420    /// Recursively find `node_modules` in subdirectories (for monorepos / workspaces).
421    /// Skips symlinks, hidden dirs, and well-known non-workspace dirs.
422    fn find_workspace_node_modules<'a>(
423        dir: &'a Path,
424        results: &'a mut Vec<PathBuf>,
425    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> {
426        Box::pin(async move {
427            let mut entries = match tokio::fs::read_dir(dir).await {
428                Ok(rd) => rd,
429                Err(_) => return,
430            };
431
432            let mut entry_list = Vec::new();
433            while let Ok(Some(entry)) = entries.next_entry().await {
434                entry_list.push(entry);
435            }
436
437            for entry in entry_list {
438                let file_type = match entry.file_type().await {
439                    Ok(ft) => ft,
440                    Err(_) => continue,
441                };
442
443                if !file_type.is_dir() {
444                    continue;
445                }
446
447                let name = entry.file_name();
448                let name_str = name.to_string_lossy();
449
450                // Skip node_modules, hidden dirs, and well-known build dirs
451                if name_str == "node_modules"
452                    || name_str.starts_with('.')
453                    || SKIP_DIRS.contains(&name_str.as_ref())
454                {
455                    continue;
456                }
457
458                let full_path = dir.join(&name);
459
460                // Check if this subdirectory has its own node_modules
461                let sub_nm = full_path.join("node_modules");
462                if is_dir(&sub_nm).await {
463                    results.push(sub_nm);
464                }
465
466                // Recurse
467                Self::find_workspace_node_modules(&full_path, results).await;
468            }
469        })
470    }
471
472    // ------------------------------------------------------------------
473    // Private helpers – scanning
474    // ------------------------------------------------------------------
475
476    /// Scan a `node_modules` directory, returning all valid packages found.
477    async fn scan_node_modules(
478        &self,
479        node_modules_path: &Path,
480        seen: &mut HashSet<String>,
481    ) -> Vec<CrawledPackage> {
482        let mut results = Vec::new();
483
484        let mut entries = match tokio::fs::read_dir(node_modules_path).await {
485            Ok(rd) => rd,
486            Err(_) => return results,
487        };
488
489        let mut entry_list = Vec::new();
490        while let Ok(Some(entry)) = entries.next_entry().await {
491            entry_list.push(entry);
492        }
493
494        for entry in entry_list {
495            let name = entry.file_name();
496            let name_str = name.to_string_lossy().to_string();
497
498            // Skip hidden files and node_modules
499            if name_str.starts_with('.') || name_str == "node_modules" {
500                continue;
501            }
502
503            let file_type = match entry.file_type().await {
504                Ok(ft) => ft,
505                Err(_) => continue,
506            };
507
508            // Allow both directories and symlinks (pnpm uses symlinks)
509            if !file_type.is_dir() && !file_type.is_symlink() {
510                continue;
511            }
512
513            let entry_path = node_modules_path.join(&name_str);
514
515            if name_str.starts_with('@') {
516                // Scoped packages
517                let scoped =
518                    Self::scan_scoped_packages(&entry_path, seen).await;
519                results.extend(scoped);
520            } else {
521                // Regular package
522                if let Some(pkg) = Self::check_package(&entry_path, seen).await {
523                    results.push(pkg);
524                }
525                // Nested node_modules only for real directories (not symlinks)
526                if file_type.is_dir() {
527                    let nested =
528                        Self::scan_nested_node_modules(&entry_path, seen).await;
529                    results.extend(nested);
530                }
531            }
532        }
533
534        results
535    }
536
537    /// Scan a scoped packages directory (`@scope/`).
538    fn scan_scoped_packages<'a>(
539        scope_path: &'a Path,
540        seen: &'a mut HashSet<String>,
541    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
542        Box::pin(async move {
543            let mut results = Vec::new();
544
545            let mut entries = match tokio::fs::read_dir(scope_path).await {
546                Ok(rd) => rd,
547                Err(_) => return results,
548            };
549
550            let mut entry_list = Vec::new();
551            while let Ok(Some(entry)) = entries.next_entry().await {
552                entry_list.push(entry);
553            }
554
555            for entry in entry_list {
556                let name = entry.file_name();
557                let name_str = name.to_string_lossy().to_string();
558
559                if name_str.starts_with('.') {
560                    continue;
561                }
562
563                let file_type = match entry.file_type().await {
564                    Ok(ft) => ft,
565                    Err(_) => continue,
566                };
567
568                if !file_type.is_dir() && !file_type.is_symlink() {
569                    continue;
570                }
571
572                let pkg_path = scope_path.join(&name_str);
573                if let Some(pkg) = Self::check_package(&pkg_path, seen).await {
574                    results.push(pkg);
575                }
576
577                // Nested node_modules only for real directories
578                if file_type.is_dir() {
579                    let nested =
580                        Self::scan_nested_node_modules(&pkg_path, seen).await;
581                    results.extend(nested);
582                }
583            }
584
585            results
586        })
587    }
588
589    /// Scan nested `node_modules` inside a package (if it exists).
590    fn scan_nested_node_modules<'a>(
591        pkg_path: &'a Path,
592        seen: &'a mut HashSet<String>,
593    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
594        Box::pin(async move {
595            let nested_nm = pkg_path.join("node_modules");
596
597            let mut entries = match tokio::fs::read_dir(&nested_nm).await {
598                Ok(rd) => rd,
599                Err(_) => return Vec::new(),
600            };
601
602            let mut results = Vec::new();
603
604            let mut entry_list = Vec::new();
605            while let Ok(Some(entry)) = entries.next_entry().await {
606                entry_list.push(entry);
607            }
608
609            for entry in entry_list {
610                let name = entry.file_name();
611                let name_str = name.to_string_lossy().to_string();
612
613                if name_str.starts_with('.') || name_str == "node_modules" {
614                    continue;
615                }
616
617                let file_type = match entry.file_type().await {
618                    Ok(ft) => ft,
619                    Err(_) => continue,
620                };
621
622                if !file_type.is_dir() && !file_type.is_symlink() {
623                    continue;
624                }
625
626                let entry_path = nested_nm.join(&name_str);
627
628                if name_str.starts_with('@') {
629                    let scoped =
630                        Self::scan_scoped_packages(&entry_path, seen).await;
631                    results.extend(scoped);
632                } else {
633                    if let Some(pkg) = Self::check_package(&entry_path, seen).await {
634                        results.push(pkg);
635                    }
636                    // Recursively check deeper nested node_modules
637                    let deeper =
638                        Self::scan_nested_node_modules(&entry_path, seen).await;
639                    results.extend(deeper);
640                }
641            }
642
643            results
644        })
645    }
646
647    /// Check a package directory and return `CrawledPackage` if valid.
648    /// Deduplicates by PURL via the `seen` set.
649    async fn check_package(
650        pkg_path: &Path,
651        seen: &mut HashSet<String>,
652    ) -> Option<CrawledPackage> {
653        let pkg_json_path = pkg_path.join("package.json");
654        let (full_name, version) = read_package_json(&pkg_json_path).await?;
655        let (namespace, name) = parse_package_name(&full_name);
656        let purl = build_npm_purl(namespace.as_deref(), &name, &version);
657
658        if seen.contains(&purl) {
659            return None;
660        }
661        seen.insert(purl.clone());
662
663        Some(CrawledPackage {
664            name,
665            version,
666            namespace,
667            purl,
668            path: pkg_path.to_path_buf(),
669        })
670    }
671
672    // ------------------------------------------------------------------
673    // Private helpers – PURL parsing
674    // ------------------------------------------------------------------
675
676    /// Parse a PURL string to extract namespace, name, and version.
677    fn parse_purl_components(purl: &str) -> Option<(Option<String>, String, String)> {
678        // Strip qualifiers
679        let base = match purl.find('?') {
680            Some(idx) => &purl[..idx],
681            None => purl,
682        };
683
684        let rest = base.strip_prefix("pkg:npm/")?;
685        let at_idx = rest.rfind('@')?;
686        let name_part = &rest[..at_idx];
687        let version = &rest[at_idx + 1..];
688
689        if name_part.is_empty() || version.is_empty() {
690            return None;
691        }
692
693        if name_part.starts_with('@') {
694            let slash_idx = name_part.find('/')?;
695            let namespace = name_part[..slash_idx].to_string();
696            let name = name_part[slash_idx + 1..].to_string();
697            if name.is_empty() {
698                return None;
699            }
700            Some((Some(namespace), name, version.to_string()))
701        } else {
702            Some((None, name_part.to_string(), version.to_string()))
703        }
704    }
705}
706
707impl Default for NpmCrawler {
708    fn default() -> Self {
709        Self::new()
710    }
711}
712
713// ---------------------------------------------------------------------------
714// Utility
715// ---------------------------------------------------------------------------
716
717/// Check whether a path is a directory (follows symlinks).
718async fn is_dir(path: &Path) -> bool {
719    tokio::fs::metadata(path)
720        .await
721        .map(|m| m.is_dir())
722        .unwrap_or(false)
723}
724
725#[cfg(test)]
726mod tests {
727    use super::*;
728
729    #[test]
730    fn test_parse_package_name_scoped() {
731        let (ns, name) = parse_package_name("@types/node");
732        assert_eq!(ns.as_deref(), Some("@types"));
733        assert_eq!(name, "node");
734    }
735
736    #[test]
737    fn test_parse_package_name_unscoped() {
738        let (ns, name) = parse_package_name("lodash");
739        assert!(ns.is_none());
740        assert_eq!(name, "lodash");
741    }
742
743    #[test]
744    fn test_build_npm_purl_scoped() {
745        assert_eq!(
746            build_npm_purl(Some("@types"), "node", "20.0.0"),
747            "pkg:npm/@types/node@20.0.0"
748        );
749    }
750
751    #[test]
752    fn test_build_npm_purl_unscoped() {
753        assert_eq!(
754            build_npm_purl(None, "lodash", "4.17.21"),
755            "pkg:npm/lodash@4.17.21"
756        );
757    }
758
759    #[test]
760    fn test_parse_purl_components_scoped() {
761        let (ns, name, ver) =
762            NpmCrawler::parse_purl_components("pkg:npm/@types/node@20.0.0").unwrap();
763        assert_eq!(ns.as_deref(), Some("@types"));
764        assert_eq!(name, "node");
765        assert_eq!(ver, "20.0.0");
766    }
767
768    #[test]
769    fn test_parse_purl_components_unscoped() {
770        let (ns, name, ver) =
771            NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap();
772        assert!(ns.is_none());
773        assert_eq!(name, "lodash");
774        assert_eq!(ver, "4.17.21");
775    }
776
777    #[test]
778    fn test_parse_purl_components_invalid() {
779        assert!(NpmCrawler::parse_purl_components("pkg:pypi/requests@2.0").is_none());
780        assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none());
781    }
782
783    #[tokio::test]
784    async fn test_read_package_json_valid() {
785        let dir = tempfile::tempdir().unwrap();
786        let pkg_json = dir.path().join("package.json");
787        tokio::fs::write(
788            &pkg_json,
789            r#"{"name": "test-pkg", "version": "1.0.0"}"#,
790        )
791        .await
792        .unwrap();
793
794        let result = read_package_json(&pkg_json).await;
795        assert!(result.is_some());
796        let (name, version) = result.unwrap();
797        assert_eq!(name, "test-pkg");
798        assert_eq!(version, "1.0.0");
799    }
800
801    #[tokio::test]
802    async fn test_read_package_json_missing() {
803        let dir = tempfile::tempdir().unwrap();
804        let pkg_json = dir.path().join("package.json");
805        assert!(read_package_json(&pkg_json).await.is_none());
806    }
807
808    #[tokio::test]
809    async fn test_read_package_json_invalid() {
810        let dir = tempfile::tempdir().unwrap();
811        let pkg_json = dir.path().join("package.json");
812        tokio::fs::write(&pkg_json, "not json").await.unwrap();
813        assert!(read_package_json(&pkg_json).await.is_none());
814    }
815
816    #[tokio::test]
817    async fn test_crawl_all_basic() {
818        let dir = tempfile::tempdir().unwrap();
819        let nm = dir.path().join("node_modules");
820        let pkg_dir = nm.join("foo");
821        tokio::fs::create_dir_all(&pkg_dir).await.unwrap();
822        tokio::fs::write(
823            pkg_dir.join("package.json"),
824            r#"{"name": "foo", "version": "1.2.3"}"#,
825        )
826        .await
827        .unwrap();
828
829        let crawler = NpmCrawler::new();
830        let options = CrawlerOptions {
831            cwd: dir.path().to_path_buf(),
832            global: false,
833            global_prefix: None,
834            batch_size: DEFAULT_BATCH_SIZE,
835        };
836
837        let packages = crawler.crawl_all(&options).await;
838        assert_eq!(packages.len(), 1);
839        assert_eq!(packages[0].name, "foo");
840        assert_eq!(packages[0].version, "1.2.3");
841        assert_eq!(packages[0].purl, "pkg:npm/foo@1.2.3");
842        assert!(packages[0].namespace.is_none());
843    }
844
845    #[tokio::test]
846    async fn test_crawl_all_scoped() {
847        let dir = tempfile::tempdir().unwrap();
848        let nm = dir.path().join("node_modules");
849        let scope_dir = nm.join("@types").join("node");
850        tokio::fs::create_dir_all(&scope_dir).await.unwrap();
851        tokio::fs::write(
852            scope_dir.join("package.json"),
853            r#"{"name": "@types/node", "version": "20.0.0"}"#,
854        )
855        .await
856        .unwrap();
857
858        let crawler = NpmCrawler::new();
859        let options = CrawlerOptions {
860            cwd: dir.path().to_path_buf(),
861            global: false,
862            global_prefix: None,
863            batch_size: DEFAULT_BATCH_SIZE,
864        };
865
866        let packages = crawler.crawl_all(&options).await;
867        assert_eq!(packages.len(), 1);
868        assert_eq!(packages[0].name, "node");
869        assert_eq!(packages[0].namespace.as_deref(), Some("@types"));
870        assert_eq!(packages[0].purl, "pkg:npm/@types/node@20.0.0");
871    }
872
873    #[test]
874    fn test_find_node_dirs_sync_wildcard() {
875        // Create an nvm-like layout: base/v18.0.0/lib/node_modules
876        let dir = tempfile::tempdir().unwrap();
877        let nm1 = dir.path().join("v18.0.0/lib/node_modules");
878        let nm2 = dir.path().join("v20.1.0/lib/node_modules");
879        std::fs::create_dir_all(&nm1).unwrap();
880        std::fs::create_dir_all(&nm2).unwrap();
881
882        let results = find_node_dirs_sync(dir.path(), &["*", "lib", "node_modules"]);
883        assert_eq!(results.len(), 2);
884        assert!(results.contains(&nm1));
885        assert!(results.contains(&nm2));
886    }
887
888    #[test]
889    fn test_find_node_dirs_sync_empty() {
890        // Non-existent base path should return empty
891        let results = find_node_dirs_sync(Path::new("/nonexistent/path/xyz"), &["*", "lib"]);
892        assert!(results.is_empty());
893    }
894
895    #[test]
896    fn test_find_node_dirs_sync_literal() {
897        // All literal segments (no wildcard)
898        let dir = tempfile::tempdir().unwrap();
899        let target = dir.path().join("lib/node_modules");
900        std::fs::create_dir_all(&target).unwrap();
901
902        let results = find_node_dirs_sync(dir.path(), &["lib", "node_modules"]);
903        assert_eq!(results.len(), 1);
904        assert_eq!(results[0], target);
905    }
906
907    #[cfg(target_os = "macos")]
908    #[test]
909    fn test_macos_get_global_node_modules_paths_no_panic() {
910        let crawler = NpmCrawler::new();
911        // Should not panic, even if no package managers are installed
912        let _paths = crawler.get_global_node_modules_paths();
913    }
914
915    #[tokio::test]
916    async fn test_find_by_purls() {
917        let dir = tempfile::tempdir().unwrap();
918        let nm = dir.path().join("node_modules");
919
920        // Create foo@1.0.0
921        let foo_dir = nm.join("foo");
922        tokio::fs::create_dir_all(&foo_dir).await.unwrap();
923        tokio::fs::write(
924            foo_dir.join("package.json"),
925            r#"{"name": "foo", "version": "1.0.0"}"#,
926        )
927        .await
928        .unwrap();
929
930        // Create @types/node@20.0.0
931        let types_dir = nm.join("@types").join("node");
932        tokio::fs::create_dir_all(&types_dir).await.unwrap();
933        tokio::fs::write(
934            types_dir.join("package.json"),
935            r#"{"name": "@types/node", "version": "20.0.0"}"#,
936        )
937        .await
938        .unwrap();
939
940        let crawler = NpmCrawler::new();
941        let purls = vec![
942            "pkg:npm/foo@1.0.0".to_string(),
943            "pkg:npm/@types/node@20.0.0".to_string(),
944            "pkg:npm/not-installed@0.0.1".to_string(),
945        ];
946
947        let result = crawler.find_by_purls(&nm, &purls).await.unwrap();
948
949        assert_eq!(result.len(), 2);
950        assert!(result.contains_key("pkg:npm/foo@1.0.0"));
951        assert!(result.contains_key("pkg:npm/@types/node@20.0.0"));
952        assert!(!result.contains_key("pkg:npm/not-installed@0.0.1"));
953    }
954}