Skip to main content

socket_patch_core/crawlers/
npm_crawler.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use serde::Deserialize;
5
6use super::types::{CrawledPackage, CrawlerOptions};
7
8/// Default batch size for crawling.
9#[cfg(test)]
10const DEFAULT_BATCH_SIZE: usize = 100;
11
12/// Directories to skip when searching for workspace node_modules.
13const SKIP_DIRS: &[&str] = &[
14    "dist",
15    "build",
16    "coverage",
17    "tmp",
18    "temp",
19    "__pycache__",
20    "vendor",
21];
22
23// ---------------------------------------------------------------------------
24// Helper: read and parse package.json
25// ---------------------------------------------------------------------------
26
27/// Minimal fields we need from package.json.
28#[derive(Deserialize)]
29struct PackageJsonPartial {
30    name: Option<String>,
31    version: Option<String>,
32}
33
34/// Read and parse a `package.json` file, returning `(name, version)` if valid.
35pub async fn read_package_json(pkg_json_path: &Path) -> Option<(String, String)> {
36    let content = tokio::fs::read_to_string(pkg_json_path).await.ok()?;
37    let pkg: PackageJsonPartial = serde_json::from_str(&content).ok()?;
38    let name = pkg.name?;
39    let version = pkg.version?;
40    if name.is_empty() || version.is_empty() {
41        return None;
42    }
43    Some((name, version))
44}
45
46// ---------------------------------------------------------------------------
47// Helper: parse package name into (namespace, name)
48// ---------------------------------------------------------------------------
49
50/// Parse a full npm package name into optional namespace and bare name.
51///
52/// Examples:
53/// - `"@types/node"` -> `(Some("@types"), "node")`
54/// - `"lodash"` -> `(None, "lodash")`
55pub fn parse_package_name(full_name: &str) -> (Option<String>, String) {
56    if full_name.starts_with('@') {
57        if let Some(slash_idx) = full_name.find('/') {
58            let namespace = full_name[..slash_idx].to_string();
59            let name = full_name[slash_idx + 1..].to_string();
60            return (Some(namespace), name);
61        }
62    }
63    (None, full_name.to_string())
64}
65
66// ---------------------------------------------------------------------------
67// Helper: build PURL
68// ---------------------------------------------------------------------------
69
70/// Build a PURL string for an npm package.
71pub fn build_npm_purl(namespace: Option<&str>, name: &str, version: &str) -> String {
72    match namespace {
73        Some(ns) => format!("pkg:npm/{ns}/{name}@{version}"),
74        None => format!("pkg:npm/{name}@{version}"),
75    }
76}
77
78// ---------------------------------------------------------------------------
79// Global prefix detection helpers
80// ---------------------------------------------------------------------------
81
82use crate::utils::process::{CommandRunner, SystemCommandRunner};
83
84/// Get the npm global `node_modules` path via `npm root -g`.
85pub fn get_npm_global_prefix() -> Result<String, String> {
86    get_npm_global_prefix_with(&SystemCommandRunner)
87}
88
89/// Version of `get_npm_global_prefix` that accepts an injected
90/// `CommandRunner`. Tests use this with a `MockCommandRunner` to
91/// exercise the success arm (binary present, stdout parsed) without
92/// requiring npm on the host's PATH.
93pub fn get_npm_global_prefix_with(runner: &dyn CommandRunner) -> Result<String, String> {
94    parse_npm_root_output(runner.run("npm", &["root", "-g"]).as_deref().unwrap_or("")).ok_or_else(
95        || {
96            "Failed to determine npm global prefix. Ensure npm is installed and in PATH."
97                .to_string()
98        },
99    )
100}
101
102/// Pure parser for `npm root -g` stdout. Returns the trimmed path or
103/// `None` on empty input. Extracted so the helper logic is unit-
104/// testable without shelling out.
105pub fn parse_npm_root_output(stdout: &str) -> Option<String> {
106    let path = stdout.trim().to_string();
107    if path.is_empty() {
108        None
109    } else {
110        Some(path)
111    }
112}
113
114/// Get the yarn global `node_modules` path via `yarn global dir`.
115pub fn get_yarn_global_prefix() -> Option<String> {
116    get_yarn_global_prefix_with(&SystemCommandRunner)
117}
118
119/// Version of `get_yarn_global_prefix` that accepts an injected
120/// `CommandRunner`. See `get_npm_global_prefix_with`.
121pub fn get_yarn_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
122    parse_yarn_dir_output(
123        runner
124            .run("yarn", &["global", "dir"])
125            .as_deref()
126            .unwrap_or(""),
127    )
128}
129
130/// Pure parser for `yarn global dir` stdout. Returns `<dir>/node_modules`
131/// or `None` on empty input. Extracted so the path-derivation logic is
132/// unit-testable without shelling out.
133pub fn parse_yarn_dir_output(stdout: &str) -> Option<String> {
134    let dir = stdout.trim().to_string();
135    if dir.is_empty() {
136        return None;
137    }
138    Some(
139        PathBuf::from(dir)
140            .join("node_modules")
141            .to_string_lossy()
142            .to_string(),
143    )
144}
145
146/// Get the pnpm global `node_modules` path via `pnpm root -g`.
147pub fn get_pnpm_global_prefix() -> Option<String> {
148    get_pnpm_global_prefix_with(&SystemCommandRunner)
149}
150
151/// Version of `get_pnpm_global_prefix` that accepts an injected
152/// `CommandRunner`. See `get_npm_global_prefix_with`.
153pub fn get_pnpm_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
154    parse_pnpm_root_output(runner.run("pnpm", &["root", "-g"]).as_deref().unwrap_or(""))
155}
156
157/// Pure parser for `pnpm root -g` stdout. Returns the trimmed path or
158/// `None` on empty input.
159pub fn parse_pnpm_root_output(stdout: &str) -> Option<String> {
160    let path = stdout.trim().to_string();
161    if path.is_empty() {
162        return None;
163    }
164    Some(path)
165}
166
167/// Get the bun global `node_modules` path via `bun pm bin -g`.
168pub fn get_bun_global_prefix() -> Option<String> {
169    get_bun_global_prefix_with(&SystemCommandRunner)
170}
171
172/// Version of `get_bun_global_prefix` that accepts an injected
173/// `CommandRunner`. See `get_npm_global_prefix_with`.
174pub fn get_bun_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
175    parse_bun_bin_output(
176        runner
177            .run("bun", &["pm", "bin", "-g"])
178            .as_deref()
179            .unwrap_or(""),
180    )
181}
182
183/// Pure parser for `bun pm bin -g` stdout. Extracted so the
184/// derive-the-global-node_modules-path logic is unit-testable
185/// without shelling out.
186///
187/// Given output like `"/Users/foo/.bun/bin\n"` returns
188/// `Some("/Users/foo/.bun/install/global/node_modules")`. Returns
189/// `None` on empty input or a root-only path with no parent.
190pub fn parse_bun_bin_output(stdout: &str) -> Option<String> {
191    let bin_path = stdout.trim().to_string();
192    if bin_path.is_empty() {
193        return None;
194    }
195
196    let bun_root = PathBuf::from(&bin_path);
197    let bun_root = bun_root.parent()?;
198    Some(
199        bun_root
200            .join("install")
201            .join("global")
202            .join("node_modules")
203            .to_string_lossy()
204            .to_string(),
205    )
206}
207
208// ---------------------------------------------------------------------------
209// Helpers: synchronous wildcard directory resolver
210// ---------------------------------------------------------------------------
211
212/// Resolve a path with `"*"` wildcard segments synchronously.
213///
214/// Each segment is either a literal directory name or `"*"` which matches any
215/// directory entry. Symlinks are followed via `std::fs::metadata`.
216///
217/// Production callers live inside `#[cfg(target_os = "macos")]` blocks of
218/// `get_global_node_modules_paths` (Homebrew/nvm/volta/fnm fallbacks).
219/// `#[allow(dead_code)]` keeps the function visible to the inline
220/// `#[cfg(test)] mod tests` callers on every target without tripping
221/// `-D dead_code` on non-macOS clippy runs.
222#[allow(dead_code)]
223fn find_node_dirs_sync(base: &Path, segments: &[&str]) -> Vec<PathBuf> {
224    if !base.is_dir() {
225        return Vec::new();
226    }
227    if segments.is_empty() {
228        return vec![base.to_path_buf()];
229    }
230
231    let first = segments[0];
232    let rest = &segments[1..];
233
234    if first == "*" {
235        let mut results = Vec::new();
236        if let Ok(entries) = std::fs::read_dir(base) {
237            for entry in entries.flatten() {
238                // Follow symlinks: `DirEntry::metadata()` does NOT traverse
239                // symlinks (it stats the link itself), so a symlinked version
240                // dir — fnm's per-version layout, nvm `default`/`current`
241                // aliases — would be missed. Stat the joined path with the
242                // free `std::fs::metadata`, which resolves the link target.
243                let child = base.join(entry.file_name());
244                let is_dir = std::fs::metadata(&child)
245                    .map(|m| m.is_dir())
246                    .unwrap_or(false);
247                if is_dir {
248                    results.extend(find_node_dirs_sync(&child, rest));
249                }
250            }
251        }
252        results
253    } else {
254        find_node_dirs_sync(&base.join(first), rest)
255    }
256}
257
258// ---------------------------------------------------------------------------
259// NpmCrawler
260// ---------------------------------------------------------------------------
261
262/// NPM ecosystem crawler for discovering packages in `node_modules`.
263pub struct NpmCrawler;
264
265impl NpmCrawler {
266    /// Create a new `NpmCrawler`.
267    pub fn new() -> Self {
268        Self
269    }
270
271    // ------------------------------------------------------------------
272    // Public API
273    // ------------------------------------------------------------------
274
275    /// Get `node_modules` paths based on options.
276    ///
277    /// In global mode returns well-known global paths; in local mode walks
278    /// the project tree looking for `node_modules` directories (including
279    /// workspace packages).
280    pub async fn get_node_modules_paths(
281        &self,
282        options: &CrawlerOptions,
283    ) -> Result<Vec<PathBuf>, std::io::Error> {
284        if options.global || options.global_prefix.is_some() {
285            if let Some(ref custom) = options.global_prefix {
286                return Ok(vec![custom.clone()]);
287            }
288            return Ok(self.get_global_node_modules_paths());
289        }
290
291        Ok(self.find_local_node_modules_dirs(&options.cwd).await)
292    }
293
294    /// Crawl all discovered `node_modules` and return every package found.
295    pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
296        let mut packages = Vec::new();
297        let mut seen = HashSet::new();
298
299        let nm_paths = self
300            .get_node_modules_paths(options)
301            .await
302            .unwrap_or_default();
303
304        for nm_path in &nm_paths {
305            let found = self.scan_node_modules(nm_path, &mut seen).await;
306            packages.extend(found);
307        }
308
309        packages
310    }
311
312    /// Find specific packages by PURL inside a single `node_modules` tree.
313    ///
314    /// This is an efficient O(n) lookup where n = number of PURLs: we parse
315    /// each PURL to derive the expected directory path, then do a direct stat
316    /// + `package.json` read.
317    pub async fn find_by_purls(
318        &self,
319        node_modules_path: &Path,
320        purls: &[String],
321    ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
322        let mut result: HashMap<String, CrawledPackage> = HashMap::new();
323
324        // Parse each PURL to extract the directory key and expected version.
325        struct Target {
326            namespace: Option<String>,
327            name: String,
328            version: String,
329            #[allow(dead_code)]
330            purl: String,
331            dir_key: String,
332        }
333
334        let purl_set: HashSet<&str> = purls.iter().map(|s| s.as_str()).collect();
335        let mut targets: Vec<Target> = Vec::new();
336
337        for purl in purls {
338            if let Some((ns, name, version)) = Self::parse_purl_components(purl) {
339                let dir_key = match &ns {
340                    Some(ns_str) => format!("{ns_str}/{name}"),
341                    None => name.clone(),
342                };
343                targets.push(Target {
344                    namespace: ns,
345                    name,
346                    version,
347                    purl: purl.clone(),
348                    dir_key,
349                });
350            }
351        }
352
353        for target in &targets {
354            let pkg_path = node_modules_path.join(&target.dir_key);
355            let pkg_json_path = pkg_path.join("package.json");
356
357            if let Some((_, version)) = read_package_json(&pkg_json_path).await {
358                if version == target.version {
359                    let purl = build_npm_purl(target.namespace.as_deref(), &target.name, &version);
360                    if purl_set.contains(purl.as_str()) {
361                        result.insert(
362                            purl.clone(),
363                            CrawledPackage {
364                                name: target.name.clone(),
365                                version,
366                                namespace: target.namespace.clone(),
367                                purl,
368                                path: pkg_path.clone(),
369                            },
370                        );
371                    }
372                }
373            }
374        }
375
376        Ok(result)
377    }
378
379    // ------------------------------------------------------------------
380    // Private helpers – global paths
381    // ------------------------------------------------------------------
382
383    /// Collect global `node_modules` paths from all known package managers.
384    fn get_global_node_modules_paths(&self) -> Vec<PathBuf> {
385        let mut seen = HashSet::new();
386        let mut paths = Vec::new();
387
388        let mut add = |p: PathBuf| {
389            if p.is_dir() && seen.insert(p.clone()) {
390                paths.push(p);
391            }
392        };
393
394        if let Ok(npm_path) = get_npm_global_prefix() {
395            add(PathBuf::from(npm_path));
396        }
397        if let Some(pnpm_path) = get_pnpm_global_prefix() {
398            add(PathBuf::from(pnpm_path));
399        }
400        if let Some(yarn_path) = get_yarn_global_prefix() {
401            add(PathBuf::from(yarn_path));
402        }
403        if let Some(bun_path) = get_bun_global_prefix() {
404            add(PathBuf::from(bun_path));
405        }
406
407        // macOS-specific fallback paths
408        #[cfg(target_os = "macos")]
409        {
410            let home = std::env::var("HOME").unwrap_or_default();
411
412            // Homebrew Apple Silicon
413            add(PathBuf::from("/opt/homebrew/lib/node_modules"));
414            // Homebrew Intel / default npm
415            add(PathBuf::from("/usr/local/lib/node_modules"));
416
417            if !home.is_empty() {
418                // nvm
419                for p in find_node_dirs_sync(
420                    &PathBuf::from(&home).join(".nvm/versions/node"),
421                    &["*", "lib", "node_modules"],
422                ) {
423                    add(p);
424                }
425                // volta
426                for p in find_node_dirs_sync(
427                    &PathBuf::from(&home).join(".volta/tools/image/node"),
428                    &["*", "lib", "node_modules"],
429                ) {
430                    add(p);
431                }
432                // fnm
433                for p in find_node_dirs_sync(
434                    &PathBuf::from(&home).join(".fnm/node-versions"),
435                    &["*", "installation", "lib", "node_modules"],
436                ) {
437                    add(p);
438                }
439            }
440        }
441
442        paths
443    }
444
445    // ------------------------------------------------------------------
446    // Private helpers – local node_modules discovery
447    // ------------------------------------------------------------------
448
449    /// Find `node_modules` directories within the project root.
450    /// Recursively searches for workspace `node_modules` but stays within the
451    /// project.
452    async fn find_local_node_modules_dirs(&self, start_path: &Path) -> Vec<PathBuf> {
453        let mut results = Vec::new();
454
455        // Direct node_modules in start_path
456        let direct = start_path.join("node_modules");
457        if is_dir(&direct).await {
458            results.push(direct);
459        }
460
461        // Recursively search for workspace node_modules
462        Self::find_workspace_node_modules(start_path, &mut results).await;
463
464        results
465    }
466
467    /// Recursively find `node_modules` in subdirectories (for monorepos / workspaces).
468    /// Skips symlinks, hidden dirs, and well-known non-workspace dirs.
469    fn find_workspace_node_modules<'a>(
470        dir: &'a Path,
471        results: &'a mut Vec<PathBuf>,
472    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> {
473        Box::pin(async move {
474            for entry in crate::utils::fs::list_dir_entries(dir).await {
475                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
476                    continue;
477                };
478                if !file_type.is_dir() {
479                    continue;
480                }
481
482                let name = entry.file_name();
483                let name_str = name.to_string_lossy();
484
485                // Skip node_modules, hidden dirs, and well-known build dirs
486                if name_str == "node_modules"
487                    || name_str.starts_with('.')
488                    || SKIP_DIRS.contains(&name_str.as_ref())
489                {
490                    continue;
491                }
492
493                let full_path = dir.join(&name);
494
495                // Check if this subdirectory has its own node_modules
496                let sub_nm = full_path.join("node_modules");
497                if is_dir(&sub_nm).await {
498                    results.push(sub_nm);
499                }
500
501                // Recurse
502                Self::find_workspace_node_modules(&full_path, results).await;
503            }
504        })
505    }
506
507    // ------------------------------------------------------------------
508    // Private helpers – scanning
509    // ------------------------------------------------------------------
510
511    /// Scan a `node_modules` directory, returning all valid packages found.
512    async fn scan_node_modules(
513        &self,
514        node_modules_path: &Path,
515        seen: &mut HashSet<String>,
516    ) -> Vec<CrawledPackage> {
517        let mut results = Vec::new();
518
519        for entry in crate::utils::fs::list_dir_entries(node_modules_path).await {
520            let name = entry.file_name();
521            let name_str = name.to_string_lossy().to_string();
522
523            // Skip hidden files and node_modules
524            if name_str.starts_with('.') || name_str == "node_modules" {
525                continue;
526            }
527
528            let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
529                continue;
530            };
531
532            // Allow both directories and symlinks (pnpm uses symlinks)
533            if !file_type.is_dir() && !file_type.is_symlink() {
534                continue;
535            }
536
537            let entry_path = node_modules_path.join(&name_str);
538
539            if name_str.starts_with('@') {
540                // Scoped packages
541                let scoped = Self::scan_scoped_packages(&entry_path, seen).await;
542                results.extend(scoped);
543            } else {
544                // Regular package
545                if let Some(pkg) = Self::check_package(&entry_path, seen).await {
546                    results.push(pkg);
547                }
548                // Nested node_modules only for real directories (not symlinks)
549                if file_type.is_dir() {
550                    let nested = Self::scan_nested_node_modules(&entry_path, seen).await;
551                    results.extend(nested);
552                }
553            }
554        }
555
556        results
557    }
558
559    /// Scan a scoped packages directory (`@scope/`).
560    fn scan_scoped_packages<'a>(
561        scope_path: &'a Path,
562        seen: &'a mut HashSet<String>,
563    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
564        Box::pin(async move {
565            let mut results = Vec::new();
566
567            for entry in crate::utils::fs::list_dir_entries(scope_path).await {
568                let name = entry.file_name();
569                let name_str = name.to_string_lossy().to_string();
570
571                if name_str.starts_with('.') {
572                    continue;
573                }
574
575                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
576                    continue;
577                };
578
579                if !file_type.is_dir() && !file_type.is_symlink() {
580                    continue;
581                }
582
583                let pkg_path = scope_path.join(&name_str);
584                if let Some(pkg) = Self::check_package(&pkg_path, seen).await {
585                    results.push(pkg);
586                }
587
588                // Nested node_modules only for real directories
589                if file_type.is_dir() {
590                    let nested = Self::scan_nested_node_modules(&pkg_path, seen).await;
591                    results.extend(nested);
592                }
593            }
594
595            results
596        })
597    }
598
599    /// Scan nested `node_modules` inside a package (if it exists).
600    fn scan_nested_node_modules<'a>(
601        pkg_path: &'a Path,
602        seen: &'a mut HashSet<String>,
603    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
604        Box::pin(async move {
605            let nested_nm = pkg_path.join("node_modules");
606            let mut results = Vec::new();
607
608            for entry in crate::utils::fs::list_dir_entries(&nested_nm).await {
609                let name = entry.file_name();
610                let name_str = name.to_string_lossy().to_string();
611
612                if name_str.starts_with('.') || name_str == "node_modules" {
613                    continue;
614                }
615
616                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
617                    continue;
618                };
619
620                if !file_type.is_dir() && !file_type.is_symlink() {
621                    continue;
622                }
623
624                let entry_path = nested_nm.join(&name_str);
625
626                if name_str.starts_with('@') {
627                    let scoped = Self::scan_scoped_packages(&entry_path, seen).await;
628                    results.extend(scoped);
629                } else {
630                    if let Some(pkg) = Self::check_package(&entry_path, seen).await {
631                        results.push(pkg);
632                    }
633                    // Recurse into deeper nested node_modules only for real
634                    // directories (not symlinks) — matching the invariant in
635                    // `scan_node_modules`/`scan_scoped_packages`. Following a
636                    // symlink here would walk into pnpm's content-addressed
637                    // store (or an `npm link` target outside the project).
638                    if file_type.is_dir() {
639                        let deeper = Self::scan_nested_node_modules(&entry_path, seen).await;
640                        results.extend(deeper);
641                    }
642                }
643            }
644
645            results
646        })
647    }
648
649    /// Check a package directory and return `CrawledPackage` if valid.
650    /// Deduplicates by PURL via the `seen` set.
651    async fn check_package(pkg_path: &Path, seen: &mut HashSet<String>) -> Option<CrawledPackage> {
652        let pkg_json_path = pkg_path.join("package.json");
653        let (full_name, version) = read_package_json(&pkg_json_path).await?;
654        let (namespace, name) = parse_package_name(&full_name);
655        let purl = build_npm_purl(namespace.as_deref(), &name, &version);
656
657        if seen.contains(&purl) {
658            return None;
659        }
660        seen.insert(purl.clone());
661
662        Some(CrawledPackage {
663            name,
664            version,
665            namespace,
666            purl,
667            path: pkg_path.to_path_buf(),
668        })
669    }
670
671    // ------------------------------------------------------------------
672    // Private helpers – PURL parsing
673    // ------------------------------------------------------------------
674
675    /// Parse a PURL string to extract namespace, name, and version.
676    fn parse_purl_components(purl: &str) -> Option<(Option<String>, String, String)> {
677        // Strip qualifiers
678        let base = match purl.find('?') {
679            Some(idx) => &purl[..idx],
680            None => purl,
681        };
682
683        let rest = base.strip_prefix("pkg:npm/")?;
684        let at_idx = rest.rfind('@')?;
685        let name_part = &rest[..at_idx];
686        let version = &rest[at_idx + 1..];
687
688        if name_part.is_empty() || version.is_empty() {
689            return None;
690        }
691
692        if name_part.starts_with('@') {
693            let slash_idx = name_part.find('/')?;
694            let namespace = name_part[..slash_idx].to_string();
695            let name = name_part[slash_idx + 1..].to_string();
696            if name.is_empty() {
697                return None;
698            }
699            Some((Some(namespace), name, version.to_string()))
700        } else {
701            Some((None, name_part.to_string(), version.to_string()))
702        }
703    }
704}
705
706impl Default for NpmCrawler {
707    fn default() -> Self {
708        Self::new()
709    }
710}
711
712// ---------------------------------------------------------------------------
713// Utility
714// ---------------------------------------------------------------------------
715
716/// Check whether a path is a directory (follows symlinks).
717async fn is_dir(path: &Path) -> bool {
718    tokio::fs::metadata(path)
719        .await
720        .map(|m| m.is_dir())
721        .unwrap_or(false)
722}
723
724#[cfg(test)]
725mod tests {
726    use super::*;
727
728    #[test]
729    fn test_parse_package_name_scoped() {
730        let (ns, name) = parse_package_name("@types/node");
731        assert_eq!(ns.as_deref(), Some("@types"));
732        assert_eq!(name, "node");
733    }
734
735    #[test]
736    fn test_parse_package_name_unscoped() {
737        let (ns, name) = parse_package_name("lodash");
738        assert!(ns.is_none());
739        assert_eq!(name, "lodash");
740    }
741
742    #[test]
743    fn test_build_npm_purl_scoped() {
744        assert_eq!(
745            build_npm_purl(Some("@types"), "node", "20.0.0"),
746            "pkg:npm/@types/node@20.0.0"
747        );
748    }
749
750    #[test]
751    fn test_build_npm_purl_unscoped() {
752        assert_eq!(
753            build_npm_purl(None, "lodash", "4.17.21"),
754            "pkg:npm/lodash@4.17.21"
755        );
756    }
757
758    #[test]
759    fn test_parse_purl_components_scoped() {
760        let (ns, name, ver) =
761            NpmCrawler::parse_purl_components("pkg:npm/@types/node@20.0.0").unwrap();
762        assert_eq!(ns.as_deref(), Some("@types"));
763        assert_eq!(name, "node");
764        assert_eq!(ver, "20.0.0");
765    }
766
767    #[test]
768    fn test_parse_purl_components_unscoped() {
769        let (ns, name, ver) = NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap();
770        assert!(ns.is_none());
771        assert_eq!(name, "lodash");
772        assert_eq!(ver, "4.17.21");
773    }
774
775    #[test]
776    fn test_parse_purl_components_invalid() {
777        assert!(NpmCrawler::parse_purl_components("pkg:pypi/requests@2.0").is_none());
778        assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none());
779    }
780
781    #[tokio::test]
782    async fn test_read_package_json_valid() {
783        let dir = tempfile::tempdir().unwrap();
784        let pkg_json = dir.path().join("package.json");
785        tokio::fs::write(&pkg_json, r#"{"name": "test-pkg", "version": "1.0.0"}"#)
786            .await
787            .unwrap();
788
789        let result = read_package_json(&pkg_json).await;
790        assert!(result.is_some());
791        let (name, version) = result.unwrap();
792        assert_eq!(name, "test-pkg");
793        assert_eq!(version, "1.0.0");
794    }
795
796    #[tokio::test]
797    async fn test_read_package_json_missing() {
798        let dir = tempfile::tempdir().unwrap();
799        let pkg_json = dir.path().join("package.json");
800        assert!(read_package_json(&pkg_json).await.is_none());
801    }
802
803    #[tokio::test]
804    async fn test_read_package_json_invalid() {
805        let dir = tempfile::tempdir().unwrap();
806        let pkg_json = dir.path().join("package.json");
807        tokio::fs::write(&pkg_json, "not json").await.unwrap();
808        assert!(read_package_json(&pkg_json).await.is_none());
809    }
810
811    #[tokio::test]
812    async fn test_crawl_all_basic() {
813        let dir = tempfile::tempdir().unwrap();
814        let nm = dir.path().join("node_modules");
815        let pkg_dir = nm.join("foo");
816        tokio::fs::create_dir_all(&pkg_dir).await.unwrap();
817        tokio::fs::write(
818            pkg_dir.join("package.json"),
819            r#"{"name": "foo", "version": "1.2.3"}"#,
820        )
821        .await
822        .unwrap();
823
824        let crawler = NpmCrawler::new();
825        let options = CrawlerOptions {
826            cwd: dir.path().to_path_buf(),
827            global: false,
828            global_prefix: None,
829            batch_size: DEFAULT_BATCH_SIZE,
830        };
831
832        let packages = crawler.crawl_all(&options).await;
833        assert_eq!(packages.len(), 1);
834        assert_eq!(packages[0].name, "foo");
835        assert_eq!(packages[0].version, "1.2.3");
836        assert_eq!(packages[0].purl, "pkg:npm/foo@1.2.3");
837        assert!(packages[0].namespace.is_none());
838    }
839
840    #[tokio::test]
841    async fn test_crawl_all_scoped() {
842        let dir = tempfile::tempdir().unwrap();
843        let nm = dir.path().join("node_modules");
844        let scope_dir = nm.join("@types").join("node");
845        tokio::fs::create_dir_all(&scope_dir).await.unwrap();
846        tokio::fs::write(
847            scope_dir.join("package.json"),
848            r#"{"name": "@types/node", "version": "20.0.0"}"#,
849        )
850        .await
851        .unwrap();
852
853        let crawler = NpmCrawler::new();
854        let options = CrawlerOptions {
855            cwd: dir.path().to_path_buf(),
856            global: false,
857            global_prefix: None,
858            batch_size: DEFAULT_BATCH_SIZE,
859        };
860
861        let packages = crawler.crawl_all(&options).await;
862        assert_eq!(packages.len(), 1);
863        assert_eq!(packages[0].name, "node");
864        assert_eq!(packages[0].namespace.as_deref(), Some("@types"));
865        assert_eq!(packages[0].purl, "pkg:npm/@types/node@20.0.0");
866    }
867
868    #[test]
869    fn test_find_node_dirs_sync_wildcard() {
870        // Create an nvm-like layout: base/v18.0.0/lib/node_modules
871        let dir = tempfile::tempdir().unwrap();
872        let nm1 = dir.path().join("v18.0.0/lib/node_modules");
873        let nm2 = dir.path().join("v20.1.0/lib/node_modules");
874        std::fs::create_dir_all(&nm1).unwrap();
875        std::fs::create_dir_all(&nm2).unwrap();
876
877        let results = find_node_dirs_sync(dir.path(), &["*", "lib", "node_modules"]);
878        assert_eq!(results.len(), 2);
879        assert!(results.contains(&nm1));
880        assert!(results.contains(&nm2));
881    }
882
883    #[test]
884    fn test_find_node_dirs_sync_empty() {
885        // Non-existent base path should return empty
886        let results = find_node_dirs_sync(Path::new("/nonexistent/path/xyz"), &["*", "lib"]);
887        assert!(results.is_empty());
888    }
889
890    /// Regression: a wildcard segment that matches a *symlinked*
891    /// directory must be followed. `DirEntry::metadata()` stats the link
892    /// itself (reports `is_dir == false`), so the resolver previously
893    /// skipped symlinked version dirs — exactly the layout fnm produces
894    /// and the `current`/`default` aliases nvm creates. The fix stats the
895    /// joined path with `std::fs::metadata`, which resolves the target.
896    #[cfg(unix)]
897    #[test]
898    fn test_find_node_dirs_sync_follows_symlinked_segment() {
899        use std::os::unix::fs::symlink;
900
901        // Real version layout lives in its own tree, away from `base`,
902        // so the only way to reach it is through the symlink.
903        let real = tempfile::tempdir().unwrap();
904        let real_nm = real.path().join("lib").join("node_modules");
905        std::fs::create_dir_all(&real_nm).unwrap();
906
907        // `base` holds only a symlink standing in for a version dir.
908        let base = tempfile::tempdir().unwrap();
909        let alias = base.path().join("current");
910        symlink(real.path(), &alias).unwrap();
911
912        let results = find_node_dirs_sync(base.path(), &["*", "lib", "node_modules"]);
913        assert_eq!(
914            results.len(),
915            1,
916            "a symlinked version dir must be followed, not skipped"
917        );
918        assert_eq!(results[0], alias.join("lib").join("node_modules"));
919    }
920
921    #[test]
922    fn test_find_node_dirs_sync_literal() {
923        // All literal segments (no wildcard)
924        let dir = tempfile::tempdir().unwrap();
925        let target = dir.path().join("lib/node_modules");
926        std::fs::create_dir_all(&target).unwrap();
927
928        let results = find_node_dirs_sync(dir.path(), &["lib", "node_modules"]);
929        assert_eq!(results.len(), 1);
930        assert_eq!(results[0], target);
931    }
932
933    #[cfg(target_os = "macos")]
934    #[test]
935    fn test_macos_get_global_node_modules_paths_no_panic() {
936        let crawler = NpmCrawler::new();
937        // Should not panic, even if no package managers are installed
938        let _paths = crawler.get_global_node_modules_paths();
939    }
940
941    #[tokio::test]
942    async fn test_find_by_purls() {
943        let dir = tempfile::tempdir().unwrap();
944        let nm = dir.path().join("node_modules");
945
946        // Create foo@1.0.0
947        let foo_dir = nm.join("foo");
948        tokio::fs::create_dir_all(&foo_dir).await.unwrap();
949        tokio::fs::write(
950            foo_dir.join("package.json"),
951            r#"{"name": "foo", "version": "1.0.0"}"#,
952        )
953        .await
954        .unwrap();
955
956        // Create @types/node@20.0.0
957        let types_dir = nm.join("@types").join("node");
958        tokio::fs::create_dir_all(&types_dir).await.unwrap();
959        tokio::fs::write(
960            types_dir.join("package.json"),
961            r#"{"name": "@types/node", "version": "20.0.0"}"#,
962        )
963        .await
964        .unwrap();
965
966        let crawler = NpmCrawler::new();
967        let purls = vec![
968            "pkg:npm/foo@1.0.0".to_string(),
969            "pkg:npm/@types/node@20.0.0".to_string(),
970            "pkg:npm/not-installed@0.0.1".to_string(),
971        ];
972
973        let result = crawler.find_by_purls(&nm, &purls).await.unwrap();
974
975        assert_eq!(result.len(), 2);
976        assert!(result.contains_key("pkg:npm/foo@1.0.0"));
977        assert!(result.contains_key("pkg:npm/@types/node@20.0.0"));
978        assert!(!result.contains_key("pkg:npm/not-installed@0.0.1"));
979    }
980}