Skip to main content

socket_patch_core/crawlers/
npm_crawler.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use serde::Deserialize;
5
6use super::types::{CrawledPackage, CrawlerOptions};
7
8/// Default batch size for crawling.
9#[cfg(test)]
10const DEFAULT_BATCH_SIZE: usize = 100;
11
12/// Directories to skip when searching for workspace node_modules.
13const SKIP_DIRS: &[&str] = &[
14    "dist",
15    "build",
16    "coverage",
17    "tmp",
18    "temp",
19    "__pycache__",
20    "vendor",
21];
22
23// ---------------------------------------------------------------------------
24// Helper: read and parse package.json
25// ---------------------------------------------------------------------------
26
27/// Minimal fields we need from package.json.
28#[derive(Deserialize)]
29struct PackageJsonPartial {
30    name: Option<String>,
31    version: Option<String>,
32}
33
34/// Read and parse a `package.json` file, returning `(name, version)` if valid.
35pub async fn read_package_json(pkg_json_path: &Path) -> Option<(String, String)> {
36    let content = tokio::fs::read_to_string(pkg_json_path).await.ok()?;
37    let pkg: PackageJsonPartial = serde_json::from_str(&content).ok()?;
38    let name = pkg.name?;
39    let version = pkg.version?;
40    if name.is_empty() || version.is_empty() {
41        return None;
42    }
43    Some((name, version))
44}
45
46// ---------------------------------------------------------------------------
47// Helper: parse package name into (namespace, name)
48// ---------------------------------------------------------------------------
49
50/// Parse a full npm package name into optional namespace and bare name.
51///
52/// Examples:
53/// - `"@types/node"` -> `(Some("@types"), "node")`
54/// - `"lodash"` -> `(None, "lodash")`
55pub fn parse_package_name(full_name: &str) -> (Option<String>, String) {
56    if full_name.starts_with('@') {
57        if let Some(slash_idx) = full_name.find('/') {
58            let namespace = full_name[..slash_idx].to_string();
59            let name = full_name[slash_idx + 1..].to_string();
60            return (Some(namespace), name);
61        }
62    }
63    (None, full_name.to_string())
64}
65
66// ---------------------------------------------------------------------------
67// Helper: build PURL
68// ---------------------------------------------------------------------------
69
70/// Build a PURL string for an npm package.
71pub fn build_npm_purl(namespace: Option<&str>, name: &str, version: &str) -> String {
72    match namespace {
73        Some(ns) => format!("pkg:npm/{ns}/{name}@{version}"),
74        None => format!("pkg:npm/{name}@{version}"),
75    }
76}
77
78// ---------------------------------------------------------------------------
79// Global prefix detection helpers
80// ---------------------------------------------------------------------------
81
82use crate::utils::process::{CommandRunner, SystemCommandRunner};
83
84/// Get the npm global `node_modules` path via `npm root -g`.
85pub fn get_npm_global_prefix() -> Result<String, String> {
86    get_npm_global_prefix_with(&SystemCommandRunner)
87}
88
89/// Version of `get_npm_global_prefix` that accepts an injected
90/// `CommandRunner`. Tests use this with a `MockCommandRunner` to
91/// exercise the success arm (binary present, stdout parsed) without
92/// requiring npm on the host's PATH.
93pub fn get_npm_global_prefix_with(runner: &dyn CommandRunner) -> Result<String, String> {
94    parse_npm_root_output(runner.run("npm", &["root", "-g"]).as_deref().unwrap_or(""))
95        .ok_or_else(|| {
96            "Failed to determine npm global prefix. Ensure npm is installed and in PATH."
97                .to_string()
98        })
99}
100
101/// Pure parser for `npm root -g` stdout. Returns the trimmed path or
102/// `None` on empty input. Extracted so the helper logic is unit-
103/// testable without shelling out.
104pub fn parse_npm_root_output(stdout: &str) -> Option<String> {
105    let path = stdout.trim().to_string();
106    if path.is_empty() {
107        None
108    } else {
109        Some(path)
110    }
111}
112
113/// Get the yarn global `node_modules` path via `yarn global dir`.
114pub fn get_yarn_global_prefix() -> Option<String> {
115    get_yarn_global_prefix_with(&SystemCommandRunner)
116}
117
118/// Version of `get_yarn_global_prefix` that accepts an injected
119/// `CommandRunner`. See `get_npm_global_prefix_with`.
120pub fn get_yarn_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
121    parse_yarn_dir_output(runner.run("yarn", &["global", "dir"]).as_deref().unwrap_or(""))
122}
123
124/// Pure parser for `yarn global dir` stdout. Returns `<dir>/node_modules`
125/// or `None` on empty input. Extracted so the path-derivation logic is
126/// unit-testable without shelling out.
127pub fn parse_yarn_dir_output(stdout: &str) -> Option<String> {
128    let dir = stdout.trim().to_string();
129    if dir.is_empty() {
130        return None;
131    }
132    Some(PathBuf::from(dir).join("node_modules").to_string_lossy().to_string())
133}
134
135/// Get the pnpm global `node_modules` path via `pnpm root -g`.
136pub fn get_pnpm_global_prefix() -> Option<String> {
137    get_pnpm_global_prefix_with(&SystemCommandRunner)
138}
139
140/// Version of `get_pnpm_global_prefix` that accepts an injected
141/// `CommandRunner`. See `get_npm_global_prefix_with`.
142pub fn get_pnpm_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
143    parse_pnpm_root_output(runner.run("pnpm", &["root", "-g"]).as_deref().unwrap_or(""))
144}
145
146/// Pure parser for `pnpm root -g` stdout. Returns the trimmed path or
147/// `None` on empty input.
148pub fn parse_pnpm_root_output(stdout: &str) -> Option<String> {
149    let path = stdout.trim().to_string();
150    if path.is_empty() {
151        return None;
152    }
153    Some(path)
154}
155
156/// Get the bun global `node_modules` path via `bun pm bin -g`.
157pub fn get_bun_global_prefix() -> Option<String> {
158    get_bun_global_prefix_with(&SystemCommandRunner)
159}
160
161/// Version of `get_bun_global_prefix` that accepts an injected
162/// `CommandRunner`. See `get_npm_global_prefix_with`.
163pub fn get_bun_global_prefix_with(runner: &dyn CommandRunner) -> Option<String> {
164    parse_bun_bin_output(runner.run("bun", &["pm", "bin", "-g"]).as_deref().unwrap_or(""))
165}
166
167/// Pure parser for `bun pm bin -g` stdout. Extracted so the
168/// derive-the-global-node_modules-path logic is unit-testable
169/// without shelling out.
170///
171/// Given output like `"/Users/foo/.bun/bin\n"` returns
172/// `Some("/Users/foo/.bun/install/global/node_modules")`. Returns
173/// `None` on empty input or a root-only path with no parent.
174pub fn parse_bun_bin_output(stdout: &str) -> Option<String> {
175    let bin_path = stdout.trim().to_string();
176    if bin_path.is_empty() {
177        return None;
178    }
179
180    let bun_root = PathBuf::from(&bin_path);
181    let bun_root = bun_root.parent()?;
182    Some(
183        bun_root
184            .join("install")
185            .join("global")
186            .join("node_modules")
187            .to_string_lossy()
188            .to_string(),
189    )
190}
191
192// ---------------------------------------------------------------------------
193// Helpers: synchronous wildcard directory resolver
194// ---------------------------------------------------------------------------
195
196/// Resolve a path with `"*"` wildcard segments synchronously.
197///
198/// Each segment is either a literal directory name or `"*"` which matches any
199/// directory entry. Symlinks are followed via `std::fs::metadata`.
200///
201/// Production callers live inside `#[cfg(target_os = "macos")]` blocks of
202/// `get_global_node_modules_paths` (Homebrew/nvm/volta/fnm fallbacks).
203/// `#[allow(dead_code)]` keeps the function visible to the inline
204/// `#[cfg(test)] mod tests` callers on every target without tripping
205/// `-D dead_code` on non-macOS clippy runs.
206#[allow(dead_code)]
207fn find_node_dirs_sync(base: &Path, segments: &[&str]) -> Vec<PathBuf> {
208    if !base.is_dir() {
209        return Vec::new();
210    }
211    if segments.is_empty() {
212        return vec![base.to_path_buf()];
213    }
214
215    let first = segments[0];
216    let rest = &segments[1..];
217
218    if first == "*" {
219        let mut results = Vec::new();
220        if let Ok(entries) = std::fs::read_dir(base) {
221            for entry in entries.flatten() {
222                // Follow symlinks: use metadata() not symlink_metadata()
223                let is_dir = entry
224                    .metadata()
225                    .map(|m| m.is_dir())
226                    .unwrap_or(false);
227                if is_dir {
228                    results.extend(find_node_dirs_sync(&base.join(entry.file_name()), rest));
229                }
230            }
231        }
232        results
233    } else {
234        find_node_dirs_sync(&base.join(first), rest)
235    }
236}
237
238// ---------------------------------------------------------------------------
239// NpmCrawler
240// ---------------------------------------------------------------------------
241
242/// NPM ecosystem crawler for discovering packages in `node_modules`.
243pub struct NpmCrawler;
244
245impl NpmCrawler {
246    /// Create a new `NpmCrawler`.
247    pub fn new() -> Self {
248        Self
249    }
250
251    // ------------------------------------------------------------------
252    // Public API
253    // ------------------------------------------------------------------
254
255    /// Get `node_modules` paths based on options.
256    ///
257    /// In global mode returns well-known global paths; in local mode walks
258    /// the project tree looking for `node_modules` directories (including
259    /// workspace packages).
260    pub async fn get_node_modules_paths(&self, options: &CrawlerOptions) -> Result<Vec<PathBuf>, std::io::Error> {
261        if options.global || options.global_prefix.is_some() {
262            if let Some(ref custom) = options.global_prefix {
263                return Ok(vec![custom.clone()]);
264            }
265            return Ok(self.get_global_node_modules_paths());
266        }
267
268        Ok(self.find_local_node_modules_dirs(&options.cwd).await)
269    }
270
271    /// Crawl all discovered `node_modules` and return every package found.
272    pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
273        let mut packages = Vec::new();
274        let mut seen = HashSet::new();
275
276        let nm_paths = self.get_node_modules_paths(options).await.unwrap_or_default();
277
278        for nm_path in &nm_paths {
279            let found = self.scan_node_modules(nm_path, &mut seen).await;
280            packages.extend(found);
281        }
282
283        packages
284    }
285
286    /// Find specific packages by PURL inside a single `node_modules` tree.
287    ///
288    /// This is an efficient O(n) lookup where n = number of PURLs: we parse
289    /// each PURL to derive the expected directory path, then do a direct stat
290    /// + `package.json` read.
291    pub async fn find_by_purls(
292        &self,
293        node_modules_path: &Path,
294        purls: &[String],
295    ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
296        let mut result: HashMap<String, CrawledPackage> = HashMap::new();
297
298        // Parse each PURL to extract the directory key and expected version.
299        struct Target {
300            namespace: Option<String>,
301            name: String,
302            version: String,
303            #[allow(dead_code)] purl: String,
304            dir_key: String,
305        }
306
307        let purl_set: HashSet<&str> = purls.iter().map(|s| s.as_str()).collect();
308        let mut targets: Vec<Target> = Vec::new();
309
310        for purl in purls {
311            if let Some((ns, name, version)) = Self::parse_purl_components(purl) {
312                let dir_key = match &ns {
313                    Some(ns_str) => format!("{ns_str}/{name}"),
314                    None => name.clone(),
315                };
316                targets.push(Target {
317                    namespace: ns,
318                    name,
319                    version,
320                    purl: purl.clone(),
321                    dir_key,
322                });
323            }
324        }
325
326        for target in &targets {
327            let pkg_path = node_modules_path.join(&target.dir_key);
328            let pkg_json_path = pkg_path.join("package.json");
329
330            if let Some((_, version)) = read_package_json(&pkg_json_path).await {
331                if version == target.version {
332                    let purl = build_npm_purl(
333                        target.namespace.as_deref(),
334                        &target.name,
335                        &version,
336                    );
337                    if purl_set.contains(purl.as_str()) {
338                        result.insert(
339                            purl.clone(),
340                            CrawledPackage {
341                                name: target.name.clone(),
342                                version,
343                                namespace: target.namespace.clone(),
344                                purl,
345                                path: pkg_path.clone(),
346                            },
347                        );
348                    }
349                }
350            }
351        }
352
353        Ok(result)
354    }
355
356    // ------------------------------------------------------------------
357    // Private helpers – global paths
358    // ------------------------------------------------------------------
359
360    /// Collect global `node_modules` paths from all known package managers.
361    fn get_global_node_modules_paths(&self) -> Vec<PathBuf> {
362        let mut seen = HashSet::new();
363        let mut paths = Vec::new();
364
365        let mut add = |p: PathBuf| {
366            if p.is_dir() && seen.insert(p.clone()) {
367                paths.push(p);
368            }
369        };
370
371        if let Ok(npm_path) = get_npm_global_prefix() {
372            add(PathBuf::from(npm_path));
373        }
374        if let Some(pnpm_path) = get_pnpm_global_prefix() {
375            add(PathBuf::from(pnpm_path));
376        }
377        if let Some(yarn_path) = get_yarn_global_prefix() {
378            add(PathBuf::from(yarn_path));
379        }
380        if let Some(bun_path) = get_bun_global_prefix() {
381            add(PathBuf::from(bun_path));
382        }
383
384        // macOS-specific fallback paths
385        #[cfg(target_os = "macos")]
386        {
387            let home = std::env::var("HOME").unwrap_or_default();
388
389            // Homebrew Apple Silicon
390            add(PathBuf::from("/opt/homebrew/lib/node_modules"));
391            // Homebrew Intel / default npm
392            add(PathBuf::from("/usr/local/lib/node_modules"));
393
394            if !home.is_empty() {
395                // nvm
396                for p in find_node_dirs_sync(
397                    &PathBuf::from(&home).join(".nvm/versions/node"),
398                    &["*", "lib", "node_modules"],
399                ) {
400                    add(p);
401                }
402                // volta
403                for p in find_node_dirs_sync(
404                    &PathBuf::from(&home).join(".volta/tools/image/node"),
405                    &["*", "lib", "node_modules"],
406                ) {
407                    add(p);
408                }
409                // fnm
410                for p in find_node_dirs_sync(
411                    &PathBuf::from(&home).join(".fnm/node-versions"),
412                    &["*", "installation", "lib", "node_modules"],
413                ) {
414                    add(p);
415                }
416            }
417        }
418
419        paths
420    }
421
422    // ------------------------------------------------------------------
423    // Private helpers – local node_modules discovery
424    // ------------------------------------------------------------------
425
426    /// Find `node_modules` directories within the project root.
427    /// Recursively searches for workspace `node_modules` but stays within the
428    /// project.
429    async fn find_local_node_modules_dirs(&self, start_path: &Path) -> Vec<PathBuf> {
430        let mut results = Vec::new();
431
432        // Direct node_modules in start_path
433        let direct = start_path.join("node_modules");
434        if is_dir(&direct).await {
435            results.push(direct);
436        }
437
438        // Recursively search for workspace node_modules
439        Self::find_workspace_node_modules(start_path, &mut results).await;
440
441        results
442    }
443
444    /// Recursively find `node_modules` in subdirectories (for monorepos / workspaces).
445    /// Skips symlinks, hidden dirs, and well-known non-workspace dirs.
446    fn find_workspace_node_modules<'a>(
447        dir: &'a Path,
448        results: &'a mut Vec<PathBuf>,
449    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> {
450        Box::pin(async move {
451            for entry in crate::utils::fs::list_dir_entries(dir).await {
452                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
453                    continue;
454                };
455                if !file_type.is_dir() {
456                    continue;
457                }
458
459                let name = entry.file_name();
460                let name_str = name.to_string_lossy();
461
462                // Skip node_modules, hidden dirs, and well-known build dirs
463                if name_str == "node_modules"
464                    || name_str.starts_with('.')
465                    || SKIP_DIRS.contains(&name_str.as_ref())
466                {
467                    continue;
468                }
469
470                let full_path = dir.join(&name);
471
472                // Check if this subdirectory has its own node_modules
473                let sub_nm = full_path.join("node_modules");
474                if is_dir(&sub_nm).await {
475                    results.push(sub_nm);
476                }
477
478                // Recurse
479                Self::find_workspace_node_modules(&full_path, results).await;
480            }
481        })
482    }
483
484    // ------------------------------------------------------------------
485    // Private helpers – scanning
486    // ------------------------------------------------------------------
487
488    /// Scan a `node_modules` directory, returning all valid packages found.
489    async fn scan_node_modules(
490        &self,
491        node_modules_path: &Path,
492        seen: &mut HashSet<String>,
493    ) -> Vec<CrawledPackage> {
494        let mut results = Vec::new();
495
496        for entry in crate::utils::fs::list_dir_entries(node_modules_path).await {
497            let name = entry.file_name();
498            let name_str = name.to_string_lossy().to_string();
499
500            // Skip hidden files and node_modules
501            if name_str.starts_with('.') || name_str == "node_modules" {
502                continue;
503            }
504
505            let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
506                continue;
507            };
508
509            // Allow both directories and symlinks (pnpm uses symlinks)
510            if !file_type.is_dir() && !file_type.is_symlink() {
511                continue;
512            }
513
514            let entry_path = node_modules_path.join(&name_str);
515
516            if name_str.starts_with('@') {
517                // Scoped packages
518                let scoped =
519                    Self::scan_scoped_packages(&entry_path, seen).await;
520                results.extend(scoped);
521            } else {
522                // Regular package
523                if let Some(pkg) = Self::check_package(&entry_path, seen).await {
524                    results.push(pkg);
525                }
526                // Nested node_modules only for real directories (not symlinks)
527                if file_type.is_dir() {
528                    let nested =
529                        Self::scan_nested_node_modules(&entry_path, seen).await;
530                    results.extend(nested);
531                }
532            }
533        }
534
535        results
536    }
537
538    /// Scan a scoped packages directory (`@scope/`).
539    fn scan_scoped_packages<'a>(
540        scope_path: &'a Path,
541        seen: &'a mut HashSet<String>,
542    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
543        Box::pin(async move {
544            let mut results = Vec::new();
545
546            for entry in crate::utils::fs::list_dir_entries(scope_path).await {
547                let name = entry.file_name();
548                let name_str = name.to_string_lossy().to_string();
549
550                if name_str.starts_with('.') {
551                    continue;
552                }
553
554                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
555                    continue;
556                };
557
558                if !file_type.is_dir() && !file_type.is_symlink() {
559                    continue;
560                }
561
562                let pkg_path = scope_path.join(&name_str);
563                if let Some(pkg) = Self::check_package(&pkg_path, seen).await {
564                    results.push(pkg);
565                }
566
567                // Nested node_modules only for real directories
568                if file_type.is_dir() {
569                    let nested =
570                        Self::scan_nested_node_modules(&pkg_path, seen).await;
571                    results.extend(nested);
572                }
573            }
574
575            results
576        })
577    }
578
579    /// Scan nested `node_modules` inside a package (if it exists).
580    fn scan_nested_node_modules<'a>(
581        pkg_path: &'a Path,
582        seen: &'a mut HashSet<String>,
583    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
584        Box::pin(async move {
585            let nested_nm = pkg_path.join("node_modules");
586            let mut results = Vec::new();
587
588            for entry in crate::utils::fs::list_dir_entries(&nested_nm).await {
589                let name = entry.file_name();
590                let name_str = name.to_string_lossy().to_string();
591
592                if name_str.starts_with('.') || name_str == "node_modules" {
593                    continue;
594                }
595
596                let Some(file_type) = crate::utils::fs::entry_file_type(&entry).await else {
597                    continue;
598                };
599
600                if !file_type.is_dir() && !file_type.is_symlink() {
601                    continue;
602                }
603
604                let entry_path = nested_nm.join(&name_str);
605
606                if name_str.starts_with('@') {
607                    let scoped =
608                        Self::scan_scoped_packages(&entry_path, seen).await;
609                    results.extend(scoped);
610                } else {
611                    if let Some(pkg) = Self::check_package(&entry_path, seen).await {
612                        results.push(pkg);
613                    }
614                    // Recursively check deeper nested node_modules
615                    let deeper =
616                        Self::scan_nested_node_modules(&entry_path, seen).await;
617                    results.extend(deeper);
618                }
619            }
620
621            results
622        })
623    }
624
625    /// Check a package directory and return `CrawledPackage` if valid.
626    /// Deduplicates by PURL via the `seen` set.
627    async fn check_package(
628        pkg_path: &Path,
629        seen: &mut HashSet<String>,
630    ) -> Option<CrawledPackage> {
631        let pkg_json_path = pkg_path.join("package.json");
632        let (full_name, version) = read_package_json(&pkg_json_path).await?;
633        let (namespace, name) = parse_package_name(&full_name);
634        let purl = build_npm_purl(namespace.as_deref(), &name, &version);
635
636        if seen.contains(&purl) {
637            return None;
638        }
639        seen.insert(purl.clone());
640
641        Some(CrawledPackage {
642            name,
643            version,
644            namespace,
645            purl,
646            path: pkg_path.to_path_buf(),
647        })
648    }
649
650    // ------------------------------------------------------------------
651    // Private helpers – PURL parsing
652    // ------------------------------------------------------------------
653
654    /// Parse a PURL string to extract namespace, name, and version.
655    fn parse_purl_components(purl: &str) -> Option<(Option<String>, String, String)> {
656        // Strip qualifiers
657        let base = match purl.find('?') {
658            Some(idx) => &purl[..idx],
659            None => purl,
660        };
661
662        let rest = base.strip_prefix("pkg:npm/")?;
663        let at_idx = rest.rfind('@')?;
664        let name_part = &rest[..at_idx];
665        let version = &rest[at_idx + 1..];
666
667        if name_part.is_empty() || version.is_empty() {
668            return None;
669        }
670
671        if name_part.starts_with('@') {
672            let slash_idx = name_part.find('/')?;
673            let namespace = name_part[..slash_idx].to_string();
674            let name = name_part[slash_idx + 1..].to_string();
675            if name.is_empty() {
676                return None;
677            }
678            Some((Some(namespace), name, version.to_string()))
679        } else {
680            Some((None, name_part.to_string(), version.to_string()))
681        }
682    }
683}
684
685impl Default for NpmCrawler {
686    fn default() -> Self {
687        Self::new()
688    }
689}
690
691// ---------------------------------------------------------------------------
692// Utility
693// ---------------------------------------------------------------------------
694
695/// Check whether a path is a directory (follows symlinks).
696async fn is_dir(path: &Path) -> bool {
697    tokio::fs::metadata(path)
698        .await
699        .map(|m| m.is_dir())
700        .unwrap_or(false)
701}
702
703#[cfg(test)]
704mod tests {
705    use super::*;
706
707    #[test]
708    fn test_parse_package_name_scoped() {
709        let (ns, name) = parse_package_name("@types/node");
710        assert_eq!(ns.as_deref(), Some("@types"));
711        assert_eq!(name, "node");
712    }
713
714    #[test]
715    fn test_parse_package_name_unscoped() {
716        let (ns, name) = parse_package_name("lodash");
717        assert!(ns.is_none());
718        assert_eq!(name, "lodash");
719    }
720
721    #[test]
722    fn test_build_npm_purl_scoped() {
723        assert_eq!(
724            build_npm_purl(Some("@types"), "node", "20.0.0"),
725            "pkg:npm/@types/node@20.0.0"
726        );
727    }
728
729    #[test]
730    fn test_build_npm_purl_unscoped() {
731        assert_eq!(
732            build_npm_purl(None, "lodash", "4.17.21"),
733            "pkg:npm/lodash@4.17.21"
734        );
735    }
736
737    #[test]
738    fn test_parse_purl_components_scoped() {
739        let (ns, name, ver) =
740            NpmCrawler::parse_purl_components("pkg:npm/@types/node@20.0.0").unwrap();
741        assert_eq!(ns.as_deref(), Some("@types"));
742        assert_eq!(name, "node");
743        assert_eq!(ver, "20.0.0");
744    }
745
746    #[test]
747    fn test_parse_purl_components_unscoped() {
748        let (ns, name, ver) =
749            NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap();
750        assert!(ns.is_none());
751        assert_eq!(name, "lodash");
752        assert_eq!(ver, "4.17.21");
753    }
754
755    #[test]
756    fn test_parse_purl_components_invalid() {
757        assert!(NpmCrawler::parse_purl_components("pkg:pypi/requests@2.0").is_none());
758        assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none());
759    }
760
761    #[tokio::test]
762    async fn test_read_package_json_valid() {
763        let dir = tempfile::tempdir().unwrap();
764        let pkg_json = dir.path().join("package.json");
765        tokio::fs::write(
766            &pkg_json,
767            r#"{"name": "test-pkg", "version": "1.0.0"}"#,
768        )
769        .await
770        .unwrap();
771
772        let result = read_package_json(&pkg_json).await;
773        assert!(result.is_some());
774        let (name, version) = result.unwrap();
775        assert_eq!(name, "test-pkg");
776        assert_eq!(version, "1.0.0");
777    }
778
779    #[tokio::test]
780    async fn test_read_package_json_missing() {
781        let dir = tempfile::tempdir().unwrap();
782        let pkg_json = dir.path().join("package.json");
783        assert!(read_package_json(&pkg_json).await.is_none());
784    }
785
786    #[tokio::test]
787    async fn test_read_package_json_invalid() {
788        let dir = tempfile::tempdir().unwrap();
789        let pkg_json = dir.path().join("package.json");
790        tokio::fs::write(&pkg_json, "not json").await.unwrap();
791        assert!(read_package_json(&pkg_json).await.is_none());
792    }
793
794    #[tokio::test]
795    async fn test_crawl_all_basic() {
796        let dir = tempfile::tempdir().unwrap();
797        let nm = dir.path().join("node_modules");
798        let pkg_dir = nm.join("foo");
799        tokio::fs::create_dir_all(&pkg_dir).await.unwrap();
800        tokio::fs::write(
801            pkg_dir.join("package.json"),
802            r#"{"name": "foo", "version": "1.2.3"}"#,
803        )
804        .await
805        .unwrap();
806
807        let crawler = NpmCrawler::new();
808        let options = CrawlerOptions {
809            cwd: dir.path().to_path_buf(),
810            global: false,
811            global_prefix: None,
812            batch_size: DEFAULT_BATCH_SIZE,
813        };
814
815        let packages = crawler.crawl_all(&options).await;
816        assert_eq!(packages.len(), 1);
817        assert_eq!(packages[0].name, "foo");
818        assert_eq!(packages[0].version, "1.2.3");
819        assert_eq!(packages[0].purl, "pkg:npm/foo@1.2.3");
820        assert!(packages[0].namespace.is_none());
821    }
822
823    #[tokio::test]
824    async fn test_crawl_all_scoped() {
825        let dir = tempfile::tempdir().unwrap();
826        let nm = dir.path().join("node_modules");
827        let scope_dir = nm.join("@types").join("node");
828        tokio::fs::create_dir_all(&scope_dir).await.unwrap();
829        tokio::fs::write(
830            scope_dir.join("package.json"),
831            r#"{"name": "@types/node", "version": "20.0.0"}"#,
832        )
833        .await
834        .unwrap();
835
836        let crawler = NpmCrawler::new();
837        let options = CrawlerOptions {
838            cwd: dir.path().to_path_buf(),
839            global: false,
840            global_prefix: None,
841            batch_size: DEFAULT_BATCH_SIZE,
842        };
843
844        let packages = crawler.crawl_all(&options).await;
845        assert_eq!(packages.len(), 1);
846        assert_eq!(packages[0].name, "node");
847        assert_eq!(packages[0].namespace.as_deref(), Some("@types"));
848        assert_eq!(packages[0].purl, "pkg:npm/@types/node@20.0.0");
849    }
850
851    #[test]
852    fn test_find_node_dirs_sync_wildcard() {
853        // Create an nvm-like layout: base/v18.0.0/lib/node_modules
854        let dir = tempfile::tempdir().unwrap();
855        let nm1 = dir.path().join("v18.0.0/lib/node_modules");
856        let nm2 = dir.path().join("v20.1.0/lib/node_modules");
857        std::fs::create_dir_all(&nm1).unwrap();
858        std::fs::create_dir_all(&nm2).unwrap();
859
860        let results = find_node_dirs_sync(dir.path(), &["*", "lib", "node_modules"]);
861        assert_eq!(results.len(), 2);
862        assert!(results.contains(&nm1));
863        assert!(results.contains(&nm2));
864    }
865
866    #[test]
867    fn test_find_node_dirs_sync_empty() {
868        // Non-existent base path should return empty
869        let results = find_node_dirs_sync(Path::new("/nonexistent/path/xyz"), &["*", "lib"]);
870        assert!(results.is_empty());
871    }
872
873    #[test]
874    fn test_find_node_dirs_sync_literal() {
875        // All literal segments (no wildcard)
876        let dir = tempfile::tempdir().unwrap();
877        let target = dir.path().join("lib/node_modules");
878        std::fs::create_dir_all(&target).unwrap();
879
880        let results = find_node_dirs_sync(dir.path(), &["lib", "node_modules"]);
881        assert_eq!(results.len(), 1);
882        assert_eq!(results[0], target);
883    }
884
885    #[cfg(target_os = "macos")]
886    #[test]
887    fn test_macos_get_global_node_modules_paths_no_panic() {
888        let crawler = NpmCrawler::new();
889        // Should not panic, even if no package managers are installed
890        let _paths = crawler.get_global_node_modules_paths();
891    }
892
893    #[tokio::test]
894    async fn test_find_by_purls() {
895        let dir = tempfile::tempdir().unwrap();
896        let nm = dir.path().join("node_modules");
897
898        // Create foo@1.0.0
899        let foo_dir = nm.join("foo");
900        tokio::fs::create_dir_all(&foo_dir).await.unwrap();
901        tokio::fs::write(
902            foo_dir.join("package.json"),
903            r#"{"name": "foo", "version": "1.0.0"}"#,
904        )
905        .await
906        .unwrap();
907
908        // Create @types/node@20.0.0
909        let types_dir = nm.join("@types").join("node");
910        tokio::fs::create_dir_all(&types_dir).await.unwrap();
911        tokio::fs::write(
912            types_dir.join("package.json"),
913            r#"{"name": "@types/node", "version": "20.0.0"}"#,
914        )
915        .await
916        .unwrap();
917
918        let crawler = NpmCrawler::new();
919        let purls = vec![
920            "pkg:npm/foo@1.0.0".to_string(),
921            "pkg:npm/@types/node@20.0.0".to_string(),
922            "pkg:npm/not-installed@0.0.1".to_string(),
923        ];
924
925        let result = crawler.find_by_purls(&nm, &purls).await.unwrap();
926
927        assert_eq!(result.len(), 2);
928        assert!(result.contains_key("pkg:npm/foo@1.0.0"));
929        assert!(result.contains_key("pkg:npm/@types/node@20.0.0"));
930        assert!(!result.contains_key("pkg:npm/not-installed@0.0.1"));
931    }
932}