Skip to main content

socket_patch_core/crawlers/
npm_crawler.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::process::Command;
4
5use serde::Deserialize;
6
7use super::types::{CrawledPackage, CrawlerOptions};
8
9/// Default batch size for crawling.
10#[cfg(test)]
11const DEFAULT_BATCH_SIZE: usize = 100;
12
13/// Directories to skip when searching for workspace node_modules.
14const SKIP_DIRS: &[&str] = &[
15    "dist",
16    "build",
17    "coverage",
18    "tmp",
19    "temp",
20    "__pycache__",
21    "vendor",
22];
23
24// ---------------------------------------------------------------------------
25// Helper: read and parse package.json
26// ---------------------------------------------------------------------------
27
28/// Minimal fields we need from package.json.
29#[derive(Deserialize)]
30struct PackageJsonPartial {
31    name: Option<String>,
32    version: Option<String>,
33}
34
35/// Read and parse a `package.json` file, returning `(name, version)` if valid.
36pub async fn read_package_json(pkg_json_path: &Path) -> Option<(String, String)> {
37    let content = tokio::fs::read_to_string(pkg_json_path).await.ok()?;
38    let pkg: PackageJsonPartial = serde_json::from_str(&content).ok()?;
39    let name = pkg.name?;
40    let version = pkg.version?;
41    if name.is_empty() || version.is_empty() {
42        return None;
43    }
44    Some((name, version))
45}
46
47// ---------------------------------------------------------------------------
48// Helper: parse package name into (namespace, name)
49// ---------------------------------------------------------------------------
50
51/// Parse a full npm package name into optional namespace and bare name.
52///
53/// Examples:
54/// - `"@types/node"` -> `(Some("@types"), "node")`
55/// - `"lodash"` -> `(None, "lodash")`
56pub fn parse_package_name(full_name: &str) -> (Option<String>, String) {
57    if full_name.starts_with('@') {
58        if let Some(slash_idx) = full_name.find('/') {
59            let namespace = full_name[..slash_idx].to_string();
60            let name = full_name[slash_idx + 1..].to_string();
61            return (Some(namespace), name);
62        }
63    }
64    (None, full_name.to_string())
65}
66
67// ---------------------------------------------------------------------------
68// Helper: build PURL
69// ---------------------------------------------------------------------------
70
71/// Build a PURL string for an npm package.
72pub fn build_npm_purl(namespace: Option<&str>, name: &str, version: &str) -> String {
73    match namespace {
74        Some(ns) => format!("pkg:npm/{ns}/{name}@{version}"),
75        None => format!("pkg:npm/{name}@{version}"),
76    }
77}
78
79// ---------------------------------------------------------------------------
80// Global prefix detection helpers
81// ---------------------------------------------------------------------------
82
83/// Get the npm global `node_modules` path via `npm root -g`.
84pub fn get_npm_global_prefix() -> Result<String, String> {
85    let output = Command::new("npm")
86        .args(["root", "-g"])
87        .stdin(std::process::Stdio::null())
88        .stdout(std::process::Stdio::piped())
89        .stderr(std::process::Stdio::piped())
90        .output()
91        .map_err(|e| format!("Failed to run `npm root -g`: {e}"))?;
92
93    if !output.status.success() {
94        return Err(
95            "Failed to determine npm global prefix. Ensure npm is installed and in PATH."
96                .to_string(),
97        );
98    }
99
100    Ok(String::from_utf8_lossy(&output.stdout).trim().to_string())
101}
102
103/// Get the yarn global `node_modules` path via `yarn global dir`.
104pub fn get_yarn_global_prefix() -> Option<String> {
105    let output = Command::new("yarn")
106        .args(["global", "dir"])
107        .stdin(std::process::Stdio::null())
108        .stdout(std::process::Stdio::piped())
109        .stderr(std::process::Stdio::piped())
110        .output()
111        .ok()?;
112
113    if !output.status.success() {
114        return None;
115    }
116
117    let dir = String::from_utf8_lossy(&output.stdout).trim().to_string();
118    if dir.is_empty() {
119        return None;
120    }
121    Some(PathBuf::from(dir).join("node_modules").to_string_lossy().to_string())
122}
123
124/// Get the pnpm global `node_modules` path via `pnpm root -g`.
125pub fn get_pnpm_global_prefix() -> Option<String> {
126    let output = Command::new("pnpm")
127        .args(["root", "-g"])
128        .stdin(std::process::Stdio::null())
129        .stdout(std::process::Stdio::piped())
130        .stderr(std::process::Stdio::piped())
131        .output()
132        .ok()?;
133
134    if !output.status.success() {
135        return None;
136    }
137
138    let path = String::from_utf8_lossy(&output.stdout).trim().to_string();
139    if path.is_empty() {
140        return None;
141    }
142    Some(path)
143}
144
145/// Get the bun global `node_modules` path via `bun pm bin -g`.
146pub fn get_bun_global_prefix() -> Option<String> {
147    let output = Command::new("bun")
148        .args(["pm", "bin", "-g"])
149        .stdin(std::process::Stdio::null())
150        .stdout(std::process::Stdio::piped())
151        .stderr(std::process::Stdio::piped())
152        .output()
153        .ok()?;
154
155    if !output.status.success() {
156        return None;
157    }
158
159    let bin_path = String::from_utf8_lossy(&output.stdout).trim().to_string();
160    if bin_path.is_empty() {
161        return None;
162    }
163
164    let bun_root = PathBuf::from(&bin_path);
165    let bun_root = bun_root.parent()?;
166    Some(
167        bun_root
168            .join("install")
169            .join("global")
170            .join("node_modules")
171            .to_string_lossy()
172            .to_string(),
173    )
174}
175
176// ---------------------------------------------------------------------------
177// NpmCrawler
178// ---------------------------------------------------------------------------
179
180/// NPM ecosystem crawler for discovering packages in `node_modules`.
181pub struct NpmCrawler;
182
183impl NpmCrawler {
184    /// Create a new `NpmCrawler`.
185    pub fn new() -> Self {
186        Self
187    }
188
189    // ------------------------------------------------------------------
190    // Public API
191    // ------------------------------------------------------------------
192
193    /// Get `node_modules` paths based on options.
194    ///
195    /// In global mode returns well-known global paths; in local mode walks
196    /// the project tree looking for `node_modules` directories (including
197    /// workspace packages).
198    pub async fn get_node_modules_paths(&self, options: &CrawlerOptions) -> Result<Vec<PathBuf>, std::io::Error> {
199        if options.global || options.global_prefix.is_some() {
200            if let Some(ref custom) = options.global_prefix {
201                return Ok(vec![custom.clone()]);
202            }
203            return Ok(self.get_global_node_modules_paths());
204        }
205
206        Ok(self.find_local_node_modules_dirs(&options.cwd).await)
207    }
208
209    /// Crawl all discovered `node_modules` and return every package found.
210    pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
211        let mut packages = Vec::new();
212        let mut seen = HashSet::new();
213
214        let nm_paths = self.get_node_modules_paths(options).await.unwrap_or_default();
215
216        for nm_path in &nm_paths {
217            let found = self.scan_node_modules(nm_path, &mut seen).await;
218            packages.extend(found);
219        }
220
221        packages
222    }
223
224    /// Find specific packages by PURL inside a single `node_modules` tree.
225    ///
226    /// This is an efficient O(n) lookup where n = number of PURLs: we parse
227    /// each PURL to derive the expected directory path, then do a direct stat
228    /// + `package.json` read.
229    pub async fn find_by_purls(
230        &self,
231        node_modules_path: &Path,
232        purls: &[String],
233    ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
234        let mut result: HashMap<String, CrawledPackage> = HashMap::new();
235
236        // Parse each PURL to extract the directory key and expected version.
237        struct Target {
238            namespace: Option<String>,
239            name: String,
240            version: String,
241            #[allow(dead_code)] purl: String,
242            dir_key: String,
243        }
244
245        let purl_set: HashSet<&str> = purls.iter().map(|s| s.as_str()).collect();
246        let mut targets: Vec<Target> = Vec::new();
247
248        for purl in purls {
249            if let Some((ns, name, version)) = Self::parse_purl_components(purl) {
250                let dir_key = match &ns {
251                    Some(ns_str) => format!("{ns_str}/{name}"),
252                    None => name.clone(),
253                };
254                targets.push(Target {
255                    namespace: ns,
256                    name,
257                    version,
258                    purl: purl.clone(),
259                    dir_key,
260                });
261            }
262        }
263
264        for target in &targets {
265            let pkg_path = node_modules_path.join(&target.dir_key);
266            let pkg_json_path = pkg_path.join("package.json");
267
268            if let Some((_, version)) = read_package_json(&pkg_json_path).await {
269                if version == target.version {
270                    let purl = build_npm_purl(
271                        target.namespace.as_deref(),
272                        &target.name,
273                        &version,
274                    );
275                    if purl_set.contains(purl.as_str()) {
276                        result.insert(
277                            purl.clone(),
278                            CrawledPackage {
279                                name: target.name.clone(),
280                                version,
281                                namespace: target.namespace.clone(),
282                                purl,
283                                path: pkg_path.clone(),
284                            },
285                        );
286                    }
287                }
288            }
289        }
290
291        Ok(result)
292    }
293
294    // ------------------------------------------------------------------
295    // Private helpers – global paths
296    // ------------------------------------------------------------------
297
298    /// Collect global `node_modules` paths from all known package managers.
299    fn get_global_node_modules_paths(&self) -> Vec<PathBuf> {
300        let mut paths = Vec::new();
301
302        if let Ok(npm_path) = get_npm_global_prefix() {
303            paths.push(PathBuf::from(npm_path));
304        }
305        if let Some(pnpm_path) = get_pnpm_global_prefix() {
306            paths.push(PathBuf::from(pnpm_path));
307        }
308        if let Some(yarn_path) = get_yarn_global_prefix() {
309            paths.push(PathBuf::from(yarn_path));
310        }
311        if let Some(bun_path) = get_bun_global_prefix() {
312            paths.push(PathBuf::from(bun_path));
313        }
314
315        paths
316    }
317
318    // ------------------------------------------------------------------
319    // Private helpers – local node_modules discovery
320    // ------------------------------------------------------------------
321
322    /// Find `node_modules` directories within the project root.
323    /// Recursively searches for workspace `node_modules` but stays within the
324    /// project.
325    async fn find_local_node_modules_dirs(&self, start_path: &Path) -> Vec<PathBuf> {
326        let mut results = Vec::new();
327
328        // Direct node_modules in start_path
329        let direct = start_path.join("node_modules");
330        if is_dir(&direct).await {
331            results.push(direct);
332        }
333
334        // Recursively search for workspace node_modules
335        Self::find_workspace_node_modules(start_path, &mut results).await;
336
337        results
338    }
339
340    /// Recursively find `node_modules` in subdirectories (for monorepos / workspaces).
341    /// Skips symlinks, hidden dirs, and well-known non-workspace dirs.
342    fn find_workspace_node_modules<'a>(
343        dir: &'a Path,
344        results: &'a mut Vec<PathBuf>,
345    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = ()> + 'a>> {
346        Box::pin(async move {
347            let mut entries = match tokio::fs::read_dir(dir).await {
348                Ok(rd) => rd,
349                Err(_) => return,
350            };
351
352            let mut entry_list = Vec::new();
353            while let Ok(Some(entry)) = entries.next_entry().await {
354                entry_list.push(entry);
355            }
356
357            for entry in entry_list {
358                let file_type = match entry.file_type().await {
359                    Ok(ft) => ft,
360                    Err(_) => continue,
361                };
362
363                if !file_type.is_dir() {
364                    continue;
365                }
366
367                let name = entry.file_name();
368                let name_str = name.to_string_lossy();
369
370                // Skip node_modules, hidden dirs, and well-known build dirs
371                if name_str == "node_modules"
372                    || name_str.starts_with('.')
373                    || SKIP_DIRS.contains(&name_str.as_ref())
374                {
375                    continue;
376                }
377
378                let full_path = dir.join(&name);
379
380                // Check if this subdirectory has its own node_modules
381                let sub_nm = full_path.join("node_modules");
382                if is_dir(&sub_nm).await {
383                    results.push(sub_nm);
384                }
385
386                // Recurse
387                Self::find_workspace_node_modules(&full_path, results).await;
388            }
389        })
390    }
391
392    // ------------------------------------------------------------------
393    // Private helpers – scanning
394    // ------------------------------------------------------------------
395
396    /// Scan a `node_modules` directory, returning all valid packages found.
397    async fn scan_node_modules(
398        &self,
399        node_modules_path: &Path,
400        seen: &mut HashSet<String>,
401    ) -> Vec<CrawledPackage> {
402        let mut results = Vec::new();
403
404        let mut entries = match tokio::fs::read_dir(node_modules_path).await {
405            Ok(rd) => rd,
406            Err(_) => return results,
407        };
408
409        let mut entry_list = Vec::new();
410        while let Ok(Some(entry)) = entries.next_entry().await {
411            entry_list.push(entry);
412        }
413
414        for entry in entry_list {
415            let name = entry.file_name();
416            let name_str = name.to_string_lossy().to_string();
417
418            // Skip hidden files and node_modules
419            if name_str.starts_with('.') || name_str == "node_modules" {
420                continue;
421            }
422
423            let file_type = match entry.file_type().await {
424                Ok(ft) => ft,
425                Err(_) => continue,
426            };
427
428            // Allow both directories and symlinks (pnpm uses symlinks)
429            if !file_type.is_dir() && !file_type.is_symlink() {
430                continue;
431            }
432
433            let entry_path = node_modules_path.join(&name_str);
434
435            if name_str.starts_with('@') {
436                // Scoped packages
437                let scoped =
438                    Self::scan_scoped_packages(&entry_path, seen).await;
439                results.extend(scoped);
440            } else {
441                // Regular package
442                if let Some(pkg) = Self::check_package(&entry_path, seen).await {
443                    results.push(pkg);
444                }
445                // Nested node_modules only for real directories (not symlinks)
446                if file_type.is_dir() {
447                    let nested =
448                        Self::scan_nested_node_modules(&entry_path, seen).await;
449                    results.extend(nested);
450                }
451            }
452        }
453
454        results
455    }
456
457    /// Scan a scoped packages directory (`@scope/`).
458    fn scan_scoped_packages<'a>(
459        scope_path: &'a Path,
460        seen: &'a mut HashSet<String>,
461    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
462        Box::pin(async move {
463            let mut results = Vec::new();
464
465            let mut entries = match tokio::fs::read_dir(scope_path).await {
466                Ok(rd) => rd,
467                Err(_) => return results,
468            };
469
470            let mut entry_list = Vec::new();
471            while let Ok(Some(entry)) = entries.next_entry().await {
472                entry_list.push(entry);
473            }
474
475            for entry in entry_list {
476                let name = entry.file_name();
477                let name_str = name.to_string_lossy().to_string();
478
479                if name_str.starts_with('.') {
480                    continue;
481                }
482
483                let file_type = match entry.file_type().await {
484                    Ok(ft) => ft,
485                    Err(_) => continue,
486                };
487
488                if !file_type.is_dir() && !file_type.is_symlink() {
489                    continue;
490                }
491
492                let pkg_path = scope_path.join(&name_str);
493                if let Some(pkg) = Self::check_package(&pkg_path, seen).await {
494                    results.push(pkg);
495                }
496
497                // Nested node_modules only for real directories
498                if file_type.is_dir() {
499                    let nested =
500                        Self::scan_nested_node_modules(&pkg_path, seen).await;
501                    results.extend(nested);
502                }
503            }
504
505            results
506        })
507    }
508
509    /// Scan nested `node_modules` inside a package (if it exists).
510    fn scan_nested_node_modules<'a>(
511        pkg_path: &'a Path,
512        seen: &'a mut HashSet<String>,
513    ) -> std::pin::Pin<Box<dyn std::future::Future<Output = Vec<CrawledPackage>> + 'a>> {
514        Box::pin(async move {
515            let nested_nm = pkg_path.join("node_modules");
516
517            let mut entries = match tokio::fs::read_dir(&nested_nm).await {
518                Ok(rd) => rd,
519                Err(_) => return Vec::new(),
520            };
521
522            let mut results = Vec::new();
523
524            let mut entry_list = Vec::new();
525            while let Ok(Some(entry)) = entries.next_entry().await {
526                entry_list.push(entry);
527            }
528
529            for entry in entry_list {
530                let name = entry.file_name();
531                let name_str = name.to_string_lossy().to_string();
532
533                if name_str.starts_with('.') || name_str == "node_modules" {
534                    continue;
535                }
536
537                let file_type = match entry.file_type().await {
538                    Ok(ft) => ft,
539                    Err(_) => continue,
540                };
541
542                if !file_type.is_dir() && !file_type.is_symlink() {
543                    continue;
544                }
545
546                let entry_path = nested_nm.join(&name_str);
547
548                if name_str.starts_with('@') {
549                    let scoped =
550                        Self::scan_scoped_packages(&entry_path, seen).await;
551                    results.extend(scoped);
552                } else {
553                    if let Some(pkg) = Self::check_package(&entry_path, seen).await {
554                        results.push(pkg);
555                    }
556                    // Recursively check deeper nested node_modules
557                    let deeper =
558                        Self::scan_nested_node_modules(&entry_path, seen).await;
559                    results.extend(deeper);
560                }
561            }
562
563            results
564        })
565    }
566
567    /// Check a package directory and return `CrawledPackage` if valid.
568    /// Deduplicates by PURL via the `seen` set.
569    async fn check_package(
570        pkg_path: &Path,
571        seen: &mut HashSet<String>,
572    ) -> Option<CrawledPackage> {
573        let pkg_json_path = pkg_path.join("package.json");
574        let (full_name, version) = read_package_json(&pkg_json_path).await?;
575        let (namespace, name) = parse_package_name(&full_name);
576        let purl = build_npm_purl(namespace.as_deref(), &name, &version);
577
578        if seen.contains(&purl) {
579            return None;
580        }
581        seen.insert(purl.clone());
582
583        Some(CrawledPackage {
584            name,
585            version,
586            namespace,
587            purl,
588            path: pkg_path.to_path_buf(),
589        })
590    }
591
592    // ------------------------------------------------------------------
593    // Private helpers – PURL parsing
594    // ------------------------------------------------------------------
595
596    /// Parse a PURL string to extract namespace, name, and version.
597    fn parse_purl_components(purl: &str) -> Option<(Option<String>, String, String)> {
598        // Strip qualifiers
599        let base = match purl.find('?') {
600            Some(idx) => &purl[..idx],
601            None => purl,
602        };
603
604        let rest = base.strip_prefix("pkg:npm/")?;
605        let at_idx = rest.rfind('@')?;
606        let name_part = &rest[..at_idx];
607        let version = &rest[at_idx + 1..];
608
609        if name_part.is_empty() || version.is_empty() {
610            return None;
611        }
612
613        if name_part.starts_with('@') {
614            let slash_idx = name_part.find('/')?;
615            let namespace = name_part[..slash_idx].to_string();
616            let name = name_part[slash_idx + 1..].to_string();
617            if name.is_empty() {
618                return None;
619            }
620            Some((Some(namespace), name, version.to_string()))
621        } else {
622            Some((None, name_part.to_string(), version.to_string()))
623        }
624    }
625}
626
627impl Default for NpmCrawler {
628    fn default() -> Self {
629        Self::new()
630    }
631}
632
633// ---------------------------------------------------------------------------
634// Utility
635// ---------------------------------------------------------------------------
636
637/// Check whether a path is a directory (follows symlinks).
638async fn is_dir(path: &Path) -> bool {
639    tokio::fs::metadata(path)
640        .await
641        .map(|m| m.is_dir())
642        .unwrap_or(false)
643}
644
645#[cfg(test)]
646mod tests {
647    use super::*;
648
649    #[test]
650    fn test_parse_package_name_scoped() {
651        let (ns, name) = parse_package_name("@types/node");
652        assert_eq!(ns.as_deref(), Some("@types"));
653        assert_eq!(name, "node");
654    }
655
656    #[test]
657    fn test_parse_package_name_unscoped() {
658        let (ns, name) = parse_package_name("lodash");
659        assert!(ns.is_none());
660        assert_eq!(name, "lodash");
661    }
662
663    #[test]
664    fn test_build_npm_purl_scoped() {
665        assert_eq!(
666            build_npm_purl(Some("@types"), "node", "20.0.0"),
667            "pkg:npm/@types/node@20.0.0"
668        );
669    }
670
671    #[test]
672    fn test_build_npm_purl_unscoped() {
673        assert_eq!(
674            build_npm_purl(None, "lodash", "4.17.21"),
675            "pkg:npm/lodash@4.17.21"
676        );
677    }
678
679    #[test]
680    fn test_parse_purl_components_scoped() {
681        let (ns, name, ver) =
682            NpmCrawler::parse_purl_components("pkg:npm/@types/node@20.0.0").unwrap();
683        assert_eq!(ns.as_deref(), Some("@types"));
684        assert_eq!(name, "node");
685        assert_eq!(ver, "20.0.0");
686    }
687
688    #[test]
689    fn test_parse_purl_components_unscoped() {
690        let (ns, name, ver) =
691            NpmCrawler::parse_purl_components("pkg:npm/lodash@4.17.21").unwrap();
692        assert!(ns.is_none());
693        assert_eq!(name, "lodash");
694        assert_eq!(ver, "4.17.21");
695    }
696
697    #[test]
698    fn test_parse_purl_components_invalid() {
699        assert!(NpmCrawler::parse_purl_components("pkg:pypi/requests@2.0").is_none());
700        assert!(NpmCrawler::parse_purl_components("not-a-purl").is_none());
701    }
702
703    #[tokio::test]
704    async fn test_read_package_json_valid() {
705        let dir = tempfile::tempdir().unwrap();
706        let pkg_json = dir.path().join("package.json");
707        tokio::fs::write(
708            &pkg_json,
709            r#"{"name": "test-pkg", "version": "1.0.0"}"#,
710        )
711        .await
712        .unwrap();
713
714        let result = read_package_json(&pkg_json).await;
715        assert!(result.is_some());
716        let (name, version) = result.unwrap();
717        assert_eq!(name, "test-pkg");
718        assert_eq!(version, "1.0.0");
719    }
720
721    #[tokio::test]
722    async fn test_read_package_json_missing() {
723        let dir = tempfile::tempdir().unwrap();
724        let pkg_json = dir.path().join("package.json");
725        assert!(read_package_json(&pkg_json).await.is_none());
726    }
727
728    #[tokio::test]
729    async fn test_read_package_json_invalid() {
730        let dir = tempfile::tempdir().unwrap();
731        let pkg_json = dir.path().join("package.json");
732        tokio::fs::write(&pkg_json, "not json").await.unwrap();
733        assert!(read_package_json(&pkg_json).await.is_none());
734    }
735
736    #[tokio::test]
737    async fn test_crawl_all_basic() {
738        let dir = tempfile::tempdir().unwrap();
739        let nm = dir.path().join("node_modules");
740        let pkg_dir = nm.join("foo");
741        tokio::fs::create_dir_all(&pkg_dir).await.unwrap();
742        tokio::fs::write(
743            pkg_dir.join("package.json"),
744            r#"{"name": "foo", "version": "1.2.3"}"#,
745        )
746        .await
747        .unwrap();
748
749        let crawler = NpmCrawler::new();
750        let options = CrawlerOptions {
751            cwd: dir.path().to_path_buf(),
752            global: false,
753            global_prefix: None,
754            batch_size: DEFAULT_BATCH_SIZE,
755        };
756
757        let packages = crawler.crawl_all(&options).await;
758        assert_eq!(packages.len(), 1);
759        assert_eq!(packages[0].name, "foo");
760        assert_eq!(packages[0].version, "1.2.3");
761        assert_eq!(packages[0].purl, "pkg:npm/foo@1.2.3");
762        assert!(packages[0].namespace.is_none());
763    }
764
765    #[tokio::test]
766    async fn test_crawl_all_scoped() {
767        let dir = tempfile::tempdir().unwrap();
768        let nm = dir.path().join("node_modules");
769        let scope_dir = nm.join("@types").join("node");
770        tokio::fs::create_dir_all(&scope_dir).await.unwrap();
771        tokio::fs::write(
772            scope_dir.join("package.json"),
773            r#"{"name": "@types/node", "version": "20.0.0"}"#,
774        )
775        .await
776        .unwrap();
777
778        let crawler = NpmCrawler::new();
779        let options = CrawlerOptions {
780            cwd: dir.path().to_path_buf(),
781            global: false,
782            global_prefix: None,
783            batch_size: DEFAULT_BATCH_SIZE,
784        };
785
786        let packages = crawler.crawl_all(&options).await;
787        assert_eq!(packages.len(), 1);
788        assert_eq!(packages[0].name, "node");
789        assert_eq!(packages[0].namespace.as_deref(), Some("@types"));
790        assert_eq!(packages[0].purl, "pkg:npm/@types/node@20.0.0");
791    }
792
793    #[tokio::test]
794    async fn test_find_by_purls() {
795        let dir = tempfile::tempdir().unwrap();
796        let nm = dir.path().join("node_modules");
797
798        // Create foo@1.0.0
799        let foo_dir = nm.join("foo");
800        tokio::fs::create_dir_all(&foo_dir).await.unwrap();
801        tokio::fs::write(
802            foo_dir.join("package.json"),
803            r#"{"name": "foo", "version": "1.0.0"}"#,
804        )
805        .await
806        .unwrap();
807
808        // Create @types/node@20.0.0
809        let types_dir = nm.join("@types").join("node");
810        tokio::fs::create_dir_all(&types_dir).await.unwrap();
811        tokio::fs::write(
812            types_dir.join("package.json"),
813            r#"{"name": "@types/node", "version": "20.0.0"}"#,
814        )
815        .await
816        .unwrap();
817
818        let crawler = NpmCrawler::new();
819        let purls = vec![
820            "pkg:npm/foo@1.0.0".to_string(),
821            "pkg:npm/@types/node@20.0.0".to_string(),
822            "pkg:npm/not-installed@0.0.1".to_string(),
823        ];
824
825        let result = crawler.find_by_purls(&nm, &purls).await.unwrap();
826
827        assert_eq!(result.len(), 2);
828        assert!(result.contains_key("pkg:npm/foo@1.0.0"));
829        assert!(result.contains_key("pkg:npm/@types/node@20.0.0"));
830        assert!(!result.contains_key("pkg:npm/not-installed@0.0.1"));
831    }
832}