Skip to main content

socket_patch_core/crawlers/
python_crawler.rs

1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3use std::process::{Command, Stdio};
4
5use super::types::{CrawledPackage, CrawlerOptions};
6
7// ---------------------------------------------------------------------------
8// Python command discovery
9// ---------------------------------------------------------------------------
10
11/// Find a working Python command on the system.
12///
13/// Tries `python3`, `python`, and `py` (Windows launcher) in order,
14/// returning the first one that responds to `--version`.
15pub fn find_python_command() -> Option<&'static str> {
16    ["python3", "python", "py"].into_iter().find(|cmd| {
17        Command::new(cmd)
18            .args(["--version"])
19            .stdin(Stdio::null())
20            .stdout(Stdio::null())
21            .stderr(Stdio::null())
22            .status()
23            .is_ok()
24    })
25}
26
27/// Default batch size for crawling.
28const _DEFAULT_BATCH_SIZE: usize = 100;
29
30// ---------------------------------------------------------------------------
31// PEP 503 name canonicalization
32// ---------------------------------------------------------------------------
33
34/// Canonicalize a Python package name per PEP 503.
35///
36/// Lowercases, trims, and replaces runs of `[-_.]` with a single `-`.
37pub fn canonicalize_pypi_name(name: &str) -> String {
38    let trimmed = name.trim().to_lowercase();
39    let mut result = String::with_capacity(trimmed.len());
40    let mut in_separator_run = false;
41
42    for ch in trimmed.chars() {
43        if ch == '-' || ch == '_' || ch == '.' {
44            if !in_separator_run {
45                result.push('-');
46                in_separator_run = true;
47            }
48            // else: skip consecutive separators
49        } else {
50            in_separator_run = false;
51            result.push(ch);
52        }
53    }
54
55    result
56}
57
58// ---------------------------------------------------------------------------
59// Helpers: read Python metadata from dist-info
60// ---------------------------------------------------------------------------
61
62/// Read `Name` and `Version` from a `.dist-info/METADATA` file.
63pub async fn read_python_metadata(dist_info_path: &Path) -> Option<(String, String)> {
64    let metadata_path = dist_info_path.join("METADATA");
65    let content = tokio::fs::read_to_string(&metadata_path).await.ok()?;
66
67    let mut name: Option<String> = None;
68    let mut version: Option<String> = None;
69
70    for line in content.lines() {
71        if name.is_some() && version.is_some() {
72            break;
73        }
74        if let Some(rest) = line.strip_prefix("Name:") {
75            name = Some(rest.trim().to_string());
76        } else if let Some(rest) = line.strip_prefix("Version:") {
77            version = Some(rest.trim().to_string());
78        }
79        // Stop at first empty line (end of headers)
80        if line.trim().is_empty() && (name.is_some() || version.is_some()) {
81            break;
82        }
83    }
84
85    match (name, version) {
86        (Some(n), Some(v)) if !n.is_empty() && !v.is_empty() => Some((n, v)),
87        _ => None,
88    }
89}
90
91// ---------------------------------------------------------------------------
92// Helpers: find Python directories with wildcard matching
93// ---------------------------------------------------------------------------
94
95/// Find directories matching a path pattern with wildcard segments.
96///
97/// Supported wildcards:
98/// - `"python3.*"` — matches directory entries starting with `python3.`
99/// - `"*"` — matches any directory entry
100///
101/// All other segments are treated as literal path components.
102pub async fn find_python_dirs(base_path: &Path, segments: &[&str]) -> Vec<PathBuf> {
103    let mut results = Vec::new();
104
105    // Check that base_path is a directory
106    match tokio::fs::metadata(base_path).await {
107        Ok(m) if m.is_dir() => {}
108        _ => return results,
109    }
110
111    if segments.is_empty() {
112        results.push(base_path.to_path_buf());
113        return results;
114    }
115
116    let first = segments[0];
117    let rest = &segments[1..];
118
119    if first == "python3.*" {
120        // Wildcard: list directory and match python3.X entries
121        if let Ok(mut entries) = tokio::fs::read_dir(base_path).await {
122            while let Ok(Some(entry)) = entries.next_entry().await {
123                let ft = match entry.file_type().await {
124                    Ok(ft) => ft,
125                    Err(_) => continue,
126                };
127                if !ft.is_dir() {
128                    continue;
129                }
130                let name = entry.file_name();
131                let name_str = name.to_string_lossy();
132                if name_str.starts_with("python3.") {
133                    let sub = Box::pin(find_python_dirs(
134                        &base_path.join(entry.file_name()),
135                        rest,
136                    ))
137                    .await;
138                    results.extend(sub);
139                }
140            }
141        }
142    } else if first == "*" {
143        // Generic wildcard: match any directory entry
144        if let Ok(mut entries) = tokio::fs::read_dir(base_path).await {
145            while let Ok(Some(entry)) = entries.next_entry().await {
146                let ft = match entry.file_type().await {
147                    Ok(ft) => ft,
148                    Err(_) => continue,
149                };
150                if !ft.is_dir() {
151                    continue;
152                }
153                let sub = Box::pin(find_python_dirs(
154                    &base_path.join(entry.file_name()),
155                    rest,
156                ))
157                .await;
158                results.extend(sub);
159            }
160        }
161    } else {
162        // Literal segment: just check if it exists
163        let sub =
164            Box::pin(find_python_dirs(&base_path.join(first), rest)).await;
165        results.extend(sub);
166    }
167
168    results
169}
170
171// ---------------------------------------------------------------------------
172// Helpers: site-packages discovery
173// ---------------------------------------------------------------------------
174
175/// Find `site-packages` (or `dist-packages`) directories under a base dir.
176///
177/// Handles both Unix (`lib/python3.X/site-packages`) and macOS/Linux layouts.
178pub async fn find_site_packages_under(
179    base_dir: &Path,
180    sub_dir_type: &str, // "site-packages" or "dist-packages"
181) -> Vec<PathBuf> {
182    if cfg!(windows) {
183        find_python_dirs(base_dir, &["Lib", sub_dir_type]).await
184    } else {
185        find_python_dirs(base_dir, &["lib", "python3.*", sub_dir_type]).await
186    }
187}
188
189/// Find local virtual environment `site-packages` directories.
190///
191/// Checks (in order):
192/// 1. `VIRTUAL_ENV` environment variable
193/// 2. `.venv` directory in `cwd`
194/// 3. `venv` directory in `cwd`
195pub async fn find_local_venv_site_packages(cwd: &Path) -> Vec<PathBuf> {
196    let mut results = Vec::new();
197
198    // 1. Check VIRTUAL_ENV env var
199    if let Ok(virtual_env) = std::env::var("VIRTUAL_ENV") {
200        let venv_path = PathBuf::from(&virtual_env);
201        let matches = find_site_packages_under(&venv_path, "site-packages").await;
202        results.extend(matches);
203        if !results.is_empty() {
204            return results;
205        }
206    }
207
208    // 2. Check .venv and venv in cwd
209    for venv_dir in &[".venv", "venv"] {
210        let venv_path = cwd.join(venv_dir);
211        let matches = find_site_packages_under(&venv_path, "site-packages").await;
212        results.extend(matches);
213    }
214
215    results
216}
217
218/// Get global/system Python `site-packages` directories.
219///
220/// Queries `python3` for site-packages paths, then checks well-known system
221/// locations including Homebrew, conda, uv tools, pip --user, etc.
222pub async fn get_global_python_site_packages() -> Vec<PathBuf> {
223    let mut results = Vec::new();
224    let mut seen = HashSet::new();
225
226    let add_path = |p: PathBuf, seen: &mut HashSet<PathBuf>, results: &mut Vec<PathBuf>| {
227        let resolved = if p.is_absolute() {
228            p
229        } else {
230            std::path::absolute(&p).unwrap_or(p)
231        };
232        if seen.insert(resolved.clone()) {
233            results.push(resolved);
234        }
235    };
236
237    // 1. Ask Python for site-packages
238    if let Some(python_cmd) = find_python_command() {
239        if let Ok(output) = Command::new(python_cmd)
240            .args([
241                "-c",
242                "import site; print('\\n'.join(site.getsitepackages())); print(site.getusersitepackages())",
243            ])
244            .stdin(Stdio::null())
245            .stdout(Stdio::piped())
246            .stderr(Stdio::piped())
247            .output()
248        {
249            if output.status.success() {
250                let stdout = String::from_utf8_lossy(&output.stdout);
251                for line in stdout.lines() {
252                    let p = line.trim();
253                    if !p.is_empty() {
254                        add_path(PathBuf::from(p), &mut seen, &mut results);
255                    }
256                }
257            }
258        }
259    }
260
261    // 2. Well-known system paths
262    let home_dir = std::env::var("HOME")
263        .or_else(|_| std::env::var("USERPROFILE"))
264        .unwrap_or_else(|_| "~".to_string());
265
266    // Helper closure to scan base/lib/python3.*/[dist|site]-packages
267    async fn scan_well_known(
268        base: &Path,
269        pkg_type: &str,
270        seen: &mut HashSet<PathBuf>,
271        results: &mut Vec<PathBuf>,
272    ) {
273        let matches = find_python_dirs(base, &["lib", "python3.*", pkg_type]).await;
274        for m in matches {
275            let resolved = if m.is_absolute() {
276                m
277            } else {
278                std::path::absolute(&m).unwrap_or(m)
279            };
280            if seen.insert(resolved.clone()) {
281                results.push(resolved);
282            }
283        }
284    }
285
286    if !cfg!(windows) {
287        // Debian/Ubuntu
288        scan_well_known(Path::new("/usr"), "dist-packages", &mut seen, &mut results).await;
289        scan_well_known(Path::new("/usr"), "site-packages", &mut seen, &mut results).await;
290        // Debian pip / most distros / macOS
291        scan_well_known(
292            Path::new("/usr/local"),
293            "dist-packages",
294            &mut seen,
295            &mut results,
296        )
297        .await;
298        scan_well_known(
299            Path::new("/usr/local"),
300            "site-packages",
301            &mut seen,
302            &mut results,
303        )
304        .await;
305        // pip --user on Unix
306        let user_local = PathBuf::from(&home_dir).join(".local");
307        scan_well_known(&user_local, "site-packages", &mut seen, &mut results).await;
308    }
309
310    // macOS-specific
311    if cfg!(target_os = "macos") {
312        scan_well_known(
313            Path::new("/opt/homebrew"),
314            "site-packages",
315            &mut seen,
316            &mut results,
317        )
318        .await;
319
320        // Python.org framework
321        let fw_matches = find_python_dirs(
322            Path::new("/Library/Frameworks/Python.framework/Versions"),
323            &["python3.*", "lib", "python3.*", "site-packages"],
324        )
325        .await;
326        for m in fw_matches {
327            add_path(m, &mut seen, &mut results);
328        }
329
330        let fw_matches2 = find_python_dirs(
331            Path::new("/Library/Frameworks/Python.framework"),
332            &["Versions", "*", "lib", "python3.*", "site-packages"],
333        )
334        .await;
335        for m in fw_matches2 {
336            add_path(m, &mut seen, &mut results);
337        }
338    }
339
340    // Windows-specific
341    if cfg!(windows) {
342        // pip --user on Windows: %APPDATA%\Python\PythonXY\site-packages
343        if let Ok(appdata) = std::env::var("APPDATA") {
344            let appdata_python = PathBuf::from(&appdata).join("Python");
345            if let Ok(mut entries) = tokio::fs::read_dir(&appdata_python).await {
346                while let Ok(Some(entry)) = entries.next_entry().await {
347                    let p = appdata_python.join(entry.file_name()).join("site-packages");
348                    if tokio::fs::metadata(&p).await.is_ok() {
349                        add_path(p, &mut seen, &mut results);
350                    }
351                }
352            }
353        }
354        // Common Windows Python install locations
355        for base in &["C:\\Python", "C:\\Program Files\\Python"] {
356            if let Ok(mut entries) = tokio::fs::read_dir(base).await {
357                while let Ok(Some(entry)) = entries.next_entry().await {
358                    let sp = PathBuf::from(base)
359                        .join(entry.file_name())
360                        .join("Lib")
361                        .join("site-packages");
362                    if tokio::fs::metadata(&sp).await.is_ok() {
363                        add_path(sp, &mut seen, &mut results);
364                    }
365                }
366            }
367        }
368        // Microsoft Store / python.org via LocalAppData
369        if let Ok(local) = std::env::var("LOCALAPPDATA") {
370            let programs_python = PathBuf::from(&local).join("Programs").join("Python");
371            if let Ok(mut entries) = tokio::fs::read_dir(&programs_python).await {
372                while let Ok(Some(entry)) = entries.next_entry().await {
373                    let sp = programs_python
374                        .join(entry.file_name())
375                        .join("Lib")
376                        .join("site-packages");
377                    if tokio::fs::metadata(&sp).await.is_ok() {
378                        add_path(sp, &mut seen, &mut results);
379                    }
380                }
381            }
382        }
383    }
384
385    // Conda
386    let anaconda = PathBuf::from(&home_dir).join("anaconda3");
387    scan_well_known(&anaconda, "site-packages", &mut seen, &mut results).await;
388    let miniconda = PathBuf::from(&home_dir).join("miniconda3");
389    scan_well_known(&miniconda, "site-packages", &mut seen, &mut results).await;
390
391    // uv tools
392    if cfg!(target_os = "macos") {
393        let uv_base = PathBuf::from(&home_dir)
394            .join("Library")
395            .join("Application Support")
396            .join("uv")
397            .join("tools");
398        let uv_matches =
399            find_python_dirs(&uv_base, &["*", "lib", "python3.*", "site-packages"]).await;
400        for m in uv_matches {
401            add_path(m, &mut seen, &mut results);
402        }
403    } else if cfg!(windows) {
404        // %LOCALAPPDATA%\uv\tools
405        if let Ok(local) = std::env::var("LOCALAPPDATA") {
406            let uv_base = PathBuf::from(local).join("uv").join("tools");
407            let uv_matches =
408                find_python_dirs(&uv_base, &["*", "Lib", "site-packages"]).await;
409            for m in uv_matches {
410                add_path(m, &mut seen, &mut results);
411            }
412        }
413    } else {
414        let uv_base = PathBuf::from(&home_dir)
415            .join(".local")
416            .join("share")
417            .join("uv")
418            .join("tools");
419        let uv_matches =
420            find_python_dirs(&uv_base, &["*", "lib", "python3.*", "site-packages"]).await;
421        for m in uv_matches {
422            add_path(m, &mut seen, &mut results);
423        }
424    }
425
426    results
427}
428
429// ---------------------------------------------------------------------------
430// PythonCrawler
431// ---------------------------------------------------------------------------
432
433/// Python ecosystem crawler for discovering packages in `site-packages`.
434pub struct PythonCrawler;
435
436impl PythonCrawler {
437    /// Create a new `PythonCrawler`.
438    pub fn new() -> Self {
439        Self
440    }
441
442    /// Get `site-packages` paths based on options.
443    pub async fn get_site_packages_paths(&self, options: &CrawlerOptions) -> Result<Vec<PathBuf>, std::io::Error> {
444        if options.global || options.global_prefix.is_some() {
445            if let Some(ref custom) = options.global_prefix {
446                return Ok(vec![custom.clone()]);
447            }
448            return Ok(get_global_python_site_packages().await);
449        }
450        Ok(find_local_venv_site_packages(&options.cwd).await)
451    }
452
453    /// Crawl all discovered `site-packages` and return every package found.
454    pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
455        let mut packages = Vec::new();
456        let mut seen = HashSet::new();
457
458        let sp_paths = self.get_site_packages_paths(options).await.unwrap_or_default();
459
460        for sp_path in &sp_paths {
461            let found = self.scan_site_packages(sp_path, &mut seen).await;
462            packages.extend(found);
463        }
464
465        packages
466    }
467
468    /// Find specific packages by PURL.
469    ///
470    /// Accepts base PURLs (no qualifiers) — the caller should strip qualifiers
471    /// before calling.
472    pub async fn find_by_purls(
473        &self,
474        site_packages_path: &Path,
475        purls: &[String],
476    ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
477        let mut result = HashMap::new();
478
479        // Build lookup: canonicalized-name@version -> purl
480        let mut purl_lookup: HashMap<String, &str> = HashMap::new();
481        for purl in purls {
482            if let Some((name, version)) = Self::parse_pypi_purl(purl) {
483                let key = format!("{}@{}", canonicalize_pypi_name(&name), version);
484                purl_lookup.insert(key, purl.as_str());
485            }
486        }
487
488        if purl_lookup.is_empty() {
489            return Ok(result);
490        }
491
492        // Scan all .dist-info dirs
493        let entries = match tokio::fs::read_dir(site_packages_path).await {
494            Ok(rd) => {
495                let mut entries = rd;
496                let mut v = Vec::new();
497                while let Ok(Some(entry)) = entries.next_entry().await {
498                    v.push(entry);
499                }
500                v
501            }
502            Err(_) => return Ok(result),
503        };
504
505        for entry in entries {
506            let name = entry.file_name();
507            let name_str = name.to_string_lossy();
508            if !name_str.ends_with(".dist-info") {
509                continue;
510            }
511
512            let dist_info_path = site_packages_path.join(&*name_str);
513            if let Some((raw_name, version)) = read_python_metadata(&dist_info_path).await {
514                let canon_name = canonicalize_pypi_name(&raw_name);
515                let key = format!("{canon_name}@{version}");
516
517                if let Some(&matched_purl) = purl_lookup.get(&key) {
518                    result.insert(
519                        matched_purl.to_string(),
520                        CrawledPackage {
521                            name: canon_name,
522                            version,
523                            namespace: None,
524                            purl: matched_purl.to_string(),
525                            path: site_packages_path.to_path_buf(),
526                        },
527                    );
528                }
529            }
530        }
531
532        Ok(result)
533    }
534
535    // ------------------------------------------------------------------
536    // Private helpers
537    // ------------------------------------------------------------------
538
539    /// Scan a `site-packages` directory for `.dist-info` directories.
540    async fn scan_site_packages(
541        &self,
542        site_packages_path: &Path,
543        seen: &mut HashSet<String>,
544    ) -> Vec<CrawledPackage> {
545        let mut results = Vec::new();
546
547        let entries = match tokio::fs::read_dir(site_packages_path).await {
548            Ok(rd) => {
549                let mut entries = rd;
550                let mut v = Vec::new();
551                while let Ok(Some(entry)) = entries.next_entry().await {
552                    v.push(entry);
553                }
554                v
555            }
556            Err(_) => return results,
557        };
558
559        for entry in entries {
560            let name = entry.file_name();
561            let name_str = name.to_string_lossy();
562            if !name_str.ends_with(".dist-info") {
563                continue;
564            }
565
566            let dist_info_path = site_packages_path.join(&*name_str);
567            if let Some((raw_name, version)) = read_python_metadata(&dist_info_path).await {
568                let canon_name = canonicalize_pypi_name(&raw_name);
569                let purl = format!("pkg:pypi/{canon_name}@{version}");
570
571                if seen.contains(&purl) {
572                    continue;
573                }
574                seen.insert(purl.clone());
575
576                results.push(CrawledPackage {
577                    name: canon_name,
578                    version,
579                    namespace: None,
580                    purl,
581                    path: site_packages_path.to_path_buf(),
582                });
583            }
584        }
585
586        results
587    }
588
589    /// Parse a PyPI PURL string to extract name and version.
590    /// Strips qualifiers before parsing.
591    fn parse_pypi_purl(purl: &str) -> Option<(String, String)> {
592        // Strip qualifiers
593        let base = match purl.find('?') {
594            Some(idx) => &purl[..idx],
595            None => purl,
596        };
597
598        let rest = base.strip_prefix("pkg:pypi/")?;
599        let at_idx = rest.rfind('@')?;
600        let name = &rest[..at_idx];
601        let version = &rest[at_idx + 1..];
602
603        if name.is_empty() || version.is_empty() {
604            return None;
605        }
606
607        Some((name.to_string(), version.to_string()))
608    }
609}
610
611impl Default for PythonCrawler {
612    fn default() -> Self {
613        Self::new()
614    }
615}
616
617#[cfg(test)]
618mod tests {
619    use super::*;
620
621    #[test]
622    fn test_canonicalize_pypi_name_basic() {
623        assert_eq!(canonicalize_pypi_name("Requests"), "requests");
624        assert_eq!(canonicalize_pypi_name("my_package"), "my-package");
625        assert_eq!(canonicalize_pypi_name("My.Package"), "my-package");
626        assert_eq!(canonicalize_pypi_name("My-._Package"), "my-package");
627    }
628
629    #[test]
630    fn test_canonicalize_pypi_name_runs() {
631        // Runs of separators collapse to single -
632        assert_eq!(canonicalize_pypi_name("a__b"), "a-b");
633        assert_eq!(canonicalize_pypi_name("a-.-b"), "a-b");
634        assert_eq!(canonicalize_pypi_name("a_._-b"), "a-b");
635    }
636
637    #[test]
638    fn test_canonicalize_pypi_name_trim() {
639        assert_eq!(canonicalize_pypi_name("  requests  "), "requests");
640    }
641
642    #[test]
643    fn test_parse_pypi_purl() {
644        let (name, ver) = PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0").unwrap();
645        assert_eq!(name, "requests");
646        assert_eq!(ver, "2.28.0");
647    }
648
649    #[test]
650    fn test_parse_pypi_purl_with_qualifiers() {
651        let (name, ver) =
652            PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0?artifact_id=abc").unwrap();
653        assert_eq!(name, "requests");
654        assert_eq!(ver, "2.28.0");
655    }
656
657    #[test]
658    fn test_parse_pypi_purl_invalid() {
659        assert!(PythonCrawler::parse_pypi_purl("pkg:npm/lodash@4.17.21").is_none());
660        assert!(PythonCrawler::parse_pypi_purl("not-a-purl").is_none());
661    }
662
663    #[tokio::test]
664    async fn test_read_python_metadata_valid() {
665        let dir = tempfile::tempdir().unwrap();
666        let dist_info = dir.path().join("requests-2.28.0.dist-info");
667        tokio::fs::create_dir_all(&dist_info).await.unwrap();
668        tokio::fs::write(
669            dist_info.join("METADATA"),
670            "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n\nSome description",
671        )
672        .await
673        .unwrap();
674
675        let result = read_python_metadata(&dist_info).await;
676        assert!(result.is_some());
677        let (name, version) = result.unwrap();
678        assert_eq!(name, "Requests");
679        assert_eq!(version, "2.28.0");
680    }
681
682    #[tokio::test]
683    async fn test_read_python_metadata_missing() {
684        let dir = tempfile::tempdir().unwrap();
685        let dist_info = dir.path().join("nonexistent.dist-info");
686        assert!(read_python_metadata(&dist_info).await.is_none());
687    }
688
689    #[tokio::test]
690    async fn test_find_python_dirs_literal() {
691        let dir = tempfile::tempdir().unwrap();
692        let target = dir.path().join("lib").join("python3.11").join("site-packages");
693        tokio::fs::create_dir_all(&target).await.unwrap();
694
695        let results =
696            find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await;
697        assert_eq!(results.len(), 1);
698        assert_eq!(results[0], target);
699    }
700
701    #[tokio::test]
702    async fn test_find_python_dirs_wildcard() {
703        let dir = tempfile::tempdir().unwrap();
704        let sp1 = dir.path().join("lib").join("python3.10").join("site-packages");
705        let sp2 = dir.path().join("lib").join("python3.11").join("site-packages");
706        tokio::fs::create_dir_all(&sp1).await.unwrap();
707        tokio::fs::create_dir_all(&sp2).await.unwrap();
708
709        // Also create a non-matching dir
710        let non_match = dir.path().join("lib").join("ruby3.0").join("site-packages");
711        tokio::fs::create_dir_all(&non_match).await.unwrap();
712
713        let results =
714            find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await;
715        assert_eq!(results.len(), 2);
716    }
717
718    #[tokio::test]
719    async fn test_find_python_dirs_star_wildcard() {
720        let dir = tempfile::tempdir().unwrap();
721        let sp1 = dir
722            .path()
723            .join("tools")
724            .join("mytool")
725            .join("lib")
726            .join("python3.11")
727            .join("site-packages");
728        tokio::fs::create_dir_all(&sp1).await.unwrap();
729
730        let results = find_python_dirs(
731            dir.path(),
732            &["tools", "*", "lib", "python3.*", "site-packages"],
733        )
734        .await;
735        assert_eq!(results.len(), 1);
736        assert_eq!(results[0], sp1);
737    }
738
739    #[tokio::test]
740    async fn test_crawl_all_python() {
741        let dir = tempfile::tempdir().unwrap();
742        let venv = dir.path().join(".venv");
743        let sp = if cfg!(windows) {
744            venv.join("Lib").join("site-packages")
745        } else {
746            venv.join("lib").join("python3.11").join("site-packages")
747        };
748        tokio::fs::create_dir_all(&sp).await.unwrap();
749
750        // Create a dist-info dir with METADATA
751        let dist_info = sp.join("requests-2.28.0.dist-info");
752        tokio::fs::create_dir_all(&dist_info).await.unwrap();
753        tokio::fs::write(
754            dist_info.join("METADATA"),
755            "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n",
756        )
757        .await
758        .unwrap();
759
760        let crawler = PythonCrawler::new();
761        let options = CrawlerOptions {
762            cwd: dir.path().to_path_buf(),
763            global: false,
764            global_prefix: None,
765            batch_size: 100,
766        };
767
768        let packages = crawler.crawl_all(&options).await;
769        assert_eq!(packages.len(), 1);
770        assert_eq!(packages[0].name, "requests");
771        assert_eq!(packages[0].version, "2.28.0");
772        assert_eq!(packages[0].purl, "pkg:pypi/requests@2.28.0");
773        assert!(packages[0].namespace.is_none());
774    }
775
776    #[test]
777    fn test_find_python_command() {
778        // On any platform with Python installed, this should return Some
779        // In CI environments, Python is typically available
780        let cmd = find_python_command();
781        // We don't assert Some because Python may not be installed,
782        // but if it is, the command should be valid
783        if let Some(c) = cmd {
784            assert!(
785                ["python3", "python", "py"].contains(&c),
786                "unexpected command: {c}"
787            );
788        }
789    }
790
791    #[test]
792    fn test_home_dir_detection() {
793        // Verify the fallback chain works: HOME -> USERPROFILE -> "~"
794        let home = std::env::var("HOME")
795            .or_else(|_| std::env::var("USERPROFILE"))
796            .unwrap_or_else(|_| "~".to_string());
797        // On any CI or dev machine, we should get a real path, not "~"
798        assert_ne!(home, "~", "expected a real home directory");
799        assert!(!home.is_empty());
800    }
801
802    #[tokio::test]
803    async fn test_find_by_purls_python() {
804        let dir = tempfile::tempdir().unwrap();
805        let sp = dir.path().to_path_buf();
806
807        // Create dist-info
808        let dist_info = sp.join("requests-2.28.0.dist-info");
809        tokio::fs::create_dir_all(&dist_info).await.unwrap();
810        tokio::fs::write(
811            dist_info.join("METADATA"),
812            "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n",
813        )
814        .await
815        .unwrap();
816
817        let crawler = PythonCrawler::new();
818        let purls = vec![
819            "pkg:pypi/requests@2.28.0".to_string(),
820            "pkg:pypi/flask@3.0.0".to_string(),
821        ];
822
823        let result = crawler.find_by_purls(&sp, &purls).await.unwrap();
824        assert_eq!(result.len(), 1);
825        assert!(result.contains_key("pkg:pypi/requests@2.28.0"));
826        assert!(!result.contains_key("pkg:pypi/flask@3.0.0"));
827    }
828}