1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use super::types::{CrawledPackage, CrawlerOptions};
5use crate::utils::process::{CommandRunner, SystemCommandRunner};
6
7pub fn find_python_command() -> Option<&'static str> {
16 find_python_command_with(&SystemCommandRunner)
17}
18
19pub fn find_python_command_with(runner: &dyn CommandRunner) -> Option<&'static str> {
24 ["python3", "python", "py"]
25 .into_iter()
26 .find(|cmd| runner.run(cmd, &["--version"]).is_some())
27}
28
29const _DEFAULT_BATCH_SIZE: usize = 100;
31
32pub fn canonicalize_pypi_name(name: &str) -> String {
40 let trimmed = name.trim().to_lowercase();
41 let mut result = String::with_capacity(trimmed.len());
42 let mut in_separator_run = false;
43
44 for ch in trimmed.chars() {
45 if ch == '-' || ch == '_' || ch == '.' {
46 if !in_separator_run {
47 result.push('-');
48 in_separator_run = true;
49 }
50 } else {
52 in_separator_run = false;
53 result.push(ch);
54 }
55 }
56
57 result
58}
59
60pub async fn read_python_metadata(dist_info_path: &Path) -> Option<(String, String)> {
74 if let Some(found) = parse_metadata_headers(dist_info_path).await {
75 return Some(found);
76 }
77
78 let is_dir = tokio::fs::metadata(dist_info_path)
79 .await
80 .map(|m| m.is_dir())
81 .unwrap_or(false);
82 if !is_dir {
83 return None;
84 }
85 let dir_name = dist_info_path.file_name()?.to_string_lossy();
86 parse_dist_info_dir_name(&dir_name)
87}
88
89async fn parse_metadata_headers(dist_info_path: &Path) -> Option<(String, String)> {
94 let metadata_path = dist_info_path.join("METADATA");
95 let content = tokio::fs::read_to_string(&metadata_path).await.ok()?;
96
97 let mut name: Option<String> = None;
98 let mut version: Option<String> = None;
99
100 for line in content.lines() {
101 if name.is_some() && version.is_some() {
102 break;
103 }
104 if let Some(rest) = line.strip_prefix("Name:") {
105 name = Some(rest.trim().to_string());
106 } else if let Some(rest) = line.strip_prefix("Version:") {
107 version = Some(rest.trim().to_string());
108 }
109 if line.trim().is_empty() && (name.is_some() || version.is_some()) {
111 break;
112 }
113 }
114
115 match (name, version) {
116 (Some(n), Some(v)) if !n.is_empty() && !v.is_empty() => Some((n, v)),
117 _ => None,
118 }
119}
120
121fn parse_dist_info_dir_name(dir_name: &str) -> Option<(String, String)> {
129 let base = dir_name.strip_suffix(".dist-info")?;
130 let idx = base.rfind('-')?;
131 let name = &base[..idx];
132 let version = &base[idx + 1..];
133 if name.is_empty() || version.is_empty() {
134 return None;
135 }
136 Some((name.to_string(), version.to_string()))
137}
138
139pub async fn find_python_dirs(base_path: &Path, segments: &[&str]) -> Vec<PathBuf> {
151 let mut results = Vec::new();
152
153 match tokio::fs::metadata(base_path).await {
155 Ok(m) if m.is_dir() => {}
156 _ => return results,
157 }
158
159 if segments.is_empty() {
160 results.push(base_path.to_path_buf());
161 return results;
162 }
163
164 let first = segments[0];
165 let rest = &segments[1..];
166
167 if first == "python3.*" {
168 for entry in crate::utils::fs::list_dir_entries(base_path).await {
170 if !crate::utils::fs::entry_is_dir(&entry).await {
171 continue;
172 }
173 let name = entry.file_name();
174 let name_str = name.to_string_lossy();
175 if name_str.starts_with("python3.") {
176 let sub =
177 Box::pin(find_python_dirs(&base_path.join(entry.file_name()), rest)).await;
178 results.extend(sub);
179 }
180 }
181 } else if first == "*" {
182 for entry in crate::utils::fs::list_dir_entries(base_path).await {
184 if !crate::utils::fs::entry_is_dir(&entry).await {
185 continue;
186 }
187 let sub = Box::pin(find_python_dirs(&base_path.join(entry.file_name()), rest)).await;
188 results.extend(sub);
189 }
190 } else {
191 let sub = Box::pin(find_python_dirs(&base_path.join(first), rest)).await;
193 results.extend(sub);
194 }
195
196 results
197}
198
199pub async fn find_site_packages_under(
207 base_dir: &Path,
208 sub_dir_type: &str, ) -> Vec<PathBuf> {
210 #[cfg(windows)]
211 {
212 find_python_dirs(base_dir, &["Lib", sub_dir_type]).await
213 }
214 #[cfg(not(windows))]
215 {
216 find_python_dirs(base_dir, &["lib", "python3.*", sub_dir_type]).await
217 }
218}
219
220pub async fn find_local_venv_site_packages(cwd: &Path) -> Vec<PathBuf> {
227 let mut results = Vec::new();
228
229 if let Ok(virtual_env) = std::env::var("VIRTUAL_ENV") {
231 let venv_path = PathBuf::from(&virtual_env);
232 let matches = find_site_packages_under(&venv_path, "site-packages").await;
233 results.extend(matches);
234 if !results.is_empty() {
235 return results;
236 }
237 }
238
239 for venv_dir in &[".venv", "venv"] {
241 let venv_path = cwd.join(venv_dir);
242 let matches = find_site_packages_under(&venv_path, "site-packages").await;
243 results.extend(matches);
244 }
245
246 results
247}
248
249pub async fn get_global_python_site_packages() -> Vec<PathBuf> {
254 let mut results = Vec::new();
255 let mut seen = HashSet::new();
256
257 let add_path = |p: PathBuf, seen: &mut HashSet<PathBuf>, results: &mut Vec<PathBuf>| {
258 let resolved = if p.is_absolute() {
259 p
260 } else {
261 std::path::absolute(&p).unwrap_or(p)
262 };
263 if seen.insert(resolved.clone()) {
264 results.push(resolved);
265 }
266 };
267
268 if let Some(python_cmd) = find_python_command() {
270 let runner = SystemCommandRunner;
271 if let Some(stdout) = runner.run(
272 python_cmd,
273 &[
274 "-c",
275 "import site; print('\\n'.join(site.getsitepackages())); print(site.getusersitepackages())",
276 ],
277 ) {
278 for p in parse_python_site_packages_output(&stdout) {
279 add_path(p, &mut seen, &mut results);
280 }
281 }
282 }
283
284 let home_dir = std::env::var("HOME")
286 .or_else(|_| std::env::var("USERPROFILE"))
287 .unwrap_or_else(|_| "~".to_string());
288
289 async fn scan_well_known(
291 base: &Path,
292 pkg_type: &str,
293 seen: &mut HashSet<PathBuf>,
294 results: &mut Vec<PathBuf>,
295 ) {
296 let matches = find_python_dirs(base, &["lib", "python3.*", pkg_type]).await;
297 for m in matches {
298 let resolved = if m.is_absolute() {
299 m
300 } else {
301 std::path::absolute(&m).unwrap_or(m)
302 };
303 if seen.insert(resolved.clone()) {
304 results.push(resolved);
305 }
306 }
307 }
308
309 #[cfg(not(windows))]
310 {
311 scan_well_known(Path::new("/usr"), "dist-packages", &mut seen, &mut results).await;
313 scan_well_known(Path::new("/usr"), "site-packages", &mut seen, &mut results).await;
314 scan_well_known(
316 Path::new("/usr/local"),
317 "dist-packages",
318 &mut seen,
319 &mut results,
320 )
321 .await;
322 scan_well_known(
323 Path::new("/usr/local"),
324 "site-packages",
325 &mut seen,
326 &mut results,
327 )
328 .await;
329 let user_local = PathBuf::from(&home_dir).join(".local");
331 scan_well_known(&user_local, "site-packages", &mut seen, &mut results).await;
332 }
333
334 #[cfg(target_os = "macos")]
336 {
337 scan_well_known(
338 Path::new("/opt/homebrew"),
339 "site-packages",
340 &mut seen,
341 &mut results,
342 )
343 .await;
344
345 let fw_matches = find_python_dirs(
349 Path::new("/Library/Frameworks/Python.framework"),
350 &["Versions", "*", "lib", "python3.*", "site-packages"],
351 )
352 .await;
353 for m in fw_matches {
354 add_path(m, &mut seen, &mut results);
355 }
356 }
357
358 #[cfg(windows)]
360 {
361 if let Ok(appdata) = std::env::var("APPDATA") {
363 let appdata_python = PathBuf::from(&appdata).join("Python");
364 for entry in crate::utils::fs::list_dir_entries(&appdata_python).await {
365 let p = appdata_python.join(entry.file_name()).join("site-packages");
366 if tokio::fs::metadata(&p).await.is_ok() {
367 add_path(p, &mut seen, &mut results);
368 }
369 }
370 }
371 for base in &["C:\\Python", "C:\\Program Files\\Python"] {
373 for entry in crate::utils::fs::list_dir_entries(Path::new(base)).await {
374 let sp = PathBuf::from(base)
375 .join(entry.file_name())
376 .join("Lib")
377 .join("site-packages");
378 if tokio::fs::metadata(&sp).await.is_ok() {
379 add_path(sp, &mut seen, &mut results);
380 }
381 }
382 }
383 if let Ok(local) = std::env::var("LOCALAPPDATA") {
385 let programs_python = PathBuf::from(&local).join("Programs").join("Python");
386 for entry in crate::utils::fs::list_dir_entries(&programs_python).await {
387 let sp = programs_python
388 .join(entry.file_name())
389 .join("Lib")
390 .join("site-packages");
391 if tokio::fs::metadata(&sp).await.is_ok() {
392 add_path(sp, &mut seen, &mut results);
393 }
394 }
395 }
396 }
397
398 #[cfg(not(windows))]
400 {
401 let pyenv_root = std::env::var("PYENV_ROOT")
402 .map(PathBuf::from)
403 .unwrap_or_else(|_| PathBuf::from(&home_dir).join(".pyenv"));
404 let pyenv_versions = pyenv_root.join("versions");
405 let pyenv_matches =
406 find_python_dirs(&pyenv_versions, &["*", "lib", "python3.*", "site-packages"]).await;
407 for m in pyenv_matches {
408 add_path(m, &mut seen, &mut results);
409 }
410 }
411
412 let anaconda = PathBuf::from(&home_dir).join("anaconda3");
414 scan_well_known(&anaconda, "site-packages", &mut seen, &mut results).await;
415 let miniconda = PathBuf::from(&home_dir).join("miniconda3");
416 scan_well_known(&miniconda, "site-packages", &mut seen, &mut results).await;
417
418 #[cfg(target_os = "macos")]
420 {
421 let uv_base = PathBuf::from(&home_dir)
422 .join("Library")
423 .join("Application Support")
424 .join("uv")
425 .join("tools");
426 let uv_matches =
427 find_python_dirs(&uv_base, &["*", "lib", "python3.*", "site-packages"]).await;
428 for m in uv_matches {
429 add_path(m, &mut seen, &mut results);
430 }
431 }
432 #[cfg(windows)]
433 {
434 if let Ok(local) = std::env::var("LOCALAPPDATA") {
436 let uv_base = PathBuf::from(local).join("uv").join("tools");
437 let uv_matches = find_python_dirs(&uv_base, &["*", "Lib", "site-packages"]).await;
438 for m in uv_matches {
439 add_path(m, &mut seen, &mut results);
440 }
441 }
442 }
443 #[cfg(all(not(target_os = "macos"), not(windows)))]
444 {
445 let uv_base = PathBuf::from(&home_dir)
446 .join(".local")
447 .join("share")
448 .join("uv")
449 .join("tools");
450 let uv_matches =
451 find_python_dirs(&uv_base, &["*", "lib", "python3.*", "site-packages"]).await;
452 for m in uv_matches {
453 add_path(m, &mut seen, &mut results);
454 }
455 }
456
457 #[cfg(not(windows))]
466 {
467 let uv_python = PathBuf::from(&home_dir)
468 .join(".local")
469 .join("share")
470 .join("uv")
471 .join("python");
472 let uv_matches =
473 find_python_dirs(&uv_python, &["*", "lib", "python3.*", "site-packages"]).await;
474 for m in uv_matches {
475 add_path(m, &mut seen, &mut results);
476 }
477 }
478 #[cfg(windows)]
479 {
480 if let Ok(local) = std::env::var("LOCALAPPDATA") {
481 let uv_python = PathBuf::from(local).join("uv").join("python");
482 let uv_matches = find_python_dirs(&uv_python, &["*", "Lib", "site-packages"]).await;
483 for m in uv_matches {
484 add_path(m, &mut seen, &mut results);
485 }
486 }
487 }
488
489 results
490}
491
492async fn is_python_project(cwd: &Path) -> bool {
507 let markers = [
508 "pyproject.toml",
509 "setup.py",
510 "setup.cfg",
511 "requirements.txt",
512 "uv.lock",
513 ];
514 for m in &markers {
515 if tokio::fs::metadata(cwd.join(m)).await.is_ok() {
516 return true;
517 }
518 }
519 false
520}
521
522pub struct PythonCrawler;
528
529impl PythonCrawler {
530 pub fn new() -> Self {
532 Self
533 }
534
535 pub async fn get_site_packages_paths(
552 &self,
553 options: &CrawlerOptions,
554 ) -> Result<Vec<PathBuf>, std::io::Error> {
555 if options.global || options.global_prefix.is_some() {
556 if let Some(ref custom) = options.global_prefix {
557 return Ok(vec![custom.clone()]);
558 }
559 return Ok(get_global_python_site_packages().await);
560 }
561 let venv_paths = find_local_venv_site_packages(&options.cwd).await;
562 if !venv_paths.is_empty() {
563 return Ok(venv_paths);
564 }
565 if is_python_project(&options.cwd).await {
566 return Ok(get_global_python_site_packages().await);
567 }
568 Ok(Vec::new())
569 }
570
571 pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
573 let mut packages = Vec::new();
574 let mut seen = HashSet::new();
575
576 let sp_paths = self
577 .get_site_packages_paths(options)
578 .await
579 .unwrap_or_default();
580
581 for sp_path in &sp_paths {
582 let found = self.scan_site_packages(sp_path, &mut seen).await;
583 packages.extend(found);
584 }
585
586 packages
587 }
588
589 pub async fn find_by_purls(
594 &self,
595 site_packages_path: &Path,
596 purls: &[String],
597 ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
598 let mut result = HashMap::new();
599
600 let mut purl_lookup: HashMap<String, &str> = HashMap::new();
602 for purl in purls {
603 if let Some((name, version)) = Self::parse_pypi_purl(purl) {
604 let key = format!("{}@{}", canonicalize_pypi_name(&name), version);
605 purl_lookup.insert(key, purl.as_str());
606 }
607 }
608
609 if purl_lookup.is_empty() {
610 return Ok(result);
611 }
612
613 for entry in crate::utils::fs::list_dir_entries(site_packages_path).await {
615 let name = entry.file_name();
616 let name_str = name.to_string_lossy();
617 if !name_str.ends_with(".dist-info") {
618 continue;
619 }
620
621 let dist_info_path = site_packages_path.join(&*name_str);
622 if let Some((raw_name, version)) = read_python_metadata(&dist_info_path).await {
623 let canon_name = canonicalize_pypi_name(&raw_name);
624 let key = format!("{canon_name}@{version}");
625
626 if let Some(&matched_purl) = purl_lookup.get(&key) {
627 result.insert(
628 matched_purl.to_string(),
629 CrawledPackage {
630 name: canon_name,
631 version,
632 namespace: None,
633 purl: matched_purl.to_string(),
634 path: site_packages_path.to_path_buf(),
635 },
636 );
637 }
638 }
639 }
640
641 Ok(result)
642 }
643
644 async fn scan_site_packages(
650 &self,
651 site_packages_path: &Path,
652 seen: &mut HashSet<String>,
653 ) -> Vec<CrawledPackage> {
654 let mut results = Vec::new();
655
656 for entry in crate::utils::fs::list_dir_entries(site_packages_path).await {
657 let name = entry.file_name();
658 let name_str = name.to_string_lossy();
659 if !name_str.ends_with(".dist-info") {
660 continue;
661 }
662
663 let dist_info_path = site_packages_path.join(&*name_str);
664 if let Some((raw_name, version)) = read_python_metadata(&dist_info_path).await {
665 let canon_name = canonicalize_pypi_name(&raw_name);
666 let purl = format!("pkg:pypi/{canon_name}@{version}");
667
668 if seen.contains(&purl) {
669 continue;
670 }
671 seen.insert(purl.clone());
672
673 results.push(CrawledPackage {
674 name: canon_name,
675 version,
676 namespace: None,
677 purl,
678 path: site_packages_path.to_path_buf(),
679 });
680 }
681 }
682
683 results
684 }
685
686 fn parse_pypi_purl(purl: &str) -> Option<(String, String)> {
689 let base = match purl.find('?') {
691 Some(idx) => &purl[..idx],
692 None => purl,
693 };
694
695 let rest = base.strip_prefix("pkg:pypi/")?;
696 let at_idx = rest.rfind('@')?;
697 let name = &rest[..at_idx];
698 let version = &rest[at_idx + 1..];
699
700 if name.is_empty() || version.is_empty() {
701 return None;
702 }
703
704 Some((name.to_string(), version.to_string()))
705 }
706}
707
708impl Default for PythonCrawler {
709 fn default() -> Self {
710 Self::new()
711 }
712}
713
714pub fn parse_python_site_packages_output(stdout: &str) -> Vec<PathBuf> {
720 stdout
721 .lines()
722 .map(str::trim)
723 .filter(|line| !line.is_empty())
724 .map(PathBuf::from)
725 .collect()
726}
727
728#[cfg(test)]
729mod tests {
730 use super::*;
731
732 #[test]
733 fn test_canonicalize_pypi_name_basic() {
734 assert_eq!(canonicalize_pypi_name("Requests"), "requests");
735 assert_eq!(canonicalize_pypi_name("my_package"), "my-package");
736 assert_eq!(canonicalize_pypi_name("My.Package"), "my-package");
737 assert_eq!(canonicalize_pypi_name("My-._Package"), "my-package");
738 }
739
740 #[test]
741 fn test_canonicalize_pypi_name_runs() {
742 assert_eq!(canonicalize_pypi_name("a__b"), "a-b");
744 assert_eq!(canonicalize_pypi_name("a-.-b"), "a-b");
745 assert_eq!(canonicalize_pypi_name("a_._-b"), "a-b");
746 }
747
748 #[test]
749 fn test_canonicalize_pypi_name_trim() {
750 assert_eq!(canonicalize_pypi_name(" requests "), "requests");
751 }
752
753 #[test]
754 fn test_parse_pypi_purl() {
755 let (name, ver) = PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0").unwrap();
756 assert_eq!(name, "requests");
757 assert_eq!(ver, "2.28.0");
758 }
759
760 #[test]
761 fn test_parse_pypi_purl_with_qualifiers() {
762 let (name, ver) =
763 PythonCrawler::parse_pypi_purl("pkg:pypi/requests@2.28.0?artifact_id=abc").unwrap();
764 assert_eq!(name, "requests");
765 assert_eq!(ver, "2.28.0");
766 }
767
768 #[test]
769 fn test_parse_pypi_purl_invalid() {
770 assert!(PythonCrawler::parse_pypi_purl("pkg:npm/lodash@4.17.21").is_none());
771 assert!(PythonCrawler::parse_pypi_purl("not-a-purl").is_none());
772 }
773
774 #[tokio::test]
775 async fn test_read_python_metadata_valid() {
776 let dir = tempfile::tempdir().unwrap();
777 let dist_info = dir.path().join("requests-2.28.0.dist-info");
778 tokio::fs::create_dir_all(&dist_info).await.unwrap();
779 tokio::fs::write(
780 dist_info.join("METADATA"),
781 "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n\nSome description",
782 )
783 .await
784 .unwrap();
785
786 let result = read_python_metadata(&dist_info).await;
787 assert!(result.is_some());
788 let (name, version) = result.unwrap();
789 assert_eq!(name, "Requests");
790 assert_eq!(version, "2.28.0");
791 }
792
793 #[tokio::test]
794 async fn test_read_python_metadata_missing() {
795 let dir = tempfile::tempdir().unwrap();
796 let dist_info = dir.path().join("nonexistent.dist-info");
797 assert!(read_python_metadata(&dist_info).await.is_none());
798 }
799
800 #[test]
801 fn test_parse_dist_info_dir_name() {
802 assert_eq!(
804 parse_dist_info_dir_name("flask_sqlalchemy-3.0.5.dist-info"),
805 Some(("flask_sqlalchemy".to_string(), "3.0.5".to_string()))
806 );
807 assert_eq!(
810 parse_dist_info_dir_name("Flask-SQLAlchemy-3.0.5.dist-info"),
811 Some(("Flask-SQLAlchemy".to_string(), "3.0.5".to_string()))
812 );
813 assert_eq!(
814 parse_dist_info_dir_name("requests-2.28.0.dist-info"),
815 Some(("requests".to_string(), "2.28.0".to_string()))
816 );
817 assert!(parse_dist_info_dir_name("noversion.dist-info").is_none());
819 assert!(parse_dist_info_dir_name("requests-2.28.0.egg-info").is_none());
820 assert!(parse_dist_info_dir_name("-1.0.dist-info").is_none());
821 }
822
823 #[tokio::test]
827 async fn test_read_python_metadata_falls_back_to_dir_name() {
828 let dir = tempfile::tempdir().unwrap();
829 let dist_info = dir.path().join("requests-2.28.0.dist-info");
830 tokio::fs::create_dir_all(&dist_info).await.unwrap();
831 let (name, version) = read_python_metadata(&dist_info).await.unwrap();
833 assert_eq!(name, "requests");
834 assert_eq!(version, "2.28.0");
835 }
836
837 #[tokio::test]
840 async fn test_read_python_metadata_falls_back_on_malformed() {
841 let dir = tempfile::tempdir().unwrap();
842 let dist_info = dir.path().join("urllib3-2.0.7.dist-info");
843 tokio::fs::create_dir_all(&dist_info).await.unwrap();
844 tokio::fs::write(
845 dist_info.join("METADATA"),
846 "Metadata-Version: 2.1\nName: urllib3\n\nDescription body, no Version header\n",
847 )
848 .await
849 .unwrap();
850 let (name, version) = read_python_metadata(&dist_info).await.unwrap();
851 assert_eq!(name, "urllib3");
852 assert_eq!(version, "2.0.7");
853 }
854
855 #[tokio::test]
858 async fn test_read_python_metadata_ignores_stray_file() {
859 let dir = tempfile::tempdir().unwrap();
860 let stray = dir.path().join("ghost-1.0.dist-info");
861 tokio::fs::write(&stray, b"not a dir").await.unwrap();
862 assert!(read_python_metadata(&stray).await.is_none());
863 }
864
865 #[tokio::test]
868 async fn test_crawl_all_recovers_metadata_less_package() {
869 let dir = tempfile::tempdir().unwrap();
870 let venv = dir.path().join(".venv");
871 #[cfg(windows)]
872 let sp = venv.join("Lib").join("site-packages");
873 #[cfg(not(windows))]
874 let sp = venv.join("lib").join("python3.11").join("site-packages");
875 tokio::fs::create_dir_all(&sp).await.unwrap();
876 tokio::fs::create_dir_all(sp.join("flask_sqlalchemy-3.0.5.dist-info"))
878 .await
879 .unwrap();
880
881 let crawler = PythonCrawler::new();
882 let options = CrawlerOptions {
883 cwd: dir.path().to_path_buf(),
884 global: false,
885 global_prefix: None,
886 batch_size: 100,
887 };
888 let packages = crawler.crawl_all(&options).await;
889 assert_eq!(packages.len(), 1);
890 assert_eq!(packages[0].name, "flask-sqlalchemy");
891 assert_eq!(packages[0].version, "3.0.5");
892 assert_eq!(packages[0].purl, "pkg:pypi/flask-sqlalchemy@3.0.5");
893 }
894
895 #[tokio::test]
900 async fn test_find_python_dirs_framework_versions_layout() {
901 let dir = tempfile::tempdir().unwrap();
902 let sp = dir
903 .path()
904 .join("Versions")
905 .join("3.11")
906 .join("lib")
907 .join("python3.11")
908 .join("site-packages");
909 tokio::fs::create_dir_all(&sp).await.unwrap();
910
911 let ok = find_python_dirs(
913 &dir.path().join("Versions"),
914 &["*", "lib", "python3.*", "site-packages"],
915 )
916 .await;
917 assert_eq!(ok.len(), 1);
918 assert_eq!(ok[0], sp);
919
920 let buggy = find_python_dirs(
922 &dir.path().join("Versions"),
923 &["python3.*", "lib", "python3.*", "site-packages"],
924 )
925 .await;
926 assert!(buggy.is_empty());
927 }
928
929 #[tokio::test]
930 async fn test_find_python_dirs_literal() {
931 let dir = tempfile::tempdir().unwrap();
932 let target = dir
933 .path()
934 .join("lib")
935 .join("python3.11")
936 .join("site-packages");
937 tokio::fs::create_dir_all(&target).await.unwrap();
938
939 let results = find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await;
940 assert_eq!(results.len(), 1);
941 assert_eq!(results[0], target);
942 }
943
944 #[tokio::test]
945 async fn test_find_python_dirs_wildcard() {
946 let dir = tempfile::tempdir().unwrap();
947 let sp1 = dir
948 .path()
949 .join("lib")
950 .join("python3.10")
951 .join("site-packages");
952 let sp2 = dir
953 .path()
954 .join("lib")
955 .join("python3.11")
956 .join("site-packages");
957 tokio::fs::create_dir_all(&sp1).await.unwrap();
958 tokio::fs::create_dir_all(&sp2).await.unwrap();
959
960 let non_match = dir.path().join("lib").join("ruby3.0").join("site-packages");
962 tokio::fs::create_dir_all(&non_match).await.unwrap();
963
964 let results = find_python_dirs(dir.path(), &["lib", "python3.*", "site-packages"]).await;
965 assert_eq!(results.len(), 2);
966 }
967
968 #[tokio::test]
969 async fn test_find_python_dirs_star_wildcard() {
970 let dir = tempfile::tempdir().unwrap();
971 let sp1 = dir
972 .path()
973 .join("tools")
974 .join("mytool")
975 .join("lib")
976 .join("python3.11")
977 .join("site-packages");
978 tokio::fs::create_dir_all(&sp1).await.unwrap();
979
980 let results = find_python_dirs(
981 dir.path(),
982 &["tools", "*", "lib", "python3.*", "site-packages"],
983 )
984 .await;
985 assert_eq!(results.len(), 1);
986 assert_eq!(results[0], sp1);
987 }
988
989 #[tokio::test]
990 async fn test_find_python_dirs_pyenv_layout() {
991 let dir = tempfile::tempdir().unwrap();
993 let sp1 = dir
994 .path()
995 .join("versions")
996 .join("3.11.5")
997 .join("lib")
998 .join("python3.11")
999 .join("site-packages");
1000 let sp2 = dir
1001 .path()
1002 .join("versions")
1003 .join("3.12.0")
1004 .join("lib")
1005 .join("python3.12")
1006 .join("site-packages");
1007 tokio::fs::create_dir_all(&sp1).await.unwrap();
1008 tokio::fs::create_dir_all(&sp2).await.unwrap();
1009
1010 let results = find_python_dirs(
1011 &dir.path().join("versions"),
1012 &["*", "lib", "python3.*", "site-packages"],
1013 )
1014 .await;
1015 assert_eq!(results.len(), 2);
1016 assert!(results.contains(&sp1));
1017 assert!(results.contains(&sp2));
1018 }
1019
1020 #[tokio::test]
1021 async fn test_crawl_all_python() {
1022 let dir = tempfile::tempdir().unwrap();
1023 let venv = dir.path().join(".venv");
1024 #[cfg(windows)]
1025 let sp = venv.join("Lib").join("site-packages");
1026 #[cfg(not(windows))]
1027 let sp = venv.join("lib").join("python3.11").join("site-packages");
1028 tokio::fs::create_dir_all(&sp).await.unwrap();
1029
1030 let dist_info = sp.join("requests-2.28.0.dist-info");
1032 tokio::fs::create_dir_all(&dist_info).await.unwrap();
1033 tokio::fs::write(
1034 dist_info.join("METADATA"),
1035 "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n",
1036 )
1037 .await
1038 .unwrap();
1039
1040 let crawler = PythonCrawler::new();
1041 let options = CrawlerOptions {
1042 cwd: dir.path().to_path_buf(),
1043 global: false,
1044 global_prefix: None,
1045 batch_size: 100,
1046 };
1047
1048 let packages = crawler.crawl_all(&options).await;
1049 assert_eq!(packages.len(), 1);
1050 assert_eq!(packages[0].name, "requests");
1051 assert_eq!(packages[0].version, "2.28.0");
1052 assert_eq!(packages[0].purl, "pkg:pypi/requests@2.28.0");
1053 assert!(packages[0].namespace.is_none());
1054 }
1055
1056 #[test]
1057 fn test_find_python_command() {
1058 let cmd = find_python_command();
1061 if let Some(c) = cmd {
1064 assert!(
1065 ["python3", "python", "py"].contains(&c),
1066 "unexpected command: {c}"
1067 );
1068 }
1069 }
1070
1071 #[test]
1072 fn test_home_dir_detection() {
1073 let home = std::env::var("HOME")
1075 .or_else(|_| std::env::var("USERPROFILE"))
1076 .unwrap_or_else(|_| "~".to_string());
1077 assert_ne!(home, "~", "expected a real home directory");
1079 assert!(!home.is_empty());
1080 }
1081
1082 #[tokio::test]
1083 async fn test_find_by_purls_python() {
1084 let dir = tempfile::tempdir().unwrap();
1085 let sp = dir.path().to_path_buf();
1086
1087 let dist_info = sp.join("requests-2.28.0.dist-info");
1089 tokio::fs::create_dir_all(&dist_info).await.unwrap();
1090 tokio::fs::write(
1091 dist_info.join("METADATA"),
1092 "Metadata-Version: 2.1\nName: Requests\nVersion: 2.28.0\n",
1093 )
1094 .await
1095 .unwrap();
1096
1097 let crawler = PythonCrawler::new();
1098 let purls = vec![
1099 "pkg:pypi/requests@2.28.0".to_string(),
1100 "pkg:pypi/flask@3.0.0".to_string(),
1101 ];
1102
1103 let result = crawler.find_by_purls(&sp, &purls).await.unwrap();
1104 assert_eq!(result.len(), 1);
1105 assert!(result.contains_key("pkg:pypi/requests@2.28.0"));
1106 assert!(!result.contains_key("pkg:pypi/flask@3.0.0"));
1107 }
1108}