socket_patch_core/crawlers/
ruby_crawler.rs1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use super::types::{CrawledPackage, CrawlerOptions};
5
6pub struct RubyCrawler;
9
10impl RubyCrawler {
11 pub fn new() -> Self {
13 Self
14 }
15
16 pub async fn get_gem_paths(
29 &self,
30 options: &CrawlerOptions,
31 ) -> Result<Vec<PathBuf>, std::io::Error> {
32 if options.global || options.global_prefix.is_some() {
33 if let Some(ref custom) = options.global_prefix {
34 return Ok(vec![custom.clone()]);
35 }
36 return Ok(Self::get_global_gem_paths().await);
37 }
38
39 let vendor_gems = Self::get_vendor_bundle_paths(&options.cwd).await;
41 if !vendor_gems.is_empty() {
42 return Ok(vendor_gems);
43 }
44
45 let has_gemfile = tokio::fs::metadata(options.cwd.join("Gemfile"))
47 .await
48 .is_ok();
49 let has_gemfile_lock = tokio::fs::metadata(options.cwd.join("Gemfile.lock"))
50 .await
51 .is_ok();
52
53 if has_gemfile || has_gemfile_lock {
54 let mut paths = Vec::new();
56 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
57 let gems_path = PathBuf::from(gemdir).join("gems");
58 if is_dir(&gems_path).await {
59 paths.push(gems_path);
60 }
61 }
62 if !paths.is_empty() {
63 return Ok(paths);
64 }
65 }
66
67 Ok(Vec::new())
69 }
70
71 pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
73 let mut packages = Vec::new();
74 let mut seen = HashSet::new();
75
76 let gem_paths = self.get_gem_paths(options).await.unwrap_or_default();
77
78 for gem_path in &gem_paths {
79 let found = self.scan_gem_dir(gem_path, &mut seen).await;
80 packages.extend(found);
81 }
82
83 packages
84 }
85
86 pub async fn find_by_purls(
90 &self,
91 gem_path: &Path,
92 purls: &[String],
93 ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
94 let mut result: HashMap<String, CrawledPackage> = HashMap::new();
95
96 for purl in purls {
97 if let Some((name, version)) = crate::utils::purl::parse_gem_purl(purl) {
98 if let Some(gem_dir) = self.locate_gem_dir(gem_path, name, version).await {
102 result.insert(
103 purl.clone(),
104 CrawledPackage {
105 name: name.to_string(),
106 version: version.to_string(),
107 namespace: None,
108 purl: purl.clone(),
109 path: gem_dir,
110 },
111 );
112 }
113 }
114 }
115
116 Ok(result)
117 }
118
119 async fn get_vendor_bundle_paths(cwd: &Path) -> Vec<PathBuf> {
125 let vendor_ruby = cwd.join("vendor").join("bundle").join("ruby");
126 let mut paths = Vec::new();
127
128 for entry in crate::utils::fs::list_dir_entries(&vendor_ruby).await {
129 if !crate::utils::fs::entry_is_dir(&entry).await {
130 continue;
131 }
132 let gems_dir = vendor_ruby.join(entry.file_name()).join("gems");
133 if is_dir(&gems_dir).await {
134 paths.push(gems_dir);
135 }
136 }
137 paths
138 }
139
140 async fn get_global_gem_paths() -> Vec<PathBuf> {
142 let mut paths = Vec::new();
143 let mut seen = HashSet::new();
144
145 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
147 let gems_path = PathBuf::from(gemdir).join("gems");
148 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
149 paths.push(gems_path);
150 }
151 }
152
153 if let Some(gempath) = Self::run_gem_env("gempath").await {
155 for segment in gempath.split(':') {
156 let segment = segment.trim();
157 if segment.is_empty() {
158 continue;
159 }
160 let gems_path = PathBuf::from(segment).join("gems");
161 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
162 paths.push(gems_path);
163 }
164 }
165 }
166
167 let home = std::env::var("HOME")
169 .or_else(|_| std::env::var("USERPROFILE"))
170 .unwrap_or_else(|_| "~".to_string());
171 let home = PathBuf::from(home);
172
173 let fallback_globs = [
174 home.join(".gem").join("ruby"),
175 home.join(".rbenv").join("versions"),
176 home.join(".rvm").join("gems"),
177 ];
178
179 for base in &fallback_globs {
180 for entry in crate::utils::fs::list_dir_entries(base).await {
181 if !crate::utils::fs::entry_is_dir(&entry).await {
182 continue;
183 }
184
185 let entry_path = base.join(entry.file_name());
186
187 let gems_dir = entry_path.join("gems");
189 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
190 paths.push(gems_dir);
191 continue;
192 }
193
194 let lib_ruby_gems = entry_path.join("lib").join("ruby").join("gems");
196 for sub_entry in crate::utils::fs::list_dir_entries(&lib_ruby_gems).await {
197 let gems_dir = lib_ruby_gems.join(sub_entry.file_name()).join("gems");
198 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
199 paths.push(gems_dir);
200 }
201 }
202 }
203 }
204
205 let system_bases = [
207 PathBuf::from("/usr/lib/ruby/gems"),
208 PathBuf::from("/usr/local/lib/ruby/gems"),
209 PathBuf::from("/opt/homebrew/lib/ruby/gems"),
210 ];
211
212 for base in &system_bases {
213 for entry in crate::utils::fs::list_dir_entries(base).await {
214 let gems_dir = base.join(entry.file_name()).join("gems");
215 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
216 paths.push(gems_dir);
217 }
218 }
219 }
220
221 paths
222 }
223
224 async fn run_gem_env(key: &str) -> Option<String> {
226 Self::run_gem_env_with(&crate::utils::process::SystemCommandRunner, key)
227 }
228
229 fn run_gem_env_with(
234 runner: &dyn crate::utils::process::CommandRunner,
235 key: &str,
236 ) -> Option<String> {
237 parse_gem_env_output(runner.run("gem", &["env", key]).as_deref().unwrap_or(""))
238 }
239
240 async fn scan_gem_dir(
242 &self,
243 gem_path: &Path,
244 seen: &mut HashSet<String>,
245 ) -> Vec<CrawledPackage> {
246 let mut results = Vec::new();
247
248 for entry in crate::utils::fs::list_dir_entries(gem_path).await {
249 if !crate::utils::fs::entry_is_dir(&entry).await {
250 continue;
251 }
252
253 let dir_name = entry.file_name();
254 let dir_name_str = dir_name.to_string_lossy();
255
256 if dir_name_str.starts_with('.') {
258 continue;
259 }
260
261 let gem_dir = gem_path.join(&*dir_name_str);
262
263 if let Some((name, version)) = Self::parse_dir_name_version(&dir_name_str) {
265 if !self.verify_gem_at_path(&gem_dir).await {
267 continue;
268 }
269
270 let purl = crate::utils::purl::build_gem_purl(&name, &version);
271
272 if seen.contains(&purl) {
273 continue;
274 }
275 seen.insert(purl.clone());
276
277 results.push(CrawledPackage {
278 name,
279 version,
280 namespace: None,
281 purl,
282 path: gem_dir,
283 });
284 }
285 }
286
287 results
288 }
289
290 async fn verify_gem_at_path(&self, path: &Path) -> bool {
293 if !is_dir(path).await {
294 return false;
295 }
296
297 if is_dir(&path.join("lib")).await {
299 return true;
300 }
301
302 for entry in crate::utils::fs::list_dir_entries(path).await {
304 if let Some(name) = entry.file_name().to_str() {
305 if name.ends_with(".gemspec") {
306 return true;
307 }
308 }
309 }
310
311 false
312 }
313
314 fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> {
326 let idx = dir_name
327 .match_indices('-')
328 .find(|(i, _)| dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()))
329 .map(|(i, _)| i)?;
330 let name = &dir_name[..idx];
331 let rest = &dir_name[idx + 1..];
332 let version = rest.split('-').next().unwrap_or(rest);
334 if name.is_empty() || version.is_empty() {
335 return None;
336 }
337 Some((name.to_string(), version.to_string()))
338 }
339
340 async fn locate_gem_dir(&self, gem_path: &Path, name: &str, version: &str) -> Option<PathBuf> {
348 let exact = gem_path.join(format!("{name}-{version}"));
349 if self.verify_gem_at_path(&exact).await {
350 return Some(exact);
351 }
352 let prefix = format!("{name}-{version}-");
353 for entry in crate::utils::fs::list_dir_entries(gem_path).await {
354 let file_name = entry.file_name();
355 let dir_name = file_name.to_string_lossy();
356 if dir_name.starts_with(&prefix) {
357 let dir = gem_path.join(&*dir_name);
358 if self.verify_gem_at_path(&dir).await {
359 return Some(dir);
360 }
361 }
362 }
363 None
364 }
365}
366
367impl Default for RubyCrawler {
368 fn default() -> Self {
369 Self::new()
370 }
371}
372
373pub fn parse_gem_env_output(stdout: &str) -> Option<String> {
377 let s = stdout.trim().to_string();
378 if s.is_empty() {
379 None
380 } else {
381 Some(s)
382 }
383}
384
385async fn is_dir(path: &Path) -> bool {
387 tokio::fs::metadata(path)
388 .await
389 .map(|m| m.is_dir())
390 .unwrap_or(false)
391}
392
393#[cfg(test)]
394mod tests {
395 use super::*;
396
397 #[test]
398 fn test_parse_gem_dir_name() {
399 assert_eq!(
400 RubyCrawler::parse_dir_name_version("rails-7.1.0"),
401 Some(("rails".to_string(), "7.1.0".to_string()))
402 );
403 assert_eq!(
404 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5"),
405 Some(("nokogiri".to_string(), "1.16.5".to_string()))
406 );
407 assert_eq!(
408 RubyCrawler::parse_dir_name_version("activerecord-7.1.3.2"),
409 Some(("activerecord".to_string(), "7.1.3.2".to_string()))
410 );
411 assert_eq!(
412 RubyCrawler::parse_dir_name_version("net-http-0.4.1"),
413 Some(("net-http".to_string(), "0.4.1".to_string()))
414 );
415 assert!(RubyCrawler::parse_dir_name_version("no-version-here").is_none());
416 assert!(RubyCrawler::parse_dir_name_version("noversion").is_none());
417 }
418
419 #[test]
420 fn test_parse_gem_dir_name_platform_gems() {
421 assert_eq!(
424 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-x86_64-linux"),
425 Some(("nokogiri".to_string(), "1.16.5".to_string()))
426 );
427 assert_eq!(
428 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-arm64-darwin"),
429 Some(("nokogiri".to_string(), "1.16.5".to_string()))
430 );
431 assert_eq!(
432 RubyCrawler::parse_dir_name_version("sassc-2.4.0-java"),
433 Some(("sassc".to_string(), "2.4.0".to_string()))
434 );
435 assert_eq!(
439 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5-universal-darwin-21"),
440 Some(("nokogiri".to_string(), "1.16.5".to_string()))
441 );
442 assert_eq!(
445 RubyCrawler::parse_dir_name_version("libv8-node-18.16.0.0-x86_64-linux"),
446 Some(("libv8-node".to_string(), "18.16.0.0".to_string()))
447 );
448 }
449
450 #[tokio::test]
451 async fn test_find_by_purls_gem() {
452 let dir = tempfile::tempdir().unwrap();
453 let rails_dir = dir.path().join("rails-7.1.0");
454 tokio::fs::create_dir_all(rails_dir.join("lib"))
455 .await
456 .unwrap();
457
458 let crawler = RubyCrawler::new();
459 let purls = vec![
460 "pkg:gem/rails@7.1.0".to_string(),
461 "pkg:gem/nokogiri@1.16.5".to_string(),
462 ];
463 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
464
465 assert_eq!(result.len(), 1);
466 assert!(result.contains_key("pkg:gem/rails@7.1.0"));
467 assert!(!result.contains_key("pkg:gem/nokogiri@1.16.5"));
468 }
469
470 #[tokio::test]
471 async fn test_crawl_all_gems() {
472 let dir = tempfile::tempdir().unwrap();
473
474 let rails_dir = dir.path().join("rails-7.1.0");
476 tokio::fs::create_dir_all(rails_dir.join("lib"))
477 .await
478 .unwrap();
479
480 let nokogiri_dir = dir.path().join("nokogiri-1.16.5");
481 tokio::fs::create_dir_all(nokogiri_dir.join("lib"))
482 .await
483 .unwrap();
484
485 let crawler = RubyCrawler::new();
486 let options = CrawlerOptions {
487 cwd: dir.path().to_path_buf(),
488 global: false,
489 global_prefix: Some(dir.path().to_path_buf()),
490 batch_size: 100,
491 };
492
493 let packages = crawler.crawl_all(&options).await;
494 assert_eq!(packages.len(), 2);
495
496 let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect();
497 assert!(purls.contains("pkg:gem/rails@7.1.0"));
498 assert!(purls.contains("pkg:gem/nokogiri@1.16.5"));
499 }
500
501 #[tokio::test]
502 async fn test_get_gem_paths_with_vendor_bundle() {
503 let dir = tempfile::tempdir().unwrap();
504 let vendor_gems = dir
505 .path()
506 .join("vendor")
507 .join("bundle")
508 .join("ruby")
509 .join("3.2.0")
510 .join("gems");
511 tokio::fs::create_dir_all(&vendor_gems).await.unwrap();
512
513 let paths = RubyCrawler::get_vendor_bundle_paths(dir.path()).await;
514 assert_eq!(paths.len(), 1);
515 assert_eq!(paths[0], vendor_gems);
516 }
517
518 #[tokio::test]
519 async fn test_deduplication() {
520 let dir = tempfile::tempdir().unwrap();
521
522 let rails_dir = dir.path().join("rails-7.1.0");
524 tokio::fs::create_dir_all(rails_dir.join("lib"))
525 .await
526 .unwrap();
527
528 let crawler = RubyCrawler::new();
529 let options = CrawlerOptions {
530 cwd: dir.path().to_path_buf(),
531 global: false,
532 global_prefix: Some(dir.path().to_path_buf()),
533 batch_size: 100,
534 };
535
536 let packages = crawler.crawl_all(&options).await;
537 assert_eq!(packages.len(), 1);
538 assert_eq!(packages[0].purl, "pkg:gem/rails@7.1.0");
539 }
540
541 #[tokio::test]
542 async fn test_verify_gem_with_gemspec() {
543 let dir = tempfile::tempdir().unwrap();
544 let gem_dir = dir.path().join("rails-7.1.0");
545 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
546 tokio::fs::write(gem_dir.join("rails.gemspec"), "# gemspec")
547 .await
548 .unwrap();
549
550 let crawler = RubyCrawler::new();
551 assert!(crawler.verify_gem_at_path(&gem_dir).await);
552 }
553
554 #[tokio::test]
555 async fn test_verify_gem_empty_dir_fails() {
556 let dir = tempfile::tempdir().unwrap();
557 let gem_dir = dir.path().join("rails-7.1.0");
558 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
559
560 let crawler = RubyCrawler::new();
561 assert!(!crawler.verify_gem_at_path(&gem_dir).await);
562 }
563
564 #[test]
569 fn test_parse_dir_name_version_empty_name_guard() {
570 assert_eq!(RubyCrawler::parse_dir_name_version("-1.0.0"), None);
571 }
572
573 #[tokio::test]
580 async fn find_by_purls_resolves_platform_suffixed_dir() {
581 let dir = tempfile::tempdir().unwrap();
582 let plat_dir = dir.path().join("nokogiri-1.16.5-x86_64-linux");
583 tokio::fs::create_dir_all(plat_dir.join("lib"))
584 .await
585 .unwrap();
586
587 let crawler = RubyCrawler::new();
588 let purls = vec!["pkg:gem/nokogiri@1.16.5".to_string()];
589 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
590
591 assert_eq!(result.len(), 1);
592 let pkg = result.get("pkg:gem/nokogiri@1.16.5").unwrap();
593 assert_eq!(pkg.version, "1.16.5");
594 assert_eq!(pkg.path, plat_dir);
595 }
596
597 #[tokio::test]
600 async fn find_by_purls_rejects_version_prefix_collision() {
601 let dir = tempfile::tempdir().unwrap();
602 let plat_dir = dir.path().join("foo-1.0.0-x86_64-linux");
603 tokio::fs::create_dir_all(plat_dir.join("lib"))
604 .await
605 .unwrap();
606
607 let crawler = RubyCrawler::new();
608 let purls = vec!["pkg:gem/foo@1.0".to_string()];
610 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
611 assert!(
612 result.is_empty(),
613 "1.0 must not match foo-1.0.0-*; got {result:?}"
614 );
615 }
616
617 #[tokio::test]
620 async fn crawl_all_strips_platform_suffix() {
621 let dir = tempfile::tempdir().unwrap();
622 let plat_dir = dir.path().join("nokogiri-1.16.5-arm64-darwin");
623 tokio::fs::create_dir_all(plat_dir.join("lib"))
624 .await
625 .unwrap();
626
627 let crawler = RubyCrawler::new();
628 let options = CrawlerOptions {
629 cwd: dir.path().to_path_buf(),
630 global: false,
631 global_prefix: Some(dir.path().to_path_buf()),
632 batch_size: 100,
633 };
634 let packages = crawler.crawl_all(&options).await;
635 assert_eq!(packages.len(), 1);
636 assert_eq!(packages[0].purl, "pkg:gem/nokogiri@1.16.5");
637 assert_eq!(packages[0].version, "1.16.5");
638 assert_eq!(packages[0].path, plat_dir);
639 }
640
641 #[tokio::test]
644 async fn locate_gem_dir_prefers_exact_over_platform() {
645 let dir = tempfile::tempdir().unwrap();
646 let exact = dir.path().join("rails-7.1.0");
647 let plat = dir.path().join("rails-7.1.0-x86_64-linux");
648 tokio::fs::create_dir_all(exact.join("lib")).await.unwrap();
649 tokio::fs::create_dir_all(plat.join("lib")).await.unwrap();
650
651 let crawler = RubyCrawler::new();
652 let purls = vec!["pkg:gem/rails@7.1.0".to_string()];
653 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
654 assert_eq!(result.get("pkg:gem/rails@7.1.0").unwrap().path, exact);
655 }
656}