socket_patch_core/crawlers/
ruby_crawler.rs1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use super::types::{CrawledPackage, CrawlerOptions};
5
6pub struct RubyCrawler;
9
10impl RubyCrawler {
11 pub fn new() -> Self {
13 Self
14 }
15
16 pub async fn get_gem_paths(
29 &self,
30 options: &CrawlerOptions,
31 ) -> Result<Vec<PathBuf>, std::io::Error> {
32 if options.global || options.global_prefix.is_some() {
33 if let Some(ref custom) = options.global_prefix {
34 return Ok(vec![custom.clone()]);
35 }
36 return Ok(Self::get_global_gem_paths().await);
37 }
38
39 let vendor_gems = Self::get_vendor_bundle_paths(&options.cwd).await;
41 if !vendor_gems.is_empty() {
42 return Ok(vendor_gems);
43 }
44
45 let has_gemfile = tokio::fs::metadata(options.cwd.join("Gemfile"))
47 .await
48 .is_ok();
49 let has_gemfile_lock = tokio::fs::metadata(options.cwd.join("Gemfile.lock"))
50 .await
51 .is_ok();
52
53 if has_gemfile || has_gemfile_lock {
54 let mut paths = Vec::new();
56 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
57 let gems_path = PathBuf::from(gemdir).join("gems");
58 if is_dir(&gems_path).await {
59 paths.push(gems_path);
60 }
61 }
62 if !paths.is_empty() {
63 return Ok(paths);
64 }
65 }
66
67 Ok(Vec::new())
69 }
70
71 pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
73 let mut packages = Vec::new();
74 let mut seen = HashSet::new();
75
76 let gem_paths = self.get_gem_paths(options).await.unwrap_or_default();
77
78 for gem_path in &gem_paths {
79 let found = self.scan_gem_dir(gem_path, &mut seen).await;
80 packages.extend(found);
81 }
82
83 packages
84 }
85
86 pub async fn find_by_purls(
90 &self,
91 gem_path: &Path,
92 purls: &[String],
93 ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
94 let mut result: HashMap<String, CrawledPackage> = HashMap::new();
95
96 for purl in purls {
97 if let Some((name, version)) = crate::utils::purl::parse_gem_purl(purl) {
98 let gem_dir = gem_path.join(format!("{name}-{version}"));
99 if self.verify_gem_at_path(&gem_dir).await {
100 result.insert(
101 purl.clone(),
102 CrawledPackage {
103 name: name.to_string(),
104 version: version.to_string(),
105 namespace: None,
106 purl: purl.clone(),
107 path: gem_dir,
108 },
109 );
110 }
111 }
112 }
113
114 Ok(result)
115 }
116
117 async fn get_vendor_bundle_paths(cwd: &Path) -> Vec<PathBuf> {
123 let vendor_ruby = cwd.join("vendor").join("bundle").join("ruby");
124 let mut paths = Vec::new();
125
126 for entry in crate::utils::fs::list_dir_entries(&vendor_ruby).await {
127 if !crate::utils::fs::entry_is_dir(&entry).await {
128 continue;
129 }
130 let gems_dir = vendor_ruby.join(entry.file_name()).join("gems");
131 if is_dir(&gems_dir).await {
132 paths.push(gems_dir);
133 }
134 }
135 paths
136 }
137
138 async fn get_global_gem_paths() -> Vec<PathBuf> {
140 let mut paths = Vec::new();
141 let mut seen = HashSet::new();
142
143 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
145 let gems_path = PathBuf::from(gemdir).join("gems");
146 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
147 paths.push(gems_path);
148 }
149 }
150
151 if let Some(gempath) = Self::run_gem_env("gempath").await {
153 for segment in gempath.split(':') {
154 let segment = segment.trim();
155 if segment.is_empty() {
156 continue;
157 }
158 let gems_path = PathBuf::from(segment).join("gems");
159 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
160 paths.push(gems_path);
161 }
162 }
163 }
164
165 let home = std::env::var("HOME")
167 .or_else(|_| std::env::var("USERPROFILE"))
168 .unwrap_or_else(|_| "~".to_string());
169 let home = PathBuf::from(home);
170
171 let fallback_globs = [
172 home.join(".gem").join("ruby"),
173 home.join(".rbenv").join("versions"),
174 home.join(".rvm").join("gems"),
175 ];
176
177 for base in &fallback_globs {
178 for entry in crate::utils::fs::list_dir_entries(base).await {
179 if !crate::utils::fs::entry_is_dir(&entry).await {
180 continue;
181 }
182
183 let entry_path = base.join(entry.file_name());
184
185 let gems_dir = entry_path.join("gems");
187 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
188 paths.push(gems_dir);
189 continue;
190 }
191
192 let lib_ruby_gems = entry_path.join("lib").join("ruby").join("gems");
194 for sub_entry in crate::utils::fs::list_dir_entries(&lib_ruby_gems).await {
195 let gems_dir = lib_ruby_gems.join(sub_entry.file_name()).join("gems");
196 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
197 paths.push(gems_dir);
198 }
199 }
200 }
201 }
202
203 let system_bases = [
205 PathBuf::from("/usr/lib/ruby/gems"),
206 PathBuf::from("/usr/local/lib/ruby/gems"),
207 PathBuf::from("/opt/homebrew/lib/ruby/gems"),
208 ];
209
210 for base in &system_bases {
211 for entry in crate::utils::fs::list_dir_entries(base).await {
212 let gems_dir = base.join(entry.file_name()).join("gems");
213 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
214 paths.push(gems_dir);
215 }
216 }
217 }
218
219 paths
220 }
221
222 async fn run_gem_env(key: &str) -> Option<String> {
224 Self::run_gem_env_with(&crate::utils::process::SystemCommandRunner, key)
225 }
226
227 fn run_gem_env_with(
232 runner: &dyn crate::utils::process::CommandRunner,
233 key: &str,
234 ) -> Option<String> {
235 parse_gem_env_output(runner.run("gem", &["env", key]).as_deref().unwrap_or(""))
236 }
237
238 async fn scan_gem_dir(
240 &self,
241 gem_path: &Path,
242 seen: &mut HashSet<String>,
243 ) -> Vec<CrawledPackage> {
244 let mut results = Vec::new();
245
246 for entry in crate::utils::fs::list_dir_entries(gem_path).await {
247 if !crate::utils::fs::entry_is_dir(&entry).await {
248 continue;
249 }
250
251 let dir_name = entry.file_name();
252 let dir_name_str = dir_name.to_string_lossy();
253
254 if dir_name_str.starts_with('.') {
256 continue;
257 }
258
259 let gem_dir = gem_path.join(&*dir_name_str);
260
261 if let Some((name, version)) = Self::parse_dir_name_version(&dir_name_str) {
263 if !self.verify_gem_at_path(&gem_dir).await {
265 continue;
266 }
267
268 let purl = crate::utils::purl::build_gem_purl(&name, &version);
269
270 if seen.contains(&purl) {
271 continue;
272 }
273 seen.insert(purl.clone());
274
275 results.push(CrawledPackage {
276 name,
277 version,
278 namespace: None,
279 purl,
280 path: gem_dir,
281 });
282 }
283 }
284
285 results
286 }
287
288 async fn verify_gem_at_path(&self, path: &Path) -> bool {
291 if !is_dir(path).await {
292 return false;
293 }
294
295 if is_dir(&path.join("lib")).await {
297 return true;
298 }
299
300 for entry in crate::utils::fs::list_dir_entries(path).await {
302 if let Some(name) = entry.file_name().to_str() {
303 if name.ends_with(".gemspec") {
304 return true;
305 }
306 }
307 }
308
309 false
310 }
311
312 fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> {
317 let mut split_idx = None;
319 for (i, _) in dir_name.match_indices('-') {
320 if dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()) {
321 split_idx = Some(i);
322 }
323 }
324 let idx = split_idx?;
325 let name = &dir_name[..idx];
326 let version = &dir_name[idx + 1..];
327 if name.is_empty() || version.is_empty() {
328 return None;
329 }
330 Some((name.to_string(), version.to_string()))
331 }
332}
333
334impl Default for RubyCrawler {
335 fn default() -> Self {
336 Self::new()
337 }
338}
339
340pub fn parse_gem_env_output(stdout: &str) -> Option<String> {
344 let s = stdout.trim().to_string();
345 if s.is_empty() {
346 None
347 } else {
348 Some(s)
349 }
350}
351
352async fn is_dir(path: &Path) -> bool {
354 tokio::fs::metadata(path)
355 .await
356 .map(|m| m.is_dir())
357 .unwrap_or(false)
358}
359
360#[cfg(test)]
361mod tests {
362 use super::*;
363
364 #[test]
365 fn test_parse_gem_dir_name() {
366 assert_eq!(
367 RubyCrawler::parse_dir_name_version("rails-7.1.0"),
368 Some(("rails".to_string(), "7.1.0".to_string()))
369 );
370 assert_eq!(
371 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5"),
372 Some(("nokogiri".to_string(), "1.16.5".to_string()))
373 );
374 assert_eq!(
375 RubyCrawler::parse_dir_name_version("activerecord-7.1.3.2"),
376 Some(("activerecord".to_string(), "7.1.3.2".to_string()))
377 );
378 assert_eq!(
379 RubyCrawler::parse_dir_name_version("net-http-0.4.1"),
380 Some(("net-http".to_string(), "0.4.1".to_string()))
381 );
382 assert!(RubyCrawler::parse_dir_name_version("no-version-here").is_none());
383 assert!(RubyCrawler::parse_dir_name_version("noversion").is_none());
384 }
385
386 #[tokio::test]
387 async fn test_find_by_purls_gem() {
388 let dir = tempfile::tempdir().unwrap();
389 let rails_dir = dir.path().join("rails-7.1.0");
390 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
391
392 let crawler = RubyCrawler::new();
393 let purls = vec![
394 "pkg:gem/rails@7.1.0".to_string(),
395 "pkg:gem/nokogiri@1.16.5".to_string(),
396 ];
397 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
398
399 assert_eq!(result.len(), 1);
400 assert!(result.contains_key("pkg:gem/rails@7.1.0"));
401 assert!(!result.contains_key("pkg:gem/nokogiri@1.16.5"));
402 }
403
404 #[tokio::test]
405 async fn test_crawl_all_gems() {
406 let dir = tempfile::tempdir().unwrap();
407
408 let rails_dir = dir.path().join("rails-7.1.0");
410 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
411
412 let nokogiri_dir = dir.path().join("nokogiri-1.16.5");
413 tokio::fs::create_dir_all(nokogiri_dir.join("lib")).await.unwrap();
414
415 let crawler = RubyCrawler::new();
416 let options = CrawlerOptions {
417 cwd: dir.path().to_path_buf(),
418 global: false,
419 global_prefix: Some(dir.path().to_path_buf()),
420 batch_size: 100,
421 };
422
423 let packages = crawler.crawl_all(&options).await;
424 assert_eq!(packages.len(), 2);
425
426 let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect();
427 assert!(purls.contains("pkg:gem/rails@7.1.0"));
428 assert!(purls.contains("pkg:gem/nokogiri@1.16.5"));
429 }
430
431 #[tokio::test]
432 async fn test_get_gem_paths_with_vendor_bundle() {
433 let dir = tempfile::tempdir().unwrap();
434 let vendor_gems = dir
435 .path()
436 .join("vendor")
437 .join("bundle")
438 .join("ruby")
439 .join("3.2.0")
440 .join("gems");
441 tokio::fs::create_dir_all(&vendor_gems).await.unwrap();
442
443 let paths = RubyCrawler::get_vendor_bundle_paths(dir.path()).await;
444 assert_eq!(paths.len(), 1);
445 assert_eq!(paths[0], vendor_gems);
446 }
447
448 #[tokio::test]
449 async fn test_deduplication() {
450 let dir = tempfile::tempdir().unwrap();
451
452 let rails_dir = dir.path().join("rails-7.1.0");
454 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
455
456 let crawler = RubyCrawler::new();
457 let options = CrawlerOptions {
458 cwd: dir.path().to_path_buf(),
459 global: false,
460 global_prefix: Some(dir.path().to_path_buf()),
461 batch_size: 100,
462 };
463
464 let packages = crawler.crawl_all(&options).await;
465 assert_eq!(packages.len(), 1);
466 assert_eq!(packages[0].purl, "pkg:gem/rails@7.1.0");
467 }
468
469 #[tokio::test]
470 async fn test_verify_gem_with_gemspec() {
471 let dir = tempfile::tempdir().unwrap();
472 let gem_dir = dir.path().join("rails-7.1.0");
473 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
474 tokio::fs::write(gem_dir.join("rails.gemspec"), "# gemspec")
475 .await
476 .unwrap();
477
478 let crawler = RubyCrawler::new();
479 assert!(crawler.verify_gem_at_path(&gem_dir).await);
480 }
481
482 #[tokio::test]
483 async fn test_verify_gem_empty_dir_fails() {
484 let dir = tempfile::tempdir().unwrap();
485 let gem_dir = dir.path().join("rails-7.1.0");
486 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
487
488 let crawler = RubyCrawler::new();
489 assert!(!crawler.verify_gem_at_path(&gem_dir).await);
490 }
491
492 #[test]
497 fn test_parse_dir_name_version_empty_name_guard() {
498 assert_eq!(RubyCrawler::parse_dir_name_version("-1.0.0"), None);
499 }
500}