socket_patch_core/crawlers/
ruby_crawler.rs1use std::collections::{HashMap, HashSet};
2use std::path::{Path, PathBuf};
3
4use super::types::{CrawledPackage, CrawlerOptions};
5
6pub struct RubyCrawler;
9
10impl RubyCrawler {
11 pub fn new() -> Self {
13 Self
14 }
15
16 pub async fn get_gem_paths(
29 &self,
30 options: &CrawlerOptions,
31 ) -> Result<Vec<PathBuf>, std::io::Error> {
32 if options.global || options.global_prefix.is_some() {
33 if let Some(ref custom) = options.global_prefix {
34 return Ok(vec![custom.clone()]);
35 }
36 return Ok(Self::get_global_gem_paths().await);
37 }
38
39 let vendor_gems = Self::get_vendor_bundle_paths(&options.cwd).await;
41 if !vendor_gems.is_empty() {
42 return Ok(vendor_gems);
43 }
44
45 let has_gemfile = tokio::fs::metadata(options.cwd.join("Gemfile"))
47 .await
48 .is_ok();
49 let has_gemfile_lock = tokio::fs::metadata(options.cwd.join("Gemfile.lock"))
50 .await
51 .is_ok();
52
53 if has_gemfile || has_gemfile_lock {
54 let mut paths = Vec::new();
56 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
57 let gems_path = PathBuf::from(gemdir).join("gems");
58 if is_dir(&gems_path).await {
59 paths.push(gems_path);
60 }
61 }
62 if !paths.is_empty() {
63 return Ok(paths);
64 }
65 }
66
67 Ok(Vec::new())
69 }
70
71 pub async fn crawl_all(&self, options: &CrawlerOptions) -> Vec<CrawledPackage> {
73 let mut packages = Vec::new();
74 let mut seen = HashSet::new();
75
76 let gem_paths = self.get_gem_paths(options).await.unwrap_or_default();
77
78 for gem_path in &gem_paths {
79 let found = self.scan_gem_dir(gem_path, &mut seen).await;
80 packages.extend(found);
81 }
82
83 packages
84 }
85
86 pub async fn find_by_purls(
90 &self,
91 gem_path: &Path,
92 purls: &[String],
93 ) -> Result<HashMap<String, CrawledPackage>, std::io::Error> {
94 let mut result: HashMap<String, CrawledPackage> = HashMap::new();
95
96 for purl in purls {
97 if let Some((name, version)) = crate::utils::purl::parse_gem_purl(purl) {
98 let gem_dir = gem_path.join(format!("{name}-{version}"));
99 if self.verify_gem_at_path(&gem_dir).await {
100 result.insert(
101 purl.clone(),
102 CrawledPackage {
103 name: name.to_string(),
104 version: version.to_string(),
105 namespace: None,
106 purl: purl.clone(),
107 path: gem_dir,
108 },
109 );
110 }
111 }
112 }
113
114 Ok(result)
115 }
116
117 async fn get_vendor_bundle_paths(cwd: &Path) -> Vec<PathBuf> {
123 let vendor_ruby = cwd.join("vendor").join("bundle").join("ruby");
124 let mut paths = Vec::new();
125
126 let mut entries = match tokio::fs::read_dir(&vendor_ruby).await {
127 Ok(rd) => rd,
128 Err(_) => return paths,
129 };
130
131 while let Ok(Some(entry)) = entries.next_entry().await {
132 let ft = match entry.file_type().await {
133 Ok(ft) => ft,
134 Err(_) => continue,
135 };
136 if ft.is_dir() {
137 let gems_dir = vendor_ruby.join(entry.file_name()).join("gems");
138 if is_dir(&gems_dir).await {
139 paths.push(gems_dir);
140 }
141 }
142 }
143
144 paths
145 }
146
147 async fn get_global_gem_paths() -> Vec<PathBuf> {
149 let mut paths = Vec::new();
150 let mut seen = HashSet::new();
151
152 if let Some(gemdir) = Self::run_gem_env("gemdir").await {
154 let gems_path = PathBuf::from(gemdir).join("gems");
155 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
156 paths.push(gems_path);
157 }
158 }
159
160 if let Some(gempath) = Self::run_gem_env("gempath").await {
162 for segment in gempath.split(':') {
163 let segment = segment.trim();
164 if segment.is_empty() {
165 continue;
166 }
167 let gems_path = PathBuf::from(segment).join("gems");
168 if is_dir(&gems_path).await && seen.insert(gems_path.clone()) {
169 paths.push(gems_path);
170 }
171 }
172 }
173
174 let home = std::env::var("HOME")
176 .or_else(|_| std::env::var("USERPROFILE"))
177 .unwrap_or_else(|_| "~".to_string());
178 let home = PathBuf::from(home);
179
180 let fallback_globs = [
181 home.join(".gem").join("ruby"),
182 home.join(".rbenv").join("versions"),
183 home.join(".rvm").join("gems"),
184 ];
185
186 for base in &fallback_globs {
187 if let Ok(mut entries) = tokio::fs::read_dir(base).await {
188 while let Ok(Some(entry)) = entries.next_entry().await {
189 let ft = match entry.file_type().await {
190 Ok(ft) => ft,
191 Err(_) => continue,
192 };
193 if !ft.is_dir() {
194 continue;
195 }
196
197 let entry_path = base.join(entry.file_name());
198
199 let gems_dir = entry_path.join("gems");
201 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
202 paths.push(gems_dir);
203 continue;
204 }
205
206 let lib_ruby_gems = entry_path.join("lib").join("ruby").join("gems");
208 if let Ok(mut sub_entries) = tokio::fs::read_dir(&lib_ruby_gems).await {
209 while let Ok(Some(sub_entry)) = sub_entries.next_entry().await {
210 let gems_dir = lib_ruby_gems.join(sub_entry.file_name()).join("gems");
211 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
212 paths.push(gems_dir);
213 }
214 }
215 }
216 }
217 }
218 }
219
220 let system_bases = [
222 PathBuf::from("/usr/lib/ruby/gems"),
223 PathBuf::from("/usr/local/lib/ruby/gems"),
224 PathBuf::from("/opt/homebrew/lib/ruby/gems"),
225 ];
226
227 for base in &system_bases {
228 if let Ok(mut entries) = tokio::fs::read_dir(base).await {
229 while let Ok(Some(entry)) = entries.next_entry().await {
230 let gems_dir = base.join(entry.file_name()).join("gems");
231 if is_dir(&gems_dir).await && seen.insert(gems_dir.clone()) {
232 paths.push(gems_dir);
233 }
234 }
235 }
236 }
237
238 paths
239 }
240
241 async fn run_gem_env(key: &str) -> Option<String> {
243 let output = std::process::Command::new("gem")
244 .args(["env", key])
245 .output()
246 .ok()?;
247
248 if !output.status.success() {
249 return None;
250 }
251
252 let stdout = String::from_utf8_lossy(&output.stdout).trim().to_string();
253 if stdout.is_empty() {
254 None
255 } else {
256 Some(stdout)
257 }
258 }
259
260 async fn scan_gem_dir(
262 &self,
263 gem_path: &Path,
264 seen: &mut HashSet<String>,
265 ) -> Vec<CrawledPackage> {
266 let mut results = Vec::new();
267
268 let mut entries = match tokio::fs::read_dir(gem_path).await {
269 Ok(rd) => rd,
270 Err(_) => return results,
271 };
272
273 let mut entry_list = Vec::new();
274 while let Ok(Some(entry)) = entries.next_entry().await {
275 entry_list.push(entry);
276 }
277
278 for entry in entry_list {
279 let ft = match entry.file_type().await {
280 Ok(ft) => ft,
281 Err(_) => continue,
282 };
283 if !ft.is_dir() {
284 continue;
285 }
286
287 let dir_name = entry.file_name();
288 let dir_name_str = dir_name.to_string_lossy();
289
290 if dir_name_str.starts_with('.') {
292 continue;
293 }
294
295 let gem_dir = gem_path.join(&*dir_name_str);
296
297 if let Some((name, version)) = Self::parse_dir_name_version(&dir_name_str) {
299 if !self.verify_gem_at_path(&gem_dir).await {
301 continue;
302 }
303
304 let purl = crate::utils::purl::build_gem_purl(&name, &version);
305
306 if seen.contains(&purl) {
307 continue;
308 }
309 seen.insert(purl.clone());
310
311 results.push(CrawledPackage {
312 name,
313 version,
314 namespace: None,
315 purl,
316 path: gem_dir,
317 });
318 }
319 }
320
321 results
322 }
323
324 async fn verify_gem_at_path(&self, path: &Path) -> bool {
327 if !is_dir(path).await {
328 return false;
329 }
330
331 if is_dir(&path.join("lib")).await {
333 return true;
334 }
335
336 if let Ok(mut entries) = tokio::fs::read_dir(path).await {
338 while let Ok(Some(entry)) = entries.next_entry().await {
339 if let Some(name) = entry.file_name().to_str() {
340 if name.ends_with(".gemspec") {
341 return true;
342 }
343 }
344 }
345 }
346
347 false
348 }
349
350 fn parse_dir_name_version(dir_name: &str) -> Option<(String, String)> {
355 let mut split_idx = None;
357 for (i, _) in dir_name.match_indices('-') {
358 if dir_name[i + 1..].starts_with(|c: char| c.is_ascii_digit()) {
359 split_idx = Some(i);
360 }
361 }
362 let idx = split_idx?;
363 let name = &dir_name[..idx];
364 let version = &dir_name[idx + 1..];
365 if name.is_empty() || version.is_empty() {
366 return None;
367 }
368 Some((name.to_string(), version.to_string()))
369 }
370}
371
372impl Default for RubyCrawler {
373 fn default() -> Self {
374 Self::new()
375 }
376}
377
378async fn is_dir(path: &Path) -> bool {
380 tokio::fs::metadata(path)
381 .await
382 .map(|m| m.is_dir())
383 .unwrap_or(false)
384}
385
386#[cfg(test)]
387mod tests {
388 use super::*;
389
390 #[test]
391 fn test_parse_gem_dir_name() {
392 assert_eq!(
393 RubyCrawler::parse_dir_name_version("rails-7.1.0"),
394 Some(("rails".to_string(), "7.1.0".to_string()))
395 );
396 assert_eq!(
397 RubyCrawler::parse_dir_name_version("nokogiri-1.16.5"),
398 Some(("nokogiri".to_string(), "1.16.5".to_string()))
399 );
400 assert_eq!(
401 RubyCrawler::parse_dir_name_version("activerecord-7.1.3.2"),
402 Some(("activerecord".to_string(), "7.1.3.2".to_string()))
403 );
404 assert_eq!(
405 RubyCrawler::parse_dir_name_version("net-http-0.4.1"),
406 Some(("net-http".to_string(), "0.4.1".to_string()))
407 );
408 assert!(RubyCrawler::parse_dir_name_version("no-version-here").is_none());
409 assert!(RubyCrawler::parse_dir_name_version("noversion").is_none());
410 }
411
412 #[tokio::test]
413 async fn test_find_by_purls_gem() {
414 let dir = tempfile::tempdir().unwrap();
415 let rails_dir = dir.path().join("rails-7.1.0");
416 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
417
418 let crawler = RubyCrawler::new();
419 let purls = vec![
420 "pkg:gem/rails@7.1.0".to_string(),
421 "pkg:gem/nokogiri@1.16.5".to_string(),
422 ];
423 let result = crawler.find_by_purls(dir.path(), &purls).await.unwrap();
424
425 assert_eq!(result.len(), 1);
426 assert!(result.contains_key("pkg:gem/rails@7.1.0"));
427 assert!(!result.contains_key("pkg:gem/nokogiri@1.16.5"));
428 }
429
430 #[tokio::test]
431 async fn test_crawl_all_gems() {
432 let dir = tempfile::tempdir().unwrap();
433
434 let rails_dir = dir.path().join("rails-7.1.0");
436 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
437
438 let nokogiri_dir = dir.path().join("nokogiri-1.16.5");
439 tokio::fs::create_dir_all(nokogiri_dir.join("lib")).await.unwrap();
440
441 let crawler = RubyCrawler::new();
442 let options = CrawlerOptions {
443 cwd: dir.path().to_path_buf(),
444 global: false,
445 global_prefix: Some(dir.path().to_path_buf()),
446 batch_size: 100,
447 };
448
449 let packages = crawler.crawl_all(&options).await;
450 assert_eq!(packages.len(), 2);
451
452 let purls: HashSet<_> = packages.iter().map(|p| p.purl.as_str()).collect();
453 assert!(purls.contains("pkg:gem/rails@7.1.0"));
454 assert!(purls.contains("pkg:gem/nokogiri@1.16.5"));
455 }
456
457 #[tokio::test]
458 async fn test_get_gem_paths_with_vendor_bundle() {
459 let dir = tempfile::tempdir().unwrap();
460 let vendor_gems = dir
461 .path()
462 .join("vendor")
463 .join("bundle")
464 .join("ruby")
465 .join("3.2.0")
466 .join("gems");
467 tokio::fs::create_dir_all(&vendor_gems).await.unwrap();
468
469 let paths = RubyCrawler::get_vendor_bundle_paths(dir.path()).await;
470 assert_eq!(paths.len(), 1);
471 assert_eq!(paths[0], vendor_gems);
472 }
473
474 #[tokio::test]
475 async fn test_deduplication() {
476 let dir = tempfile::tempdir().unwrap();
477
478 let rails_dir = dir.path().join("rails-7.1.0");
480 tokio::fs::create_dir_all(rails_dir.join("lib")).await.unwrap();
481
482 let crawler = RubyCrawler::new();
483 let options = CrawlerOptions {
484 cwd: dir.path().to_path_buf(),
485 global: false,
486 global_prefix: Some(dir.path().to_path_buf()),
487 batch_size: 100,
488 };
489
490 let packages = crawler.crawl_all(&options).await;
491 assert_eq!(packages.len(), 1);
492 assert_eq!(packages[0].purl, "pkg:gem/rails@7.1.0");
493 }
494
495 #[tokio::test]
496 async fn test_verify_gem_with_gemspec() {
497 let dir = tempfile::tempdir().unwrap();
498 let gem_dir = dir.path().join("rails-7.1.0");
499 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
500 tokio::fs::write(gem_dir.join("rails.gemspec"), "# gemspec")
501 .await
502 .unwrap();
503
504 let crawler = RubyCrawler::new();
505 assert!(crawler.verify_gem_at_path(&gem_dir).await);
506 }
507
508 #[tokio::test]
509 async fn test_verify_gem_empty_dir_fails() {
510 let dir = tempfile::tempdir().unwrap();
511 let gem_dir = dir.path().join("rails-7.1.0");
512 tokio::fs::create_dir_all(&gem_dir).await.unwrap();
513
514 let crawler = RubyCrawler::new();
515 assert!(!crawler.verify_gem_at_path(&gem_dir).await);
516 }
517}