1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parsers::utils::split_name_email;
29use flate2::read::GzDecoder;
30use log::warn;
31use packageurl::PackageUrl;
32use regex::Regex;
33use std::collections::HashMap;
34use std::fs::{self, File};
35use std::io::Read;
36use std::path::{Path, PathBuf};
37use tar::Archive;
38
39use super::PackageParser;
40
41const PACKAGE_TYPE: PackageType = PackageType::Gem;
42
43pub fn strip_freeze_suffix(s: &str) -> &str {
58 s.trim_end_matches(".freeze")
59}
60
61enum GemfileBlock {
62 Group(Vec<String>),
63 Source(String),
64}
65
66pub struct GemfileParser;
75
76impl PackageParser for GemfileParser {
77 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
78
79 fn extract_packages(path: &Path) -> Vec<PackageData> {
80 let content = match fs::read_to_string(path) {
81 Ok(c) => c,
82 Err(e) => {
83 warn!("Failed to read Gemfile at {:?}: {}", path, e);
84 return vec![default_package_data_with_datasource(DatasourceId::Gemfile)];
85 }
86 };
87
88 vec![parse_gemfile(&content)]
89 }
90
91 fn is_match(path: &Path) -> bool {
92 path.file_name()
93 .and_then(|n| n.to_str())
94 .is_some_and(|name| name == "Gemfile")
95 || path
96 .to_str()
97 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
98 }
99}
100
101fn parse_gemfile(content: &str) -> PackageData {
103 let mut dependencies = Vec::new();
104 let mut block_stack = Vec::new();
105 let mut default_source = None;
106 let mut sources = Vec::new();
107
108 let gem_regex = match Regex::new(
111 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
112 ) {
113 Ok(r) => r,
114 Err(e) => {
115 warn!("Failed to compile gem regex: {}", e);
116 return default_package_data_with_datasource(DatasourceId::Gemfile);
117 }
118 };
119
120 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
122 Ok(r) => r,
123 Err(e) => {
124 warn!("Failed to compile group regex: {}", e);
125 return default_package_data_with_datasource(DatasourceId::Gemfile);
126 }
127 };
128
129 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
130 Ok(r) => r,
131 Err(e) => {
132 warn!("Failed to compile end regex: {}", e);
133 return default_package_data_with_datasource(DatasourceId::Gemfile);
134 }
135 };
136
137 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
138 Ok(r) => r,
139 Err(e) => {
140 warn!("Failed to compile source block regex: {}", e);
141 return default_package_data_with_datasource(DatasourceId::Gemfile);
142 }
143 };
144
145 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
146 Ok(r) => r,
147 Err(e) => {
148 warn!("Failed to compile source regex: {}", e);
149 return default_package_data_with_datasource(DatasourceId::Gemfile);
150 }
151 };
152
153 let symbol_regex = match Regex::new(r":(\w+)") {
155 Ok(r) => r,
156 Err(e) => {
157 warn!("Failed to compile symbol regex: {}", e);
158 return default_package_data_with_datasource(DatasourceId::Gemfile);
159 }
160 };
161
162 for line in content.lines() {
163 let trimmed = line.trim();
164
165 if trimmed.is_empty() || trimmed.starts_with('#') {
167 continue;
168 }
169
170 if let Some(caps) = group_start_regex.captures(trimmed) {
172 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
173 let mut current_groups = Vec::new();
174 for cap in symbol_regex.captures_iter(groups_str) {
175 if let Some(group_name) = cap.get(1) {
176 current_groups.push(group_name.as_str().to_string());
177 }
178 }
179 block_stack.push(GemfileBlock::Group(current_groups));
180 continue;
181 }
182
183 if let Some(caps) = source_block_start_regex.captures(trimmed) {
184 let source = caps
185 .get(1)
186 .map(|m| m.as_str().to_string())
187 .unwrap_or_default();
188 if !source.is_empty() {
189 push_unique_string(&mut sources, source.clone());
190 block_stack.push(GemfileBlock::Source(source));
191 }
192 continue;
193 }
194
195 if let Some(caps) = source_regex.captures(trimmed) {
196 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
197 push_unique_string(&mut sources, source.clone());
198 default_source = Some(source);
199 }
200 continue;
201 }
202
203 if group_end_regex.is_match(trimmed) {
205 block_stack.pop();
206 continue;
207 }
208
209 if let Some(caps) = gem_regex.captures(trimmed) {
211 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
212 if name.is_empty() {
213 continue;
214 }
215
216 let mut version_parts = Vec::new();
218 if let Some(v) = caps.get(2) {
219 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
220 }
221 if let Some(v) = caps.get(3) {
222 let v_str = strip_freeze_suffix(v.as_str());
223 if looks_like_version_constraint(v_str) {
225 version_parts.push(v_str.to_string());
226 }
227 }
228
229 let extracted_requirement = if version_parts.is_empty() {
230 None
231 } else {
232 Some(version_parts.join(", "))
233 };
234
235 let current_groups = current_group_names(&block_stack);
236
237 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
240 (None, true, false)
242 } else if current_groups.iter().any(|g| g == "development") {
243 (Some("development".to_string()), false, true)
244 } else if current_groups.iter().any(|g| g == "test") {
245 (Some("test".to_string()), false, true)
246 } else {
247 let group = current_groups.first().cloned();
249 (group, true, false)
250 };
251
252 let purl = create_gem_purl(name, None);
254 let inherited_source = current_source(&block_stack, default_source.as_deref());
255 let extra_data = build_gemfile_dependency_extra_data(
256 caps.get(4).map(|m| m.as_str()),
257 inherited_source.as_deref(),
258 );
259
260 dependencies.push(Dependency {
261 purl,
262 extracted_requirement,
263 scope,
264 is_runtime: Some(is_runtime),
265 is_optional: Some(is_optional),
266 is_pinned: None,
267 is_direct: Some(true),
268 resolved_package: None,
269 extra_data,
270 });
271 }
272 }
273
274 let extra_data = if sources.is_empty() {
275 None
276 } else {
277 Some(HashMap::from([(
278 "sources".to_string(),
279 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
280 )]))
281 };
282
283 PackageData {
284 package_type: Some(PACKAGE_TYPE),
285 primary_language: Some("Ruby".to_string()),
286 dependencies,
287 extra_data,
288 datasource_id: Some(DatasourceId::Gemfile),
289 ..default_package_data()
290 }
291}
292
293fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
294 block_stack
295 .iter()
296 .rev()
297 .find_map(|block| match block {
298 GemfileBlock::Group(groups) => Some(groups.clone()),
299 GemfileBlock::Source(_) => None,
300 })
301 .unwrap_or_default()
302}
303
304fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
305 block_stack
306 .iter()
307 .rev()
308 .find_map(|block| match block {
309 GemfileBlock::Source(source) => Some(source.clone()),
310 GemfileBlock::Group(_) => None,
311 })
312 .or_else(|| default_source.map(str::to_string))
313}
314
315fn push_unique_string(values: &mut Vec<String>, value: String) {
316 if !values.contains(&value) {
317 values.push(value);
318 }
319}
320
321fn build_gemfile_dependency_extra_data(
322 options: Option<&str>,
323 inherited_source: Option<&str>,
324) -> Option<HashMap<String, serde_json::Value>> {
325 let mut extra = HashMap::new();
326 let options = options.unwrap_or("");
327
328 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
329 extra.insert(
330 "source_type".to_string(),
331 serde_json::Value::String("GIT".to_string()),
332 );
333 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
334 extra.insert("remote".to_string(), serde_json::Value::String(git));
335 }
336
337 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
338 extra.insert(
339 "source_type".to_string(),
340 serde_json::Value::String("PATH".to_string()),
341 );
342 extra.insert("path".to_string(), serde_json::Value::String(path));
343 }
344
345 for key in ["branch", "ref", "tag"] {
346 if let Some(value) = extract_gemfile_quoted_option(options, key) {
347 extra.insert(key.to_string(), serde_json::Value::String(value));
348 }
349 }
350
351 let direct_source = extract_gemfile_quoted_option(options, "source");
352 if let Some(source) = direct_source {
353 extra.insert("source".to_string(), serde_json::Value::String(source));
354 } else if !extra.contains_key("source_type")
355 && let Some(source) = inherited_source
356 {
357 extra.insert(
358 "source".to_string(),
359 serde_json::Value::String(source.to_string()),
360 );
361 }
362
363 (!extra.is_empty()).then_some(extra)
364}
365
366fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
367 if options.is_empty() {
368 return None;
369 }
370
371 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
372 Regex::new(&pattern)
373 .ok()
374 .and_then(|regex| regex.captures(options))
375 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
376}
377
378fn looks_like_version_constraint(s: &str) -> bool {
380 s.starts_with('~')
381 || s.starts_with('>')
382 || s.starts_with('<')
383 || s.starts_with('=')
384 || s.starts_with('!')
385 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
386}
387
388pub struct GemfileLockParser;
397
398impl PackageParser for GemfileLockParser {
399 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
400
401 fn extract_packages(path: &Path) -> Vec<PackageData> {
402 let content = match fs::read_to_string(path) {
403 Ok(c) => c,
404 Err(e) => {
405 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
406 return vec![default_package_data_with_datasource(
407 DatasourceId::GemfileLock,
408 )];
409 }
410 };
411
412 vec![parse_gemfile_lock(&content)]
413 }
414
415 fn is_match(path: &Path) -> bool {
416 path.file_name()
417 .and_then(|n| n.to_str())
418 .is_some_and(|name| name == "Gemfile.lock")
419 || path
420 .to_str()
421 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
422 }
423}
424
425#[derive(Debug, Clone, PartialEq)]
427enum ParseState {
428 None,
429 Gem,
430 Git,
431 Path,
432 Svn,
433 Specs,
434 Platforms,
435 BundledWith,
436 Dependencies,
437}
438
439#[derive(Debug, Clone, Default)]
446struct GemInfo {
447 name: String,
448 version: Option<String>,
449 platform: Option<String>,
450 gem_type: String,
451 remote: Option<String>,
452 revision: Option<String>,
453 ref_field: Option<String>,
454 branch: Option<String>,
455 tag: Option<String>,
456 pinned: bool,
457 requirements: Vec<String>,
458}
459
460fn parse_gemfile_lock(content: &str) -> PackageData {
462 let mut state = ParseState::None;
463 let mut dependencies = Vec::new();
464 let mut gems: HashMap<String, GemInfo> = HashMap::new();
465 let mut platforms: Vec<String> = Vec::new();
466 let mut bundler_version: Option<String> = None;
467 let mut current_gem_type = String::new();
468 let mut current_remote: Option<String> = None;
469 let mut current_options: HashMap<String, String> = HashMap::new();
470
471 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
473 Ok(r) => r,
474 Err(e) => {
475 warn!("Failed to compile deps regex: {}", e);
476 return default_package_data_with_datasource(DatasourceId::GemfileLock);
477 }
478 };
479
480 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
482 Ok(r) => r,
483 Err(e) => {
484 warn!("Failed to compile spec_deps regex: {}", e);
485 return default_package_data_with_datasource(DatasourceId::GemfileLock);
486 }
487 };
488
489 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
491 Ok(r) => r,
492 Err(e) => {
493 warn!("Failed to compile options regex: {}", e);
494 return default_package_data_with_datasource(DatasourceId::GemfileLock);
495 }
496 };
497
498 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
500 Ok(r) => r,
501 Err(e) => {
502 warn!("Failed to compile version regex: {}", e);
503 return default_package_data_with_datasource(DatasourceId::GemfileLock);
504 }
505 };
506
507 for line in content.lines() {
508 let trimmed = line.trim_end();
509
510 if trimmed.is_empty() {
512 current_options.clear();
513 continue;
514 }
515
516 match trimmed {
518 "GEM" => {
519 state = ParseState::Gem;
520 current_gem_type = "GEM".to_string();
521 current_remote = None;
522 current_options.clear();
523 continue;
524 }
525 "GIT" => {
526 state = ParseState::Git;
527 current_gem_type = "GIT".to_string();
528 current_remote = None;
529 current_options.clear();
530 continue;
531 }
532 "PATH" => {
533 state = ParseState::Path;
534 current_gem_type = "PATH".to_string();
535 current_remote = None;
536 current_options.clear();
537 continue;
538 }
539 "SVN" => {
540 state = ParseState::Svn;
541 current_gem_type = "SVN".to_string();
542 current_remote = None;
543 current_options.clear();
544 continue;
545 }
546 "PLATFORMS" => {
547 state = ParseState::Platforms;
548 continue;
549 }
550 "BUNDLED WITH" => {
551 state = ParseState::BundledWith;
552 continue;
553 }
554 "DEPENDENCIES" => {
555 state = ParseState::Dependencies;
556 continue;
557 }
558 _ => {}
559 }
560
561 if trimmed.trim() == "specs:" {
565 state = match state {
566 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
567 ParseState::Specs
568 }
569 _ => state,
570 };
571 continue;
572 }
573
574 match state {
576 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
577 if let Some(caps) = options_regex.captures(line) {
579 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
580 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
581 current_options.insert(key.to_string(), value.to_string());
582 if key == "remote" {
583 current_remote = Some(value.to_string());
584 }
585 }
586 }
587 ParseState::Specs => {
588 if let Some(caps) = spec_deps_regex.captures(line) {
590 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
591 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
592
593 let (version, platform) = parse_version_platform(version_str);
595
596 if !name.is_empty() {
597 let gem_info = GemInfo {
598 name: name.clone(),
599 version,
600 platform,
601 gem_type: current_gem_type.clone(),
602 remote: current_remote.clone(),
603 revision: current_options.get("revision").cloned(),
604 ref_field: current_options.get("ref").cloned(),
605 branch: current_options.get("branch").cloned(),
606 tag: current_options.get("tag").cloned(),
607 pinned: false,
608 requirements: Vec::new(),
609 };
610 gems.insert(name, gem_info);
611 }
612 }
613 }
614 ParseState::Platforms => {
615 let platform = trimmed.trim();
617 if !platform.is_empty() {
618 platforms.push(platform.to_string());
619 }
620 }
621 ParseState::BundledWith => {
622 if let Some(caps) = version_regex.captures(line) {
624 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
625 }
626 }
627 ParseState::Dependencies => {
628 if let Some(caps) = deps_regex.captures(line) {
630 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
631 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
632 let pinned = caps.get(3).is_some();
633
634 if !name.is_empty() {
635 if let Some(gem) = gems.get_mut(&name) {
637 gem.pinned = pinned;
638 if let Some(vc) = &version_constraint {
639 gem.requirements.push(vc.clone());
640 }
641 } else {
642 let gem_info = GemInfo {
643 name: name.clone(),
644 version: None,
645 platform: None,
646 gem_type: "GEM".to_string(),
647 remote: None,
648 revision: None,
649 ref_field: None,
650 branch: None,
651 tag: None,
652 pinned,
653 requirements: version_constraint.into_iter().collect(),
654 };
655 gems.insert(name, gem_info);
656 }
657 }
658 }
659 }
660 ParseState::None => {}
661 }
662 }
663
664 let primary_gem = gems.values().find(|gem| gem.gem_type == "PATH").cloned();
665
666 let (
667 package_name,
668 package_version,
669 repository_homepage_url,
670 repository_download_url,
671 api_data_url,
672 download_url,
673 ) = if let Some(ref pg) = primary_gem {
674 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
675 (
676 Some(pg.name.clone()),
677 pg.version.clone(),
678 urls.0,
679 urls.1,
680 urls.2,
681 urls.3,
682 )
683 } else {
684 (None, None, None, None, None, None)
685 };
686
687 for (_, gem) in gems {
688 if let Some(ref pg) = primary_gem
689 && gem.name == pg.name
690 {
691 continue;
692 }
693
694 let version_for_purl = gem.version.as_deref();
695 let purl = create_gem_purl(&gem.name, version_for_purl);
696
697 let extracted_requirement = if !gem.requirements.is_empty() {
698 Some(gem.requirements.join(", "))
699 } else {
700 gem.version.clone()
701 };
702
703 let extra_data = build_gem_source_extra_data(&gem);
704
705 dependencies.push(Dependency {
706 purl,
707 extracted_requirement,
708 scope: Some("dependencies".to_string()),
709 is_runtime: Some(true),
710 is_optional: Some(false),
711 is_pinned: Some(gem.pinned),
712 is_direct: Some(true),
713 resolved_package: None,
714 extra_data,
715 });
716 }
717
718 dependencies.sort_by(|left, right| {
719 left.purl
720 .as_deref()
721 .cmp(&right.purl.as_deref())
722 .then_with(|| {
723 left.extracted_requirement
724 .as_deref()
725 .cmp(&right.extracted_requirement.as_deref())
726 })
727 });
728
729 let mut extra_data = HashMap::new();
731 if !platforms.is_empty() {
732 extra_data.insert(
733 "platforms".to_string(),
734 serde_json::Value::Array(
735 platforms
736 .into_iter()
737 .map(serde_json::Value::String)
738 .collect(),
739 ),
740 );
741 }
742 if let Some(bv) = bundler_version {
743 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
744 }
745
746 let purl = package_name
747 .as_deref()
748 .map(|n| create_gem_purl(n, package_version.as_deref()))
749 .unwrap_or(None);
750
751 PackageData {
752 package_type: Some(PACKAGE_TYPE),
753 name: package_name,
754 version: package_version,
755 primary_language: Some("Ruby".to_string()),
756 download_url,
757 dependencies,
758 repository_homepage_url,
759 repository_download_url,
760 api_data_url,
761 extra_data: if extra_data.is_empty() {
762 None
763 } else {
764 Some(extra_data)
765 },
766 datasource_id: Some(DatasourceId::GemfileLock),
767 purl,
768 ..default_package_data()
769 }
770}
771
772fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
773 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
774 return None;
775 }
776
777 let mut extra = HashMap::new();
778 extra.insert(
779 "source_type".to_string(),
780 serde_json::Value::String(gem.gem_type.clone()),
781 );
782
783 if let Some(ref remote) = gem.remote {
784 extra.insert(
785 "remote".to_string(),
786 serde_json::Value::String(remote.clone()),
787 );
788 }
789 if let Some(ref revision) = gem.revision {
790 extra.insert(
791 "revision".to_string(),
792 serde_json::Value::String(revision.clone()),
793 );
794 }
795 if let Some(ref ref_field) = gem.ref_field {
796 extra.insert(
797 "ref".to_string(),
798 serde_json::Value::String(ref_field.clone()),
799 );
800 }
801 if let Some(ref branch) = gem.branch {
802 extra.insert(
803 "branch".to_string(),
804 serde_json::Value::String(branch.clone()),
805 );
806 }
807 if let Some(ref tag) = gem.tag {
808 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
809 }
810
811 Some(extra)
812}
813
814fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
817 if s.is_empty() {
818 return (None, None);
819 }
820 if let Some(idx) = s.find('-') {
821 let version = &s[..idx];
822 let platform = &s[idx + 1..];
823 (Some(version.to_string()), Some(platform.to_string()))
824 } else {
825 (Some(s.to_string()), None)
826 }
827}
828
829fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
831 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
832 Ok(p) => p,
833 Err(e) => {
834 warn!("Failed to create PURL for gem '{}': {}", name, e);
835 return None;
836 }
837 };
838
839 if let Some(v) = version
840 && let Err(e) = purl.with_version(v)
841 {
842 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
843 }
844
845 Some(purl.to_string())
846}
847
848fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
849 if name.is_empty() {
850 return None;
851 }
852
853 if let Some(v) = version {
854 let v = v.trim().trim_matches('/');
855 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
856 } else {
857 Some(format!("https://rubygems.org/gems/{}", name))
858 }
859}
860
861fn rubygems_download_url(
862 name: &str,
863 version: Option<&str>,
864 platform: Option<&str>,
865) -> Option<String> {
866 if name.is_empty() || version.is_none() {
867 return None;
868 }
869
870 let name = name.trim().trim_matches('/');
871 let version = version?.trim().trim_matches('/');
872
873 let version_plat = if let Some(p) = platform {
874 if p != "ruby" {
875 format!("{}-{}", version, p)
876 } else {
877 version.to_string()
878 }
879 } else {
880 version.to_string()
881 };
882
883 Some(format!(
884 "https://rubygems.org/downloads/{}-{}.gem",
885 name, version_plat
886 ))
887}
888
889fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
890 if name.is_empty() {
891 return None;
892 }
893
894 if let Some(v) = version {
895 Some(format!(
896 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
897 name, v
898 ))
899 } else {
900 Some(format!(
901 "https://rubygems.org/api/v1/versions/{}.json",
902 name
903 ))
904 }
905}
906
907fn get_rubygems_urls(
908 name: &str,
909 version: Option<&str>,
910 platform: Option<&str>,
911) -> (
912 Option<String>,
913 Option<String>,
914 Option<String>,
915 Option<String>,
916) {
917 let repository_homepage_url = rubygems_homepage_url(name, version);
918 let repository_download_url = rubygems_download_url(name, version, platform);
919 let api_data_url = rubygems_api_url(name, version);
920 let download_url = repository_download_url.clone();
921
922 (
923 repository_homepage_url,
924 repository_download_url,
925 api_data_url,
926 download_url,
927 )
928}
929
930fn default_package_data() -> PackageData {
932 PackageData {
933 package_type: Some(PACKAGE_TYPE),
934 primary_language: Some("Ruby".to_string()),
935 ..Default::default()
936 }
937}
938
939fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
940 PackageData {
941 datasource_id: Some(datasource_id),
942 ..default_package_data()
943 }
944}
945
946pub struct GemspecParser;
956
957impl PackageParser for GemspecParser {
958 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
959
960 fn extract_packages(path: &Path) -> Vec<PackageData> {
961 let content = match fs::read_to_string(path) {
962 Ok(c) => c,
963 Err(e) => {
964 warn!("Failed to read .gemspec at {:?}: {}", path, e);
965 return vec![default_package_data_with_datasource(DatasourceId::Gemspec)];
966 }
967 };
968
969 vec![parse_gemspec_with_context(&content, path.parent())]
970 }
971
972 fn is_match(path: &Path) -> bool {
973 path.extension()
974 .and_then(|ext| ext.to_str())
975 .is_some_and(|ext| ext == "gemspec")
976 }
977}
978
979fn clean_gemspec_value(s: &str) -> String {
981 let s = strip_freeze_suffix(s).trim();
982
983 let s = if let Some(pos) = s.find(" #") {
984 s[..pos].trim()
985 } else {
986 s
987 };
988
989 let s = if let Some(stripped) = s.strip_prefix("%q{") {
990 stripped.strip_suffix('}').unwrap_or(stripped)
991 } else if let Some(stripped) = s.strip_prefix("%q<") {
992 stripped.strip_suffix('>').unwrap_or(stripped)
993 } else if let Some(stripped) = s.strip_prefix("%q[") {
994 stripped.strip_suffix(']').unwrap_or(stripped)
995 } else if let Some(stripped) = s.strip_prefix("%q(") {
996 stripped.strip_suffix(')').unwrap_or(stripped)
997 } else {
998 s
999 };
1000
1001 let s = s
1002 .trim_start_matches('"')
1003 .trim_end_matches('"')
1004 .trim_start_matches('\'')
1005 .trim_end_matches('\'');
1006 let s = strip_freeze_suffix(s).trim();
1007 s.to_string()
1008}
1009
1010fn extract_ruby_array(s: &str) -> Vec<String> {
1012 let s = strip_freeze_suffix(s.trim());
1013 let s = s.trim_start_matches('[').trim_end_matches(']');
1014 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1015 Ok(r) => r,
1016 Err(_) => return Vec::new(),
1017 };
1018 item_re
1019 .captures_iter(s)
1020 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1021 .collect()
1022}
1023
1024fn extract_all_ruby_values(s: &str) -> Vec<String> {
1025 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1026 Ok(r) => r,
1027 Err(_) => return Vec::new(),
1028 };
1029
1030 value_re
1031 .captures_iter(s)
1032 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1033 .map(|m| clean_gemspec_value(m.as_str()))
1034 .collect()
1035}
1036
1037fn extract_first_ruby_value(s: &str) -> Option<String> {
1038 extract_all_ruby_values(s).into_iter().next()
1039}
1040
1041fn after_first_argument(args: &str) -> &str {
1042 let mut bracket_depth = 0usize;
1043 let mut paren_depth = 0usize;
1044 let mut in_quote: Option<char> = None;
1045 let chars: Vec<(usize, char)> = args.char_indices().collect();
1046 let mut i = 0;
1047
1048 while i < chars.len() {
1049 let (idx, ch) = chars[i];
1050
1051 if let Some(quote) = in_quote {
1052 if ch == '\\' {
1053 i += 2;
1054 continue;
1055 }
1056 if ch == quote {
1057 in_quote = None;
1058 }
1059 i += 1;
1060 continue;
1061 }
1062
1063 match ch {
1064 '\'' | '"' => in_quote = Some(ch),
1065 '[' | '{' | '<' => bracket_depth += 1,
1066 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1067 '(' => paren_depth += 1,
1068 ')' => paren_depth = paren_depth.saturating_sub(1),
1069 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1070 _ => {}
1071 }
1072
1073 i += 1;
1074 }
1075
1076 ""
1077}
1078
1079fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1084 let var_name = var_name.trim();
1085 if var_name.is_empty() {
1086 return None;
1087 }
1088
1089 for candidate in candidate_constant_names(var_name) {
1090 let escaped = regex::escape(&candidate);
1091 let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1092 let Ok(re) = Regex::new(&pattern) else {
1093 continue;
1094 };
1095
1096 for context in contexts {
1097 if let Some(caps) = re.captures(context) {
1098 return caps.get(1).map(|m| m.as_str().to_string());
1099 }
1100 }
1101 }
1102
1103 None
1104}
1105
1106fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1107 let var_name = var_name.trim();
1108 if var_name.is_empty() {
1109 return None;
1110 }
1111
1112 for candidate in candidate_constant_names(var_name) {
1113 let escaped = regex::escape(&candidate);
1114 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1115 let Ok(re) = Regex::new(&pattern) else {
1116 continue;
1117 };
1118
1119 for context in contexts {
1120 if let Some(caps) = re.captures(context)
1121 && let Some(raw) = caps.get(1)
1122 {
1123 let values = extract_ruby_array(raw.as_str());
1124 if !values.is_empty() {
1125 return Some(values);
1126 }
1127 }
1128 }
1129 }
1130
1131 None
1132}
1133
1134fn candidate_constant_names(var_name: &str) -> Vec<String> {
1135 let mut names = vec![var_name.to_string()];
1136 if let Some(last) = var_name.split("::").last()
1137 && last != var_name
1138 {
1139 names.push(last.to_string());
1140 }
1141 names
1142}
1143
1144fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1145 let mut contexts = vec![content.to_string()];
1146 let Some(base_dir) = base_dir else {
1147 return contexts;
1148 };
1149
1150 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1151 Ok(re) => re,
1152 Err(_) => return contexts,
1153 };
1154
1155 for caps in require_re.captures_iter(content) {
1156 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1157 continue;
1158 };
1159 for candidate in candidate_require_paths(base_dir, required) {
1160 if let Ok(required_content) = fs::read_to_string(&candidate) {
1161 contexts.push(required_content);
1162 break;
1163 }
1164 }
1165 }
1166
1167 contexts
1168}
1169
1170fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1171 let relative = required.replace("::", "/");
1172 let filename = if relative.ends_with(".rb") {
1173 relative
1174 } else {
1175 format!("{}.rb", relative)
1176 };
1177
1178 vec![
1179 base_dir.join(&filename),
1180 base_dir.join("lib").join(&filename),
1181 ]
1182}
1183
1184fn looks_like_constant_reference(s: &str) -> bool {
1185 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1186}
1187
1188#[cfg(test)]
1190fn parse_gemspec(content: &str) -> PackageData {
1191 parse_gemspec_with_context(content, None)
1192}
1193
1194fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1195 let contexts = load_required_ruby_contexts(content, base_dir);
1196
1197 let field_re = match Regex::new(
1200 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1201 ) {
1202 Ok(r) => r,
1203 Err(e) => {
1204 warn!("Failed to compile gemspec field regex: {}", e);
1205 return default_package_data_with_datasource(DatasourceId::Gemspec);
1206 }
1207 };
1208
1209 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1210 Ok(r) => r,
1211 Err(e) => {
1212 warn!("Failed to compile licenses regex: {}", e);
1213 return default_package_data_with_datasource(DatasourceId::Gemspec);
1214 }
1215 };
1216
1217 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1218 Ok(r) => r,
1219 Err(e) => {
1220 warn!("Failed to compile authors regex: {}", e);
1221 return default_package_data_with_datasource(DatasourceId::Gemspec);
1222 }
1223 };
1224
1225 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1226 Ok(r) => r,
1227 Err(e) => {
1228 warn!("Failed to compile email regex: {}", e);
1229 return default_package_data_with_datasource(DatasourceId::Gemspec);
1230 }
1231 };
1232
1233 let dependency_call_re = match Regex::new(
1234 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1235 ) {
1236 Ok(r) => r,
1237 Err(e) => {
1238 warn!("Failed to compile gemspec dependency regex: {}", e);
1239 return default_package_data_with_datasource(DatasourceId::Gemspec);
1240 }
1241 };
1242
1243 let mut name: Option<String> = None;
1244 let mut version: Option<String> = None;
1245 let mut summary: Option<String> = None;
1246 let mut description: Option<String> = None;
1247 let mut homepage: Option<String> = None;
1248 let mut license: Option<String> = None;
1249 let mut licenses: Vec<String> = Vec::new();
1250 let mut authors: Vec<String> = Vec::new();
1251 let mut emails: Vec<String> = Vec::new();
1252 let mut dependencies: Vec<Dependency> = Vec::new();
1253
1254 for caps in field_re.captures_iter(content) {
1256 let field_name = match caps.get(1) {
1257 Some(m) => m.as_str(),
1258 None => continue,
1259 };
1260 let raw_value = match caps.get(2) {
1261 Some(m) => m.as_str().trim(),
1262 None => continue,
1263 };
1264
1265 match field_name {
1266 "name" => {
1267 let cleaned = clean_gemspec_value(raw_value);
1268 name = if looks_like_constant_reference(&cleaned) {
1269 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1270 } else {
1271 Some(cleaned)
1272 }
1273 }
1274 "version" => {
1275 let cleaned = clean_gemspec_value(raw_value);
1276 if looks_like_constant_reference(&cleaned) {
1278 version = resolve_variable_version(&cleaned, &contexts).or(Some(cleaned));
1279 } else {
1280 version = Some(cleaned);
1281 }
1282 }
1283 "summary" => {
1284 let cleaned = clean_gemspec_value(raw_value);
1285 summary = if looks_like_constant_reference(&cleaned) {
1286 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1287 } else {
1288 Some(cleaned)
1289 }
1290 }
1291 "description" => description = Some(clean_gemspec_value(raw_value)),
1292 "homepage" => {
1293 let cleaned = clean_gemspec_value(raw_value);
1294 homepage = if looks_like_constant_reference(&cleaned) {
1295 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1296 } else {
1297 Some(cleaned)
1298 }
1299 }
1300 "license" => license = Some(clean_gemspec_value(raw_value)),
1301 _ => {}
1302 }
1303 }
1304
1305 for caps in licenses_re.captures_iter(content) {
1307 if let Some(raw) = caps.get(1) {
1308 licenses = extract_ruby_array(raw.as_str());
1309 }
1310 }
1311
1312 for caps in authors_re.captures_iter(content) {
1314 if let Some(raw) = caps.get(1) {
1315 let raw_str = raw.as_str().trim();
1316 if raw_str.starts_with('[') {
1317 authors = extract_ruby_array(raw_str);
1318 } else if looks_like_constant_reference(raw_str) {
1319 authors = resolve_variable_array(raw_str, &contexts)
1320 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1321 } else {
1322 authors.push(clean_gemspec_value(raw_str));
1323 }
1324 }
1325 }
1326
1327 for caps in email_re.captures_iter(content) {
1329 if let Some(raw) = caps.get(1) {
1330 let raw_str = raw.as_str().trim();
1331 if raw_str.starts_with('[') {
1332 emails = extract_ruby_array(raw_str);
1333 } else if looks_like_constant_reference(raw_str) {
1334 emails = resolve_variable_array(raw_str, &contexts)
1335 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1336 } else {
1337 emails.push(clean_gemspec_value(raw_str));
1338 }
1339 }
1340 }
1341
1342 let mut parties: Vec<Party> = Vec::new();
1344
1345 if authors.len() == 1 && emails.len() == 1 {
1346 let email_str = emails.first().map(String::as_str);
1347 let (parsed_email_name, parsed_email) = match email_str {
1348 Some(e) => split_name_email(e),
1349 None => (None, None),
1350 };
1351
1352 parties.push(Party {
1353 r#type: Some("person".to_string()),
1354 role: Some("author".to_string()),
1355 name: authors.first().cloned().or(parsed_email_name),
1356 email: parsed_email.or_else(|| {
1357 email_str
1358 .filter(|e| e.contains('@') && !e.contains('<'))
1359 .map(|e| e.to_string())
1360 }),
1361 url: None,
1362 organization: None,
1363 organization_url: None,
1364 timezone: None,
1365 });
1366 } else {
1367 for author_name in authors {
1368 parties.push(Party {
1369 r#type: Some("person".to_string()),
1370 role: Some("author".to_string()),
1371 name: Some(author_name),
1372 email: None,
1373 url: None,
1374 organization: None,
1375 organization_url: None,
1376 timezone: None,
1377 });
1378 }
1379
1380 for email_str in emails {
1381 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1382 split_name_email(&email_str)
1383 } else {
1384 (None, None)
1385 };
1386 parties.push(Party {
1387 r#type: Some("person".to_string()),
1388 role: Some("author".to_string()),
1389 name: parsed_email_name,
1390 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1391 url: None,
1392 organization: None,
1393 organization_url: None,
1394 timezone: None,
1395 });
1396 }
1397 }
1398
1399 for caps in dependency_call_re.captures_iter(content) {
1400 let method = match caps.get(1) {
1401 Some(m) => m.as_str(),
1402 None => continue,
1403 };
1404 let args = match caps.get(2) {
1405 Some(m) => m.as_str(),
1406 None => continue,
1407 };
1408
1409 let Some(dep_name) = extract_first_ruby_value(args) else {
1410 continue;
1411 };
1412 let version_parts = extract_all_ruby_values(after_first_argument(args));
1413 let extracted_requirement = if version_parts.is_empty() {
1414 None
1415 } else {
1416 Some(version_parts.join(", "))
1417 };
1418 let purl = create_gem_purl(&dep_name, None);
1419 let is_development = method == "add_development_dependency";
1420 let scope = if is_development {
1421 "development"
1422 } else {
1423 "runtime"
1424 };
1425
1426 dependencies.push(Dependency {
1427 purl,
1428 extracted_requirement,
1429 scope: Some(scope.to_string()),
1430 is_runtime: Some(!is_development),
1431 is_optional: Some(is_development),
1432 is_pinned: None,
1433 is_direct: Some(true),
1434 resolved_package: None,
1435 extra_data: None,
1436 });
1437 }
1438
1439 let extracted_license_statement = if !licenses.is_empty() {
1441 Some(licenses.join(" AND "))
1442 } else {
1443 license
1444 };
1445
1446 let declared_license_expression = None;
1447 let declared_license_expression_spdx = None;
1448
1449 let final_description = description.or(summary);
1451
1452 let purl = name
1454 .as_deref()
1455 .map(|n| create_gem_purl(n, version.as_deref()))
1456 .unwrap_or(None);
1457
1458 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1459 if let Some(n) = name.as_deref() {
1460 get_rubygems_urls(n, version.as_deref(), None)
1461 } else {
1462 (None, None, None, None)
1463 };
1464
1465 PackageData {
1466 package_type: Some(PACKAGE_TYPE),
1467 name,
1468 version,
1469 primary_language: Some("Ruby".to_string()),
1470 description: final_description,
1471 homepage_url: homepage,
1472 download_url,
1473 declared_license_expression,
1474 declared_license_expression_spdx,
1475 extracted_license_statement,
1476 parties,
1477 dependencies,
1478 repository_homepage_url,
1479 repository_download_url,
1480 api_data_url,
1481 datasource_id: Some(DatasourceId::Gemspec),
1482 purl,
1483 ..default_package_data()
1484 }
1485}
1486
1487const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1502
1503impl PackageParser for GemArchiveParser {
1504 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1505
1506 fn extract_packages(path: &Path) -> Vec<PackageData> {
1507 vec![match extract_gem_archive(path) {
1508 Ok(data) => data,
1509 Err(e) => {
1510 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1511 default_package_data_with_datasource(DatasourceId::GemArchive)
1512 }
1513 }]
1514 }
1515
1516 fn is_match(path: &Path) -> bool {
1517 path.extension()
1518 .and_then(|ext| ext.to_str())
1519 .is_some_and(|ext| ext == "gem")
1520 }
1521}
1522
1523fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1524 let file_metadata =
1525 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1526 let archive_size = file_metadata.len();
1527
1528 if archive_size > MAX_ARCHIVE_SIZE {
1529 return Err(format!(
1530 "Archive too large: {} bytes (limit: {} bytes)",
1531 archive_size, MAX_ARCHIVE_SIZE
1532 ));
1533 }
1534
1535 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1536 let mut archive = Archive::new(file);
1537
1538 for entry_result in archive
1539 .entries()
1540 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1541 {
1542 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1543 let entry_path = entry
1544 .path()
1545 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1546
1547 if entry_path.to_str() == Some("metadata.gz") {
1548 let entry_size = entry.size();
1549 if entry_size > MAX_FILE_SIZE {
1550 return Err(format!(
1551 "metadata.gz too large: {} bytes (limit: {} bytes)",
1552 entry_size, MAX_FILE_SIZE
1553 ));
1554 }
1555
1556 let mut decoder = GzDecoder::new(entry);
1557 let mut content = String::new();
1558 decoder
1559 .read_to_string(&mut content)
1560 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1561
1562 let uncompressed_size = content.len() as u64;
1563 if entry_size > 0 {
1564 let ratio = uncompressed_size as f64 / entry_size as f64;
1565 if ratio > MAX_COMPRESSION_RATIO {
1566 return Err(format!(
1567 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1568 ratio, MAX_COMPRESSION_RATIO
1569 ));
1570 }
1571 }
1572 if uncompressed_size > MAX_FILE_SIZE {
1573 return Err(format!(
1574 "Decompressed metadata too large: {} bytes (limit: {} bytes)",
1575 uncompressed_size, MAX_FILE_SIZE
1576 ));
1577 }
1578
1579 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1580 }
1581 }
1582
1583 Err("metadata.gz not found in .gem archive".to_string())
1584}
1585
1586fn parse_gem_metadata_yaml(
1587 content: &str,
1588 datasource_id: DatasourceId,
1589) -> Result<PackageData, String> {
1590 let cleaned = clean_ruby_yaml_tags(content);
1594
1595 let yaml: serde_yaml::Value =
1596 serde_yaml::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1597
1598 let name = yaml_string(&yaml, "name");
1599 let version = yaml.get("version").and_then(|v| {
1600 if v.is_string() {
1602 v.as_str().map(|s| s.to_string())
1603 } else {
1604 yaml_string(v, "version")
1605 }
1606 });
1607 let description = yaml_string(&yaml, "description").or_else(|| yaml_string(&yaml, "summary"));
1608 let homepage = yaml_string(&yaml, "homepage");
1609 let summary = yaml_string(&yaml, "summary");
1610
1611 let licenses: Vec<String> = yaml
1613 .get("licenses")
1614 .and_then(|v| v.as_sequence())
1615 .map(|seq| {
1616 seq.iter()
1617 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1618 .collect()
1619 })
1620 .unwrap_or_default();
1621
1622 let extracted_license_statement = if !licenses.is_empty() {
1624 Some(licenses.join(" AND "))
1625 } else {
1626 None
1627 };
1628
1629 let license_expression = None;
1630 let license_expression_spdx = None;
1631
1632 let authors: Vec<String> = yaml
1634 .get("authors")
1635 .and_then(|v| v.as_sequence())
1636 .map(|seq| {
1637 seq.iter()
1638 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1639 .collect()
1640 })
1641 .unwrap_or_default();
1642
1643 let emails: Vec<String> = yaml
1644 .get("email")
1645 .map(|v| {
1646 if let Some(seq) = v.as_sequence() {
1647 seq.iter()
1648 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1649 .collect()
1650 } else if let Some(s) = v.as_str() {
1651 vec![s.to_string()]
1652 } else {
1653 Vec::new()
1654 }
1655 })
1656 .unwrap_or_default();
1657
1658 let mut parties: Vec<Party> = Vec::new();
1660 let max_len = authors.len().max(emails.len());
1661 for i in 0..max_len {
1662 let author_name = authors.get(i).map(|s| s.as_str());
1663 let email_str = emails.get(i).map(|s| s.as_str());
1664
1665 let (parsed_email_name, parsed_email) = match email_str {
1666 Some(e) if e.contains('<') => split_name_email(e),
1667 None => (None, None),
1668 _ => (None, None),
1669 };
1670
1671 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1672
1673 parties.push(Party {
1674 r#type: Some("person".to_string()),
1675 role: Some("author".to_string()),
1676 name: party_name,
1677 email: parsed_email.or_else(|| {
1678 email_str
1679 .filter(|e| e.contains('@') && !e.contains('<'))
1680 .map(|e| e.to_string())
1681 }),
1682 url: None,
1683 organization: None,
1684 organization_url: None,
1685 timezone: None,
1686 });
1687 }
1688
1689 let dependencies = parse_gem_yaml_dependencies(&yaml);
1691
1692 let metadata = yaml.get("metadata");
1693
1694 let bug_tracking_url = metadata.and_then(|m| yaml_string(m, "bug_tracking_uri"));
1695
1696 let code_view_url = metadata.and_then(|m| yaml_string(m, "source_code_uri"));
1697
1698 let vcs_url = code_view_url
1699 .clone()
1700 .or_else(|| metadata.and_then(|m| yaml_string(m, "homepage_uri")));
1701
1702 let file_references = metadata
1703 .and_then(|m| m.get("files"))
1704 .and_then(|f| f.as_sequence())
1705 .map(|seq| {
1706 seq.iter()
1707 .filter_map(|v| v.as_str())
1708 .map(|s| crate::models::FileReference {
1709 path: s.to_string(),
1710 size: None,
1711 sha1: None,
1712 md5: None,
1713 sha256: None,
1714 sha512: None,
1715 extra_data: None,
1716 })
1717 .collect::<Vec<_>>()
1718 })
1719 .unwrap_or_default();
1720
1721 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1722 if d.len() >= 10 {
1723 Some(d[..10].to_string())
1724 } else {
1725 None
1726 }
1727 });
1728
1729 let purl = name
1730 .as_deref()
1731 .map(|n| create_gem_purl(n, version.as_deref()))
1732 .unwrap_or(None);
1733
1734 let platform = yaml_string(&yaml, "platform");
1735 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1736 if let Some(n) = name.as_deref() {
1737 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1738 } else {
1739 (None, None, None, None)
1740 };
1741
1742 let qualifiers = if let Some(ref p) = platform {
1743 if p != "ruby" {
1744 let mut q = HashMap::new();
1745 q.insert("platform".to_string(), p.clone());
1746 Some(q)
1747 } else {
1748 None
1749 }
1750 } else {
1751 None
1752 };
1753
1754 Ok(PackageData {
1755 package_type: Some(PACKAGE_TYPE),
1756 name,
1757 version,
1758 qualifiers,
1759 primary_language: Some("Ruby".to_string()),
1760 description: description.or(summary),
1761 release_date,
1762 homepage_url: homepage,
1763 download_url,
1764 bug_tracking_url,
1765 code_view_url,
1766 declared_license_expression: license_expression,
1767 declared_license_expression_spdx: license_expression_spdx,
1768 extracted_license_statement,
1769 file_references,
1770 parties,
1771 dependencies,
1772 repository_homepage_url,
1773 repository_download_url,
1774 api_data_url,
1775 datasource_id: Some(datasource_id),
1776 purl,
1777 vcs_url,
1778 ..default_package_data()
1779 })
1780}
1781
1782fn clean_ruby_yaml_tags(content: &str) -> String {
1784 let tag_re = match Regex::new(r"!ruby/\S+") {
1785 Ok(r) => r,
1786 Err(_) => return content.to_string(),
1787 };
1788 tag_re.replace_all(content, "").to_string()
1789}
1790
1791fn yaml_string(yaml: &serde_yaml::Value, key: &str) -> Option<String> {
1792 yaml.get(key)
1793 .and_then(|v| v.as_str())
1794 .filter(|s| !s.is_empty())
1795 .map(|s| s.to_string())
1796}
1797
1798fn parse_gem_yaml_dependencies(yaml: &serde_yaml::Value) -> Vec<Dependency> {
1799 let mut dependencies = Vec::new();
1800
1801 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
1802 Some(seq) => seq,
1803 None => return dependencies,
1804 };
1805
1806 for dep_value in deps_seq {
1807 let dep_name = match yaml_string(dep_value, "name") {
1808 Some(n) => n,
1809 None => continue,
1810 };
1811
1812 let dep_type = yaml_string(dep_value, "type");
1813 let is_development = dep_type.as_deref() == Some(":development");
1814
1815 let requirements = dep_value
1817 .get("requirement")
1818 .or_else(|| dep_value.get("version_requirements"))
1819 .and_then(|req| req.get("requirements"))
1820 .and_then(|reqs| reqs.as_sequence());
1821
1822 let extracted_requirement = requirements.map(|reqs| {
1823 let parts: Vec<String> = reqs
1824 .iter()
1825 .filter_map(|req| {
1826 let seq = req.as_sequence()?;
1827 if seq.len() >= 2 {
1828 let op = seq[0].as_str().unwrap_or("");
1829 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
1830 if op == ">=" && ver == "0" {
1831 None
1833 } else if op.is_empty() || ver.is_empty() {
1834 None
1835 } else {
1836 Some(format!("{} {}", op, ver))
1837 }
1838 } else {
1839 None
1840 }
1841 })
1842 .collect();
1843 parts.join(", ")
1844 });
1845
1846 let extracted_requirement = extracted_requirement
1847 .filter(|s| !s.is_empty())
1848 .or_else(|| Some(String::new()));
1849
1850 let (scope, is_runtime, is_optional) = if is_development {
1851 (Some("development".to_string()), false, true)
1852 } else {
1853 (Some("runtime".to_string()), true, false)
1854 };
1855
1856 let purl = create_gem_purl(&dep_name, None);
1857
1858 dependencies.push(Dependency {
1859 purl,
1860 extracted_requirement,
1861 scope,
1862 is_runtime: Some(is_runtime),
1863 is_optional: Some(is_optional),
1864 is_pinned: None,
1865 is_direct: Some(true),
1866 resolved_package: None,
1867 extra_data: None,
1868 });
1869 }
1870
1871 dependencies
1872}
1873
1874pub struct GemMetadataExtractedParser;
1879
1880impl PackageParser for GemMetadataExtractedParser {
1881 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1882
1883 fn extract_packages(path: &Path) -> Vec<PackageData> {
1884 vec![match extract_gem_metadata_extracted(path) {
1885 Ok(data) => data,
1886 Err(e) => {
1887 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
1888 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
1889 }
1890 }]
1891 }
1892
1893 fn is_match(path: &Path) -> bool {
1894 path.to_str()
1895 .is_some_and(|p| p.contains("metadata.gz-extract"))
1896 }
1897}
1898
1899fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
1900 let content = fs::read_to_string(path)
1901 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
1902
1903 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
1904}
1905
1906crate::register_parser!(
1908 "Ruby Gemfile manifest",
1909 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
1910 "gem",
1911 "Ruby",
1912 Some("https://bundler.io/man/gemfile.5.html"),
1913);
1914
1915crate::register_parser!(
1916 "Ruby Gemfile.lock lockfile",
1917 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
1918 "gem",
1919 "Ruby",
1920 Some("https://bundler.io/man/gemfile.5.html"),
1921);
1922
1923crate::register_parser!(
1924 "Ruby .gemspec manifest",
1925 &[
1926 "**/*.gemspec",
1927 "**/data.gz-extract/*.gemspec",
1928 "**/specifications/*.gemspec"
1929 ],
1930 "gem",
1931 "Ruby",
1932 Some("https://guides.rubygems.org/specification-reference/"),
1933);
1934
1935crate::register_parser!(
1936 "Ruby .gem archive",
1937 &["**/*.gem"],
1938 "gem",
1939 "Ruby",
1940 Some("https://guides.rubygems.org/specification-reference/"),
1941);
1942
1943crate::register_parser!(
1944 "Ruby gem metadata (extracted)",
1945 &["**/metadata.gz-extract"],
1946 "gem",
1947 "Ruby",
1948 Some("https://guides.rubygems.org/specification-reference/"),
1949);
1950
1951#[cfg(test)]
1952mod tests {
1953 use super::parse_gemspec;
1954
1955 #[test]
1956 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
1957 assert_eq!(
1958 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
1959 "Arel is a SQL AST manager for Ruby. It"
1960 );
1961 }
1962
1963 #[test]
1964 fn test_parse_gemspec_runtime_dependency_scope() {
1965 let content = r#"
1966Gem::Specification.new do |spec|
1967 spec.name = "demo"
1968 spec.version = "1.0.0"
1969 spec.add_runtime_dependency "rack", "~> 3.0"
1970 spec.add_dependency "thor", ">= 1.0"
1971end
1972"#;
1973
1974 let package_data = parse_gemspec(content);
1975 assert_eq!(package_data.dependencies.len(), 2);
1976 assert_eq!(
1977 package_data.dependencies[0].scope,
1978 Some("runtime".to_string())
1979 );
1980 assert_eq!(
1981 package_data.dependencies[0].extracted_requirement,
1982 Some("~> 3.0".to_string())
1983 );
1984 assert_eq!(
1985 package_data.dependencies[1].scope,
1986 Some("runtime".to_string())
1987 );
1988 assert_eq!(
1989 package_data.dependencies[1].extracted_requirement,
1990 Some(">= 1.0".to_string())
1991 );
1992 }
1993}