1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::split_name_email;
30use flate2::read::GzDecoder;
31use packageurl::PackageUrl;
32use regex::Regex;
33use std::collections::HashMap;
34use std::fs::{self, File};
35use std::io::Read;
36use std::path::{Path, PathBuf};
37use tar::Archive;
38
39use super::PackageParser;
40use super::license_normalization::normalize_spdx_declared_license;
41
42const PACKAGE_TYPE: PackageType = PackageType::Gem;
43
44pub fn strip_freeze_suffix(s: &str) -> &str {
59 s.trim_end_matches(".freeze")
60}
61
62enum GemfileBlock {
63 Group(Vec<String>),
64 Source(String),
65}
66
67pub struct GemfileParser;
76
77impl PackageParser for GemfileParser {
78 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
79
80 fn extract_packages(path: &Path) -> Vec<PackageData> {
81 let datasource_id = gemfile_datasource_id(path);
82 let content = match fs::read_to_string(path) {
83 Ok(c) => c,
84 Err(e) => {
85 warn!("Failed to read Gemfile at {:?}: {}", path, e);
86 return vec![default_package_data_with_datasource(datasource_id)];
87 }
88 };
89
90 let mut package_data = parse_gemfile(&content);
91 package_data.datasource_id = Some(datasource_id);
92 vec![package_data]
93 }
94
95 fn is_match(path: &Path) -> bool {
96 path.file_name()
97 .and_then(|n| n.to_str())
98 .is_some_and(|name| name == "Gemfile")
99 || path
100 .to_str()
101 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
102 }
103}
104
105fn parse_gemfile(content: &str) -> PackageData {
107 let mut dependencies = Vec::new();
108 let mut block_stack = Vec::new();
109 let mut default_source = None;
110 let mut sources = Vec::new();
111
112 let gem_regex = match Regex::new(
115 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
116 ) {
117 Ok(r) => r,
118 Err(e) => {
119 warn!("Failed to compile gem regex: {}", e);
120 return default_package_data_with_datasource(DatasourceId::Gemfile);
121 }
122 };
123
124 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
126 Ok(r) => r,
127 Err(e) => {
128 warn!("Failed to compile group regex: {}", e);
129 return default_package_data_with_datasource(DatasourceId::Gemfile);
130 }
131 };
132
133 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
134 Ok(r) => r,
135 Err(e) => {
136 warn!("Failed to compile end regex: {}", e);
137 return default_package_data_with_datasource(DatasourceId::Gemfile);
138 }
139 };
140
141 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
142 Ok(r) => r,
143 Err(e) => {
144 warn!("Failed to compile source block regex: {}", e);
145 return default_package_data_with_datasource(DatasourceId::Gemfile);
146 }
147 };
148
149 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
150 Ok(r) => r,
151 Err(e) => {
152 warn!("Failed to compile source regex: {}", e);
153 return default_package_data_with_datasource(DatasourceId::Gemfile);
154 }
155 };
156
157 let symbol_regex = match Regex::new(r":(\w+)") {
159 Ok(r) => r,
160 Err(e) => {
161 warn!("Failed to compile symbol regex: {}", e);
162 return default_package_data_with_datasource(DatasourceId::Gemfile);
163 }
164 };
165
166 for line in content.lines() {
167 let trimmed = line.trim();
168
169 if trimmed.is_empty() || trimmed.starts_with('#') {
171 continue;
172 }
173
174 if let Some(caps) = group_start_regex.captures(trimmed) {
176 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
177 let mut current_groups = Vec::new();
178 for cap in symbol_regex.captures_iter(groups_str) {
179 if let Some(group_name) = cap.get(1) {
180 current_groups.push(group_name.as_str().to_string());
181 }
182 }
183 block_stack.push(GemfileBlock::Group(current_groups));
184 continue;
185 }
186
187 if let Some(caps) = source_block_start_regex.captures(trimmed) {
188 let source = caps
189 .get(1)
190 .map(|m| m.as_str().to_string())
191 .unwrap_or_default();
192 if !source.is_empty() {
193 push_unique_string(&mut sources, source.clone());
194 block_stack.push(GemfileBlock::Source(source));
195 }
196 continue;
197 }
198
199 if let Some(caps) = source_regex.captures(trimmed) {
200 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
201 push_unique_string(&mut sources, source.clone());
202 default_source = Some(source);
203 }
204 continue;
205 }
206
207 if group_end_regex.is_match(trimmed) {
209 block_stack.pop();
210 continue;
211 }
212
213 if let Some(caps) = gem_regex.captures(trimmed) {
215 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
216 if name.is_empty() {
217 continue;
218 }
219
220 let mut version_parts = Vec::new();
222 if let Some(v) = caps.get(2) {
223 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
224 }
225 if let Some(v) = caps.get(3) {
226 let v_str = strip_freeze_suffix(v.as_str());
227 if looks_like_version_constraint(v_str) {
229 version_parts.push(v_str.to_string());
230 }
231 }
232
233 let extracted_requirement = if version_parts.is_empty() {
234 None
235 } else {
236 Some(version_parts.join(", "))
237 };
238
239 let current_groups = current_group_names(&block_stack);
240
241 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
244 (None, true, false)
246 } else if current_groups.iter().any(|g| g == "development") {
247 (Some("development".to_string()), false, true)
248 } else if current_groups.iter().any(|g| g == "test") {
249 (Some("test".to_string()), false, true)
250 } else {
251 let group = current_groups.first().cloned();
253 (group, true, false)
254 };
255
256 let purl = create_gem_purl(name, None);
258 let inherited_source = current_source(&block_stack, default_source.as_deref());
259 let extra_data = build_gemfile_dependency_extra_data(
260 caps.get(4).map(|m| m.as_str()),
261 inherited_source.as_deref(),
262 );
263
264 dependencies.push(Dependency {
265 purl,
266 extracted_requirement,
267 scope,
268 is_runtime: Some(is_runtime),
269 is_optional: Some(is_optional),
270 is_pinned: None,
271 is_direct: Some(true),
272 resolved_package: None,
273 extra_data,
274 });
275 }
276 }
277
278 let extra_data = if sources.is_empty() {
279 None
280 } else {
281 Some(HashMap::from([(
282 "sources".to_string(),
283 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
284 )]))
285 };
286
287 PackageData {
288 package_type: Some(PACKAGE_TYPE),
289 primary_language: Some("Ruby".to_string()),
290 dependencies,
291 extra_data,
292 datasource_id: Some(DatasourceId::Gemfile),
293 ..default_package_data()
294 }
295}
296
297fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
298 block_stack
299 .iter()
300 .rev()
301 .find_map(|block| match block {
302 GemfileBlock::Group(groups) => Some(groups.clone()),
303 GemfileBlock::Source(_) => None,
304 })
305 .unwrap_or_default()
306}
307
308fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
309 block_stack
310 .iter()
311 .rev()
312 .find_map(|block| match block {
313 GemfileBlock::Source(source) => Some(source.clone()),
314 GemfileBlock::Group(_) => None,
315 })
316 .or_else(|| default_source.map(str::to_string))
317}
318
319fn push_unique_string(values: &mut Vec<String>, value: String) {
320 if !values.contains(&value) {
321 values.push(value);
322 }
323}
324
325fn build_gemfile_dependency_extra_data(
326 options: Option<&str>,
327 inherited_source: Option<&str>,
328) -> Option<HashMap<String, serde_json::Value>> {
329 let mut extra = HashMap::new();
330 let options = options.unwrap_or("");
331
332 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
333 extra.insert(
334 "source_type".to_string(),
335 serde_json::Value::String("GIT".to_string()),
336 );
337 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
338 extra.insert("remote".to_string(), serde_json::Value::String(git));
339 }
340
341 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
342 extra.insert(
343 "source_type".to_string(),
344 serde_json::Value::String("PATH".to_string()),
345 );
346 extra.insert("path".to_string(), serde_json::Value::String(path));
347 }
348
349 for key in ["branch", "ref", "tag"] {
350 if let Some(value) = extract_gemfile_quoted_option(options, key) {
351 extra.insert(key.to_string(), serde_json::Value::String(value));
352 }
353 }
354
355 let direct_source = extract_gemfile_quoted_option(options, "source");
356 if let Some(source) = direct_source {
357 extra.insert("source".to_string(), serde_json::Value::String(source));
358 } else if !extra.contains_key("source_type")
359 && let Some(source) = inherited_source
360 {
361 extra.insert(
362 "source".to_string(),
363 serde_json::Value::String(source.to_string()),
364 );
365 }
366
367 (!extra.is_empty()).then_some(extra)
368}
369
370fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
371 if options.is_empty() {
372 return None;
373 }
374
375 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
376 Regex::new(&pattern)
377 .ok()
378 .and_then(|regex| regex.captures(options))
379 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
380}
381
382fn looks_like_version_constraint(s: &str) -> bool {
384 s.starts_with('~')
385 || s.starts_with('>')
386 || s.starts_with('<')
387 || s.starts_with('=')
388 || s.starts_with('!')
389 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
390}
391
392pub struct GemfileLockParser;
401
402impl PackageParser for GemfileLockParser {
403 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
404
405 fn extract_packages(path: &Path) -> Vec<PackageData> {
406 let datasource_id = gemfile_lock_datasource_id(path);
407 let content = match fs::read_to_string(path) {
408 Ok(c) => c,
409 Err(e) => {
410 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
411 return vec![default_package_data_with_datasource(datasource_id)];
412 }
413 };
414
415 let mut package_data = parse_gemfile_lock(&content);
416 package_data.datasource_id = Some(datasource_id);
417 vec![package_data]
418 }
419
420 fn is_match(path: &Path) -> bool {
421 path.file_name()
422 .and_then(|n| n.to_str())
423 .is_some_and(|name| name == "Gemfile.lock")
424 || path
425 .to_str()
426 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
427 }
428}
429
430#[derive(Debug, Clone, PartialEq)]
432enum ParseState {
433 None,
434 Gem,
435 Git,
436 Path,
437 Svn,
438 Specs,
439 Platforms,
440 BundledWith,
441 Dependencies,
442}
443
444#[derive(Debug, Clone, Default)]
451struct GemInfo {
452 name: String,
453 version: Option<String>,
454 platform: Option<String>,
455 gem_type: String,
456 remote: Option<String>,
457 revision: Option<String>,
458 ref_field: Option<String>,
459 branch: Option<String>,
460 tag: Option<String>,
461 pinned: bool,
462 requirements: Vec<String>,
463}
464
465fn parse_gemfile_lock(content: &str) -> PackageData {
467 let mut state = ParseState::None;
468 let mut dependencies = Vec::new();
469 let mut gems: HashMap<String, GemInfo> = HashMap::new();
470 let mut platforms: Vec<String> = Vec::new();
471 let mut bundler_version: Option<String> = None;
472 let mut current_gem_type = String::new();
473 let mut current_remote: Option<String> = None;
474 let mut current_options: HashMap<String, String> = HashMap::new();
475
476 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
478 Ok(r) => r,
479 Err(e) => {
480 warn!("Failed to compile deps regex: {}", e);
481 return default_package_data_with_datasource(DatasourceId::GemfileLock);
482 }
483 };
484
485 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
487 Ok(r) => r,
488 Err(e) => {
489 warn!("Failed to compile spec_deps regex: {}", e);
490 return default_package_data_with_datasource(DatasourceId::GemfileLock);
491 }
492 };
493
494 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
496 Ok(r) => r,
497 Err(e) => {
498 warn!("Failed to compile options regex: {}", e);
499 return default_package_data_with_datasource(DatasourceId::GemfileLock);
500 }
501 };
502
503 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
505 Ok(r) => r,
506 Err(e) => {
507 warn!("Failed to compile version regex: {}", e);
508 return default_package_data_with_datasource(DatasourceId::GemfileLock);
509 }
510 };
511
512 for line in content.lines() {
513 let trimmed = line.trim_end();
514
515 if trimmed.is_empty() {
517 current_options.clear();
518 continue;
519 }
520
521 match trimmed {
523 "GEM" => {
524 state = ParseState::Gem;
525 current_gem_type = "GEM".to_string();
526 current_remote = None;
527 current_options.clear();
528 continue;
529 }
530 "GIT" => {
531 state = ParseState::Git;
532 current_gem_type = "GIT".to_string();
533 current_remote = None;
534 current_options.clear();
535 continue;
536 }
537 "PATH" => {
538 state = ParseState::Path;
539 current_gem_type = "PATH".to_string();
540 current_remote = None;
541 current_options.clear();
542 continue;
543 }
544 "SVN" => {
545 state = ParseState::Svn;
546 current_gem_type = "SVN".to_string();
547 current_remote = None;
548 current_options.clear();
549 continue;
550 }
551 "PLATFORMS" => {
552 state = ParseState::Platforms;
553 continue;
554 }
555 "BUNDLED WITH" => {
556 state = ParseState::BundledWith;
557 continue;
558 }
559 "DEPENDENCIES" => {
560 state = ParseState::Dependencies;
561 continue;
562 }
563 _ => {}
564 }
565
566 if trimmed.trim() == "specs:" {
570 state = match state {
571 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
572 ParseState::Specs
573 }
574 _ => state,
575 };
576 continue;
577 }
578
579 match state {
581 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
582 if let Some(caps) = options_regex.captures(line) {
584 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
585 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
586 current_options.insert(key.to_string(), value.to_string());
587 if key == "remote" {
588 current_remote = Some(value.to_string());
589 }
590 }
591 }
592 ParseState::Specs => {
593 if let Some(caps) = spec_deps_regex.captures(line) {
595 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
596 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
597
598 let (version, platform) = parse_version_platform(version_str);
600
601 if !name.is_empty() {
602 let gem_info = GemInfo {
603 name: name.clone(),
604 version,
605 platform,
606 gem_type: current_gem_type.clone(),
607 remote: current_remote.clone(),
608 revision: current_options.get("revision").cloned(),
609 ref_field: current_options.get("ref").cloned(),
610 branch: current_options.get("branch").cloned(),
611 tag: current_options.get("tag").cloned(),
612 pinned: false,
613 requirements: Vec::new(),
614 };
615 gems.insert(name, gem_info);
616 }
617 }
618 }
619 ParseState::Platforms => {
620 let platform = trimmed.trim();
622 if !platform.is_empty() {
623 platforms.push(platform.to_string());
624 }
625 }
626 ParseState::BundledWith => {
627 if let Some(caps) = version_regex.captures(line) {
629 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
630 }
631 }
632 ParseState::Dependencies => {
633 if let Some(caps) = deps_regex.captures(line) {
635 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
636 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
637 let pinned = caps.get(3).is_some();
638
639 if !name.is_empty() {
640 if let Some(gem) = gems.get_mut(&name) {
642 gem.pinned = pinned;
643 if let Some(vc) = &version_constraint {
644 gem.requirements.push(vc.clone());
645 }
646 } else {
647 let gem_info = GemInfo {
648 name: name.clone(),
649 version: None,
650 platform: None,
651 gem_type: "GEM".to_string(),
652 remote: None,
653 revision: None,
654 ref_field: None,
655 branch: None,
656 tag: None,
657 pinned,
658 requirements: version_constraint.into_iter().collect(),
659 };
660 gems.insert(name, gem_info);
661 }
662 }
663 }
664 }
665 ParseState::None => {}
666 }
667 }
668
669 let primary_gem = gems.values().find(|gem| gem.gem_type == "PATH").cloned();
670
671 let (
672 package_name,
673 package_version,
674 repository_homepage_url,
675 repository_download_url,
676 api_data_url,
677 download_url,
678 ) = if let Some(ref pg) = primary_gem {
679 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
680 (
681 Some(pg.name.clone()),
682 pg.version.clone(),
683 urls.0,
684 urls.1,
685 urls.2,
686 urls.3,
687 )
688 } else {
689 (None, None, None, None, None, None)
690 };
691
692 for (_, gem) in gems {
693 if let Some(ref pg) = primary_gem
694 && gem.name == pg.name
695 {
696 continue;
697 }
698
699 let version_for_purl = gem.version.as_deref();
700 let purl = create_gem_purl(&gem.name, version_for_purl);
701
702 let extracted_requirement = if !gem.requirements.is_empty() {
703 Some(gem.requirements.join(", "))
704 } else {
705 gem.version.clone()
706 };
707
708 let extra_data = build_gem_source_extra_data(&gem);
709
710 dependencies.push(Dependency {
711 purl,
712 extracted_requirement,
713 scope: Some("dependencies".to_string()),
714 is_runtime: Some(true),
715 is_optional: Some(false),
716 is_pinned: Some(gem.pinned),
717 is_direct: Some(true),
718 resolved_package: None,
719 extra_data,
720 });
721 }
722
723 dependencies.sort_by(|left, right| {
724 left.purl
725 .as_deref()
726 .cmp(&right.purl.as_deref())
727 .then_with(|| {
728 left.extracted_requirement
729 .as_deref()
730 .cmp(&right.extracted_requirement.as_deref())
731 })
732 });
733
734 let mut extra_data = HashMap::new();
736 if !platforms.is_empty() {
737 extra_data.insert(
738 "platforms".to_string(),
739 serde_json::Value::Array(
740 platforms
741 .into_iter()
742 .map(serde_json::Value::String)
743 .collect(),
744 ),
745 );
746 }
747 if let Some(bv) = bundler_version {
748 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
749 }
750
751 let purl = package_name
752 .as_deref()
753 .map(|n| create_gem_purl(n, package_version.as_deref()))
754 .unwrap_or(None);
755
756 PackageData {
757 package_type: Some(PACKAGE_TYPE),
758 name: package_name,
759 version: package_version,
760 primary_language: Some("Ruby".to_string()),
761 download_url,
762 dependencies,
763 repository_homepage_url,
764 repository_download_url,
765 api_data_url,
766 extra_data: if extra_data.is_empty() {
767 None
768 } else {
769 Some(extra_data)
770 },
771 datasource_id: Some(DatasourceId::GemfileLock),
772 purl,
773 ..default_package_data()
774 }
775}
776
777fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
778 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
779 return None;
780 }
781
782 let mut extra = HashMap::new();
783 extra.insert(
784 "source_type".to_string(),
785 serde_json::Value::String(gem.gem_type.clone()),
786 );
787
788 if let Some(ref remote) = gem.remote {
789 extra.insert(
790 "remote".to_string(),
791 serde_json::Value::String(remote.clone()),
792 );
793 }
794 if let Some(ref revision) = gem.revision {
795 extra.insert(
796 "revision".to_string(),
797 serde_json::Value::String(revision.clone()),
798 );
799 }
800 if let Some(ref ref_field) = gem.ref_field {
801 extra.insert(
802 "ref".to_string(),
803 serde_json::Value::String(ref_field.clone()),
804 );
805 }
806 if let Some(ref branch) = gem.branch {
807 extra.insert(
808 "branch".to_string(),
809 serde_json::Value::String(branch.clone()),
810 );
811 }
812 if let Some(ref tag) = gem.tag {
813 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
814 }
815
816 Some(extra)
817}
818
819fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
822 if s.is_empty() {
823 return (None, None);
824 }
825 if let Some(idx) = s.find('-') {
826 let version = &s[..idx];
827 let platform = &s[idx + 1..];
828 (Some(version.to_string()), Some(platform.to_string()))
829 } else {
830 (Some(s.to_string()), None)
831 }
832}
833
834fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
836 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
837 Ok(p) => p,
838 Err(e) => {
839 warn!("Failed to create PURL for gem '{}': {}", name, e);
840 return None;
841 }
842 };
843
844 if let Some(v) = version
845 && let Err(e) = purl.with_version(v)
846 {
847 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
848 }
849
850 Some(purl.to_string())
851}
852
853fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
854 if name.is_empty() {
855 return None;
856 }
857
858 if let Some(v) = version {
859 let v = v.trim().trim_matches('/');
860 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
861 } else {
862 Some(format!("https://rubygems.org/gems/{}", name))
863 }
864}
865
866fn rubygems_download_url(
867 name: &str,
868 version: Option<&str>,
869 platform: Option<&str>,
870) -> Option<String> {
871 if name.is_empty() || version.is_none() {
872 return None;
873 }
874
875 let name = name.trim().trim_matches('/');
876 let version = version?.trim().trim_matches('/');
877
878 let version_plat = if let Some(p) = platform {
879 if p != "ruby" {
880 format!("{}-{}", version, p)
881 } else {
882 version.to_string()
883 }
884 } else {
885 version.to_string()
886 };
887
888 Some(format!(
889 "https://rubygems.org/downloads/{}-{}.gem",
890 name, version_plat
891 ))
892}
893
894fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
895 if name.is_empty() {
896 return None;
897 }
898
899 if let Some(v) = version {
900 Some(format!(
901 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
902 name, v
903 ))
904 } else {
905 Some(format!(
906 "https://rubygems.org/api/v1/versions/{}.json",
907 name
908 ))
909 }
910}
911
912fn get_rubygems_urls(
913 name: &str,
914 version: Option<&str>,
915 platform: Option<&str>,
916) -> (
917 Option<String>,
918 Option<String>,
919 Option<String>,
920 Option<String>,
921) {
922 let repository_homepage_url = rubygems_homepage_url(name, version);
923 let repository_download_url = rubygems_download_url(name, version, platform);
924 let api_data_url = rubygems_api_url(name, version);
925 let download_url = repository_download_url.clone();
926
927 (
928 repository_homepage_url,
929 repository_download_url,
930 api_data_url,
931 download_url,
932 )
933}
934
935fn default_package_data() -> PackageData {
937 PackageData {
938 package_type: Some(PACKAGE_TYPE),
939 primary_language: Some("Ruby".to_string()),
940 ..Default::default()
941 }
942}
943
944fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
945 PackageData {
946 datasource_id: Some(datasource_id),
947 ..default_package_data()
948 }
949}
950
951pub struct GemspecParser;
961
962impl PackageParser for GemspecParser {
963 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
964
965 fn extract_packages(path: &Path) -> Vec<PackageData> {
966 let datasource_id = gemspec_datasource_id(path);
967 let content = match fs::read_to_string(path) {
968 Ok(c) => c,
969 Err(e) => {
970 warn!("Failed to read .gemspec at {:?}: {}", path, e);
971 return vec![default_package_data_with_datasource(datasource_id)];
972 }
973 };
974
975 let mut package_data = parse_gemspec_with_context(&content, path.parent());
976 package_data.datasource_id = Some(datasource_id);
977 vec![package_data]
978 }
979
980 fn is_match(path: &Path) -> bool {
981 path.extension()
982 .and_then(|ext| ext.to_str())
983 .is_some_and(|ext| ext == "gemspec")
984 }
985}
986
987fn normalized_ruby_path(path: &Path) -> String {
988 path.to_string_lossy().replace('\\', "/")
989}
990
991fn gemfile_datasource_id(path: &Path) -> DatasourceId {
992 if normalized_ruby_path(path).contains("/data.gz-extract/") {
993 DatasourceId::GemfileExtracted
994 } else {
995 DatasourceId::Gemfile
996 }
997}
998
999fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1000 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1001 DatasourceId::GemfileLockExtracted
1002 } else {
1003 DatasourceId::GemfileLock
1004 }
1005}
1006
1007fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1008 let normalized = normalized_ruby_path(path);
1009 if normalized.contains("/data.gz-extract/") {
1010 DatasourceId::GemspecExtracted
1011 } else if normalized.contains("/specifications/") {
1012 DatasourceId::GemGemspecInstalledSpecifications
1013 } else {
1014 DatasourceId::Gemspec
1015 }
1016}
1017
1018fn clean_gemspec_value(s: &str) -> String {
1020 let s = strip_freeze_suffix(s).trim();
1021
1022 let s = if let Some(pos) = s.find(" #") {
1023 s[..pos].trim()
1024 } else {
1025 s
1026 };
1027
1028 let s = if let Some(stripped) = s.strip_prefix("%q{") {
1029 stripped.strip_suffix('}').unwrap_or(stripped)
1030 } else if let Some(stripped) = s.strip_prefix("%q<") {
1031 stripped.strip_suffix('>').unwrap_or(stripped)
1032 } else if let Some(stripped) = s.strip_prefix("%q[") {
1033 stripped.strip_suffix(']').unwrap_or(stripped)
1034 } else if let Some(stripped) = s.strip_prefix("%q(") {
1035 stripped.strip_suffix(')').unwrap_or(stripped)
1036 } else {
1037 s
1038 };
1039
1040 let s = s
1041 .trim_start_matches('"')
1042 .trim_end_matches('"')
1043 .trim_start_matches('\'')
1044 .trim_end_matches('\'');
1045 let s = strip_freeze_suffix(s).trim();
1046 s.to_string()
1047}
1048
1049fn extract_ruby_array(s: &str) -> Vec<String> {
1051 let s = strip_freeze_suffix(s.trim());
1052 let s = s.trim_start_matches('[').trim_end_matches(']');
1053 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1054 Ok(r) => r,
1055 Err(_) => return Vec::new(),
1056 };
1057 item_re
1058 .captures_iter(s)
1059 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1060 .collect()
1061}
1062
1063fn extract_all_ruby_values(s: &str) -> Vec<String> {
1064 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1065 Ok(r) => r,
1066 Err(_) => return Vec::new(),
1067 };
1068
1069 value_re
1070 .captures_iter(s)
1071 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1072 .map(|m| clean_gemspec_value(m.as_str()))
1073 .collect()
1074}
1075
1076fn extract_first_ruby_value(s: &str) -> Option<String> {
1077 extract_all_ruby_values(s).into_iter().next()
1078}
1079
1080fn after_first_argument(args: &str) -> &str {
1081 let mut bracket_depth = 0usize;
1082 let mut paren_depth = 0usize;
1083 let mut in_quote: Option<char> = None;
1084 let chars: Vec<(usize, char)> = args.char_indices().collect();
1085 let mut i = 0;
1086
1087 while i < chars.len() {
1088 let (idx, ch) = chars[i];
1089
1090 if let Some(quote) = in_quote {
1091 if ch == '\\' {
1092 i += 2;
1093 continue;
1094 }
1095 if ch == quote {
1096 in_quote = None;
1097 }
1098 i += 1;
1099 continue;
1100 }
1101
1102 match ch {
1103 '\'' | '"' => in_quote = Some(ch),
1104 '[' | '{' | '<' => bracket_depth += 1,
1105 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1106 '(' => paren_depth += 1,
1107 ')' => paren_depth = paren_depth.saturating_sub(1),
1108 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1109 _ => {}
1110 }
1111
1112 i += 1;
1113 }
1114
1115 ""
1116}
1117
1118fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1123 let var_name = var_name.trim();
1124 if var_name.is_empty() {
1125 return None;
1126 }
1127
1128 for candidate in candidate_constant_names(var_name) {
1129 let escaped = regex::escape(&candidate);
1130 let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1131 let Ok(re) = Regex::new(&pattern) else {
1132 continue;
1133 };
1134
1135 for context in contexts {
1136 if let Some(caps) = re.captures(context) {
1137 return caps.get(1).map(|m| m.as_str().to_string());
1138 }
1139 }
1140 }
1141
1142 None
1143}
1144
1145fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1146 let var_name = var_name.trim();
1147 if var_name.is_empty() {
1148 return None;
1149 }
1150
1151 for candidate in candidate_constant_names(var_name) {
1152 let escaped = regex::escape(&candidate);
1153 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1154 let Ok(re) = Regex::new(&pattern) else {
1155 continue;
1156 };
1157
1158 for context in contexts {
1159 if let Some(caps) = re.captures(context)
1160 && let Some(raw) = caps.get(1)
1161 {
1162 let values = extract_ruby_array(raw.as_str());
1163 if !values.is_empty() {
1164 return Some(values);
1165 }
1166 }
1167 }
1168 }
1169
1170 None
1171}
1172
1173fn candidate_constant_names(var_name: &str) -> Vec<String> {
1174 let mut names = vec![var_name.to_string()];
1175 if let Some(last) = var_name.split("::").last()
1176 && last != var_name
1177 {
1178 names.push(last.to_string());
1179 }
1180 names
1181}
1182
1183fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1184 let mut contexts = vec![content.to_string()];
1185 let Some(base_dir) = base_dir else {
1186 return contexts;
1187 };
1188
1189 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1190 Ok(re) => re,
1191 Err(_) => return contexts,
1192 };
1193
1194 for caps in require_re.captures_iter(content) {
1195 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1196 continue;
1197 };
1198 for candidate in candidate_require_paths(base_dir, required) {
1199 if let Ok(required_content) = fs::read_to_string(&candidate) {
1200 contexts.push(required_content);
1201 break;
1202 }
1203 }
1204 }
1205
1206 contexts
1207}
1208
1209fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1210 let relative = required.replace("::", "/");
1211 let filename = if relative.ends_with(".rb") {
1212 relative
1213 } else {
1214 format!("{}.rb", relative)
1215 };
1216
1217 vec![
1218 base_dir.join(&filename),
1219 base_dir.join("lib").join(&filename),
1220 ]
1221}
1222
1223fn looks_like_constant_reference(s: &str) -> bool {
1224 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1225}
1226
1227#[cfg(test)]
1229fn parse_gemspec(content: &str) -> PackageData {
1230 parse_gemspec_with_context(content, None)
1231}
1232
1233fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1234 let contexts = load_required_ruby_contexts(content, base_dir);
1235
1236 let field_re = match Regex::new(
1239 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1240 ) {
1241 Ok(r) => r,
1242 Err(e) => {
1243 warn!("Failed to compile gemspec field regex: {}", e);
1244 return default_package_data_with_datasource(DatasourceId::Gemspec);
1245 }
1246 };
1247
1248 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1249 Ok(r) => r,
1250 Err(e) => {
1251 warn!("Failed to compile licenses regex: {}", e);
1252 return default_package_data_with_datasource(DatasourceId::Gemspec);
1253 }
1254 };
1255
1256 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1257 Ok(r) => r,
1258 Err(e) => {
1259 warn!("Failed to compile authors regex: {}", e);
1260 return default_package_data_with_datasource(DatasourceId::Gemspec);
1261 }
1262 };
1263
1264 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1265 Ok(r) => r,
1266 Err(e) => {
1267 warn!("Failed to compile email regex: {}", e);
1268 return default_package_data_with_datasource(DatasourceId::Gemspec);
1269 }
1270 };
1271
1272 let dependency_call_re = match Regex::new(
1273 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1274 ) {
1275 Ok(r) => r,
1276 Err(e) => {
1277 warn!("Failed to compile gemspec dependency regex: {}", e);
1278 return default_package_data_with_datasource(DatasourceId::Gemspec);
1279 }
1280 };
1281
1282 let mut name: Option<String> = None;
1283 let mut version: Option<String> = None;
1284 let mut summary: Option<String> = None;
1285 let mut description: Option<String> = None;
1286 let mut homepage: Option<String> = None;
1287 let mut license: Option<String> = None;
1288 let mut licenses: Vec<String> = Vec::new();
1289 let mut authors: Vec<String> = Vec::new();
1290 let mut emails: Vec<String> = Vec::new();
1291 let mut dependencies: Vec<Dependency> = Vec::new();
1292
1293 for caps in field_re.captures_iter(content) {
1295 let field_name = match caps.get(1) {
1296 Some(m) => m.as_str(),
1297 None => continue,
1298 };
1299 let raw_value = match caps.get(2) {
1300 Some(m) => m.as_str().trim(),
1301 None => continue,
1302 };
1303
1304 match field_name {
1305 "name" => {
1306 let cleaned = clean_gemspec_value(raw_value);
1307 name = if looks_like_constant_reference(&cleaned) {
1308 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1309 } else {
1310 Some(cleaned)
1311 }
1312 }
1313 "version" => {
1314 let cleaned = clean_gemspec_value(raw_value);
1315 if looks_like_constant_reference(&cleaned) {
1317 version = resolve_variable_version(&cleaned, &contexts).or(Some(cleaned));
1318 } else {
1319 version = Some(cleaned);
1320 }
1321 }
1322 "summary" => {
1323 let cleaned = clean_gemspec_value(raw_value);
1324 summary = if looks_like_constant_reference(&cleaned) {
1325 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1326 } else {
1327 Some(cleaned)
1328 }
1329 }
1330 "description" => description = Some(clean_gemspec_value(raw_value)),
1331 "homepage" => {
1332 let cleaned = clean_gemspec_value(raw_value);
1333 homepage = if looks_like_constant_reference(&cleaned) {
1334 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1335 } else {
1336 Some(cleaned)
1337 }
1338 }
1339 "license" => license = Some(clean_gemspec_value(raw_value)),
1340 _ => {}
1341 }
1342 }
1343
1344 for caps in licenses_re.captures_iter(content) {
1346 if let Some(raw) = caps.get(1) {
1347 licenses = extract_ruby_array(raw.as_str());
1348 }
1349 }
1350
1351 for caps in authors_re.captures_iter(content) {
1353 if let Some(raw) = caps.get(1) {
1354 let raw_str = raw.as_str().trim();
1355 if raw_str.starts_with('[') {
1356 authors = extract_ruby_array(raw_str);
1357 } else if looks_like_constant_reference(raw_str) {
1358 authors = resolve_variable_array(raw_str, &contexts)
1359 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1360 } else {
1361 authors.push(clean_gemspec_value(raw_str));
1362 }
1363 }
1364 }
1365
1366 for caps in email_re.captures_iter(content) {
1368 if let Some(raw) = caps.get(1) {
1369 let raw_str = raw.as_str().trim();
1370 if raw_str.starts_with('[') {
1371 emails = extract_ruby_array(raw_str);
1372 } else if looks_like_constant_reference(raw_str) {
1373 emails = resolve_variable_array(raw_str, &contexts)
1374 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1375 } else {
1376 emails.push(clean_gemspec_value(raw_str));
1377 }
1378 }
1379 }
1380
1381 let mut parties: Vec<Party> = Vec::new();
1383
1384 if authors.len() == 1 && emails.len() == 1 {
1385 let email_str = emails.first().map(String::as_str);
1386 let (parsed_email_name, parsed_email) = match email_str {
1387 Some(e) => split_name_email(e),
1388 None => (None, None),
1389 };
1390
1391 parties.push(Party {
1392 r#type: Some("person".to_string()),
1393 role: Some("author".to_string()),
1394 name: authors.first().cloned().or(parsed_email_name),
1395 email: parsed_email.or_else(|| {
1396 email_str
1397 .filter(|e| e.contains('@') && !e.contains('<'))
1398 .map(|e| e.to_string())
1399 }),
1400 url: None,
1401 organization: None,
1402 organization_url: None,
1403 timezone: None,
1404 });
1405 } else {
1406 for author_name in authors {
1407 parties.push(Party {
1408 r#type: Some("person".to_string()),
1409 role: Some("author".to_string()),
1410 name: Some(author_name),
1411 email: None,
1412 url: None,
1413 organization: None,
1414 organization_url: None,
1415 timezone: None,
1416 });
1417 }
1418
1419 for email_str in emails {
1420 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1421 split_name_email(&email_str)
1422 } else {
1423 (None, None)
1424 };
1425 parties.push(Party {
1426 r#type: Some("person".to_string()),
1427 role: Some("author".to_string()),
1428 name: parsed_email_name,
1429 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1430 url: None,
1431 organization: None,
1432 organization_url: None,
1433 timezone: None,
1434 });
1435 }
1436 }
1437
1438 for caps in dependency_call_re.captures_iter(content) {
1439 let method = match caps.get(1) {
1440 Some(m) => m.as_str(),
1441 None => continue,
1442 };
1443 let args = match caps.get(2) {
1444 Some(m) => m.as_str(),
1445 None => continue,
1446 };
1447
1448 let Some(dep_name) = extract_first_ruby_value(args) else {
1449 continue;
1450 };
1451 let version_parts = extract_all_ruby_values(after_first_argument(args));
1452 let extracted_requirement = if version_parts.is_empty() {
1453 None
1454 } else {
1455 Some(version_parts.join(", "))
1456 };
1457 let purl = create_gem_purl(&dep_name, None);
1458 let is_development = method == "add_development_dependency";
1459 let scope = if is_development {
1460 "development"
1461 } else {
1462 "runtime"
1463 };
1464
1465 dependencies.push(Dependency {
1466 purl,
1467 extracted_requirement,
1468 scope: Some(scope.to_string()),
1469 is_runtime: Some(!is_development),
1470 is_optional: Some(is_development),
1471 is_pinned: None,
1472 is_direct: Some(true),
1473 resolved_package: None,
1474 extra_data: None,
1475 });
1476 }
1477
1478 let extracted_license_statement = if !licenses.is_empty() {
1480 Some(licenses.join(" AND "))
1481 } else {
1482 license
1483 };
1484
1485 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1486 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1487
1488 let final_description = description.or(summary);
1490
1491 let purl = name
1493 .as_deref()
1494 .map(|n| create_gem_purl(n, version.as_deref()))
1495 .unwrap_or(None);
1496
1497 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1498 if let Some(n) = name.as_deref() {
1499 get_rubygems_urls(n, version.as_deref(), None)
1500 } else {
1501 (None, None, None, None)
1502 };
1503
1504 PackageData {
1505 package_type: Some(PACKAGE_TYPE),
1506 name,
1507 version,
1508 primary_language: Some("Ruby".to_string()),
1509 description: final_description,
1510 homepage_url: homepage,
1511 download_url,
1512 declared_license_expression,
1513 declared_license_expression_spdx,
1514 license_detections,
1515 extracted_license_statement,
1516 parties,
1517 dependencies,
1518 repository_homepage_url,
1519 repository_download_url,
1520 api_data_url,
1521 datasource_id: Some(DatasourceId::Gemspec),
1522 purl,
1523 ..default_package_data()
1524 }
1525}
1526
1527const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1542
1543impl PackageParser for GemArchiveParser {
1544 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1545
1546 fn extract_packages(path: &Path) -> Vec<PackageData> {
1547 vec![match extract_gem_archive(path) {
1548 Ok(data) => data,
1549 Err(e) => {
1550 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1551 default_package_data_with_datasource(DatasourceId::GemArchive)
1552 }
1553 }]
1554 }
1555
1556 fn is_match(path: &Path) -> bool {
1557 path.extension()
1558 .and_then(|ext| ext.to_str())
1559 .is_some_and(|ext| ext == "gem")
1560 }
1561}
1562
1563fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1564 let file_metadata =
1565 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1566 let archive_size = file_metadata.len();
1567
1568 if archive_size > MAX_ARCHIVE_SIZE {
1569 return Err(format!(
1570 "Archive too large: {} bytes (limit: {} bytes)",
1571 archive_size, MAX_ARCHIVE_SIZE
1572 ));
1573 }
1574
1575 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1576 let mut archive = Archive::new(file);
1577
1578 for entry_result in archive
1579 .entries()
1580 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1581 {
1582 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1583 let entry_path = entry
1584 .path()
1585 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1586
1587 if entry_path.to_str() == Some("metadata.gz") {
1588 let entry_size = entry.size();
1589 if entry_size > MAX_FILE_SIZE {
1590 return Err(format!(
1591 "metadata.gz too large: {} bytes (limit: {} bytes)",
1592 entry_size, MAX_FILE_SIZE
1593 ));
1594 }
1595
1596 let mut decoder = GzDecoder::new(entry);
1597 let mut content = String::new();
1598 decoder
1599 .read_to_string(&mut content)
1600 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1601
1602 let uncompressed_size = content.len() as u64;
1603 if entry_size > 0 {
1604 let ratio = uncompressed_size as f64 / entry_size as f64;
1605 if ratio > MAX_COMPRESSION_RATIO {
1606 return Err(format!(
1607 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1608 ratio, MAX_COMPRESSION_RATIO
1609 ));
1610 }
1611 }
1612 if uncompressed_size > MAX_FILE_SIZE {
1613 return Err(format!(
1614 "Decompressed metadata too large: {} bytes (limit: {} bytes)",
1615 uncompressed_size, MAX_FILE_SIZE
1616 ));
1617 }
1618
1619 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1620 }
1621 }
1622
1623 Err("metadata.gz not found in .gem archive".to_string())
1624}
1625
1626fn parse_gem_metadata_yaml(
1627 content: &str,
1628 datasource_id: DatasourceId,
1629) -> Result<PackageData, String> {
1630 let cleaned = clean_ruby_yaml_tags(content);
1634
1635 let yaml: yaml_serde::Value =
1636 yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1637
1638 let name = yaml_string(&yaml, "name");
1639 let version = yaml.get("version").and_then(|v| {
1640 if v.is_string() {
1642 v.as_str().map(|s| s.to_string())
1643 } else {
1644 yaml_string(v, "version")
1645 }
1646 });
1647 let description = yaml_string(&yaml, "description").or_else(|| yaml_string(&yaml, "summary"));
1648 let homepage = yaml_string(&yaml, "homepage");
1649 let summary = yaml_string(&yaml, "summary");
1650
1651 let licenses: Vec<String> = yaml
1653 .get("licenses")
1654 .and_then(|v| v.as_sequence())
1655 .map(|seq| {
1656 seq.iter()
1657 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1658 .collect()
1659 })
1660 .unwrap_or_default();
1661
1662 let extracted_license_statement = if !licenses.is_empty() {
1664 Some(licenses.join(" AND "))
1665 } else {
1666 None
1667 };
1668
1669 let (license_expression, license_expression_spdx, license_detections) =
1670 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1671
1672 let authors: Vec<String> = yaml
1674 .get("authors")
1675 .and_then(|v| v.as_sequence())
1676 .map(|seq| {
1677 seq.iter()
1678 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1679 .collect()
1680 })
1681 .unwrap_or_default();
1682
1683 let emails: Vec<String> = yaml
1684 .get("email")
1685 .map(|v| {
1686 if let Some(seq) = v.as_sequence() {
1687 seq.iter()
1688 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1689 .collect()
1690 } else if let Some(s) = v.as_str() {
1691 vec![s.to_string()]
1692 } else {
1693 Vec::new()
1694 }
1695 })
1696 .unwrap_or_default();
1697
1698 let mut parties: Vec<Party> = Vec::new();
1700 let max_len = authors.len().max(emails.len());
1701 for i in 0..max_len {
1702 let author_name = authors.get(i).map(|s| s.as_str());
1703 let email_str = emails.get(i).map(|s| s.as_str());
1704
1705 let (parsed_email_name, parsed_email) = match email_str {
1706 Some(e) if e.contains('<') => split_name_email(e),
1707 None => (None, None),
1708 _ => (None, None),
1709 };
1710
1711 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1712
1713 parties.push(Party {
1714 r#type: Some("person".to_string()),
1715 role: Some("author".to_string()),
1716 name: party_name,
1717 email: parsed_email.or_else(|| {
1718 email_str
1719 .filter(|e| e.contains('@') && !e.contains('<'))
1720 .map(|e| e.to_string())
1721 }),
1722 url: None,
1723 organization: None,
1724 organization_url: None,
1725 timezone: None,
1726 });
1727 }
1728
1729 let dependencies = parse_gem_yaml_dependencies(&yaml);
1731
1732 let metadata = yaml.get("metadata");
1733
1734 let bug_tracking_url = metadata.and_then(|m| yaml_string(m, "bug_tracking_uri"));
1735
1736 let code_view_url = metadata.and_then(|m| yaml_string(m, "source_code_uri"));
1737
1738 let vcs_url = code_view_url
1739 .clone()
1740 .or_else(|| metadata.and_then(|m| yaml_string(m, "homepage_uri")));
1741
1742 let file_references = metadata
1743 .and_then(|m| m.get("files"))
1744 .and_then(|f| f.as_sequence())
1745 .map(|seq| {
1746 seq.iter()
1747 .filter_map(|v| v.as_str())
1748 .map(|s| crate::models::FileReference {
1749 path: s.to_string(),
1750 size: None,
1751 sha1: None,
1752 md5: None,
1753 sha256: None,
1754 sha512: None,
1755 extra_data: None,
1756 })
1757 .collect::<Vec<_>>()
1758 })
1759 .unwrap_or_default();
1760
1761 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1762 if d.len() >= 10 {
1763 Some(d[..10].to_string())
1764 } else {
1765 None
1766 }
1767 });
1768
1769 let purl = name
1770 .as_deref()
1771 .map(|n| create_gem_purl(n, version.as_deref()))
1772 .unwrap_or(None);
1773
1774 let platform = yaml_string(&yaml, "platform");
1775 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1776 if let Some(n) = name.as_deref() {
1777 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1778 } else {
1779 (None, None, None, None)
1780 };
1781
1782 let qualifiers = if let Some(ref p) = platform {
1783 if p != "ruby" {
1784 let mut q = HashMap::new();
1785 q.insert("platform".to_string(), p.clone());
1786 Some(q)
1787 } else {
1788 None
1789 }
1790 } else {
1791 None
1792 };
1793
1794 Ok(PackageData {
1795 package_type: Some(PACKAGE_TYPE),
1796 name,
1797 version,
1798 qualifiers,
1799 primary_language: Some("Ruby".to_string()),
1800 description: description.or(summary),
1801 release_date,
1802 homepage_url: homepage,
1803 download_url,
1804 bug_tracking_url,
1805 code_view_url,
1806 declared_license_expression: license_expression,
1807 declared_license_expression_spdx: license_expression_spdx,
1808 license_detections,
1809 extracted_license_statement,
1810 file_references,
1811 parties,
1812 dependencies,
1813 repository_homepage_url,
1814 repository_download_url,
1815 api_data_url,
1816 datasource_id: Some(datasource_id),
1817 purl,
1818 vcs_url,
1819 ..default_package_data()
1820 })
1821}
1822
1823fn clean_ruby_yaml_tags(content: &str) -> String {
1825 let tag_re = match Regex::new(r"!ruby/\S+") {
1826 Ok(r) => r,
1827 Err(_) => return content.to_string(),
1828 };
1829 tag_re.replace_all(content, "").to_string()
1830}
1831
1832fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
1833 yaml.get(key)
1834 .and_then(|v| v.as_str())
1835 .filter(|s| !s.is_empty())
1836 .map(|s| s.to_string())
1837}
1838
1839fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
1840 let mut dependencies = Vec::new();
1841
1842 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
1843 Some(seq) => seq,
1844 None => return dependencies,
1845 };
1846
1847 for dep_value in deps_seq {
1848 let dep_name = match yaml_string(dep_value, "name") {
1849 Some(n) => n,
1850 None => continue,
1851 };
1852
1853 let dep_type = yaml_string(dep_value, "type");
1854 let is_development = dep_type.as_deref() == Some(":development");
1855
1856 let requirements = dep_value
1858 .get("requirement")
1859 .or_else(|| dep_value.get("version_requirements"))
1860 .and_then(|req| req.get("requirements"))
1861 .and_then(|reqs| reqs.as_sequence());
1862
1863 let extracted_requirement = requirements.map(|reqs| {
1864 let parts: Vec<String> = reqs
1865 .iter()
1866 .filter_map(|req| {
1867 let seq = req.as_sequence()?;
1868 if seq.len() >= 2 {
1869 let op = seq[0].as_str().unwrap_or("");
1870 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
1871 if op == ">=" && ver == "0" {
1872 None
1874 } else if op.is_empty() || ver.is_empty() {
1875 None
1876 } else {
1877 Some(format!("{} {}", op, ver))
1878 }
1879 } else {
1880 None
1881 }
1882 })
1883 .collect();
1884 parts.join(", ")
1885 });
1886
1887 let extracted_requirement = extracted_requirement
1888 .filter(|s| !s.is_empty())
1889 .or_else(|| Some(String::new()));
1890
1891 let (scope, is_runtime, is_optional) = if is_development {
1892 (Some("development".to_string()), false, true)
1893 } else {
1894 (Some("runtime".to_string()), true, false)
1895 };
1896
1897 let purl = create_gem_purl(&dep_name, None);
1898
1899 dependencies.push(Dependency {
1900 purl,
1901 extracted_requirement,
1902 scope,
1903 is_runtime: Some(is_runtime),
1904 is_optional: Some(is_optional),
1905 is_pinned: None,
1906 is_direct: Some(true),
1907 resolved_package: None,
1908 extra_data: None,
1909 });
1910 }
1911
1912 dependencies
1913}
1914
1915pub struct GemMetadataExtractedParser;
1920
1921impl PackageParser for GemMetadataExtractedParser {
1922 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1923
1924 fn extract_packages(path: &Path) -> Vec<PackageData> {
1925 vec![match extract_gem_metadata_extracted(path) {
1926 Ok(data) => data,
1927 Err(e) => {
1928 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
1929 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
1930 }
1931 }]
1932 }
1933
1934 fn is_match(path: &Path) -> bool {
1935 path.to_str()
1936 .is_some_and(|p| p.contains("metadata.gz-extract"))
1937 }
1938}
1939
1940fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
1941 let content = fs::read_to_string(path)
1942 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
1943
1944 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
1945}
1946
1947crate::register_parser!(
1949 "Ruby Gemfile manifest",
1950 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
1951 "gem",
1952 "Ruby",
1953 Some("https://bundler.io/man/gemfile.5.html"),
1954);
1955
1956crate::register_parser!(
1957 "Ruby Gemfile.lock lockfile",
1958 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
1959 "gem",
1960 "Ruby",
1961 Some("https://bundler.io/man/gemfile.5.html"),
1962);
1963
1964crate::register_parser!(
1965 "Ruby .gemspec manifest",
1966 &[
1967 "**/*.gemspec",
1968 "**/data.gz-extract/*.gemspec",
1969 "**/specifications/*.gemspec"
1970 ],
1971 "gem",
1972 "Ruby",
1973 Some("https://guides.rubygems.org/specification-reference/"),
1974);
1975
1976crate::register_parser!(
1977 "Ruby .gem archive",
1978 &["**/*.gem"],
1979 "gem",
1980 "Ruby",
1981 Some("https://guides.rubygems.org/specification-reference/"),
1982);
1983
1984crate::register_parser!(
1985 "Ruby gem metadata (extracted)",
1986 &["**/metadata.gz-extract"],
1987 "gem",
1988 "Ruby",
1989 Some("https://guides.rubygems.org/specification-reference/"),
1990);
1991
1992#[cfg(test)]
1993mod tests {
1994 use super::parse_gemspec;
1995
1996 #[test]
1997 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
1998 assert_eq!(
1999 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2000 "Arel is a SQL AST manager for Ruby. It"
2001 );
2002 }
2003
2004 #[test]
2005 fn test_parse_gemspec_runtime_dependency_scope() {
2006 let content = r#"
2007Gem::Specification.new do |spec|
2008 spec.name = "demo"
2009 spec.version = "1.0.0"
2010 spec.add_runtime_dependency "rack", "~> 3.0"
2011 spec.add_dependency "thor", ">= 1.0"
2012end
2013"#;
2014
2015 let package_data = parse_gemspec(content);
2016 assert_eq!(package_data.dependencies.len(), 2);
2017 assert_eq!(
2018 package_data.dependencies[0].scope,
2019 Some("runtime".to_string())
2020 );
2021 assert_eq!(
2022 package_data.dependencies[0].extracted_requirement,
2023 Some("~> 3.0".to_string())
2024 );
2025 assert_eq!(
2026 package_data.dependencies[1].scope,
2027 Some("runtime".to_string())
2028 );
2029 assert_eq!(
2030 package_data.dependencies[1].extracted_requirement,
2031 Some(">= 1.0".to_string())
2032 );
2033 }
2034}