1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::split_name_email;
30use flate2::read::GzDecoder;
31use packageurl::PackageUrl;
32use regex::Regex;
33use std::collections::HashMap;
34use std::fs::{self, File};
35use std::io::Read;
36use std::path::{Path, PathBuf};
37use tar::Archive;
38
39use super::PackageParser;
40use super::license_normalization::normalize_spdx_declared_license;
41
42const PACKAGE_TYPE: PackageType = PackageType::Gem;
43
44pub fn strip_freeze_suffix(s: &str) -> &str {
59 s.trim_end_matches(".freeze")
60}
61
62enum GemfileBlock {
63 Group(Vec<String>),
64 Source(String),
65}
66
67pub struct GemfileParser;
76
77impl PackageParser for GemfileParser {
78 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
79
80 fn extract_packages(path: &Path) -> Vec<PackageData> {
81 let content = match fs::read_to_string(path) {
82 Ok(c) => c,
83 Err(e) => {
84 warn!("Failed to read Gemfile at {:?}: {}", path, e);
85 return vec![default_package_data_with_datasource(DatasourceId::Gemfile)];
86 }
87 };
88
89 vec![parse_gemfile(&content)]
90 }
91
92 fn is_match(path: &Path) -> bool {
93 path.file_name()
94 .and_then(|n| n.to_str())
95 .is_some_and(|name| name == "Gemfile")
96 || path
97 .to_str()
98 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
99 }
100}
101
102fn parse_gemfile(content: &str) -> PackageData {
104 let mut dependencies = Vec::new();
105 let mut block_stack = Vec::new();
106 let mut default_source = None;
107 let mut sources = Vec::new();
108
109 let gem_regex = match Regex::new(
112 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
113 ) {
114 Ok(r) => r,
115 Err(e) => {
116 warn!("Failed to compile gem regex: {}", e);
117 return default_package_data_with_datasource(DatasourceId::Gemfile);
118 }
119 };
120
121 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
123 Ok(r) => r,
124 Err(e) => {
125 warn!("Failed to compile group regex: {}", e);
126 return default_package_data_with_datasource(DatasourceId::Gemfile);
127 }
128 };
129
130 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
131 Ok(r) => r,
132 Err(e) => {
133 warn!("Failed to compile end regex: {}", e);
134 return default_package_data_with_datasource(DatasourceId::Gemfile);
135 }
136 };
137
138 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
139 Ok(r) => r,
140 Err(e) => {
141 warn!("Failed to compile source block regex: {}", e);
142 return default_package_data_with_datasource(DatasourceId::Gemfile);
143 }
144 };
145
146 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
147 Ok(r) => r,
148 Err(e) => {
149 warn!("Failed to compile source regex: {}", e);
150 return default_package_data_with_datasource(DatasourceId::Gemfile);
151 }
152 };
153
154 let symbol_regex = match Regex::new(r":(\w+)") {
156 Ok(r) => r,
157 Err(e) => {
158 warn!("Failed to compile symbol regex: {}", e);
159 return default_package_data_with_datasource(DatasourceId::Gemfile);
160 }
161 };
162
163 for line in content.lines() {
164 let trimmed = line.trim();
165
166 if trimmed.is_empty() || trimmed.starts_with('#') {
168 continue;
169 }
170
171 if let Some(caps) = group_start_regex.captures(trimmed) {
173 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
174 let mut current_groups = Vec::new();
175 for cap in symbol_regex.captures_iter(groups_str) {
176 if let Some(group_name) = cap.get(1) {
177 current_groups.push(group_name.as_str().to_string());
178 }
179 }
180 block_stack.push(GemfileBlock::Group(current_groups));
181 continue;
182 }
183
184 if let Some(caps) = source_block_start_regex.captures(trimmed) {
185 let source = caps
186 .get(1)
187 .map(|m| m.as_str().to_string())
188 .unwrap_or_default();
189 if !source.is_empty() {
190 push_unique_string(&mut sources, source.clone());
191 block_stack.push(GemfileBlock::Source(source));
192 }
193 continue;
194 }
195
196 if let Some(caps) = source_regex.captures(trimmed) {
197 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
198 push_unique_string(&mut sources, source.clone());
199 default_source = Some(source);
200 }
201 continue;
202 }
203
204 if group_end_regex.is_match(trimmed) {
206 block_stack.pop();
207 continue;
208 }
209
210 if let Some(caps) = gem_regex.captures(trimmed) {
212 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
213 if name.is_empty() {
214 continue;
215 }
216
217 let mut version_parts = Vec::new();
219 if let Some(v) = caps.get(2) {
220 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
221 }
222 if let Some(v) = caps.get(3) {
223 let v_str = strip_freeze_suffix(v.as_str());
224 if looks_like_version_constraint(v_str) {
226 version_parts.push(v_str.to_string());
227 }
228 }
229
230 let extracted_requirement = if version_parts.is_empty() {
231 None
232 } else {
233 Some(version_parts.join(", "))
234 };
235
236 let current_groups = current_group_names(&block_stack);
237
238 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
241 (None, true, false)
243 } else if current_groups.iter().any(|g| g == "development") {
244 (Some("development".to_string()), false, true)
245 } else if current_groups.iter().any(|g| g == "test") {
246 (Some("test".to_string()), false, true)
247 } else {
248 let group = current_groups.first().cloned();
250 (group, true, false)
251 };
252
253 let purl = create_gem_purl(name, None);
255 let inherited_source = current_source(&block_stack, default_source.as_deref());
256 let extra_data = build_gemfile_dependency_extra_data(
257 caps.get(4).map(|m| m.as_str()),
258 inherited_source.as_deref(),
259 );
260
261 dependencies.push(Dependency {
262 purl,
263 extracted_requirement,
264 scope,
265 is_runtime: Some(is_runtime),
266 is_optional: Some(is_optional),
267 is_pinned: None,
268 is_direct: Some(true),
269 resolved_package: None,
270 extra_data,
271 });
272 }
273 }
274
275 let extra_data = if sources.is_empty() {
276 None
277 } else {
278 Some(HashMap::from([(
279 "sources".to_string(),
280 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
281 )]))
282 };
283
284 PackageData {
285 package_type: Some(PACKAGE_TYPE),
286 primary_language: Some("Ruby".to_string()),
287 dependencies,
288 extra_data,
289 datasource_id: Some(DatasourceId::Gemfile),
290 ..default_package_data()
291 }
292}
293
294fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
295 block_stack
296 .iter()
297 .rev()
298 .find_map(|block| match block {
299 GemfileBlock::Group(groups) => Some(groups.clone()),
300 GemfileBlock::Source(_) => None,
301 })
302 .unwrap_or_default()
303}
304
305fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
306 block_stack
307 .iter()
308 .rev()
309 .find_map(|block| match block {
310 GemfileBlock::Source(source) => Some(source.clone()),
311 GemfileBlock::Group(_) => None,
312 })
313 .or_else(|| default_source.map(str::to_string))
314}
315
316fn push_unique_string(values: &mut Vec<String>, value: String) {
317 if !values.contains(&value) {
318 values.push(value);
319 }
320}
321
322fn build_gemfile_dependency_extra_data(
323 options: Option<&str>,
324 inherited_source: Option<&str>,
325) -> Option<HashMap<String, serde_json::Value>> {
326 let mut extra = HashMap::new();
327 let options = options.unwrap_or("");
328
329 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
330 extra.insert(
331 "source_type".to_string(),
332 serde_json::Value::String("GIT".to_string()),
333 );
334 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
335 extra.insert("remote".to_string(), serde_json::Value::String(git));
336 }
337
338 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
339 extra.insert(
340 "source_type".to_string(),
341 serde_json::Value::String("PATH".to_string()),
342 );
343 extra.insert("path".to_string(), serde_json::Value::String(path));
344 }
345
346 for key in ["branch", "ref", "tag"] {
347 if let Some(value) = extract_gemfile_quoted_option(options, key) {
348 extra.insert(key.to_string(), serde_json::Value::String(value));
349 }
350 }
351
352 let direct_source = extract_gemfile_quoted_option(options, "source");
353 if let Some(source) = direct_source {
354 extra.insert("source".to_string(), serde_json::Value::String(source));
355 } else if !extra.contains_key("source_type")
356 && let Some(source) = inherited_source
357 {
358 extra.insert(
359 "source".to_string(),
360 serde_json::Value::String(source.to_string()),
361 );
362 }
363
364 (!extra.is_empty()).then_some(extra)
365}
366
367fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
368 if options.is_empty() {
369 return None;
370 }
371
372 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
373 Regex::new(&pattern)
374 .ok()
375 .and_then(|regex| regex.captures(options))
376 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
377}
378
379fn looks_like_version_constraint(s: &str) -> bool {
381 s.starts_with('~')
382 || s.starts_with('>')
383 || s.starts_with('<')
384 || s.starts_with('=')
385 || s.starts_with('!')
386 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
387}
388
389pub struct GemfileLockParser;
398
399impl PackageParser for GemfileLockParser {
400 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
401
402 fn extract_packages(path: &Path) -> Vec<PackageData> {
403 let content = match fs::read_to_string(path) {
404 Ok(c) => c,
405 Err(e) => {
406 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
407 return vec![default_package_data_with_datasource(
408 DatasourceId::GemfileLock,
409 )];
410 }
411 };
412
413 vec![parse_gemfile_lock(&content)]
414 }
415
416 fn is_match(path: &Path) -> bool {
417 path.file_name()
418 .and_then(|n| n.to_str())
419 .is_some_and(|name| name == "Gemfile.lock")
420 || path
421 .to_str()
422 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
423 }
424}
425
426#[derive(Debug, Clone, PartialEq)]
428enum ParseState {
429 None,
430 Gem,
431 Git,
432 Path,
433 Svn,
434 Specs,
435 Platforms,
436 BundledWith,
437 Dependencies,
438}
439
440#[derive(Debug, Clone, Default)]
447struct GemInfo {
448 name: String,
449 version: Option<String>,
450 platform: Option<String>,
451 gem_type: String,
452 remote: Option<String>,
453 revision: Option<String>,
454 ref_field: Option<String>,
455 branch: Option<String>,
456 tag: Option<String>,
457 pinned: bool,
458 requirements: Vec<String>,
459}
460
461fn parse_gemfile_lock(content: &str) -> PackageData {
463 let mut state = ParseState::None;
464 let mut dependencies = Vec::new();
465 let mut gems: HashMap<String, GemInfo> = HashMap::new();
466 let mut platforms: Vec<String> = Vec::new();
467 let mut bundler_version: Option<String> = None;
468 let mut current_gem_type = String::new();
469 let mut current_remote: Option<String> = None;
470 let mut current_options: HashMap<String, String> = HashMap::new();
471
472 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
474 Ok(r) => r,
475 Err(e) => {
476 warn!("Failed to compile deps regex: {}", e);
477 return default_package_data_with_datasource(DatasourceId::GemfileLock);
478 }
479 };
480
481 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
483 Ok(r) => r,
484 Err(e) => {
485 warn!("Failed to compile spec_deps regex: {}", e);
486 return default_package_data_with_datasource(DatasourceId::GemfileLock);
487 }
488 };
489
490 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
492 Ok(r) => r,
493 Err(e) => {
494 warn!("Failed to compile options regex: {}", e);
495 return default_package_data_with_datasource(DatasourceId::GemfileLock);
496 }
497 };
498
499 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
501 Ok(r) => r,
502 Err(e) => {
503 warn!("Failed to compile version regex: {}", e);
504 return default_package_data_with_datasource(DatasourceId::GemfileLock);
505 }
506 };
507
508 for line in content.lines() {
509 let trimmed = line.trim_end();
510
511 if trimmed.is_empty() {
513 current_options.clear();
514 continue;
515 }
516
517 match trimmed {
519 "GEM" => {
520 state = ParseState::Gem;
521 current_gem_type = "GEM".to_string();
522 current_remote = None;
523 current_options.clear();
524 continue;
525 }
526 "GIT" => {
527 state = ParseState::Git;
528 current_gem_type = "GIT".to_string();
529 current_remote = None;
530 current_options.clear();
531 continue;
532 }
533 "PATH" => {
534 state = ParseState::Path;
535 current_gem_type = "PATH".to_string();
536 current_remote = None;
537 current_options.clear();
538 continue;
539 }
540 "SVN" => {
541 state = ParseState::Svn;
542 current_gem_type = "SVN".to_string();
543 current_remote = None;
544 current_options.clear();
545 continue;
546 }
547 "PLATFORMS" => {
548 state = ParseState::Platforms;
549 continue;
550 }
551 "BUNDLED WITH" => {
552 state = ParseState::BundledWith;
553 continue;
554 }
555 "DEPENDENCIES" => {
556 state = ParseState::Dependencies;
557 continue;
558 }
559 _ => {}
560 }
561
562 if trimmed.trim() == "specs:" {
566 state = match state {
567 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
568 ParseState::Specs
569 }
570 _ => state,
571 };
572 continue;
573 }
574
575 match state {
577 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
578 if let Some(caps) = options_regex.captures(line) {
580 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
581 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
582 current_options.insert(key.to_string(), value.to_string());
583 if key == "remote" {
584 current_remote = Some(value.to_string());
585 }
586 }
587 }
588 ParseState::Specs => {
589 if let Some(caps) = spec_deps_regex.captures(line) {
591 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
592 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
593
594 let (version, platform) = parse_version_platform(version_str);
596
597 if !name.is_empty() {
598 let gem_info = GemInfo {
599 name: name.clone(),
600 version,
601 platform,
602 gem_type: current_gem_type.clone(),
603 remote: current_remote.clone(),
604 revision: current_options.get("revision").cloned(),
605 ref_field: current_options.get("ref").cloned(),
606 branch: current_options.get("branch").cloned(),
607 tag: current_options.get("tag").cloned(),
608 pinned: false,
609 requirements: Vec::new(),
610 };
611 gems.insert(name, gem_info);
612 }
613 }
614 }
615 ParseState::Platforms => {
616 let platform = trimmed.trim();
618 if !platform.is_empty() {
619 platforms.push(platform.to_string());
620 }
621 }
622 ParseState::BundledWith => {
623 if let Some(caps) = version_regex.captures(line) {
625 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
626 }
627 }
628 ParseState::Dependencies => {
629 if let Some(caps) = deps_regex.captures(line) {
631 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
632 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
633 let pinned = caps.get(3).is_some();
634
635 if !name.is_empty() {
636 if let Some(gem) = gems.get_mut(&name) {
638 gem.pinned = pinned;
639 if let Some(vc) = &version_constraint {
640 gem.requirements.push(vc.clone());
641 }
642 } else {
643 let gem_info = GemInfo {
644 name: name.clone(),
645 version: None,
646 platform: None,
647 gem_type: "GEM".to_string(),
648 remote: None,
649 revision: None,
650 ref_field: None,
651 branch: None,
652 tag: None,
653 pinned,
654 requirements: version_constraint.into_iter().collect(),
655 };
656 gems.insert(name, gem_info);
657 }
658 }
659 }
660 }
661 ParseState::None => {}
662 }
663 }
664
665 let primary_gem = gems.values().find(|gem| gem.gem_type == "PATH").cloned();
666
667 let (
668 package_name,
669 package_version,
670 repository_homepage_url,
671 repository_download_url,
672 api_data_url,
673 download_url,
674 ) = if let Some(ref pg) = primary_gem {
675 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
676 (
677 Some(pg.name.clone()),
678 pg.version.clone(),
679 urls.0,
680 urls.1,
681 urls.2,
682 urls.3,
683 )
684 } else {
685 (None, None, None, None, None, None)
686 };
687
688 for (_, gem) in gems {
689 if let Some(ref pg) = primary_gem
690 && gem.name == pg.name
691 {
692 continue;
693 }
694
695 let version_for_purl = gem.version.as_deref();
696 let purl = create_gem_purl(&gem.name, version_for_purl);
697
698 let extracted_requirement = if !gem.requirements.is_empty() {
699 Some(gem.requirements.join(", "))
700 } else {
701 gem.version.clone()
702 };
703
704 let extra_data = build_gem_source_extra_data(&gem);
705
706 dependencies.push(Dependency {
707 purl,
708 extracted_requirement,
709 scope: Some("dependencies".to_string()),
710 is_runtime: Some(true),
711 is_optional: Some(false),
712 is_pinned: Some(gem.pinned),
713 is_direct: Some(true),
714 resolved_package: None,
715 extra_data,
716 });
717 }
718
719 dependencies.sort_by(|left, right| {
720 left.purl
721 .as_deref()
722 .cmp(&right.purl.as_deref())
723 .then_with(|| {
724 left.extracted_requirement
725 .as_deref()
726 .cmp(&right.extracted_requirement.as_deref())
727 })
728 });
729
730 let mut extra_data = HashMap::new();
732 if !platforms.is_empty() {
733 extra_data.insert(
734 "platforms".to_string(),
735 serde_json::Value::Array(
736 platforms
737 .into_iter()
738 .map(serde_json::Value::String)
739 .collect(),
740 ),
741 );
742 }
743 if let Some(bv) = bundler_version {
744 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
745 }
746
747 let purl = package_name
748 .as_deref()
749 .map(|n| create_gem_purl(n, package_version.as_deref()))
750 .unwrap_or(None);
751
752 PackageData {
753 package_type: Some(PACKAGE_TYPE),
754 name: package_name,
755 version: package_version,
756 primary_language: Some("Ruby".to_string()),
757 download_url,
758 dependencies,
759 repository_homepage_url,
760 repository_download_url,
761 api_data_url,
762 extra_data: if extra_data.is_empty() {
763 None
764 } else {
765 Some(extra_data)
766 },
767 datasource_id: Some(DatasourceId::GemfileLock),
768 purl,
769 ..default_package_data()
770 }
771}
772
773fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
774 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
775 return None;
776 }
777
778 let mut extra = HashMap::new();
779 extra.insert(
780 "source_type".to_string(),
781 serde_json::Value::String(gem.gem_type.clone()),
782 );
783
784 if let Some(ref remote) = gem.remote {
785 extra.insert(
786 "remote".to_string(),
787 serde_json::Value::String(remote.clone()),
788 );
789 }
790 if let Some(ref revision) = gem.revision {
791 extra.insert(
792 "revision".to_string(),
793 serde_json::Value::String(revision.clone()),
794 );
795 }
796 if let Some(ref ref_field) = gem.ref_field {
797 extra.insert(
798 "ref".to_string(),
799 serde_json::Value::String(ref_field.clone()),
800 );
801 }
802 if let Some(ref branch) = gem.branch {
803 extra.insert(
804 "branch".to_string(),
805 serde_json::Value::String(branch.clone()),
806 );
807 }
808 if let Some(ref tag) = gem.tag {
809 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
810 }
811
812 Some(extra)
813}
814
815fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
818 if s.is_empty() {
819 return (None, None);
820 }
821 if let Some(idx) = s.find('-') {
822 let version = &s[..idx];
823 let platform = &s[idx + 1..];
824 (Some(version.to_string()), Some(platform.to_string()))
825 } else {
826 (Some(s.to_string()), None)
827 }
828}
829
830fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
832 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
833 Ok(p) => p,
834 Err(e) => {
835 warn!("Failed to create PURL for gem '{}': {}", name, e);
836 return None;
837 }
838 };
839
840 if let Some(v) = version
841 && let Err(e) = purl.with_version(v)
842 {
843 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
844 }
845
846 Some(purl.to_string())
847}
848
849fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
850 if name.is_empty() {
851 return None;
852 }
853
854 if let Some(v) = version {
855 let v = v.trim().trim_matches('/');
856 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
857 } else {
858 Some(format!("https://rubygems.org/gems/{}", name))
859 }
860}
861
862fn rubygems_download_url(
863 name: &str,
864 version: Option<&str>,
865 platform: Option<&str>,
866) -> Option<String> {
867 if name.is_empty() || version.is_none() {
868 return None;
869 }
870
871 let name = name.trim().trim_matches('/');
872 let version = version?.trim().trim_matches('/');
873
874 let version_plat = if let Some(p) = platform {
875 if p != "ruby" {
876 format!("{}-{}", version, p)
877 } else {
878 version.to_string()
879 }
880 } else {
881 version.to_string()
882 };
883
884 Some(format!(
885 "https://rubygems.org/downloads/{}-{}.gem",
886 name, version_plat
887 ))
888}
889
890fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
891 if name.is_empty() {
892 return None;
893 }
894
895 if let Some(v) = version {
896 Some(format!(
897 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
898 name, v
899 ))
900 } else {
901 Some(format!(
902 "https://rubygems.org/api/v1/versions/{}.json",
903 name
904 ))
905 }
906}
907
908fn get_rubygems_urls(
909 name: &str,
910 version: Option<&str>,
911 platform: Option<&str>,
912) -> (
913 Option<String>,
914 Option<String>,
915 Option<String>,
916 Option<String>,
917) {
918 let repository_homepage_url = rubygems_homepage_url(name, version);
919 let repository_download_url = rubygems_download_url(name, version, platform);
920 let api_data_url = rubygems_api_url(name, version);
921 let download_url = repository_download_url.clone();
922
923 (
924 repository_homepage_url,
925 repository_download_url,
926 api_data_url,
927 download_url,
928 )
929}
930
931fn default_package_data() -> PackageData {
933 PackageData {
934 package_type: Some(PACKAGE_TYPE),
935 primary_language: Some("Ruby".to_string()),
936 ..Default::default()
937 }
938}
939
940fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
941 PackageData {
942 datasource_id: Some(datasource_id),
943 ..default_package_data()
944 }
945}
946
947pub struct GemspecParser;
957
958impl PackageParser for GemspecParser {
959 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
960
961 fn extract_packages(path: &Path) -> Vec<PackageData> {
962 let content = match fs::read_to_string(path) {
963 Ok(c) => c,
964 Err(e) => {
965 warn!("Failed to read .gemspec at {:?}: {}", path, e);
966 return vec![default_package_data_with_datasource(DatasourceId::Gemspec)];
967 }
968 };
969
970 vec![parse_gemspec_with_context(&content, path.parent())]
971 }
972
973 fn is_match(path: &Path) -> bool {
974 path.extension()
975 .and_then(|ext| ext.to_str())
976 .is_some_and(|ext| ext == "gemspec")
977 }
978}
979
980fn clean_gemspec_value(s: &str) -> String {
982 let s = strip_freeze_suffix(s).trim();
983
984 let s = if let Some(pos) = s.find(" #") {
985 s[..pos].trim()
986 } else {
987 s
988 };
989
990 let s = if let Some(stripped) = s.strip_prefix("%q{") {
991 stripped.strip_suffix('}').unwrap_or(stripped)
992 } else if let Some(stripped) = s.strip_prefix("%q<") {
993 stripped.strip_suffix('>').unwrap_or(stripped)
994 } else if let Some(stripped) = s.strip_prefix("%q[") {
995 stripped.strip_suffix(']').unwrap_or(stripped)
996 } else if let Some(stripped) = s.strip_prefix("%q(") {
997 stripped.strip_suffix(')').unwrap_or(stripped)
998 } else {
999 s
1000 };
1001
1002 let s = s
1003 .trim_start_matches('"')
1004 .trim_end_matches('"')
1005 .trim_start_matches('\'')
1006 .trim_end_matches('\'');
1007 let s = strip_freeze_suffix(s).trim();
1008 s.to_string()
1009}
1010
1011fn extract_ruby_array(s: &str) -> Vec<String> {
1013 let s = strip_freeze_suffix(s.trim());
1014 let s = s.trim_start_matches('[').trim_end_matches(']');
1015 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1016 Ok(r) => r,
1017 Err(_) => return Vec::new(),
1018 };
1019 item_re
1020 .captures_iter(s)
1021 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1022 .collect()
1023}
1024
1025fn extract_all_ruby_values(s: &str) -> Vec<String> {
1026 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1027 Ok(r) => r,
1028 Err(_) => return Vec::new(),
1029 };
1030
1031 value_re
1032 .captures_iter(s)
1033 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1034 .map(|m| clean_gemspec_value(m.as_str()))
1035 .collect()
1036}
1037
1038fn extract_first_ruby_value(s: &str) -> Option<String> {
1039 extract_all_ruby_values(s).into_iter().next()
1040}
1041
1042fn after_first_argument(args: &str) -> &str {
1043 let mut bracket_depth = 0usize;
1044 let mut paren_depth = 0usize;
1045 let mut in_quote: Option<char> = None;
1046 let chars: Vec<(usize, char)> = args.char_indices().collect();
1047 let mut i = 0;
1048
1049 while i < chars.len() {
1050 let (idx, ch) = chars[i];
1051
1052 if let Some(quote) = in_quote {
1053 if ch == '\\' {
1054 i += 2;
1055 continue;
1056 }
1057 if ch == quote {
1058 in_quote = None;
1059 }
1060 i += 1;
1061 continue;
1062 }
1063
1064 match ch {
1065 '\'' | '"' => in_quote = Some(ch),
1066 '[' | '{' | '<' => bracket_depth += 1,
1067 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1068 '(' => paren_depth += 1,
1069 ')' => paren_depth = paren_depth.saturating_sub(1),
1070 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1071 _ => {}
1072 }
1073
1074 i += 1;
1075 }
1076
1077 ""
1078}
1079
1080fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1085 let var_name = var_name.trim();
1086 if var_name.is_empty() {
1087 return None;
1088 }
1089
1090 for candidate in candidate_constant_names(var_name) {
1091 let escaped = regex::escape(&candidate);
1092 let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1093 let Ok(re) = Regex::new(&pattern) else {
1094 continue;
1095 };
1096
1097 for context in contexts {
1098 if let Some(caps) = re.captures(context) {
1099 return caps.get(1).map(|m| m.as_str().to_string());
1100 }
1101 }
1102 }
1103
1104 None
1105}
1106
1107fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1108 let var_name = var_name.trim();
1109 if var_name.is_empty() {
1110 return None;
1111 }
1112
1113 for candidate in candidate_constant_names(var_name) {
1114 let escaped = regex::escape(&candidate);
1115 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1116 let Ok(re) = Regex::new(&pattern) else {
1117 continue;
1118 };
1119
1120 for context in contexts {
1121 if let Some(caps) = re.captures(context)
1122 && let Some(raw) = caps.get(1)
1123 {
1124 let values = extract_ruby_array(raw.as_str());
1125 if !values.is_empty() {
1126 return Some(values);
1127 }
1128 }
1129 }
1130 }
1131
1132 None
1133}
1134
1135fn candidate_constant_names(var_name: &str) -> Vec<String> {
1136 let mut names = vec![var_name.to_string()];
1137 if let Some(last) = var_name.split("::").last()
1138 && last != var_name
1139 {
1140 names.push(last.to_string());
1141 }
1142 names
1143}
1144
1145fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1146 let mut contexts = vec![content.to_string()];
1147 let Some(base_dir) = base_dir else {
1148 return contexts;
1149 };
1150
1151 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1152 Ok(re) => re,
1153 Err(_) => return contexts,
1154 };
1155
1156 for caps in require_re.captures_iter(content) {
1157 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1158 continue;
1159 };
1160 for candidate in candidate_require_paths(base_dir, required) {
1161 if let Ok(required_content) = fs::read_to_string(&candidate) {
1162 contexts.push(required_content);
1163 break;
1164 }
1165 }
1166 }
1167
1168 contexts
1169}
1170
1171fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1172 let relative = required.replace("::", "/");
1173 let filename = if relative.ends_with(".rb") {
1174 relative
1175 } else {
1176 format!("{}.rb", relative)
1177 };
1178
1179 vec![
1180 base_dir.join(&filename),
1181 base_dir.join("lib").join(&filename),
1182 ]
1183}
1184
1185fn looks_like_constant_reference(s: &str) -> bool {
1186 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1187}
1188
1189#[cfg(test)]
1191fn parse_gemspec(content: &str) -> PackageData {
1192 parse_gemspec_with_context(content, None)
1193}
1194
1195fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1196 let contexts = load_required_ruby_contexts(content, base_dir);
1197
1198 let field_re = match Regex::new(
1201 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1202 ) {
1203 Ok(r) => r,
1204 Err(e) => {
1205 warn!("Failed to compile gemspec field regex: {}", e);
1206 return default_package_data_with_datasource(DatasourceId::Gemspec);
1207 }
1208 };
1209
1210 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1211 Ok(r) => r,
1212 Err(e) => {
1213 warn!("Failed to compile licenses regex: {}", e);
1214 return default_package_data_with_datasource(DatasourceId::Gemspec);
1215 }
1216 };
1217
1218 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1219 Ok(r) => r,
1220 Err(e) => {
1221 warn!("Failed to compile authors regex: {}", e);
1222 return default_package_data_with_datasource(DatasourceId::Gemspec);
1223 }
1224 };
1225
1226 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1227 Ok(r) => r,
1228 Err(e) => {
1229 warn!("Failed to compile email regex: {}", e);
1230 return default_package_data_with_datasource(DatasourceId::Gemspec);
1231 }
1232 };
1233
1234 let dependency_call_re = match Regex::new(
1235 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1236 ) {
1237 Ok(r) => r,
1238 Err(e) => {
1239 warn!("Failed to compile gemspec dependency regex: {}", e);
1240 return default_package_data_with_datasource(DatasourceId::Gemspec);
1241 }
1242 };
1243
1244 let mut name: Option<String> = None;
1245 let mut version: Option<String> = None;
1246 let mut summary: Option<String> = None;
1247 let mut description: Option<String> = None;
1248 let mut homepage: Option<String> = None;
1249 let mut license: Option<String> = None;
1250 let mut licenses: Vec<String> = Vec::new();
1251 let mut authors: Vec<String> = Vec::new();
1252 let mut emails: Vec<String> = Vec::new();
1253 let mut dependencies: Vec<Dependency> = Vec::new();
1254
1255 for caps in field_re.captures_iter(content) {
1257 let field_name = match caps.get(1) {
1258 Some(m) => m.as_str(),
1259 None => continue,
1260 };
1261 let raw_value = match caps.get(2) {
1262 Some(m) => m.as_str().trim(),
1263 None => continue,
1264 };
1265
1266 match field_name {
1267 "name" => {
1268 let cleaned = clean_gemspec_value(raw_value);
1269 name = if looks_like_constant_reference(&cleaned) {
1270 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1271 } else {
1272 Some(cleaned)
1273 }
1274 }
1275 "version" => {
1276 let cleaned = clean_gemspec_value(raw_value);
1277 if looks_like_constant_reference(&cleaned) {
1279 version = resolve_variable_version(&cleaned, &contexts).or(Some(cleaned));
1280 } else {
1281 version = Some(cleaned);
1282 }
1283 }
1284 "summary" => {
1285 let cleaned = clean_gemspec_value(raw_value);
1286 summary = if looks_like_constant_reference(&cleaned) {
1287 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1288 } else {
1289 Some(cleaned)
1290 }
1291 }
1292 "description" => description = Some(clean_gemspec_value(raw_value)),
1293 "homepage" => {
1294 let cleaned = clean_gemspec_value(raw_value);
1295 homepage = if looks_like_constant_reference(&cleaned) {
1296 resolve_variable_version(&cleaned, &contexts).or(Some(cleaned))
1297 } else {
1298 Some(cleaned)
1299 }
1300 }
1301 "license" => license = Some(clean_gemspec_value(raw_value)),
1302 _ => {}
1303 }
1304 }
1305
1306 for caps in licenses_re.captures_iter(content) {
1308 if let Some(raw) = caps.get(1) {
1309 licenses = extract_ruby_array(raw.as_str());
1310 }
1311 }
1312
1313 for caps in authors_re.captures_iter(content) {
1315 if let Some(raw) = caps.get(1) {
1316 let raw_str = raw.as_str().trim();
1317 if raw_str.starts_with('[') {
1318 authors = extract_ruby_array(raw_str);
1319 } else if looks_like_constant_reference(raw_str) {
1320 authors = resolve_variable_array(raw_str, &contexts)
1321 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1322 } else {
1323 authors.push(clean_gemspec_value(raw_str));
1324 }
1325 }
1326 }
1327
1328 for caps in email_re.captures_iter(content) {
1330 if let Some(raw) = caps.get(1) {
1331 let raw_str = raw.as_str().trim();
1332 if raw_str.starts_with('[') {
1333 emails = extract_ruby_array(raw_str);
1334 } else if looks_like_constant_reference(raw_str) {
1335 emails = resolve_variable_array(raw_str, &contexts)
1336 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1337 } else {
1338 emails.push(clean_gemspec_value(raw_str));
1339 }
1340 }
1341 }
1342
1343 let mut parties: Vec<Party> = Vec::new();
1345
1346 if authors.len() == 1 && emails.len() == 1 {
1347 let email_str = emails.first().map(String::as_str);
1348 let (parsed_email_name, parsed_email) = match email_str {
1349 Some(e) => split_name_email(e),
1350 None => (None, None),
1351 };
1352
1353 parties.push(Party {
1354 r#type: Some("person".to_string()),
1355 role: Some("author".to_string()),
1356 name: authors.first().cloned().or(parsed_email_name),
1357 email: parsed_email.or_else(|| {
1358 email_str
1359 .filter(|e| e.contains('@') && !e.contains('<'))
1360 .map(|e| e.to_string())
1361 }),
1362 url: None,
1363 organization: None,
1364 organization_url: None,
1365 timezone: None,
1366 });
1367 } else {
1368 for author_name in authors {
1369 parties.push(Party {
1370 r#type: Some("person".to_string()),
1371 role: Some("author".to_string()),
1372 name: Some(author_name),
1373 email: None,
1374 url: None,
1375 organization: None,
1376 organization_url: None,
1377 timezone: None,
1378 });
1379 }
1380
1381 for email_str in emails {
1382 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1383 split_name_email(&email_str)
1384 } else {
1385 (None, None)
1386 };
1387 parties.push(Party {
1388 r#type: Some("person".to_string()),
1389 role: Some("author".to_string()),
1390 name: parsed_email_name,
1391 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1392 url: None,
1393 organization: None,
1394 organization_url: None,
1395 timezone: None,
1396 });
1397 }
1398 }
1399
1400 for caps in dependency_call_re.captures_iter(content) {
1401 let method = match caps.get(1) {
1402 Some(m) => m.as_str(),
1403 None => continue,
1404 };
1405 let args = match caps.get(2) {
1406 Some(m) => m.as_str(),
1407 None => continue,
1408 };
1409
1410 let Some(dep_name) = extract_first_ruby_value(args) else {
1411 continue;
1412 };
1413 let version_parts = extract_all_ruby_values(after_first_argument(args));
1414 let extracted_requirement = if version_parts.is_empty() {
1415 None
1416 } else {
1417 Some(version_parts.join(", "))
1418 };
1419 let purl = create_gem_purl(&dep_name, None);
1420 let is_development = method == "add_development_dependency";
1421 let scope = if is_development {
1422 "development"
1423 } else {
1424 "runtime"
1425 };
1426
1427 dependencies.push(Dependency {
1428 purl,
1429 extracted_requirement,
1430 scope: Some(scope.to_string()),
1431 is_runtime: Some(!is_development),
1432 is_optional: Some(is_development),
1433 is_pinned: None,
1434 is_direct: Some(true),
1435 resolved_package: None,
1436 extra_data: None,
1437 });
1438 }
1439
1440 let extracted_license_statement = if !licenses.is_empty() {
1442 Some(licenses.join(" AND "))
1443 } else {
1444 license
1445 };
1446
1447 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1448 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1449
1450 let final_description = description.or(summary);
1452
1453 let purl = name
1455 .as_deref()
1456 .map(|n| create_gem_purl(n, version.as_deref()))
1457 .unwrap_or(None);
1458
1459 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1460 if let Some(n) = name.as_deref() {
1461 get_rubygems_urls(n, version.as_deref(), None)
1462 } else {
1463 (None, None, None, None)
1464 };
1465
1466 PackageData {
1467 package_type: Some(PACKAGE_TYPE),
1468 name,
1469 version,
1470 primary_language: Some("Ruby".to_string()),
1471 description: final_description,
1472 homepage_url: homepage,
1473 download_url,
1474 declared_license_expression,
1475 declared_license_expression_spdx,
1476 license_detections,
1477 extracted_license_statement,
1478 parties,
1479 dependencies,
1480 repository_homepage_url,
1481 repository_download_url,
1482 api_data_url,
1483 datasource_id: Some(DatasourceId::Gemspec),
1484 purl,
1485 ..default_package_data()
1486 }
1487}
1488
1489const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1504
1505impl PackageParser for GemArchiveParser {
1506 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1507
1508 fn extract_packages(path: &Path) -> Vec<PackageData> {
1509 vec![match extract_gem_archive(path) {
1510 Ok(data) => data,
1511 Err(e) => {
1512 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1513 default_package_data_with_datasource(DatasourceId::GemArchive)
1514 }
1515 }]
1516 }
1517
1518 fn is_match(path: &Path) -> bool {
1519 path.extension()
1520 .and_then(|ext| ext.to_str())
1521 .is_some_and(|ext| ext == "gem")
1522 }
1523}
1524
1525fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1526 let file_metadata =
1527 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1528 let archive_size = file_metadata.len();
1529
1530 if archive_size > MAX_ARCHIVE_SIZE {
1531 return Err(format!(
1532 "Archive too large: {} bytes (limit: {} bytes)",
1533 archive_size, MAX_ARCHIVE_SIZE
1534 ));
1535 }
1536
1537 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1538 let mut archive = Archive::new(file);
1539
1540 for entry_result in archive
1541 .entries()
1542 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1543 {
1544 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1545 let entry_path = entry
1546 .path()
1547 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1548
1549 if entry_path.to_str() == Some("metadata.gz") {
1550 let entry_size = entry.size();
1551 if entry_size > MAX_FILE_SIZE {
1552 return Err(format!(
1553 "metadata.gz too large: {} bytes (limit: {} bytes)",
1554 entry_size, MAX_FILE_SIZE
1555 ));
1556 }
1557
1558 let mut decoder = GzDecoder::new(entry);
1559 let mut content = String::new();
1560 decoder
1561 .read_to_string(&mut content)
1562 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1563
1564 let uncompressed_size = content.len() as u64;
1565 if entry_size > 0 {
1566 let ratio = uncompressed_size as f64 / entry_size as f64;
1567 if ratio > MAX_COMPRESSION_RATIO {
1568 return Err(format!(
1569 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1570 ratio, MAX_COMPRESSION_RATIO
1571 ));
1572 }
1573 }
1574 if uncompressed_size > MAX_FILE_SIZE {
1575 return Err(format!(
1576 "Decompressed metadata too large: {} bytes (limit: {} bytes)",
1577 uncompressed_size, MAX_FILE_SIZE
1578 ));
1579 }
1580
1581 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1582 }
1583 }
1584
1585 Err("metadata.gz not found in .gem archive".to_string())
1586}
1587
1588fn parse_gem_metadata_yaml(
1589 content: &str,
1590 datasource_id: DatasourceId,
1591) -> Result<PackageData, String> {
1592 let cleaned = clean_ruby_yaml_tags(content);
1596
1597 let yaml: serde_yaml::Value =
1598 serde_yaml::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1599
1600 let name = yaml_string(&yaml, "name");
1601 let version = yaml.get("version").and_then(|v| {
1602 if v.is_string() {
1604 v.as_str().map(|s| s.to_string())
1605 } else {
1606 yaml_string(v, "version")
1607 }
1608 });
1609 let description = yaml_string(&yaml, "description").or_else(|| yaml_string(&yaml, "summary"));
1610 let homepage = yaml_string(&yaml, "homepage");
1611 let summary = yaml_string(&yaml, "summary");
1612
1613 let licenses: Vec<String> = yaml
1615 .get("licenses")
1616 .and_then(|v| v.as_sequence())
1617 .map(|seq| {
1618 seq.iter()
1619 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1620 .collect()
1621 })
1622 .unwrap_or_default();
1623
1624 let extracted_license_statement = if !licenses.is_empty() {
1626 Some(licenses.join(" AND "))
1627 } else {
1628 None
1629 };
1630
1631 let (license_expression, license_expression_spdx, license_detections) =
1632 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1633
1634 let authors: Vec<String> = yaml
1636 .get("authors")
1637 .and_then(|v| v.as_sequence())
1638 .map(|seq| {
1639 seq.iter()
1640 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1641 .collect()
1642 })
1643 .unwrap_or_default();
1644
1645 let emails: Vec<String> = yaml
1646 .get("email")
1647 .map(|v| {
1648 if let Some(seq) = v.as_sequence() {
1649 seq.iter()
1650 .filter_map(|item| item.as_str().map(|s| s.to_string()))
1651 .collect()
1652 } else if let Some(s) = v.as_str() {
1653 vec![s.to_string()]
1654 } else {
1655 Vec::new()
1656 }
1657 })
1658 .unwrap_or_default();
1659
1660 let mut parties: Vec<Party> = Vec::new();
1662 let max_len = authors.len().max(emails.len());
1663 for i in 0..max_len {
1664 let author_name = authors.get(i).map(|s| s.as_str());
1665 let email_str = emails.get(i).map(|s| s.as_str());
1666
1667 let (parsed_email_name, parsed_email) = match email_str {
1668 Some(e) if e.contains('<') => split_name_email(e),
1669 None => (None, None),
1670 _ => (None, None),
1671 };
1672
1673 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1674
1675 parties.push(Party {
1676 r#type: Some("person".to_string()),
1677 role: Some("author".to_string()),
1678 name: party_name,
1679 email: parsed_email.or_else(|| {
1680 email_str
1681 .filter(|e| e.contains('@') && !e.contains('<'))
1682 .map(|e| e.to_string())
1683 }),
1684 url: None,
1685 organization: None,
1686 organization_url: None,
1687 timezone: None,
1688 });
1689 }
1690
1691 let dependencies = parse_gem_yaml_dependencies(&yaml);
1693
1694 let metadata = yaml.get("metadata");
1695
1696 let bug_tracking_url = metadata.and_then(|m| yaml_string(m, "bug_tracking_uri"));
1697
1698 let code_view_url = metadata.and_then(|m| yaml_string(m, "source_code_uri"));
1699
1700 let vcs_url = code_view_url
1701 .clone()
1702 .or_else(|| metadata.and_then(|m| yaml_string(m, "homepage_uri")));
1703
1704 let file_references = metadata
1705 .and_then(|m| m.get("files"))
1706 .and_then(|f| f.as_sequence())
1707 .map(|seq| {
1708 seq.iter()
1709 .filter_map(|v| v.as_str())
1710 .map(|s| crate::models::FileReference {
1711 path: s.to_string(),
1712 size: None,
1713 sha1: None,
1714 md5: None,
1715 sha256: None,
1716 sha512: None,
1717 extra_data: None,
1718 })
1719 .collect::<Vec<_>>()
1720 })
1721 .unwrap_or_default();
1722
1723 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1724 if d.len() >= 10 {
1725 Some(d[..10].to_string())
1726 } else {
1727 None
1728 }
1729 });
1730
1731 let purl = name
1732 .as_deref()
1733 .map(|n| create_gem_purl(n, version.as_deref()))
1734 .unwrap_or(None);
1735
1736 let platform = yaml_string(&yaml, "platform");
1737 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1738 if let Some(n) = name.as_deref() {
1739 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1740 } else {
1741 (None, None, None, None)
1742 };
1743
1744 let qualifiers = if let Some(ref p) = platform {
1745 if p != "ruby" {
1746 let mut q = HashMap::new();
1747 q.insert("platform".to_string(), p.clone());
1748 Some(q)
1749 } else {
1750 None
1751 }
1752 } else {
1753 None
1754 };
1755
1756 Ok(PackageData {
1757 package_type: Some(PACKAGE_TYPE),
1758 name,
1759 version,
1760 qualifiers,
1761 primary_language: Some("Ruby".to_string()),
1762 description: description.or(summary),
1763 release_date,
1764 homepage_url: homepage,
1765 download_url,
1766 bug_tracking_url,
1767 code_view_url,
1768 declared_license_expression: license_expression,
1769 declared_license_expression_spdx: license_expression_spdx,
1770 license_detections,
1771 extracted_license_statement,
1772 file_references,
1773 parties,
1774 dependencies,
1775 repository_homepage_url,
1776 repository_download_url,
1777 api_data_url,
1778 datasource_id: Some(datasource_id),
1779 purl,
1780 vcs_url,
1781 ..default_package_data()
1782 })
1783}
1784
1785fn clean_ruby_yaml_tags(content: &str) -> String {
1787 let tag_re = match Regex::new(r"!ruby/\S+") {
1788 Ok(r) => r,
1789 Err(_) => return content.to_string(),
1790 };
1791 tag_re.replace_all(content, "").to_string()
1792}
1793
1794fn yaml_string(yaml: &serde_yaml::Value, key: &str) -> Option<String> {
1795 yaml.get(key)
1796 .and_then(|v| v.as_str())
1797 .filter(|s| !s.is_empty())
1798 .map(|s| s.to_string())
1799}
1800
1801fn parse_gem_yaml_dependencies(yaml: &serde_yaml::Value) -> Vec<Dependency> {
1802 let mut dependencies = Vec::new();
1803
1804 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
1805 Some(seq) => seq,
1806 None => return dependencies,
1807 };
1808
1809 for dep_value in deps_seq {
1810 let dep_name = match yaml_string(dep_value, "name") {
1811 Some(n) => n,
1812 None => continue,
1813 };
1814
1815 let dep_type = yaml_string(dep_value, "type");
1816 let is_development = dep_type.as_deref() == Some(":development");
1817
1818 let requirements = dep_value
1820 .get("requirement")
1821 .or_else(|| dep_value.get("version_requirements"))
1822 .and_then(|req| req.get("requirements"))
1823 .and_then(|reqs| reqs.as_sequence());
1824
1825 let extracted_requirement = requirements.map(|reqs| {
1826 let parts: Vec<String> = reqs
1827 .iter()
1828 .filter_map(|req| {
1829 let seq = req.as_sequence()?;
1830 if seq.len() >= 2 {
1831 let op = seq[0].as_str().unwrap_or("");
1832 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
1833 if op == ">=" && ver == "0" {
1834 None
1836 } else if op.is_empty() || ver.is_empty() {
1837 None
1838 } else {
1839 Some(format!("{} {}", op, ver))
1840 }
1841 } else {
1842 None
1843 }
1844 })
1845 .collect();
1846 parts.join(", ")
1847 });
1848
1849 let extracted_requirement = extracted_requirement
1850 .filter(|s| !s.is_empty())
1851 .or_else(|| Some(String::new()));
1852
1853 let (scope, is_runtime, is_optional) = if is_development {
1854 (Some("development".to_string()), false, true)
1855 } else {
1856 (Some("runtime".to_string()), true, false)
1857 };
1858
1859 let purl = create_gem_purl(&dep_name, None);
1860
1861 dependencies.push(Dependency {
1862 purl,
1863 extracted_requirement,
1864 scope,
1865 is_runtime: Some(is_runtime),
1866 is_optional: Some(is_optional),
1867 is_pinned: None,
1868 is_direct: Some(true),
1869 resolved_package: None,
1870 extra_data: None,
1871 });
1872 }
1873
1874 dependencies
1875}
1876
1877pub struct GemMetadataExtractedParser;
1882
1883impl PackageParser for GemMetadataExtractedParser {
1884 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1885
1886 fn extract_packages(path: &Path) -> Vec<PackageData> {
1887 vec![match extract_gem_metadata_extracted(path) {
1888 Ok(data) => data,
1889 Err(e) => {
1890 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
1891 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
1892 }
1893 }]
1894 }
1895
1896 fn is_match(path: &Path) -> bool {
1897 path.to_str()
1898 .is_some_and(|p| p.contains("metadata.gz-extract"))
1899 }
1900}
1901
1902fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
1903 let content = fs::read_to_string(path)
1904 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
1905
1906 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
1907}
1908
1909crate::register_parser!(
1911 "Ruby Gemfile manifest",
1912 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
1913 "gem",
1914 "Ruby",
1915 Some("https://bundler.io/man/gemfile.5.html"),
1916);
1917
1918crate::register_parser!(
1919 "Ruby Gemfile.lock lockfile",
1920 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
1921 "gem",
1922 "Ruby",
1923 Some("https://bundler.io/man/gemfile.5.html"),
1924);
1925
1926crate::register_parser!(
1927 "Ruby .gemspec manifest",
1928 &[
1929 "**/*.gemspec",
1930 "**/data.gz-extract/*.gemspec",
1931 "**/specifications/*.gemspec"
1932 ],
1933 "gem",
1934 "Ruby",
1935 Some("https://guides.rubygems.org/specification-reference/"),
1936);
1937
1938crate::register_parser!(
1939 "Ruby .gem archive",
1940 &["**/*.gem"],
1941 "gem",
1942 "Ruby",
1943 Some("https://guides.rubygems.org/specification-reference/"),
1944);
1945
1946crate::register_parser!(
1947 "Ruby gem metadata (extracted)",
1948 &["**/metadata.gz-extract"],
1949 "gem",
1950 "Ruby",
1951 Some("https://guides.rubygems.org/specification-reference/"),
1952);
1953
1954#[cfg(test)]
1955mod tests {
1956 use super::parse_gemspec;
1957
1958 #[test]
1959 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
1960 assert_eq!(
1961 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
1962 "Arel is a SQL AST manager for Ruby. It"
1963 );
1964 }
1965
1966 #[test]
1967 fn test_parse_gemspec_runtime_dependency_scope() {
1968 let content = r#"
1969Gem::Specification.new do |spec|
1970 spec.name = "demo"
1971 spec.version = "1.0.0"
1972 spec.add_runtime_dependency "rack", "~> 3.0"
1973 spec.add_dependency "thor", ">= 1.0"
1974end
1975"#;
1976
1977 let package_data = parse_gemspec(content);
1978 assert_eq!(package_data.dependencies.len(), 2);
1979 assert_eq!(
1980 package_data.dependencies[0].scope,
1981 Some("runtime".to_string())
1982 );
1983 assert_eq!(
1984 package_data.dependencies[0].extracted_requirement,
1985 Some("~> 3.0".to_string())
1986 );
1987 assert_eq!(
1988 package_data.dependencies[1].scope,
1989 Some("runtime".to_string())
1990 );
1991 assert_eq!(
1992 package_data.dependencies[1].extracted_requirement,
1993 Some(">= 1.0".to_string())
1994 );
1995 }
1996}