1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46
47const PACKAGE_TYPE: PackageType = PackageType::Gem;
48
49pub fn strip_freeze_suffix(s: &str) -> &str {
64 s.trim_end_matches(".freeze")
65}
66
67enum GemfileBlock {
68 Group(Vec<String>),
69 Source(String),
70}
71
72pub struct GemfileParser;
81
82impl PackageParser for GemfileParser {
83 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
84
85 fn extract_packages(path: &Path) -> Vec<PackageData> {
86 let datasource_id = gemfile_datasource_id(path);
87 let content = match read_file_to_string(path, None) {
88 Ok(c) => c,
89 Err(e) => {
90 warn!("Failed to read Gemfile at {:?}: {}", path, e);
91 return vec![default_package_data_with_datasource(datasource_id)];
92 }
93 };
94
95 let mut package_data = parse_gemfile(&content);
96 package_data.datasource_id = Some(datasource_id);
97 vec![package_data]
98 }
99
100 fn is_match(path: &Path) -> bool {
101 path.file_name()
102 .and_then(|n| n.to_str())
103 .is_some_and(|name| name == "Gemfile")
104 || path
105 .to_str()
106 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
107 }
108}
109
110fn parse_gemfile(content: &str) -> PackageData {
112 let mut dependencies = Vec::new();
113 let mut block_stack = Vec::new();
114 let mut default_source = None;
115 let mut sources = Vec::new();
116
117 let gem_regex = match Regex::new(
120 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
121 ) {
122 Ok(r) => r,
123 Err(e) => {
124 warn!("Failed to compile gem regex: {}", e);
125 return default_package_data_with_datasource(DatasourceId::Gemfile);
126 }
127 };
128
129 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
131 Ok(r) => r,
132 Err(e) => {
133 warn!("Failed to compile group regex: {}", e);
134 return default_package_data_with_datasource(DatasourceId::Gemfile);
135 }
136 };
137
138 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
139 Ok(r) => r,
140 Err(e) => {
141 warn!("Failed to compile end regex: {}", e);
142 return default_package_data_with_datasource(DatasourceId::Gemfile);
143 }
144 };
145
146 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
147 Ok(r) => r,
148 Err(e) => {
149 warn!("Failed to compile source block regex: {}", e);
150 return default_package_data_with_datasource(DatasourceId::Gemfile);
151 }
152 };
153
154 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
155 Ok(r) => r,
156 Err(e) => {
157 warn!("Failed to compile source regex: {}", e);
158 return default_package_data_with_datasource(DatasourceId::Gemfile);
159 }
160 };
161
162 let symbol_regex = match Regex::new(r":(\w+)") {
164 Ok(r) => r,
165 Err(e) => {
166 warn!("Failed to compile symbol regex: {}", e);
167 return default_package_data_with_datasource(DatasourceId::Gemfile);
168 }
169 };
170
171 for line in content.lines().take(MAX_ITERATION_COUNT) {
172 let trimmed = line.trim();
173
174 if trimmed.is_empty() || trimmed.starts_with('#') {
176 continue;
177 }
178
179 if let Some(caps) = group_start_regex.captures(trimmed) {
181 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
182 let mut current_groups = Vec::new();
183 for cap in symbol_regex.captures_iter(groups_str) {
184 if let Some(group_name) = cap.get(1) {
185 current_groups.push(group_name.as_str().to_string());
186 }
187 }
188 block_stack.push(GemfileBlock::Group(current_groups));
189 continue;
190 }
191
192 if let Some(caps) = source_block_start_regex.captures(trimmed) {
193 let source = caps
194 .get(1)
195 .map(|m| m.as_str().to_string())
196 .unwrap_or_default();
197 if !source.is_empty() {
198 push_unique_string(&mut sources, source.clone());
199 block_stack.push(GemfileBlock::Source(source));
200 }
201 continue;
202 }
203
204 if let Some(caps) = source_regex.captures(trimmed) {
205 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
206 push_unique_string(&mut sources, source.clone());
207 default_source = Some(source);
208 }
209 continue;
210 }
211
212 if group_end_regex.is_match(trimmed) {
214 block_stack.pop();
215 continue;
216 }
217
218 if let Some(caps) = gem_regex.captures(trimmed) {
220 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
221 if name.is_empty() {
222 continue;
223 }
224
225 let mut version_parts = Vec::new();
227 if let Some(v) = caps.get(2) {
228 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
229 }
230 if let Some(v) = caps.get(3) {
231 let v_str = strip_freeze_suffix(v.as_str());
232 if looks_like_version_constraint(v_str) {
234 version_parts.push(v_str.to_string());
235 }
236 }
237
238 let extracted_requirement = if version_parts.is_empty() {
239 None
240 } else {
241 Some(version_parts.join(", "))
242 };
243
244 let current_groups = current_group_names(&block_stack);
245
246 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
249 (None, true, false)
251 } else if current_groups.iter().any(|g| g == "development") {
252 (Some("development".to_string()), false, true)
253 } else if current_groups.iter().any(|g| g == "test") {
254 (Some("test".to_string()), false, true)
255 } else {
256 let group = current_groups.first().cloned();
258 (group, true, false)
259 };
260
261 let purl = create_gem_purl(name, None);
263 let inherited_source = current_source(&block_stack, default_source.as_deref());
264 let extra_data = build_gemfile_dependency_extra_data(
265 caps.get(4).map(|m| m.as_str()),
266 inherited_source.as_deref(),
267 );
268
269 dependencies.push(Dependency {
270 purl,
271 extracted_requirement,
272 scope,
273 is_runtime: Some(is_runtime),
274 is_optional: Some(is_optional),
275 is_pinned: None,
276 is_direct: Some(true),
277 resolved_package: None,
278 extra_data,
279 });
280 }
281 }
282
283 let extra_data = if sources.is_empty() {
284 None
285 } else {
286 Some(HashMap::from([(
287 "sources".to_string(),
288 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
289 )]))
290 };
291
292 PackageData {
293 package_type: Some(PACKAGE_TYPE),
294 primary_language: Some("Ruby".to_string()),
295 dependencies,
296 extra_data,
297 datasource_id: Some(DatasourceId::Gemfile),
298 ..default_package_data()
299 }
300}
301
302fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
303 block_stack
304 .iter()
305 .rev()
306 .find_map(|block| match block {
307 GemfileBlock::Group(groups) => Some(groups.clone()),
308 GemfileBlock::Source(_) => None,
309 })
310 .unwrap_or_default()
311}
312
313fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
314 block_stack
315 .iter()
316 .rev()
317 .find_map(|block| match block {
318 GemfileBlock::Source(source) => Some(source.clone()),
319 GemfileBlock::Group(_) => None,
320 })
321 .or_else(|| default_source.map(str::to_string))
322}
323
324fn push_unique_string(values: &mut Vec<String>, value: String) {
325 if !values.contains(&value) {
326 values.push(value);
327 }
328}
329
330fn build_gemfile_dependency_extra_data(
331 options: Option<&str>,
332 inherited_source: Option<&str>,
333) -> Option<HashMap<String, serde_json::Value>> {
334 let mut extra = HashMap::new();
335 let options = options.unwrap_or("");
336
337 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
338 extra.insert(
339 "source_type".to_string(),
340 serde_json::Value::String("GIT".to_string()),
341 );
342 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
343 extra.insert("remote".to_string(), serde_json::Value::String(git));
344 }
345
346 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
347 extra.insert(
348 "source_type".to_string(),
349 serde_json::Value::String("PATH".to_string()),
350 );
351 extra.insert("path".to_string(), serde_json::Value::String(path));
352 }
353
354 for key in ["branch", "ref", "tag"] {
355 if let Some(value) = extract_gemfile_quoted_option(options, key) {
356 extra.insert(key.to_string(), serde_json::Value::String(value));
357 }
358 }
359
360 let direct_source = extract_gemfile_quoted_option(options, "source");
361 if let Some(source) = direct_source {
362 extra.insert("source".to_string(), serde_json::Value::String(source));
363 } else if !extra.contains_key("source_type")
364 && let Some(source) = inherited_source
365 {
366 extra.insert(
367 "source".to_string(),
368 serde_json::Value::String(source.to_string()),
369 );
370 }
371
372 (!extra.is_empty()).then_some(extra)
373}
374
375fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
376 if options.is_empty() {
377 return None;
378 }
379
380 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
381 Regex::new(&pattern)
382 .ok()
383 .and_then(|regex| regex.captures(options))
384 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
385}
386
387fn looks_like_version_constraint(s: &str) -> bool {
389 s.starts_with('~')
390 || s.starts_with('>')
391 || s.starts_with('<')
392 || s.starts_with('=')
393 || s.starts_with('!')
394 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
395}
396
397pub struct GemfileLockParser;
406
407impl PackageParser for GemfileLockParser {
408 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
409
410 fn extract_packages(path: &Path) -> Vec<PackageData> {
411 let datasource_id = gemfile_lock_datasource_id(path);
412 let content = match read_file_to_string(path, None) {
413 Ok(c) => c,
414 Err(e) => {
415 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
416 return vec![default_package_data_with_datasource(datasource_id)];
417 }
418 };
419
420 let mut package_data = parse_gemfile_lock(&content);
421 package_data.datasource_id = Some(datasource_id);
422 vec![package_data]
423 }
424
425 fn is_match(path: &Path) -> bool {
426 path.file_name()
427 .and_then(|n| n.to_str())
428 .is_some_and(|name| name == "Gemfile.lock")
429 || path
430 .to_str()
431 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
432 }
433}
434
435#[derive(Debug, Clone, PartialEq)]
437enum ParseState {
438 None,
439 Gem,
440 Git,
441 Path,
442 Svn,
443 Specs,
444 Platforms,
445 BundledWith,
446 Dependencies,
447}
448
449#[derive(Debug, Clone, Default)]
456struct GemInfo {
457 name: String,
458 version: Option<String>,
459 platform: Option<String>,
460 gem_type: String,
461 remote: Option<String>,
462 revision: Option<String>,
463 ref_field: Option<String>,
464 branch: Option<String>,
465 tag: Option<String>,
466 pinned: bool,
467 requirements: Vec<String>,
468}
469
470fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
471 let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
472 path_gems.sort_by(|left, right| {
473 left.remote
474 .as_deref()
475 .cmp(&right.remote.as_deref())
476 .then_with(|| left.name.cmp(&right.name))
477 });
478
479 path_gems
480 .iter()
481 .copied()
482 .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
483 .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
484 .or_else(|| {
485 path_gems
486 .iter()
487 .copied()
488 .find(|gem| gem.remote.as_deref() == Some("."))
489 })
490 .or_else(|| path_gems.first().copied())
491 .cloned()
492}
493
494fn parse_gemfile_lock(content: &str) -> PackageData {
496 let mut state = ParseState::None;
497 let mut dependencies = Vec::new();
498 let mut gems: HashMap<String, GemInfo> = HashMap::new();
499 let mut platforms: Vec<String> = Vec::new();
500 let mut bundler_version: Option<String> = None;
501 let mut current_gem_type = String::new();
502 let mut current_remote: Option<String> = None;
503 let mut current_options: HashMap<String, String> = HashMap::new();
504
505 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
507 Ok(r) => r,
508 Err(e) => {
509 warn!("Failed to compile deps regex: {}", e);
510 return default_package_data_with_datasource(DatasourceId::GemfileLock);
511 }
512 };
513
514 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
516 Ok(r) => r,
517 Err(e) => {
518 warn!("Failed to compile spec_deps regex: {}", e);
519 return default_package_data_with_datasource(DatasourceId::GemfileLock);
520 }
521 };
522
523 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
525 Ok(r) => r,
526 Err(e) => {
527 warn!("Failed to compile options regex: {}", e);
528 return default_package_data_with_datasource(DatasourceId::GemfileLock);
529 }
530 };
531
532 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
534 Ok(r) => r,
535 Err(e) => {
536 warn!("Failed to compile version regex: {}", e);
537 return default_package_data_with_datasource(DatasourceId::GemfileLock);
538 }
539 };
540
541 for line in content.lines().take(MAX_ITERATION_COUNT) {
542 let trimmed = line.trim_end();
543
544 if trimmed.is_empty() {
546 current_options.clear();
547 continue;
548 }
549
550 match trimmed {
552 "GEM" => {
553 state = ParseState::Gem;
554 current_gem_type = "GEM".to_string();
555 current_remote = None;
556 current_options.clear();
557 continue;
558 }
559 "GIT" => {
560 state = ParseState::Git;
561 current_gem_type = "GIT".to_string();
562 current_remote = None;
563 current_options.clear();
564 continue;
565 }
566 "PATH" => {
567 state = ParseState::Path;
568 current_gem_type = "PATH".to_string();
569 current_remote = None;
570 current_options.clear();
571 continue;
572 }
573 "SVN" => {
574 state = ParseState::Svn;
575 current_gem_type = "SVN".to_string();
576 current_remote = None;
577 current_options.clear();
578 continue;
579 }
580 "PLATFORMS" => {
581 state = ParseState::Platforms;
582 continue;
583 }
584 "BUNDLED WITH" => {
585 state = ParseState::BundledWith;
586 continue;
587 }
588 "DEPENDENCIES" => {
589 state = ParseState::Dependencies;
590 continue;
591 }
592 _ => {}
593 }
594
595 if trimmed.trim() == "specs:" {
599 state = match state {
600 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
601 ParseState::Specs
602 }
603 _ => state,
604 };
605 continue;
606 }
607
608 match state {
610 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
611 if let Some(caps) = options_regex.captures(line) {
613 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
614 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
615 current_options.insert(key.to_string(), value.to_string());
616 if key == "remote" {
617 current_remote = Some(value.to_string());
618 }
619 }
620 }
621 ParseState::Specs => {
622 if let Some(caps) = spec_deps_regex.captures(line) {
624 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
625 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
626
627 let (version, platform) = parse_version_platform(version_str);
629
630 if !name.is_empty() {
631 let gem_info = GemInfo {
632 name: name.clone(),
633 version,
634 platform,
635 gem_type: current_gem_type.clone(),
636 remote: current_remote.clone(),
637 revision: current_options.get("revision").cloned(),
638 ref_field: current_options.get("ref").cloned(),
639 branch: current_options.get("branch").cloned(),
640 tag: current_options.get("tag").cloned(),
641 pinned: false,
642 requirements: Vec::new(),
643 };
644 gems.insert(name, gem_info);
645 }
646 }
647 }
648 ParseState::Platforms => {
649 let platform = trimmed.trim();
651 if !platform.is_empty() {
652 platforms.push(platform.to_string());
653 }
654 }
655 ParseState::BundledWith => {
656 if let Some(caps) = version_regex.captures(line) {
658 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
659 }
660 }
661 ParseState::Dependencies => {
662 if let Some(caps) = deps_regex.captures(line) {
664 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
665 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
666 let pinned = caps.get(3).is_some();
667
668 if !name.is_empty() {
669 if let Some(gem) = gems.get_mut(&name) {
671 gem.pinned = pinned;
672 if let Some(vc) = &version_constraint {
673 gem.requirements.push(vc.clone());
674 }
675 } else {
676 let gem_info = GemInfo {
677 name: name.clone(),
678 version: None,
679 platform: None,
680 gem_type: "GEM".to_string(),
681 remote: None,
682 revision: None,
683 ref_field: None,
684 branch: None,
685 tag: None,
686 pinned,
687 requirements: version_constraint.into_iter().collect(),
688 };
689 gems.insert(name, gem_info);
690 }
691 }
692 }
693 }
694 ParseState::None => {}
695 }
696 }
697
698 let primary_gem = select_primary_path_gem(&gems);
699
700 let (
701 package_name,
702 package_version,
703 repository_homepage_url,
704 repository_download_url,
705 api_data_url,
706 download_url,
707 ) = if let Some(ref pg) = primary_gem {
708 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
709 (
710 Some(pg.name.clone()),
711 pg.version.clone(),
712 urls.0,
713 urls.1,
714 urls.2,
715 urls.3,
716 )
717 } else {
718 (None, None, None, None, None, None)
719 };
720
721 for (_, gem) in gems {
722 if let Some(ref pg) = primary_gem
723 && gem.name == pg.name
724 {
725 continue;
726 }
727
728 let version_for_purl = gem.version.as_deref();
729 let purl = create_gem_purl(&gem.name, version_for_purl);
730
731 let extracted_requirement = if !gem.requirements.is_empty() {
732 Some(gem.requirements.join(", "))
733 } else {
734 gem.version.clone()
735 };
736
737 let extra_data = build_gem_source_extra_data(&gem);
738
739 dependencies.push(Dependency {
740 purl,
741 extracted_requirement,
742 scope: Some("dependencies".to_string()),
743 is_runtime: Some(true),
744 is_optional: Some(false),
745 is_pinned: Some(gem.pinned),
746 is_direct: Some(true),
747 resolved_package: None,
748 extra_data,
749 });
750 }
751
752 dependencies.sort_by(|left, right| {
753 left.purl
754 .as_deref()
755 .cmp(&right.purl.as_deref())
756 .then_with(|| {
757 left.extracted_requirement
758 .as_deref()
759 .cmp(&right.extracted_requirement.as_deref())
760 })
761 });
762
763 let mut extra_data = HashMap::new();
765 if !platforms.is_empty() {
766 extra_data.insert(
767 "platforms".to_string(),
768 serde_json::Value::Array(
769 platforms
770 .into_iter()
771 .map(serde_json::Value::String)
772 .collect(),
773 ),
774 );
775 }
776 if let Some(bv) = bundler_version {
777 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
778 }
779
780 let purl = package_name
781 .as_deref()
782 .map(|n| create_gem_purl(n, package_version.as_deref()))
783 .unwrap_or(None);
784
785 PackageData {
786 package_type: Some(PACKAGE_TYPE),
787 name: package_name,
788 version: package_version,
789 primary_language: Some("Ruby".to_string()),
790 download_url,
791 dependencies,
792 repository_homepage_url,
793 repository_download_url,
794 api_data_url,
795 extra_data: if extra_data.is_empty() {
796 None
797 } else {
798 Some(extra_data)
799 },
800 datasource_id: Some(DatasourceId::GemfileLock),
801 purl,
802 ..default_package_data()
803 }
804}
805
806fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
807 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
808 return None;
809 }
810
811 let mut extra = HashMap::new();
812 extra.insert(
813 "source_type".to_string(),
814 serde_json::Value::String(gem.gem_type.clone()),
815 );
816
817 if let Some(ref remote) = gem.remote {
818 extra.insert(
819 "remote".to_string(),
820 serde_json::Value::String(remote.clone()),
821 );
822 }
823 if let Some(ref revision) = gem.revision {
824 extra.insert(
825 "revision".to_string(),
826 serde_json::Value::String(revision.clone()),
827 );
828 }
829 if let Some(ref ref_field) = gem.ref_field {
830 extra.insert(
831 "ref".to_string(),
832 serde_json::Value::String(ref_field.clone()),
833 );
834 }
835 if let Some(ref branch) = gem.branch {
836 extra.insert(
837 "branch".to_string(),
838 serde_json::Value::String(branch.clone()),
839 );
840 }
841 if let Some(ref tag) = gem.tag {
842 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
843 }
844
845 Some(extra)
846}
847
848fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
851 if s.is_empty() {
852 return (None, None);
853 }
854 if let Some(idx) = s.find('-') {
855 let version = &s[..idx];
856 let platform = &s[idx + 1..];
857 (Some(version.to_string()), Some(platform.to_string()))
858 } else {
859 (Some(s.to_string()), None)
860 }
861}
862
863fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
865 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
866 Ok(p) => p,
867 Err(e) => {
868 warn!("Failed to create PURL for gem '{}': {}", name, e);
869 return None;
870 }
871 };
872
873 if let Some(v) = version
874 && let Err(e) = purl.with_version(v)
875 {
876 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
877 }
878
879 Some(purl.to_string())
880}
881
882fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
883 if name.is_empty() {
884 return None;
885 }
886
887 if let Some(v) = version {
888 let v = v.trim().trim_matches('/');
889 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
890 } else {
891 Some(format!("https://rubygems.org/gems/{}", name))
892 }
893}
894
895fn rubygems_download_url(
896 name: &str,
897 version: Option<&str>,
898 platform: Option<&str>,
899) -> Option<String> {
900 if name.is_empty() || version.is_none() {
901 return None;
902 }
903
904 let name = name.trim().trim_matches('/');
905 let version = version?.trim().trim_matches('/');
906
907 let version_plat = if let Some(p) = platform {
908 if p != "ruby" {
909 format!("{}-{}", version, p)
910 } else {
911 version.to_string()
912 }
913 } else {
914 version.to_string()
915 };
916
917 Some(format!(
918 "https://rubygems.org/downloads/{}-{}.gem",
919 name, version_plat
920 ))
921}
922
923fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
924 if name.is_empty() {
925 return None;
926 }
927
928 if let Some(v) = version {
929 Some(format!(
930 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
931 name, v
932 ))
933 } else {
934 Some(format!(
935 "https://rubygems.org/api/v1/versions/{}.json",
936 name
937 ))
938 }
939}
940
941fn get_rubygems_urls(
942 name: &str,
943 version: Option<&str>,
944 platform: Option<&str>,
945) -> (
946 Option<String>,
947 Option<String>,
948 Option<String>,
949 Option<String>,
950) {
951 let repository_homepage_url = rubygems_homepage_url(name, version);
952 let repository_download_url = rubygems_download_url(name, version, platform);
953 let api_data_url = rubygems_api_url(name, version);
954 let download_url = repository_download_url.clone();
955
956 (
957 repository_homepage_url,
958 repository_download_url,
959 api_data_url,
960 download_url,
961 )
962}
963
964fn default_package_data() -> PackageData {
966 PackageData {
967 package_type: Some(PACKAGE_TYPE),
968 primary_language: Some("Ruby".to_string()),
969 ..Default::default()
970 }
971}
972
973fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
974 PackageData {
975 datasource_id: Some(datasource_id),
976 ..default_package_data()
977 }
978}
979
980pub struct GemspecParser;
990
991impl PackageParser for GemspecParser {
992 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
993
994 fn extract_packages(path: &Path) -> Vec<PackageData> {
995 let datasource_id = gemspec_datasource_id(path);
996 let content = match read_file_to_string(path, None) {
997 Ok(c) => c,
998 Err(e) => {
999 warn!("Failed to read .gemspec at {:?}: {}", path, e);
1000 return vec![default_package_data_with_datasource(datasource_id)];
1001 }
1002 };
1003
1004 let mut package_data = parse_gemspec_with_context(&content, path.parent());
1005 package_data.datasource_id = Some(datasource_id);
1006 vec![package_data]
1007 }
1008
1009 fn is_match(path: &Path) -> bool {
1010 path.extension()
1011 .and_then(|ext| ext.to_str())
1012 .is_some_and(|ext| ext == "gemspec")
1013 }
1014}
1015
1016fn normalized_ruby_path(path: &Path) -> String {
1017 path.to_string_lossy().replace('\\', "/")
1018}
1019
1020fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1021 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1022 DatasourceId::GemfileExtracted
1023 } else {
1024 DatasourceId::Gemfile
1025 }
1026}
1027
1028fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1029 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1030 DatasourceId::GemfileLockExtracted
1031 } else {
1032 DatasourceId::GemfileLock
1033 }
1034}
1035
1036fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1037 let normalized = normalized_ruby_path(path);
1038 if normalized.contains("/data.gz-extract/") {
1039 DatasourceId::GemspecExtracted
1040 } else if normalized.contains("/specifications/") {
1041 DatasourceId::GemGemspecInstalledSpecifications
1042 } else {
1043 DatasourceId::Gemspec
1044 }
1045}
1046
1047fn clean_gemspec_value(s: &str) -> String {
1049 let s = strip_freeze_suffix(s).trim();
1050
1051 let s = if let Some(pos) = s.find(" #") {
1052 s[..pos].trim()
1053 } else {
1054 s
1055 };
1056
1057 let s = if let Some(stripped) = s.strip_prefix("%q{") {
1058 stripped.strip_suffix('}').unwrap_or(stripped)
1059 } else if let Some(stripped) = s.strip_prefix("%q<") {
1060 stripped.strip_suffix('>').unwrap_or(stripped)
1061 } else if let Some(stripped) = s.strip_prefix("%q[") {
1062 stripped.strip_suffix(']').unwrap_or(stripped)
1063 } else if let Some(stripped) = s.strip_prefix("%q(") {
1064 stripped.strip_suffix(')').unwrap_or(stripped)
1065 } else {
1066 s
1067 };
1068
1069 let s = s
1070 .trim_start_matches('"')
1071 .trim_end_matches('"')
1072 .trim_start_matches('\'')
1073 .trim_end_matches('\'');
1074 let s = strip_freeze_suffix(s).trim();
1075 s.to_string()
1076}
1077
1078fn extract_ruby_array(s: &str) -> Vec<String> {
1080 let s = strip_freeze_suffix(s.trim());
1081 let s = s.trim_start_matches('[').trim_end_matches(']');
1082 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1083 Ok(r) => r,
1084 Err(_) => return Vec::new(),
1085 };
1086 item_re
1087 .captures_iter(s)
1088 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1089 .collect()
1090}
1091
1092fn extract_all_ruby_values(s: &str) -> Vec<String> {
1093 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1094 Ok(r) => r,
1095 Err(_) => return Vec::new(),
1096 };
1097
1098 value_re
1099 .captures_iter(s)
1100 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1101 .map(|m| clean_gemspec_value(m.as_str()))
1102 .collect()
1103}
1104
1105fn extract_first_ruby_value(s: &str) -> Option<String> {
1106 extract_all_ruby_values(s).into_iter().next()
1107}
1108
1109fn after_first_argument(args: &str) -> &str {
1110 let mut bracket_depth = 0usize;
1111 let mut paren_depth = 0usize;
1112 let mut in_quote: Option<char> = None;
1113 let chars: Vec<(usize, char)> = args.char_indices().collect();
1114 let mut i = 0;
1115
1116 while i < chars.len() {
1117 let (idx, ch) = chars[i];
1118
1119 if let Some(quote) = in_quote {
1120 if ch == '\\' {
1121 i += 2;
1122 continue;
1123 }
1124 if ch == quote {
1125 in_quote = None;
1126 }
1127 i += 1;
1128 continue;
1129 }
1130
1131 match ch {
1132 '\'' | '"' => in_quote = Some(ch),
1133 '[' | '{' | '<' => bracket_depth += 1,
1134 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1135 '(' => paren_depth += 1,
1136 ')' => paren_depth = paren_depth.saturating_sub(1),
1137 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1138 _ => {}
1139 }
1140
1141 i += 1;
1142 }
1143
1144 ""
1145}
1146
1147fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1152 let var_name = var_name.trim();
1153 if var_name.is_empty() {
1154 return None;
1155 }
1156
1157 for candidate in candidate_constant_names(var_name) {
1158 let escaped = regex::escape(&candidate);
1159 let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1160 let Ok(re) = Regex::new(&pattern) else {
1161 continue;
1162 };
1163
1164 for context in contexts {
1165 if let Some(caps) = re.captures(context) {
1166 return caps.get(1).map(|m| m.as_str().to_string());
1167 }
1168 }
1169 }
1170
1171 None
1172}
1173
1174fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1175 let var_name = var_name.trim();
1176 if var_name.is_empty() {
1177 return None;
1178 }
1179
1180 for candidate in candidate_constant_names(var_name) {
1181 let escaped = regex::escape(&candidate);
1182 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1183 let Ok(re) = Regex::new(&pattern) else {
1184 continue;
1185 };
1186
1187 for context in contexts {
1188 if let Some(caps) = re.captures(context)
1189 && let Some(raw) = caps.get(1)
1190 {
1191 let values = extract_ruby_array(raw.as_str());
1192 if !values.is_empty() {
1193 return Some(values);
1194 }
1195 }
1196 }
1197 }
1198
1199 None
1200}
1201
1202fn candidate_constant_names(var_name: &str) -> Vec<String> {
1203 let mut names = vec![var_name.to_string()];
1204 if let Some(last) = var_name.split("::").last()
1205 && last != var_name
1206 {
1207 names.push(last.to_string());
1208 }
1209 names
1210}
1211
1212fn looks_like_local_variable_reference(s: &str) -> bool {
1213 let mut chars = s.chars();
1214 matches!(chars.next(), Some('_' | 'a'..='z'))
1215 && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1216}
1217
1218fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1219 let base_dir = base_dir?;
1220 let current_dir = std::env::current_dir().ok();
1221
1222 current_dir
1223 .and_then(|cwd| {
1224 let canonical_cwd = cwd.canonicalize().ok()?;
1225 let canonical_base = base_dir.canonicalize().ok()?;
1226 canonical_base
1227 .starts_with(&canonical_cwd)
1228 .then_some(canonical_cwd)
1229 })
1230 .or_else(|| base_dir.canonicalize().ok())
1231}
1232
1233fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1234 let canonical_path = path.canonicalize().ok()?;
1235 canonical_path
1236 .starts_with(allowed_root)
1237 .then_some(canonical_path)
1238}
1239
1240fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1241 let base_dir = base_dir?;
1242 let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1243 let relative_path = extract_first_ruby_value(args)?;
1244 if relative_path.is_empty() {
1245 return None;
1246 }
1247
1248 let candidate = Path::new(&relative_path);
1249 let path = if candidate.is_absolute() {
1250 candidate.to_path_buf()
1251 } else {
1252 base_dir.join(candidate)
1253 };
1254
1255 let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1256
1257 fs::read_to_string(safe_path)
1258 .ok()
1259 .map(|content| content.trim().to_string())
1260 .filter(|content| !content.is_empty())
1261}
1262
1263fn resolve_scalar_expression(
1264 expression: &str,
1265 base_dir: Option<&Path>,
1266 contexts: &[String],
1267) -> Option<String> {
1268 let expression = if let Some(pos) = expression.find(" #") {
1269 expression[..pos].trim()
1270 } else {
1271 expression.trim()
1272 };
1273
1274 let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1275 if let Some(caps) = file_read_re.captures(expression) {
1276 return caps
1277 .get(1)
1278 .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1279 }
1280
1281 if let Some(value) = extract_first_ruby_value(expression) {
1282 return Some(value);
1283 }
1284
1285 let cleaned = clean_gemspec_value(expression);
1286 if looks_like_constant_reference(&cleaned) {
1287 return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1288 }
1289
1290 None
1291}
1292
1293fn resolve_local_variable_value(
1294 var_name: &str,
1295 content: &str,
1296 base_dir: Option<&Path>,
1297 contexts: &[String],
1298) -> Option<String> {
1299 let escaped = regex::escape(var_name.trim());
1300 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1301 let re = Regex::new(&pattern).ok()?;
1302
1303 re.captures_iter(content).find_map(|caps| {
1304 caps.get(1)
1305 .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1306 })
1307}
1308
1309fn resolve_gemspec_scalar_value(
1310 raw_value: &str,
1311 content: &str,
1312 base_dir: Option<&Path>,
1313 contexts: &[String],
1314) -> Option<String> {
1315 let cleaned = truncate_field(clean_gemspec_value(raw_value));
1316 if cleaned.is_empty() {
1317 return None;
1318 }
1319
1320 if looks_like_constant_reference(&cleaned) {
1321 return resolve_variable_version(&cleaned, contexts)
1322 .map(truncate_field)
1323 .or(Some(cleaned));
1324 }
1325
1326 if looks_like_local_variable_reference(&cleaned) {
1327 return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1328 .map(truncate_field)
1329 .or(Some(cleaned));
1330 }
1331
1332 Some(cleaned)
1333}
1334
1335fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1336 let mut contexts = vec![content.to_string()];
1337 let Some(base_dir) = base_dir else {
1338 return contexts;
1339 };
1340 let allowed_root = resolve_ruby_read_root(Some(base_dir));
1341
1342 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1343 Ok(re) => re,
1344 Err(_) => return contexts,
1345 };
1346
1347 for caps in require_re.captures_iter(content) {
1348 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1349 continue;
1350 };
1351 for candidate in candidate_require_paths(base_dir, required) {
1352 let Some(safe_candidate) = allowed_root
1353 .as_deref()
1354 .and_then(|root| resolve_ruby_read_path(candidate, root))
1355 else {
1356 continue;
1357 };
1358 if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1359 contexts.push(required_content);
1360 break;
1361 }
1362 }
1363 }
1364
1365 contexts
1366}
1367
1368fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1369 let relative = required.replace("::", "/");
1370 let filename = if relative.ends_with(".rb") {
1371 relative
1372 } else {
1373 format!("{}.rb", relative)
1374 };
1375
1376 vec![
1377 base_dir.join(&filename),
1378 base_dir.join("lib").join(&filename),
1379 ]
1380}
1381
1382fn looks_like_constant_reference(s: &str) -> bool {
1383 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1384}
1385
1386#[cfg(test)]
1388fn parse_gemspec(content: &str) -> PackageData {
1389 parse_gemspec_with_context(content, None)
1390}
1391
1392fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1393 let contexts = load_required_ruby_contexts(content, base_dir);
1394
1395 let field_re = match Regex::new(
1398 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1399 ) {
1400 Ok(r) => r,
1401 Err(e) => {
1402 warn!("Failed to compile gemspec field regex: {}", e);
1403 return default_package_data_with_datasource(DatasourceId::Gemspec);
1404 }
1405 };
1406
1407 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1408 Ok(r) => r,
1409 Err(e) => {
1410 warn!("Failed to compile licenses regex: {}", e);
1411 return default_package_data_with_datasource(DatasourceId::Gemspec);
1412 }
1413 };
1414
1415 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1416 Ok(r) => r,
1417 Err(e) => {
1418 warn!("Failed to compile authors regex: {}", e);
1419 return default_package_data_with_datasource(DatasourceId::Gemspec);
1420 }
1421 };
1422
1423 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1424 Ok(r) => r,
1425 Err(e) => {
1426 warn!("Failed to compile email regex: {}", e);
1427 return default_package_data_with_datasource(DatasourceId::Gemspec);
1428 }
1429 };
1430
1431 let dependency_call_re = match Regex::new(
1432 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1433 ) {
1434 Ok(r) => r,
1435 Err(e) => {
1436 warn!("Failed to compile gemspec dependency regex: {}", e);
1437 return default_package_data_with_datasource(DatasourceId::Gemspec);
1438 }
1439 };
1440
1441 let mut name: Option<String> = None;
1442 let mut version: Option<String> = None;
1443 let mut summary: Option<String> = None;
1444 let mut description: Option<String> = None;
1445 let mut homepage: Option<String> = None;
1446 let mut license: Option<String> = None;
1447 let mut licenses: Vec<String> = Vec::new();
1448 let mut authors: Vec<String> = Vec::new();
1449 let mut emails: Vec<String> = Vec::new();
1450 let mut dependencies: Vec<Dependency> = Vec::new();
1451
1452 for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1454 let field_name = match caps.get(1) {
1455 Some(m) => m.as_str(),
1456 None => continue,
1457 };
1458 let raw_value = match caps.get(2) {
1459 Some(m) => m.as_str().trim(),
1460 None => continue,
1461 };
1462
1463 match field_name {
1464 "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1465 "version" => {
1466 version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1467 }
1468 "summary" => {
1469 summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1470 }
1471 "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1472 "homepage" => {
1473 homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1474 }
1475 "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1476 _ => {}
1477 }
1478 }
1479
1480 for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1482 if let Some(raw) = caps.get(1) {
1483 licenses = extract_ruby_array(raw.as_str());
1484 }
1485 }
1486
1487 for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1489 if let Some(raw) = caps.get(1) {
1490 let raw_str = raw.as_str().trim();
1491 if raw_str.starts_with('[') {
1492 authors = extract_ruby_array(raw_str);
1493 } else if looks_like_constant_reference(raw_str) {
1494 authors = resolve_variable_array(raw_str, &contexts)
1495 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1496 } else {
1497 authors.push(clean_gemspec_value(raw_str));
1498 }
1499 }
1500 }
1501
1502 for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1504 if let Some(raw) = caps.get(1) {
1505 let raw_str = raw.as_str().trim();
1506 if raw_str.starts_with('[') {
1507 emails = extract_ruby_array(raw_str);
1508 } else if looks_like_constant_reference(raw_str) {
1509 emails = resolve_variable_array(raw_str, &contexts)
1510 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1511 } else {
1512 emails.push(clean_gemspec_value(raw_str));
1513 }
1514 }
1515 }
1516
1517 let mut parties: Vec<Party> = Vec::new();
1519
1520 if authors.len() == 1 && emails.len() == 1 {
1521 let email_str = emails.first().map(String::as_str);
1522 let (parsed_email_name, parsed_email) = match email_str {
1523 Some(e) => split_name_email(e),
1524 None => (None, None),
1525 };
1526
1527 parties.push(Party {
1528 r#type: Some("person".to_string()),
1529 role: Some("author".to_string()),
1530 name: authors.first().cloned().or(parsed_email_name),
1531 email: parsed_email.or_else(|| {
1532 email_str
1533 .filter(|e| e.contains('@') && !e.contains('<'))
1534 .map(|e| e.to_string())
1535 }),
1536 url: None,
1537 organization: None,
1538 organization_url: None,
1539 timezone: None,
1540 });
1541 } else {
1542 for author_name in authors {
1543 parties.push(Party {
1544 r#type: Some("person".to_string()),
1545 role: Some("author".to_string()),
1546 name: Some(author_name),
1547 email: None,
1548 url: None,
1549 organization: None,
1550 organization_url: None,
1551 timezone: None,
1552 });
1553 }
1554
1555 for email_str in emails {
1556 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1557 split_name_email(&email_str)
1558 } else {
1559 (None, None)
1560 };
1561 parties.push(Party {
1562 r#type: Some("person".to_string()),
1563 role: Some("author".to_string()),
1564 name: parsed_email_name,
1565 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1566 url: None,
1567 organization: None,
1568 organization_url: None,
1569 timezone: None,
1570 });
1571 }
1572 }
1573
1574 for caps in dependency_call_re
1575 .captures_iter(content)
1576 .take(MAX_ITERATION_COUNT)
1577 {
1578 let method = match caps.get(1) {
1579 Some(m) => m.as_str(),
1580 None => continue,
1581 };
1582 let args = match caps.get(2) {
1583 Some(m) => m.as_str(),
1584 None => continue,
1585 };
1586
1587 let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1588 continue;
1589 };
1590 let version_parts = extract_all_ruby_values(after_first_argument(args));
1591 let extracted_requirement = if version_parts.is_empty() {
1592 None
1593 } else {
1594 Some(version_parts.join(", "))
1595 };
1596 let purl = create_gem_purl(&dep_name, None);
1597 let is_development = method == "add_development_dependency";
1598 let scope = if is_development {
1599 "development"
1600 } else {
1601 "runtime"
1602 };
1603
1604 dependencies.push(Dependency {
1605 purl,
1606 extracted_requirement,
1607 scope: Some(scope.to_string()),
1608 is_runtime: Some(!is_development),
1609 is_optional: Some(is_development),
1610 is_pinned: None,
1611 is_direct: Some(true),
1612 resolved_package: None,
1613 extra_data: None,
1614 });
1615 }
1616
1617 let extracted_license_statement = if !licenses.is_empty() {
1619 Some(licenses.join(" AND "))
1620 } else {
1621 license
1622 };
1623
1624 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1625 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1626
1627 let final_description = description.or(summary);
1629
1630 let purl = name
1632 .as_deref()
1633 .map(|n| create_gem_purl(n, version.as_deref()))
1634 .unwrap_or(None);
1635
1636 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1637 if let Some(n) = name.as_deref() {
1638 get_rubygems_urls(n, version.as_deref(), None)
1639 } else {
1640 (None, None, None, None)
1641 };
1642
1643 PackageData {
1644 package_type: Some(PACKAGE_TYPE),
1645 name,
1646 version,
1647 primary_language: Some("Ruby".to_string()),
1648 description: final_description,
1649 homepage_url: homepage,
1650 download_url,
1651 declared_license_expression,
1652 declared_license_expression_spdx,
1653 license_detections,
1654 extracted_license_statement,
1655 parties,
1656 dependencies,
1657 repository_homepage_url,
1658 repository_download_url,
1659 api_data_url,
1660 datasource_id: Some(DatasourceId::Gemspec),
1661 purl,
1662 ..default_package_data()
1663 }
1664}
1665
1666const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1681
1682impl PackageParser for GemArchiveParser {
1683 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1684
1685 fn extract_packages(path: &Path) -> Vec<PackageData> {
1686 vec![match extract_gem_archive(path) {
1687 Ok(data) => data,
1688 Err(e) => {
1689 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1690 default_package_data_with_datasource(DatasourceId::GemArchive)
1691 }
1692 }]
1693 }
1694
1695 fn is_match(path: &Path) -> bool {
1696 path.extension()
1697 .and_then(|ext| ext.to_str())
1698 .is_some_and(|ext| ext == "gem")
1699 }
1700}
1701
1702fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1703 let file_metadata =
1704 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1705 let archive_size = file_metadata.len();
1706
1707 if archive_size > MAX_ARCHIVE_SIZE {
1708 return Err(format!(
1709 "Archive too large: {} bytes (limit: {} bytes)",
1710 archive_size, MAX_ARCHIVE_SIZE
1711 ));
1712 }
1713
1714 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1715 let mut archive = Archive::new(file);
1716
1717 let mut entry_count: usize = 0;
1718 for entry_result in archive
1719 .entries()
1720 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1721 {
1722 entry_count += 1;
1723 if entry_count > MAX_ITERATION_COUNT {
1724 warn!(
1725 "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1726 MAX_ITERATION_COUNT
1727 );
1728 break;
1729 }
1730
1731 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1732 let entry_path = entry
1733 .path()
1734 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1735 let entry_str = entry_path.to_string_lossy();
1736 if entry_str.contains("..") {
1737 warn!("Skipping tar entry with path traversal: {}", entry_str);
1738 continue;
1739 }
1740
1741 if entry_path.to_str() == Some("metadata.gz") {
1742 let entry_size = entry.size();
1743 if entry_size > MAX_FILE_SIZE {
1744 return Err(format!(
1745 "metadata.gz too large: {} bytes (limit: {} bytes)",
1746 entry_size, MAX_FILE_SIZE
1747 ));
1748 }
1749
1750 let mut decoder = GzDecoder::new(entry);
1751 let mut content = Vec::new();
1752 let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1753 limited
1754 .read_to_end(&mut content)
1755 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1756
1757 if content.len() > MAX_FILE_SIZE as usize {
1758 return Err(format!(
1759 "Decompressed metadata too large: exceeds {} byte limit",
1760 MAX_FILE_SIZE
1761 ));
1762 }
1763
1764 let content = match String::from_utf8(content) {
1765 Ok(s) => s,
1766 Err(err) => {
1767 let bytes = err.into_bytes();
1768 warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1769 String::from_utf8_lossy(&bytes).into_owned()
1770 }
1771 };
1772
1773 let uncompressed_size = content.len() as u64;
1774 if entry_size > 0 {
1775 let ratio = uncompressed_size as f64 / entry_size as f64;
1776 if ratio > MAX_COMPRESSION_RATIO {
1777 return Err(format!(
1778 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1779 ratio, MAX_COMPRESSION_RATIO
1780 ));
1781 }
1782 }
1783
1784 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1785 }
1786 }
1787
1788 Err("metadata.gz not found in .gem archive".to_string())
1789}
1790
1791fn parse_gem_metadata_yaml(
1792 content: &str,
1793 datasource_id: DatasourceId,
1794) -> Result<PackageData, String> {
1795 let cleaned = clean_ruby_yaml_tags(content);
1799
1800 let yaml: yaml_serde::Value =
1801 yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1802
1803 let name = yaml_string(&yaml, "name").map(truncate_field);
1804 let version = yaml.get("version").and_then(|v| {
1805 if v.is_string() {
1806 v.as_str().map(|s| truncate_field(s.to_string()))
1807 } else {
1808 yaml_string(v, "version").map(truncate_field)
1809 }
1810 });
1811 let description = yaml_string(&yaml, "description")
1812 .or_else(|| yaml_string(&yaml, "summary"))
1813 .map(truncate_field);
1814 let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1815 let summary = yaml_string(&yaml, "summary").map(truncate_field);
1816
1817 let licenses: Vec<String> = yaml
1819 .get("licenses")
1820 .and_then(|v| v.as_sequence())
1821 .map(|seq| {
1822 seq.iter()
1823 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1824 .collect()
1825 })
1826 .unwrap_or_default();
1827
1828 let extracted_license_statement = if !licenses.is_empty() {
1830 Some(licenses.join(" AND "))
1831 } else {
1832 None
1833 };
1834
1835 let (license_expression, license_expression_spdx, license_detections) =
1836 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1837
1838 let authors: Vec<String> = yaml
1840 .get("authors")
1841 .and_then(|v| v.as_sequence())
1842 .map(|seq| {
1843 seq.iter()
1844 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1845 .collect()
1846 })
1847 .unwrap_or_default();
1848
1849 let emails: Vec<String> = yaml
1850 .get("email")
1851 .map(|v| {
1852 if let Some(seq) = v.as_sequence() {
1853 seq.iter()
1854 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1855 .collect()
1856 } else if let Some(s) = v.as_str() {
1857 vec![truncate_field(s.to_string())]
1858 } else {
1859 Vec::new()
1860 }
1861 })
1862 .unwrap_or_default();
1863
1864 let mut parties: Vec<Party> = Vec::new();
1866 let max_len = authors.len().max(emails.len());
1867 for i in 0..max_len {
1868 let author_name = authors.get(i).map(|s| s.as_str());
1869 let email_str = emails.get(i).map(|s| s.as_str());
1870
1871 let (parsed_email_name, parsed_email) = match email_str {
1872 Some(e) if e.contains('<') => split_name_email(e),
1873 None => (None, None),
1874 _ => (None, None),
1875 };
1876
1877 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1878
1879 parties.push(Party {
1880 r#type: Some("person".to_string()),
1881 role: Some("author".to_string()),
1882 name: party_name,
1883 email: parsed_email.or_else(|| {
1884 email_str
1885 .filter(|e| e.contains('@') && !e.contains('<'))
1886 .map(|e| e.to_string())
1887 }),
1888 url: None,
1889 organization: None,
1890 organization_url: None,
1891 timezone: None,
1892 });
1893 }
1894
1895 let dependencies = parse_gem_yaml_dependencies(&yaml);
1897
1898 let metadata = yaml.get("metadata");
1899
1900 let bug_tracking_url = metadata
1901 .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1902 .map(truncate_field);
1903
1904 let code_view_url = metadata
1905 .and_then(|m| yaml_string(m, "source_code_uri"))
1906 .map(truncate_field);
1907
1908 let vcs_url = code_view_url.clone().or_else(|| {
1909 metadata
1910 .and_then(|m| yaml_string(m, "homepage_uri"))
1911 .map(truncate_field)
1912 });
1913
1914 let file_references = metadata
1915 .and_then(|m| m.get("files"))
1916 .and_then(|f| f.as_sequence())
1917 .map(|seq| {
1918 seq.iter()
1919 .filter_map(|v| v.as_str())
1920 .map(|s| crate::models::FileReference {
1921 path: s.to_string(),
1922 size: None,
1923 sha1: None,
1924 md5: None,
1925 sha256: None,
1926 sha512: None,
1927 extra_data: None,
1928 })
1929 .collect::<Vec<_>>()
1930 })
1931 .unwrap_or_default();
1932
1933 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1934 if d.len() >= 10 {
1935 Some(d[..10].to_string())
1936 } else {
1937 None
1938 }
1939 });
1940
1941 let purl = name
1942 .as_deref()
1943 .map(|n| create_gem_purl(n, version.as_deref()))
1944 .unwrap_or(None);
1945
1946 let platform = yaml_string(&yaml, "platform").map(truncate_field);
1947 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1948 if let Some(n) = name.as_deref() {
1949 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1950 } else {
1951 (None, None, None, None)
1952 };
1953
1954 let qualifiers = if let Some(ref p) = platform {
1955 if p != "ruby" {
1956 let mut q = HashMap::new();
1957 q.insert("platform".to_string(), p.clone());
1958 Some(q)
1959 } else {
1960 None
1961 }
1962 } else {
1963 None
1964 };
1965
1966 Ok(PackageData {
1967 package_type: Some(PACKAGE_TYPE),
1968 name,
1969 version,
1970 qualifiers,
1971 primary_language: Some("Ruby".to_string()),
1972 description: description.or(summary),
1973 release_date,
1974 homepage_url: homepage,
1975 download_url,
1976 bug_tracking_url,
1977 code_view_url,
1978 declared_license_expression: license_expression,
1979 declared_license_expression_spdx: license_expression_spdx,
1980 license_detections,
1981 extracted_license_statement,
1982 file_references,
1983 parties,
1984 dependencies,
1985 repository_homepage_url,
1986 repository_download_url,
1987 api_data_url,
1988 datasource_id: Some(datasource_id),
1989 purl,
1990 vcs_url,
1991 ..default_package_data()
1992 })
1993}
1994
1995fn clean_ruby_yaml_tags(content: &str) -> String {
1997 let tag_re = match Regex::new(r"!ruby/\S+") {
1998 Ok(r) => r,
1999 Err(_) => return content.to_string(),
2000 };
2001 tag_re.replace_all(content, "").to_string()
2002}
2003
2004fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2005 yaml.get(key)
2006 .and_then(|v| v.as_str())
2007 .filter(|s| !s.is_empty())
2008 .map(|s| s.to_string())
2009}
2010
2011fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2012 let mut dependencies = Vec::new();
2013
2014 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2015 Some(seq) => seq,
2016 None => return dependencies,
2017 };
2018
2019 for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2020 let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2021 Some(n) => n,
2022 None => continue,
2023 };
2024
2025 let dep_type = yaml_string(dep_value, "type");
2026 let is_development = dep_type.as_deref() == Some(":development");
2027
2028 let requirements = dep_value
2030 .get("requirement")
2031 .or_else(|| dep_value.get("version_requirements"))
2032 .and_then(|req| req.get("requirements"))
2033 .and_then(|reqs| reqs.as_sequence());
2034
2035 let extracted_requirement = requirements.map(|reqs| {
2036 let parts: Vec<String> = reqs
2037 .iter()
2038 .filter_map(|req| {
2039 let seq = req.as_sequence()?;
2040 if seq.len() >= 2 {
2041 let op = seq[0].as_str().unwrap_or("");
2042 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2043 if op == ">=" && ver == "0" {
2044 None
2046 } else if op.is_empty() || ver.is_empty() {
2047 None
2048 } else {
2049 Some(format!("{} {}", op, ver))
2050 }
2051 } else {
2052 None
2053 }
2054 })
2055 .collect();
2056 parts.join(", ")
2057 });
2058
2059 let extracted_requirement = extracted_requirement
2060 .filter(|s| !s.is_empty())
2061 .or_else(|| Some(String::new()));
2062
2063 let (scope, is_runtime, is_optional) = if is_development {
2064 (Some("development".to_string()), false, true)
2065 } else {
2066 (Some("runtime".to_string()), true, false)
2067 };
2068
2069 let purl = create_gem_purl(&dep_name, None);
2070
2071 dependencies.push(Dependency {
2072 purl,
2073 extracted_requirement,
2074 scope,
2075 is_runtime: Some(is_runtime),
2076 is_optional: Some(is_optional),
2077 is_pinned: None,
2078 is_direct: Some(true),
2079 resolved_package: None,
2080 extra_data: None,
2081 });
2082 }
2083
2084 dependencies
2085}
2086
2087pub struct GemMetadataExtractedParser;
2092
2093impl PackageParser for GemMetadataExtractedParser {
2094 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2095
2096 fn extract_packages(path: &Path) -> Vec<PackageData> {
2097 vec![match extract_gem_metadata_extracted(path) {
2098 Ok(data) => data,
2099 Err(e) => {
2100 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2101 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2102 }
2103 }]
2104 }
2105
2106 fn is_match(path: &Path) -> bool {
2107 path.to_str()
2108 .is_some_and(|p| p.contains("metadata.gz-extract"))
2109 }
2110}
2111
2112fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2113 let content = read_file_to_string(path, None)
2114 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2115
2116 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2117}
2118
2119crate::register_parser!(
2121 "Ruby Gemfile manifest",
2122 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2123 "gem",
2124 "Ruby",
2125 Some("https://bundler.io/man/gemfile.5.html"),
2126);
2127
2128crate::register_parser!(
2129 "Ruby Gemfile.lock lockfile",
2130 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2131 "gem",
2132 "Ruby",
2133 Some("https://bundler.io/man/gemfile.5.html"),
2134);
2135
2136crate::register_parser!(
2137 "Ruby .gemspec manifest",
2138 &[
2139 "**/*.gemspec",
2140 "**/data.gz-extract/*.gemspec",
2141 "**/specifications/*.gemspec"
2142 ],
2143 "gem",
2144 "Ruby",
2145 Some("https://guides.rubygems.org/specification-reference/"),
2146);
2147
2148crate::register_parser!(
2149 "Ruby .gem archive",
2150 &["**/*.gem"],
2151 "gem",
2152 "Ruby",
2153 Some("https://guides.rubygems.org/specification-reference/"),
2154);
2155
2156crate::register_parser!(
2157 "Ruby gem metadata (extracted)",
2158 &["**/metadata.gz-extract"],
2159 "gem",
2160 "Ruby",
2161 Some("https://guides.rubygems.org/specification-reference/"),
2162);
2163
2164#[cfg(test)]
2165mod tests {
2166 use super::parse_gemspec;
2167
2168 #[test]
2169 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2170 assert_eq!(
2171 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2172 "Arel is a SQL AST manager for Ruby. It"
2173 );
2174 }
2175
2176 #[test]
2177 fn test_parse_gemspec_runtime_dependency_scope() {
2178 let content = r#"
2179Gem::Specification.new do |spec|
2180 spec.name = "demo"
2181 spec.version = "1.0.0"
2182 spec.add_runtime_dependency "rack", "~> 3.0"
2183 spec.add_dependency "thor", ">= 1.0"
2184end
2185"#;
2186
2187 let package_data = parse_gemspec(content);
2188 assert_eq!(package_data.dependencies.len(), 2);
2189 assert_eq!(
2190 package_data.dependencies[0].scope,
2191 Some("runtime".to_string())
2192 );
2193 assert_eq!(
2194 package_data.dependencies[0].extracted_requirement,
2195 Some("~> 3.0".to_string())
2196 );
2197 assert_eq!(
2198 package_data.dependencies[1].scope,
2199 Some("runtime".to_string())
2200 );
2201 assert_eq!(
2202 package_data.dependencies[1].extracted_requirement,
2203 Some(">= 1.0".to_string())
2204 );
2205 }
2206}