1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46
47const PACKAGE_TYPE: PackageType = PackageType::Gem;
48
49pub fn strip_freeze_suffix(s: &str) -> &str {
61 s.trim_end_matches(".freeze")
62}
63
64enum GemfileBlock {
65 Group(Vec<String>),
66 Source(String),
67}
68
69pub struct GemfileParser;
78
79impl PackageParser for GemfileParser {
80 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
81
82 fn extract_packages(path: &Path) -> Vec<PackageData> {
83 let datasource_id = gemfile_datasource_id(path);
84 let content = match read_file_to_string(path, None) {
85 Ok(c) => c,
86 Err(e) => {
87 warn!("Failed to read Gemfile at {:?}: {}", path, e);
88 return vec![default_package_data_with_datasource(datasource_id)];
89 }
90 };
91
92 let mut package_data = parse_gemfile(&content);
93 package_data.datasource_id = Some(datasource_id);
94 vec![package_data]
95 }
96
97 fn is_match(path: &Path) -> bool {
98 path.file_name()
99 .and_then(|n| n.to_str())
100 .is_some_and(|name| name == "Gemfile")
101 || path
102 .to_str()
103 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
104 }
105}
106
107fn parse_gemfile(content: &str) -> PackageData {
109 let mut dependencies = Vec::new();
110 let mut block_stack = Vec::new();
111 let mut default_source = None;
112 let mut sources = Vec::new();
113
114 let gem_regex = match Regex::new(
117 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
118 ) {
119 Ok(r) => r,
120 Err(e) => {
121 warn!("Failed to compile gem regex: {}", e);
122 return default_package_data_with_datasource(DatasourceId::Gemfile);
123 }
124 };
125
126 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
128 Ok(r) => r,
129 Err(e) => {
130 warn!("Failed to compile group regex: {}", e);
131 return default_package_data_with_datasource(DatasourceId::Gemfile);
132 }
133 };
134
135 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
136 Ok(r) => r,
137 Err(e) => {
138 warn!("Failed to compile end regex: {}", e);
139 return default_package_data_with_datasource(DatasourceId::Gemfile);
140 }
141 };
142
143 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
144 Ok(r) => r,
145 Err(e) => {
146 warn!("Failed to compile source block regex: {}", e);
147 return default_package_data_with_datasource(DatasourceId::Gemfile);
148 }
149 };
150
151 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
152 Ok(r) => r,
153 Err(e) => {
154 warn!("Failed to compile source regex: {}", e);
155 return default_package_data_with_datasource(DatasourceId::Gemfile);
156 }
157 };
158
159 let symbol_regex = match Regex::new(r":(\w+)") {
161 Ok(r) => r,
162 Err(e) => {
163 warn!("Failed to compile symbol regex: {}", e);
164 return default_package_data_with_datasource(DatasourceId::Gemfile);
165 }
166 };
167
168 for line in content.lines().take(MAX_ITERATION_COUNT) {
169 let trimmed = line.trim();
170
171 if trimmed.is_empty() || trimmed.starts_with('#') {
173 continue;
174 }
175
176 if let Some(caps) = group_start_regex.captures(trimmed) {
178 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
179 let mut current_groups = Vec::new();
180 for cap in symbol_regex.captures_iter(groups_str) {
181 if let Some(group_name) = cap.get(1) {
182 current_groups.push(group_name.as_str().to_string());
183 }
184 }
185 block_stack.push(GemfileBlock::Group(current_groups));
186 continue;
187 }
188
189 if let Some(caps) = source_block_start_regex.captures(trimmed) {
190 let source = caps
191 .get(1)
192 .map(|m| m.as_str().to_string())
193 .unwrap_or_default();
194 if !source.is_empty() {
195 push_unique_string(&mut sources, source.clone());
196 block_stack.push(GemfileBlock::Source(source));
197 }
198 continue;
199 }
200
201 if let Some(caps) = source_regex.captures(trimmed) {
202 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
203 push_unique_string(&mut sources, source.clone());
204 default_source = Some(source);
205 }
206 continue;
207 }
208
209 if group_end_regex.is_match(trimmed) {
211 block_stack.pop();
212 continue;
213 }
214
215 if let Some(caps) = gem_regex.captures(trimmed) {
217 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
218 if name.is_empty() {
219 continue;
220 }
221
222 let mut version_parts = Vec::new();
224 if let Some(v) = caps.get(2) {
225 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
226 }
227 if let Some(v) = caps.get(3) {
228 let v_str = strip_freeze_suffix(v.as_str());
229 if looks_like_version_constraint(v_str) {
231 version_parts.push(v_str.to_string());
232 }
233 }
234
235 let extracted_requirement = if version_parts.is_empty() {
236 None
237 } else {
238 Some(version_parts.join(", "))
239 };
240
241 let current_groups = current_group_names(&block_stack);
242
243 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
246 (None, true, false)
248 } else if current_groups.iter().any(|g| g == "development") {
249 (Some("development".to_string()), false, true)
250 } else if current_groups.iter().any(|g| g == "test") {
251 (Some("test".to_string()), false, true)
252 } else {
253 let group = current_groups.first().cloned();
255 (group, true, false)
256 };
257
258 let purl = create_gem_purl(name, None);
260 let inherited_source = current_source(&block_stack, default_source.as_deref());
261 let extra_data = build_gemfile_dependency_extra_data(
262 caps.get(4).map(|m| m.as_str()),
263 inherited_source.as_deref(),
264 );
265
266 dependencies.push(Dependency {
267 purl,
268 extracted_requirement,
269 scope,
270 is_runtime: Some(is_runtime),
271 is_optional: Some(is_optional),
272 is_pinned: None,
273 is_direct: Some(true),
274 resolved_package: None,
275 extra_data,
276 });
277 }
278 }
279
280 let extra_data = if sources.is_empty() {
281 None
282 } else {
283 Some(HashMap::from([(
284 "sources".to_string(),
285 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
286 )]))
287 };
288
289 PackageData {
290 package_type: Some(PACKAGE_TYPE),
291 primary_language: Some("Ruby".to_string()),
292 dependencies,
293 extra_data,
294 datasource_id: Some(DatasourceId::Gemfile),
295 ..default_package_data()
296 }
297}
298
299fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
300 block_stack
301 .iter()
302 .rev()
303 .find_map(|block| match block {
304 GemfileBlock::Group(groups) => Some(groups.clone()),
305 GemfileBlock::Source(_) => None,
306 })
307 .unwrap_or_default()
308}
309
310fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
311 block_stack
312 .iter()
313 .rev()
314 .find_map(|block| match block {
315 GemfileBlock::Source(source) => Some(source.clone()),
316 GemfileBlock::Group(_) => None,
317 })
318 .or_else(|| default_source.map(str::to_string))
319}
320
321fn push_unique_string(values: &mut Vec<String>, value: String) {
322 if !values.contains(&value) {
323 values.push(value);
324 }
325}
326
327fn build_gemfile_dependency_extra_data(
328 options: Option<&str>,
329 inherited_source: Option<&str>,
330) -> Option<HashMap<String, serde_json::Value>> {
331 let mut extra = HashMap::new();
332 let options = options.unwrap_or("");
333
334 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
335 extra.insert(
336 "source_type".to_string(),
337 serde_json::Value::String("GIT".to_string()),
338 );
339 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
340 extra.insert("remote".to_string(), serde_json::Value::String(git));
341 }
342
343 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
344 extra.insert(
345 "source_type".to_string(),
346 serde_json::Value::String("PATH".to_string()),
347 );
348 extra.insert("path".to_string(), serde_json::Value::String(path));
349 }
350
351 for key in ["branch", "ref", "tag"] {
352 if let Some(value) = extract_gemfile_quoted_option(options, key) {
353 extra.insert(key.to_string(), serde_json::Value::String(value));
354 }
355 }
356
357 let direct_source = extract_gemfile_quoted_option(options, "source");
358 if let Some(source) = direct_source {
359 extra.insert("source".to_string(), serde_json::Value::String(source));
360 } else if !extra.contains_key("source_type")
361 && let Some(source) = inherited_source
362 {
363 extra.insert(
364 "source".to_string(),
365 serde_json::Value::String(source.to_string()),
366 );
367 }
368
369 (!extra.is_empty()).then_some(extra)
370}
371
372fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
373 if options.is_empty() {
374 return None;
375 }
376
377 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
378 Regex::new(&pattern)
379 .ok()
380 .and_then(|regex| regex.captures(options))
381 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
382}
383
384fn looks_like_version_constraint(s: &str) -> bool {
386 s.starts_with('~')
387 || s.starts_with('>')
388 || s.starts_with('<')
389 || s.starts_with('=')
390 || s.starts_with('!')
391 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
392}
393
394pub struct GemfileLockParser;
403
404impl PackageParser for GemfileLockParser {
405 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
406
407 fn extract_packages(path: &Path) -> Vec<PackageData> {
408 let datasource_id = gemfile_lock_datasource_id(path);
409 let content = match read_file_to_string(path, None) {
410 Ok(c) => c,
411 Err(e) => {
412 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
413 return vec![default_package_data_with_datasource(datasource_id)];
414 }
415 };
416
417 let mut package_data = parse_gemfile_lock(&content);
418 package_data.datasource_id = Some(datasource_id);
419 vec![package_data]
420 }
421
422 fn is_match(path: &Path) -> bool {
423 path.file_name()
424 .and_then(|n| n.to_str())
425 .is_some_and(|name| name == "Gemfile.lock")
426 || path
427 .to_str()
428 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
429 }
430}
431
432#[derive(Debug, Clone, PartialEq)]
434enum ParseState {
435 None,
436 Gem,
437 Git,
438 Path,
439 Svn,
440 Specs,
441 Platforms,
442 BundledWith,
443 Dependencies,
444}
445
446#[derive(Debug, Clone, Default)]
453struct GemInfo {
454 name: String,
455 version: Option<String>,
456 platform: Option<String>,
457 gem_type: String,
458 remote: Option<String>,
459 revision: Option<String>,
460 ref_field: Option<String>,
461 branch: Option<String>,
462 tag: Option<String>,
463 pinned: bool,
464 requirements: Vec<String>,
465}
466
467fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
468 let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
469 path_gems.sort_by(|left, right| {
470 left.remote
471 .as_deref()
472 .cmp(&right.remote.as_deref())
473 .then_with(|| left.name.cmp(&right.name))
474 });
475
476 path_gems
477 .iter()
478 .copied()
479 .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
480 .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
481 .or_else(|| {
482 path_gems
483 .iter()
484 .copied()
485 .find(|gem| gem.remote.as_deref() == Some("."))
486 })
487 .or_else(|| path_gems.first().copied())
488 .cloned()
489}
490
491fn parse_gemfile_lock(content: &str) -> PackageData {
493 let mut state = ParseState::None;
494 let mut dependencies = Vec::new();
495 let mut gems: HashMap<String, GemInfo> = HashMap::new();
496 let mut platforms: Vec<String> = Vec::new();
497 let mut bundler_version: Option<String> = None;
498 let mut current_gem_type = String::new();
499 let mut current_remote: Option<String> = None;
500 let mut current_options: HashMap<String, String> = HashMap::new();
501
502 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
504 Ok(r) => r,
505 Err(e) => {
506 warn!("Failed to compile deps regex: {}", e);
507 return default_package_data_with_datasource(DatasourceId::GemfileLock);
508 }
509 };
510
511 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
513 Ok(r) => r,
514 Err(e) => {
515 warn!("Failed to compile spec_deps regex: {}", e);
516 return default_package_data_with_datasource(DatasourceId::GemfileLock);
517 }
518 };
519
520 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
522 Ok(r) => r,
523 Err(e) => {
524 warn!("Failed to compile options regex: {}", e);
525 return default_package_data_with_datasource(DatasourceId::GemfileLock);
526 }
527 };
528
529 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
531 Ok(r) => r,
532 Err(e) => {
533 warn!("Failed to compile version regex: {}", e);
534 return default_package_data_with_datasource(DatasourceId::GemfileLock);
535 }
536 };
537
538 for line in content.lines().take(MAX_ITERATION_COUNT) {
539 let trimmed = line.trim_end();
540
541 if trimmed.is_empty() {
543 current_options.clear();
544 continue;
545 }
546
547 match trimmed {
549 "GEM" => {
550 state = ParseState::Gem;
551 current_gem_type = "GEM".to_string();
552 current_remote = None;
553 current_options.clear();
554 continue;
555 }
556 "GIT" => {
557 state = ParseState::Git;
558 current_gem_type = "GIT".to_string();
559 current_remote = None;
560 current_options.clear();
561 continue;
562 }
563 "PATH" => {
564 state = ParseState::Path;
565 current_gem_type = "PATH".to_string();
566 current_remote = None;
567 current_options.clear();
568 continue;
569 }
570 "SVN" => {
571 state = ParseState::Svn;
572 current_gem_type = "SVN".to_string();
573 current_remote = None;
574 current_options.clear();
575 continue;
576 }
577 "PLATFORMS" => {
578 state = ParseState::Platforms;
579 continue;
580 }
581 "BUNDLED WITH" => {
582 state = ParseState::BundledWith;
583 continue;
584 }
585 "DEPENDENCIES" => {
586 state = ParseState::Dependencies;
587 continue;
588 }
589 _ => {}
590 }
591
592 if trimmed.trim() == "specs:" {
596 state = match state {
597 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
598 ParseState::Specs
599 }
600 _ => state,
601 };
602 continue;
603 }
604
605 match state {
607 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
608 if let Some(caps) = options_regex.captures(line) {
610 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
611 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
612 current_options.insert(key.to_string(), value.to_string());
613 if key == "remote" {
614 current_remote = Some(value.to_string());
615 }
616 }
617 }
618 ParseState::Specs => {
619 if let Some(caps) = spec_deps_regex.captures(line) {
621 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
622 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
623
624 let (version, platform) = parse_version_platform(version_str);
626
627 if !name.is_empty() {
628 let gem_info = GemInfo {
629 name: name.clone(),
630 version,
631 platform,
632 gem_type: current_gem_type.clone(),
633 remote: current_remote.clone(),
634 revision: current_options.get("revision").cloned(),
635 ref_field: current_options.get("ref").cloned(),
636 branch: current_options.get("branch").cloned(),
637 tag: current_options.get("tag").cloned(),
638 pinned: false,
639 requirements: Vec::new(),
640 };
641 gems.insert(name, gem_info);
642 }
643 }
644 }
645 ParseState::Platforms => {
646 let platform = trimmed.trim();
648 if !platform.is_empty() {
649 platforms.push(platform.to_string());
650 }
651 }
652 ParseState::BundledWith => {
653 if let Some(caps) = version_regex.captures(line) {
655 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
656 }
657 }
658 ParseState::Dependencies => {
659 if let Some(caps) = deps_regex.captures(line) {
661 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
662 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
663 let pinned = caps.get(3).is_some();
664
665 if !name.is_empty() {
666 if let Some(gem) = gems.get_mut(&name) {
668 gem.pinned = pinned;
669 if let Some(vc) = &version_constraint {
670 gem.requirements.push(vc.clone());
671 }
672 } else {
673 let gem_info = GemInfo {
674 name: name.clone(),
675 version: None,
676 platform: None,
677 gem_type: "GEM".to_string(),
678 remote: None,
679 revision: None,
680 ref_field: None,
681 branch: None,
682 tag: None,
683 pinned,
684 requirements: version_constraint.into_iter().collect(),
685 };
686 gems.insert(name, gem_info);
687 }
688 }
689 }
690 }
691 ParseState::None => {}
692 }
693 }
694
695 let primary_gem = select_primary_path_gem(&gems);
696
697 let (
698 package_name,
699 package_version,
700 repository_homepage_url,
701 repository_download_url,
702 api_data_url,
703 download_url,
704 ) = if let Some(ref pg) = primary_gem {
705 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
706 (
707 Some(pg.name.clone()),
708 pg.version.clone(),
709 urls.0,
710 urls.1,
711 urls.2,
712 urls.3,
713 )
714 } else {
715 (None, None, None, None, None, None)
716 };
717
718 for (_, gem) in gems {
719 if let Some(ref pg) = primary_gem
720 && gem.name == pg.name
721 {
722 continue;
723 }
724
725 let version_for_purl = gem.version.as_deref();
726 let purl = create_gem_purl(&gem.name, version_for_purl);
727
728 let extracted_requirement = if !gem.requirements.is_empty() {
729 Some(gem.requirements.join(", "))
730 } else {
731 gem.version.clone()
732 };
733
734 let extra_data = build_gem_source_extra_data(&gem);
735
736 dependencies.push(Dependency {
737 purl,
738 extracted_requirement,
739 scope: Some("dependencies".to_string()),
740 is_runtime: Some(true),
741 is_optional: Some(false),
742 is_pinned: Some(gem.pinned),
743 is_direct: Some(true),
744 resolved_package: None,
745 extra_data,
746 });
747 }
748
749 dependencies.sort_by(|left, right| {
750 left.purl
751 .as_deref()
752 .cmp(&right.purl.as_deref())
753 .then_with(|| {
754 left.extracted_requirement
755 .as_deref()
756 .cmp(&right.extracted_requirement.as_deref())
757 })
758 });
759
760 let mut extra_data = HashMap::new();
762 if !platforms.is_empty() {
763 extra_data.insert(
764 "platforms".to_string(),
765 serde_json::Value::Array(
766 platforms
767 .into_iter()
768 .map(serde_json::Value::String)
769 .collect(),
770 ),
771 );
772 }
773 if let Some(bv) = bundler_version {
774 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
775 }
776
777 let purl = package_name
778 .as_deref()
779 .map(|n| create_gem_purl(n, package_version.as_deref()))
780 .unwrap_or(None);
781
782 PackageData {
783 package_type: Some(PACKAGE_TYPE),
784 name: package_name,
785 version: package_version,
786 primary_language: Some("Ruby".to_string()),
787 download_url,
788 dependencies,
789 repository_homepage_url,
790 repository_download_url,
791 api_data_url,
792 extra_data: if extra_data.is_empty() {
793 None
794 } else {
795 Some(extra_data)
796 },
797 datasource_id: Some(DatasourceId::GemfileLock),
798 purl,
799 ..default_package_data()
800 }
801}
802
803fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
804 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
805 return None;
806 }
807
808 let mut extra = HashMap::new();
809 extra.insert(
810 "source_type".to_string(),
811 serde_json::Value::String(gem.gem_type.clone()),
812 );
813
814 if let Some(ref remote) = gem.remote {
815 extra.insert(
816 "remote".to_string(),
817 serde_json::Value::String(remote.clone()),
818 );
819 }
820 if let Some(ref revision) = gem.revision {
821 extra.insert(
822 "revision".to_string(),
823 serde_json::Value::String(revision.clone()),
824 );
825 }
826 if let Some(ref ref_field) = gem.ref_field {
827 extra.insert(
828 "ref".to_string(),
829 serde_json::Value::String(ref_field.clone()),
830 );
831 }
832 if let Some(ref branch) = gem.branch {
833 extra.insert(
834 "branch".to_string(),
835 serde_json::Value::String(branch.clone()),
836 );
837 }
838 if let Some(ref tag) = gem.tag {
839 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
840 }
841
842 Some(extra)
843}
844
845fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
848 if s.is_empty() {
849 return (None, None);
850 }
851 if let Some(idx) = s.find('-') {
852 let version = &s[..idx];
853 let platform = &s[idx + 1..];
854 (Some(version.to_string()), Some(platform.to_string()))
855 } else {
856 (Some(s.to_string()), None)
857 }
858}
859
860fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
862 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
863 Ok(p) => p,
864 Err(e) => {
865 warn!("Failed to create PURL for gem '{}': {}", name, e);
866 return None;
867 }
868 };
869
870 if let Some(v) = version
871 && let Err(e) = purl.with_version(v)
872 {
873 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
874 }
875
876 Some(purl.to_string())
877}
878
879fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
880 if name.is_empty() {
881 return None;
882 }
883
884 if let Some(v) = version {
885 let v = v.trim().trim_matches('/');
886 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
887 } else {
888 Some(format!("https://rubygems.org/gems/{}", name))
889 }
890}
891
892fn rubygems_download_url(
893 name: &str,
894 version: Option<&str>,
895 platform: Option<&str>,
896) -> Option<String> {
897 if name.is_empty() || version.is_none() {
898 return None;
899 }
900
901 let name = name.trim().trim_matches('/');
902 let version = version?.trim().trim_matches('/');
903
904 let version_plat = if let Some(p) = platform {
905 if p != "ruby" {
906 format!("{}-{}", version, p)
907 } else {
908 version.to_string()
909 }
910 } else {
911 version.to_string()
912 };
913
914 Some(format!(
915 "https://rubygems.org/downloads/{}-{}.gem",
916 name, version_plat
917 ))
918}
919
920fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
921 if name.is_empty() {
922 return None;
923 }
924
925 if let Some(v) = version {
926 Some(format!(
927 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
928 name, v
929 ))
930 } else {
931 Some(format!(
932 "https://rubygems.org/api/v1/versions/{}.json",
933 name
934 ))
935 }
936}
937
938fn get_rubygems_urls(
939 name: &str,
940 version: Option<&str>,
941 platform: Option<&str>,
942) -> (
943 Option<String>,
944 Option<String>,
945 Option<String>,
946 Option<String>,
947) {
948 let repository_homepage_url = rubygems_homepage_url(name, version);
949 let repository_download_url = rubygems_download_url(name, version, platform);
950 let api_data_url = rubygems_api_url(name, version);
951 let download_url = repository_download_url.clone();
952
953 (
954 repository_homepage_url,
955 repository_download_url,
956 api_data_url,
957 download_url,
958 )
959}
960
961fn default_package_data() -> PackageData {
963 PackageData {
964 package_type: Some(PACKAGE_TYPE),
965 primary_language: Some("Ruby".to_string()),
966 ..Default::default()
967 }
968}
969
970fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
971 PackageData {
972 datasource_id: Some(datasource_id),
973 ..default_package_data()
974 }
975}
976
977pub struct GemspecParser;
987
988impl PackageParser for GemspecParser {
989 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
990
991 fn extract_packages(path: &Path) -> Vec<PackageData> {
992 let datasource_id = gemspec_datasource_id(path);
993 let content = match read_file_to_string(path, None) {
994 Ok(c) => c,
995 Err(e) => {
996 warn!("Failed to read .gemspec at {:?}: {}", path, e);
997 return vec![default_package_data_with_datasource(datasource_id)];
998 }
999 };
1000
1001 let mut package_data = parse_gemspec_with_context(&content, path.parent());
1002 package_data.datasource_id = Some(datasource_id);
1003 vec![package_data]
1004 }
1005
1006 fn is_match(path: &Path) -> bool {
1007 path.extension()
1008 .and_then(|ext| ext.to_str())
1009 .is_some_and(|ext| ext == "gemspec")
1010 }
1011}
1012
1013fn normalized_ruby_path(path: &Path) -> String {
1014 path.to_string_lossy().replace('\\', "/")
1015}
1016
1017fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1018 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1019 DatasourceId::GemfileExtracted
1020 } else {
1021 DatasourceId::Gemfile
1022 }
1023}
1024
1025fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1026 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1027 DatasourceId::GemfileLockExtracted
1028 } else {
1029 DatasourceId::GemfileLock
1030 }
1031}
1032
1033fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1034 let normalized = normalized_ruby_path(path);
1035 if normalized.contains("/data.gz-extract/") {
1036 DatasourceId::GemspecExtracted
1037 } else if normalized.contains("/specifications/") {
1038 DatasourceId::GemGemspecInstalledSpecifications
1039 } else {
1040 DatasourceId::Gemspec
1041 }
1042}
1043
1044fn clean_gemspec_value(s: &str) -> String {
1046 let s = strip_freeze_suffix(s).trim();
1047
1048 let s = if let Some(pos) = s.find(" #") {
1049 s[..pos].trim()
1050 } else {
1051 s
1052 };
1053
1054 let s = if let Some(stripped) = s.strip_prefix("%q{") {
1055 stripped.strip_suffix('}').unwrap_or(stripped)
1056 } else if let Some(stripped) = s.strip_prefix("%q<") {
1057 stripped.strip_suffix('>').unwrap_or(stripped)
1058 } else if let Some(stripped) = s.strip_prefix("%q[") {
1059 stripped.strip_suffix(']').unwrap_or(stripped)
1060 } else if let Some(stripped) = s.strip_prefix("%q(") {
1061 stripped.strip_suffix(')').unwrap_or(stripped)
1062 } else {
1063 s
1064 };
1065
1066 let s = s
1067 .trim_start_matches('"')
1068 .trim_end_matches('"')
1069 .trim_start_matches('\'')
1070 .trim_end_matches('\'');
1071 let s = strip_freeze_suffix(s).trim();
1072 s.to_string()
1073}
1074
1075fn extract_ruby_array(s: &str) -> Vec<String> {
1077 let s = strip_freeze_suffix(s.trim());
1078 let s = s.trim_start_matches('[').trim_end_matches(']');
1079 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1080 Ok(r) => r,
1081 Err(_) => return Vec::new(),
1082 };
1083 item_re
1084 .captures_iter(s)
1085 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1086 .collect()
1087}
1088
1089fn extract_all_ruby_values(s: &str) -> Vec<String> {
1090 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1091 Ok(r) => r,
1092 Err(_) => return Vec::new(),
1093 };
1094
1095 value_re
1096 .captures_iter(s)
1097 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1098 .map(|m| clean_gemspec_value(m.as_str()))
1099 .collect()
1100}
1101
1102fn extract_first_ruby_value(s: &str) -> Option<String> {
1103 extract_all_ruby_values(s).into_iter().next()
1104}
1105
1106fn after_first_argument(args: &str) -> &str {
1107 let mut bracket_depth = 0usize;
1108 let mut paren_depth = 0usize;
1109 let mut in_quote: Option<char> = None;
1110 let chars: Vec<(usize, char)> = args.char_indices().collect();
1111 let mut i = 0;
1112
1113 while i < chars.len() {
1114 let (idx, ch) = chars[i];
1115
1116 if let Some(quote) = in_quote {
1117 if ch == '\\' {
1118 i += 2;
1119 continue;
1120 }
1121 if ch == quote {
1122 in_quote = None;
1123 }
1124 i += 1;
1125 continue;
1126 }
1127
1128 match ch {
1129 '\'' | '"' => in_quote = Some(ch),
1130 '[' | '{' | '<' => bracket_depth += 1,
1131 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1132 '(' => paren_depth += 1,
1133 ')' => paren_depth = paren_depth.saturating_sub(1),
1134 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1135 _ => {}
1136 }
1137
1138 i += 1;
1139 }
1140
1141 ""
1142}
1143
1144fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1149 let var_name = var_name.trim();
1150 if var_name.is_empty() {
1151 return None;
1152 }
1153
1154 for candidate in candidate_constant_names(var_name) {
1155 let escaped = regex::escape(&candidate);
1156 let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1157 let Ok(re) = Regex::new(&pattern) else {
1158 continue;
1159 };
1160
1161 for context in contexts {
1162 if let Some(caps) = re.captures(context) {
1163 return caps.get(1).map(|m| m.as_str().to_string());
1164 }
1165 }
1166 }
1167
1168 None
1169}
1170
1171fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1172 let var_name = var_name.trim();
1173 if var_name.is_empty() {
1174 return None;
1175 }
1176
1177 for candidate in candidate_constant_names(var_name) {
1178 let escaped = regex::escape(&candidate);
1179 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1180 let Ok(re) = Regex::new(&pattern) else {
1181 continue;
1182 };
1183
1184 for context in contexts {
1185 if let Some(caps) = re.captures(context)
1186 && let Some(raw) = caps.get(1)
1187 {
1188 let values = extract_ruby_array(raw.as_str());
1189 if !values.is_empty() {
1190 return Some(values);
1191 }
1192 }
1193 }
1194 }
1195
1196 None
1197}
1198
1199fn candidate_constant_names(var_name: &str) -> Vec<String> {
1200 let mut names = vec![var_name.to_string()];
1201 if let Some(last) = var_name.split("::").last()
1202 && last != var_name
1203 {
1204 names.push(last.to_string());
1205 }
1206 names
1207}
1208
1209fn looks_like_local_variable_reference(s: &str) -> bool {
1210 let mut chars = s.chars();
1211 matches!(chars.next(), Some('_' | 'a'..='z'))
1212 && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1213}
1214
1215fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1216 let base_dir = base_dir?;
1217 let current_dir = std::env::current_dir().ok();
1218
1219 current_dir
1220 .and_then(|cwd| {
1221 let canonical_cwd = cwd.canonicalize().ok()?;
1222 let canonical_base = base_dir.canonicalize().ok()?;
1223 canonical_base
1224 .starts_with(&canonical_cwd)
1225 .then_some(canonical_cwd)
1226 })
1227 .or_else(|| base_dir.canonicalize().ok())
1228}
1229
1230fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1231 let canonical_path = path.canonicalize().ok()?;
1232 canonical_path
1233 .starts_with(allowed_root)
1234 .then_some(canonical_path)
1235}
1236
1237fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1238 let base_dir = base_dir?;
1239 let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1240 let relative_path = extract_first_ruby_value(args)?;
1241 if relative_path.is_empty() {
1242 return None;
1243 }
1244
1245 let candidate = Path::new(&relative_path);
1246 let path = if candidate.is_absolute() {
1247 candidate.to_path_buf()
1248 } else {
1249 base_dir.join(candidate)
1250 };
1251
1252 let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1253
1254 fs::read_to_string(safe_path)
1255 .ok()
1256 .map(|content| content.trim().to_string())
1257 .filter(|content| !content.is_empty())
1258}
1259
1260fn resolve_scalar_expression(
1261 expression: &str,
1262 base_dir: Option<&Path>,
1263 contexts: &[String],
1264) -> Option<String> {
1265 let expression = if let Some(pos) = expression.find(" #") {
1266 expression[..pos].trim()
1267 } else {
1268 expression.trim()
1269 };
1270
1271 let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1272 if let Some(caps) = file_read_re.captures(expression) {
1273 return caps
1274 .get(1)
1275 .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1276 }
1277
1278 if let Some(value) = extract_first_ruby_value(expression) {
1279 return Some(value);
1280 }
1281
1282 let cleaned = clean_gemspec_value(expression);
1283 if looks_like_constant_reference(&cleaned) {
1284 return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1285 }
1286
1287 None
1288}
1289
1290fn resolve_local_variable_value(
1291 var_name: &str,
1292 content: &str,
1293 base_dir: Option<&Path>,
1294 contexts: &[String],
1295) -> Option<String> {
1296 let escaped = regex::escape(var_name.trim());
1297 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1298 let re = Regex::new(&pattern).ok()?;
1299
1300 re.captures_iter(content).find_map(|caps| {
1301 caps.get(1)
1302 .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1303 })
1304}
1305
1306fn resolve_gemspec_scalar_value(
1307 raw_value: &str,
1308 content: &str,
1309 base_dir: Option<&Path>,
1310 contexts: &[String],
1311) -> Option<String> {
1312 let cleaned = truncate_field(clean_gemspec_value(raw_value));
1313 if cleaned.is_empty() {
1314 return None;
1315 }
1316
1317 if looks_like_constant_reference(&cleaned) {
1318 return resolve_variable_version(&cleaned, contexts)
1319 .map(truncate_field)
1320 .or(Some(cleaned));
1321 }
1322
1323 if looks_like_local_variable_reference(&cleaned) {
1324 return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1325 .map(truncate_field)
1326 .or(Some(cleaned));
1327 }
1328
1329 Some(cleaned)
1330}
1331
1332fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1333 let mut contexts = vec![content.to_string()];
1334 let Some(base_dir) = base_dir else {
1335 return contexts;
1336 };
1337 let allowed_root = resolve_ruby_read_root(Some(base_dir));
1338
1339 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1340 Ok(re) => re,
1341 Err(_) => return contexts,
1342 };
1343
1344 for caps in require_re.captures_iter(content) {
1345 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1346 continue;
1347 };
1348 for candidate in candidate_require_paths(base_dir, required) {
1349 let Some(safe_candidate) = allowed_root
1350 .as_deref()
1351 .and_then(|root| resolve_ruby_read_path(candidate, root))
1352 else {
1353 continue;
1354 };
1355 if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1356 contexts.push(required_content);
1357 break;
1358 }
1359 }
1360 }
1361
1362 contexts
1363}
1364
1365fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1366 let relative = required.replace("::", "/");
1367 let filename = if relative.ends_with(".rb") {
1368 relative
1369 } else {
1370 format!("{}.rb", relative)
1371 };
1372
1373 vec![
1374 base_dir.join(&filename),
1375 base_dir.join("lib").join(&filename),
1376 ]
1377}
1378
1379fn looks_like_constant_reference(s: &str) -> bool {
1380 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1381}
1382
1383#[cfg(test)]
1385fn parse_gemspec(content: &str) -> PackageData {
1386 parse_gemspec_with_context(content, None)
1387}
1388
1389fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1390 let contexts = load_required_ruby_contexts(content, base_dir);
1391
1392 let field_re = match Regex::new(
1395 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1396 ) {
1397 Ok(r) => r,
1398 Err(e) => {
1399 warn!("Failed to compile gemspec field regex: {}", e);
1400 return default_package_data_with_datasource(DatasourceId::Gemspec);
1401 }
1402 };
1403
1404 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1405 Ok(r) => r,
1406 Err(e) => {
1407 warn!("Failed to compile licenses regex: {}", e);
1408 return default_package_data_with_datasource(DatasourceId::Gemspec);
1409 }
1410 };
1411
1412 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1413 Ok(r) => r,
1414 Err(e) => {
1415 warn!("Failed to compile authors regex: {}", e);
1416 return default_package_data_with_datasource(DatasourceId::Gemspec);
1417 }
1418 };
1419
1420 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1421 Ok(r) => r,
1422 Err(e) => {
1423 warn!("Failed to compile email regex: {}", e);
1424 return default_package_data_with_datasource(DatasourceId::Gemspec);
1425 }
1426 };
1427
1428 let dependency_call_re = match Regex::new(
1429 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1430 ) {
1431 Ok(r) => r,
1432 Err(e) => {
1433 warn!("Failed to compile gemspec dependency regex: {}", e);
1434 return default_package_data_with_datasource(DatasourceId::Gemspec);
1435 }
1436 };
1437
1438 let mut name: Option<String> = None;
1439 let mut version: Option<String> = None;
1440 let mut summary: Option<String> = None;
1441 let mut description: Option<String> = None;
1442 let mut homepage: Option<String> = None;
1443 let mut license: Option<String> = None;
1444 let mut licenses: Vec<String> = Vec::new();
1445 let mut authors: Vec<String> = Vec::new();
1446 let mut emails: Vec<String> = Vec::new();
1447 let mut dependencies: Vec<Dependency> = Vec::new();
1448
1449 for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1451 let field_name = match caps.get(1) {
1452 Some(m) => m.as_str(),
1453 None => continue,
1454 };
1455 let raw_value = match caps.get(2) {
1456 Some(m) => m.as_str().trim(),
1457 None => continue,
1458 };
1459
1460 match field_name {
1461 "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1462 "version" => {
1463 version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1464 }
1465 "summary" => {
1466 summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1467 }
1468 "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1469 "homepage" => {
1470 homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1471 }
1472 "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1473 _ => {}
1474 }
1475 }
1476
1477 for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1479 if let Some(raw) = caps.get(1) {
1480 licenses = extract_ruby_array(raw.as_str());
1481 }
1482 }
1483
1484 for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1486 if let Some(raw) = caps.get(1) {
1487 let raw_str = raw.as_str().trim();
1488 if raw_str.starts_with('[') {
1489 authors = extract_ruby_array(raw_str);
1490 } else if looks_like_constant_reference(raw_str) {
1491 authors = resolve_variable_array(raw_str, &contexts)
1492 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1493 } else {
1494 authors.push(clean_gemspec_value(raw_str));
1495 }
1496 }
1497 }
1498
1499 for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1501 if let Some(raw) = caps.get(1) {
1502 let raw_str = raw.as_str().trim();
1503 if raw_str.starts_with('[') {
1504 emails = extract_ruby_array(raw_str);
1505 } else if looks_like_constant_reference(raw_str) {
1506 emails = resolve_variable_array(raw_str, &contexts)
1507 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1508 } else {
1509 emails.push(clean_gemspec_value(raw_str));
1510 }
1511 }
1512 }
1513
1514 let mut parties: Vec<Party> = Vec::new();
1516
1517 if authors.len() == 1 && emails.len() == 1 {
1518 let email_str = emails.first().map(String::as_str);
1519 let (parsed_email_name, parsed_email) = match email_str {
1520 Some(e) => split_name_email(e),
1521 None => (None, None),
1522 };
1523
1524 parties.push(Party {
1525 r#type: Some("person".to_string()),
1526 role: Some("author".to_string()),
1527 name: authors.first().cloned().or(parsed_email_name),
1528 email: parsed_email.or_else(|| {
1529 email_str
1530 .filter(|e| e.contains('@') && !e.contains('<'))
1531 .map(|e| e.to_string())
1532 }),
1533 url: None,
1534 organization: None,
1535 organization_url: None,
1536 timezone: None,
1537 });
1538 } else {
1539 for author_name in authors {
1540 parties.push(Party {
1541 r#type: Some("person".to_string()),
1542 role: Some("author".to_string()),
1543 name: Some(author_name),
1544 email: None,
1545 url: None,
1546 organization: None,
1547 organization_url: None,
1548 timezone: None,
1549 });
1550 }
1551
1552 for email_str in emails {
1553 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1554 split_name_email(&email_str)
1555 } else {
1556 (None, None)
1557 };
1558 parties.push(Party {
1559 r#type: Some("person".to_string()),
1560 role: Some("author".to_string()),
1561 name: parsed_email_name,
1562 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1563 url: None,
1564 organization: None,
1565 organization_url: None,
1566 timezone: None,
1567 });
1568 }
1569 }
1570
1571 for caps in dependency_call_re
1572 .captures_iter(content)
1573 .take(MAX_ITERATION_COUNT)
1574 {
1575 let method = match caps.get(1) {
1576 Some(m) => m.as_str(),
1577 None => continue,
1578 };
1579 let args = match caps.get(2) {
1580 Some(m) => m.as_str(),
1581 None => continue,
1582 };
1583
1584 let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1585 continue;
1586 };
1587 let version_parts = extract_all_ruby_values(after_first_argument(args));
1588 let extracted_requirement = if version_parts.is_empty() {
1589 None
1590 } else {
1591 Some(version_parts.join(", "))
1592 };
1593 let purl = create_gem_purl(&dep_name, None);
1594 let is_development = method == "add_development_dependency";
1595 let scope = if is_development {
1596 "development"
1597 } else {
1598 "runtime"
1599 };
1600
1601 dependencies.push(Dependency {
1602 purl,
1603 extracted_requirement,
1604 scope: Some(scope.to_string()),
1605 is_runtime: Some(!is_development),
1606 is_optional: Some(is_development),
1607 is_pinned: None,
1608 is_direct: Some(true),
1609 resolved_package: None,
1610 extra_data: None,
1611 });
1612 }
1613
1614 let extracted_license_statement = if !licenses.is_empty() {
1616 Some(licenses.join(" AND "))
1617 } else {
1618 license
1619 };
1620
1621 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1622 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1623
1624 let final_description = description.or(summary);
1626
1627 let purl = name
1629 .as_deref()
1630 .map(|n| create_gem_purl(n, version.as_deref()))
1631 .unwrap_or(None);
1632
1633 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1634 if let Some(n) = name.as_deref() {
1635 get_rubygems_urls(n, version.as_deref(), None)
1636 } else {
1637 (None, None, None, None)
1638 };
1639
1640 PackageData {
1641 package_type: Some(PACKAGE_TYPE),
1642 name,
1643 version,
1644 primary_language: Some("Ruby".to_string()),
1645 description: final_description,
1646 homepage_url: homepage,
1647 download_url,
1648 declared_license_expression,
1649 declared_license_expression_spdx,
1650 license_detections,
1651 extracted_license_statement,
1652 parties,
1653 dependencies,
1654 repository_homepage_url,
1655 repository_download_url,
1656 api_data_url,
1657 datasource_id: Some(DatasourceId::Gemspec),
1658 purl,
1659 ..default_package_data()
1660 }
1661}
1662
1663const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1678
1679impl PackageParser for GemArchiveParser {
1680 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1681
1682 fn extract_packages(path: &Path) -> Vec<PackageData> {
1683 vec![match extract_gem_archive(path) {
1684 Ok(data) => data,
1685 Err(e) => {
1686 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1687 default_package_data_with_datasource(DatasourceId::GemArchive)
1688 }
1689 }]
1690 }
1691
1692 fn is_match(path: &Path) -> bool {
1693 path.extension()
1694 .and_then(|ext| ext.to_str())
1695 .is_some_and(|ext| ext == "gem")
1696 }
1697}
1698
1699fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1700 let file_metadata =
1701 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1702 let archive_size = file_metadata.len();
1703
1704 if archive_size > MAX_ARCHIVE_SIZE {
1705 return Err(format!(
1706 "Archive too large: {} bytes (limit: {} bytes)",
1707 archive_size, MAX_ARCHIVE_SIZE
1708 ));
1709 }
1710
1711 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1712 let mut archive = Archive::new(file);
1713
1714 let mut entry_count: usize = 0;
1715 for entry_result in archive
1716 .entries()
1717 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1718 {
1719 entry_count += 1;
1720 if entry_count > MAX_ITERATION_COUNT {
1721 warn!(
1722 "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1723 MAX_ITERATION_COUNT
1724 );
1725 break;
1726 }
1727
1728 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1729 let entry_path = entry
1730 .path()
1731 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1732 let entry_str = entry_path.to_string_lossy();
1733 if entry_str.contains("..") {
1734 warn!("Skipping tar entry with path traversal: {}", entry_str);
1735 continue;
1736 }
1737
1738 if entry_path.to_str() == Some("metadata.gz") {
1739 let entry_size = entry.size();
1740 if entry_size > MAX_FILE_SIZE {
1741 return Err(format!(
1742 "metadata.gz too large: {} bytes (limit: {} bytes)",
1743 entry_size, MAX_FILE_SIZE
1744 ));
1745 }
1746
1747 let mut decoder = GzDecoder::new(entry);
1748 let mut content = Vec::new();
1749 let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1750 limited
1751 .read_to_end(&mut content)
1752 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1753
1754 if content.len() > MAX_FILE_SIZE as usize {
1755 return Err(format!(
1756 "Decompressed metadata too large: exceeds {} byte limit",
1757 MAX_FILE_SIZE
1758 ));
1759 }
1760
1761 let content = match String::from_utf8(content) {
1762 Ok(s) => s,
1763 Err(err) => {
1764 let bytes = err.into_bytes();
1765 warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1766 String::from_utf8_lossy(&bytes).into_owned()
1767 }
1768 };
1769
1770 let uncompressed_size = content.len() as u64;
1771 if entry_size > 0 {
1772 let ratio = uncompressed_size as f64 / entry_size as f64;
1773 if ratio > MAX_COMPRESSION_RATIO {
1774 return Err(format!(
1775 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1776 ratio, MAX_COMPRESSION_RATIO
1777 ));
1778 }
1779 }
1780
1781 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1782 }
1783 }
1784
1785 Err("metadata.gz not found in .gem archive".to_string())
1786}
1787
1788fn parse_gem_metadata_yaml(
1789 content: &str,
1790 datasource_id: DatasourceId,
1791) -> Result<PackageData, String> {
1792 let cleaned = clean_ruby_yaml_tags(content);
1796
1797 let yaml: yaml_serde::Value =
1798 yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1799
1800 let name = yaml_string(&yaml, "name").map(truncate_field);
1801 let version = yaml.get("version").and_then(|v| {
1802 if v.is_string() {
1803 v.as_str().map(|s| truncate_field(s.to_string()))
1804 } else {
1805 yaml_string(v, "version").map(truncate_field)
1806 }
1807 });
1808 let description = yaml_string(&yaml, "description")
1809 .or_else(|| yaml_string(&yaml, "summary"))
1810 .map(truncate_field);
1811 let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1812 let summary = yaml_string(&yaml, "summary").map(truncate_field);
1813
1814 let licenses: Vec<String> = yaml
1816 .get("licenses")
1817 .and_then(|v| v.as_sequence())
1818 .map(|seq| {
1819 seq.iter()
1820 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1821 .collect()
1822 })
1823 .unwrap_or_default();
1824
1825 let extracted_license_statement = if !licenses.is_empty() {
1827 Some(licenses.join(" AND "))
1828 } else {
1829 None
1830 };
1831
1832 let (license_expression, license_expression_spdx, license_detections) =
1833 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1834
1835 let authors: Vec<String> = yaml
1837 .get("authors")
1838 .and_then(|v| v.as_sequence())
1839 .map(|seq| {
1840 seq.iter()
1841 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1842 .collect()
1843 })
1844 .unwrap_or_default();
1845
1846 let emails: Vec<String> = yaml
1847 .get("email")
1848 .map(|v| {
1849 if let Some(seq) = v.as_sequence() {
1850 seq.iter()
1851 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1852 .collect()
1853 } else if let Some(s) = v.as_str() {
1854 vec![truncate_field(s.to_string())]
1855 } else {
1856 Vec::new()
1857 }
1858 })
1859 .unwrap_or_default();
1860
1861 let mut parties: Vec<Party> = Vec::new();
1863 let max_len = authors.len().max(emails.len());
1864 for i in 0..max_len {
1865 let author_name = authors.get(i).map(|s| s.as_str());
1866 let email_str = emails.get(i).map(|s| s.as_str());
1867
1868 let (parsed_email_name, parsed_email) = match email_str {
1869 Some(e) if e.contains('<') => split_name_email(e),
1870 None => (None, None),
1871 _ => (None, None),
1872 };
1873
1874 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1875
1876 parties.push(Party {
1877 r#type: Some("person".to_string()),
1878 role: Some("author".to_string()),
1879 name: party_name,
1880 email: parsed_email.or_else(|| {
1881 email_str
1882 .filter(|e| e.contains('@') && !e.contains('<'))
1883 .map(|e| e.to_string())
1884 }),
1885 url: None,
1886 organization: None,
1887 organization_url: None,
1888 timezone: None,
1889 });
1890 }
1891
1892 let dependencies = parse_gem_yaml_dependencies(&yaml);
1894
1895 let metadata = yaml.get("metadata");
1896
1897 let bug_tracking_url = metadata
1898 .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1899 .map(truncate_field);
1900
1901 let code_view_url = metadata
1902 .and_then(|m| yaml_string(m, "source_code_uri"))
1903 .map(truncate_field);
1904
1905 let vcs_url = code_view_url.clone().or_else(|| {
1906 metadata
1907 .and_then(|m| yaml_string(m, "homepage_uri"))
1908 .map(truncate_field)
1909 });
1910
1911 let file_references = metadata
1912 .and_then(|m| m.get("files"))
1913 .and_then(|f| f.as_sequence())
1914 .map(|seq| {
1915 seq.iter()
1916 .filter_map(|v| v.as_str())
1917 .map(|s| crate::models::FileReference {
1918 path: s.to_string(),
1919 size: None,
1920 sha1: None,
1921 md5: None,
1922 sha256: None,
1923 sha512: None,
1924 extra_data: None,
1925 })
1926 .collect::<Vec<_>>()
1927 })
1928 .unwrap_or_default();
1929
1930 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1931 if d.len() >= 10 {
1932 Some(d[..10].to_string())
1933 } else {
1934 None
1935 }
1936 });
1937
1938 let purl = name
1939 .as_deref()
1940 .map(|n| create_gem_purl(n, version.as_deref()))
1941 .unwrap_or(None);
1942
1943 let platform = yaml_string(&yaml, "platform").map(truncate_field);
1944 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1945 if let Some(n) = name.as_deref() {
1946 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1947 } else {
1948 (None, None, None, None)
1949 };
1950
1951 let qualifiers = if let Some(ref p) = platform {
1952 if p != "ruby" {
1953 let mut q = HashMap::new();
1954 q.insert("platform".to_string(), p.clone());
1955 Some(q)
1956 } else {
1957 None
1958 }
1959 } else {
1960 None
1961 };
1962
1963 Ok(PackageData {
1964 package_type: Some(PACKAGE_TYPE),
1965 name,
1966 version,
1967 qualifiers,
1968 primary_language: Some("Ruby".to_string()),
1969 description: description.or(summary),
1970 release_date,
1971 homepage_url: homepage,
1972 download_url,
1973 bug_tracking_url,
1974 code_view_url,
1975 declared_license_expression: license_expression,
1976 declared_license_expression_spdx: license_expression_spdx,
1977 license_detections,
1978 extracted_license_statement,
1979 file_references,
1980 parties,
1981 dependencies,
1982 repository_homepage_url,
1983 repository_download_url,
1984 api_data_url,
1985 datasource_id: Some(datasource_id),
1986 purl,
1987 vcs_url,
1988 ..default_package_data()
1989 })
1990}
1991
1992fn clean_ruby_yaml_tags(content: &str) -> String {
1994 let tag_re = match Regex::new(r"!ruby/\S+") {
1995 Ok(r) => r,
1996 Err(_) => return content.to_string(),
1997 };
1998 tag_re.replace_all(content, "").to_string()
1999}
2000
2001fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2002 yaml.get(key)
2003 .and_then(|v| v.as_str())
2004 .filter(|s| !s.is_empty())
2005 .map(|s| s.to_string())
2006}
2007
2008fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2009 let mut dependencies = Vec::new();
2010
2011 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2012 Some(seq) => seq,
2013 None => return dependencies,
2014 };
2015
2016 for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2017 let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2018 Some(n) => n,
2019 None => continue,
2020 };
2021
2022 let dep_type = yaml_string(dep_value, "type");
2023 let is_development = dep_type.as_deref() == Some(":development");
2024
2025 let requirements = dep_value
2027 .get("requirement")
2028 .or_else(|| dep_value.get("version_requirements"))
2029 .and_then(|req| req.get("requirements"))
2030 .and_then(|reqs| reqs.as_sequence());
2031
2032 let extracted_requirement = requirements.map(|reqs| {
2033 let parts: Vec<String> = reqs
2034 .iter()
2035 .filter_map(|req| {
2036 let seq = req.as_sequence()?;
2037 if seq.len() >= 2 {
2038 let op = seq[0].as_str().unwrap_or("");
2039 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2040 if op == ">=" && ver == "0" {
2041 None
2043 } else if op.is_empty() || ver.is_empty() {
2044 None
2045 } else {
2046 Some(format!("{} {}", op, ver))
2047 }
2048 } else {
2049 None
2050 }
2051 })
2052 .collect();
2053 parts.join(", ")
2054 });
2055
2056 let extracted_requirement = extracted_requirement
2057 .filter(|s| !s.is_empty())
2058 .or_else(|| Some(String::new()));
2059
2060 let (scope, is_runtime, is_optional) = if is_development {
2061 (Some("development".to_string()), false, true)
2062 } else {
2063 (Some("runtime".to_string()), true, false)
2064 };
2065
2066 let purl = create_gem_purl(&dep_name, None);
2067
2068 dependencies.push(Dependency {
2069 purl,
2070 extracted_requirement,
2071 scope,
2072 is_runtime: Some(is_runtime),
2073 is_optional: Some(is_optional),
2074 is_pinned: None,
2075 is_direct: Some(true),
2076 resolved_package: None,
2077 extra_data: None,
2078 });
2079 }
2080
2081 dependencies
2082}
2083
2084pub struct GemMetadataExtractedParser;
2089
2090impl PackageParser for GemMetadataExtractedParser {
2091 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2092
2093 fn extract_packages(path: &Path) -> Vec<PackageData> {
2094 vec![match extract_gem_metadata_extracted(path) {
2095 Ok(data) => data,
2096 Err(e) => {
2097 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2098 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2099 }
2100 }]
2101 }
2102
2103 fn is_match(path: &Path) -> bool {
2104 path.to_str()
2105 .is_some_and(|p| p.contains("metadata.gz-extract"))
2106 }
2107}
2108
2109fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2110 let content = read_file_to_string(path, None)
2111 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2112
2113 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2114}
2115
2116crate::register_parser!(
2118 "Ruby Gemfile manifest",
2119 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2120 "gem",
2121 "Ruby",
2122 Some("https://bundler.io/man/gemfile.5.html"),
2123);
2124
2125crate::register_parser!(
2126 "Ruby Gemfile.lock lockfile",
2127 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2128 "gem",
2129 "Ruby",
2130 Some("https://bundler.io/man/gemfile.5.html"),
2131);
2132
2133crate::register_parser!(
2134 "Ruby .gemspec manifest",
2135 &[
2136 "**/*.gemspec",
2137 "**/data.gz-extract/*.gemspec",
2138 "**/specifications/*.gemspec"
2139 ],
2140 "gem",
2141 "Ruby",
2142 Some("https://guides.rubygems.org/specification-reference/"),
2143);
2144
2145crate::register_parser!(
2146 "Ruby .gem archive",
2147 &["**/*.gem"],
2148 "gem",
2149 "Ruby",
2150 Some("https://guides.rubygems.org/specification-reference/"),
2151);
2152
2153crate::register_parser!(
2154 "Ruby gem metadata (extracted)",
2155 &["**/metadata.gz-extract"],
2156 "gem",
2157 "Ruby",
2158 Some("https://guides.rubygems.org/specification-reference/"),
2159);
2160
2161#[cfg(test)]
2162mod tests {
2163 use super::parse_gemspec;
2164
2165 #[test]
2166 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2167 assert_eq!(
2168 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2169 "Arel is a SQL AST manager for Ruby. It"
2170 );
2171 }
2172
2173 #[test]
2174 fn test_parse_gemspec_runtime_dependency_scope() {
2175 let content = r#"
2176Gem::Specification.new do |spec|
2177 spec.name = "demo"
2178 spec.version = "1.0.0"
2179 spec.add_runtime_dependency "rack", "~> 3.0"
2180 spec.add_dependency "thor", ">= 1.0"
2181end
2182"#;
2183
2184 let package_data = parse_gemspec(content);
2185 assert_eq!(package_data.dependencies.len(), 2);
2186 assert_eq!(
2187 package_data.dependencies[0].scope,
2188 Some("runtime".to_string())
2189 );
2190 assert_eq!(
2191 package_data.dependencies[0].extracted_requirement,
2192 Some("~> 3.0".to_string())
2193 );
2194 assert_eq!(
2195 package_data.dependencies[1].scope,
2196 Some("runtime".to_string())
2197 );
2198 assert_eq!(
2199 package_data.dependencies[1].extracted_requirement,
2200 Some(">= 1.0".to_string())
2201 );
2202 }
2203}