1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46
47const PACKAGE_TYPE: PackageType = PackageType::Gem;
48
49pub fn strip_freeze_suffix(s: &str) -> &str {
61 s.trim_end_matches(".freeze")
62}
63
64enum GemfileBlock {
65 Group(Vec<String>),
66 Source(String),
67}
68
69pub struct GemfileParser;
78
79impl PackageParser for GemfileParser {
80 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
81
82 fn extract_packages(path: &Path) -> Vec<PackageData> {
83 let datasource_id = gemfile_datasource_id(path);
84 let content = match read_file_to_string(path, None) {
85 Ok(c) => c,
86 Err(e) => {
87 warn!("Failed to read Gemfile at {:?}: {}", path, e);
88 return vec![default_package_data_with_datasource(datasource_id)];
89 }
90 };
91
92 let mut package_data = parse_gemfile(&content);
93 package_data.datasource_id = Some(datasource_id);
94 vec![package_data]
95 }
96
97 fn is_match(path: &Path) -> bool {
98 path.file_name()
99 .and_then(|n| n.to_str())
100 .is_some_and(|name| name == "Gemfile")
101 || path
102 .to_str()
103 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
104 }
105}
106
107fn parse_gemfile(content: &str) -> PackageData {
109 let mut dependencies = Vec::new();
110 let mut block_stack = Vec::new();
111 let mut default_source = None;
112 let mut sources = Vec::new();
113
114 let gem_regex = match Regex::new(
117 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
118 ) {
119 Ok(r) => r,
120 Err(e) => {
121 warn!("Failed to compile gem regex: {}", e);
122 return default_package_data_with_datasource(DatasourceId::Gemfile);
123 }
124 };
125
126 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
128 Ok(r) => r,
129 Err(e) => {
130 warn!("Failed to compile group regex: {}", e);
131 return default_package_data_with_datasource(DatasourceId::Gemfile);
132 }
133 };
134
135 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
136 Ok(r) => r,
137 Err(e) => {
138 warn!("Failed to compile end regex: {}", e);
139 return default_package_data_with_datasource(DatasourceId::Gemfile);
140 }
141 };
142
143 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
144 Ok(r) => r,
145 Err(e) => {
146 warn!("Failed to compile source block regex: {}", e);
147 return default_package_data_with_datasource(DatasourceId::Gemfile);
148 }
149 };
150
151 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
152 Ok(r) => r,
153 Err(e) => {
154 warn!("Failed to compile source regex: {}", e);
155 return default_package_data_with_datasource(DatasourceId::Gemfile);
156 }
157 };
158
159 let symbol_regex = match Regex::new(r":(\w+)") {
161 Ok(r) => r,
162 Err(e) => {
163 warn!("Failed to compile symbol regex: {}", e);
164 return default_package_data_with_datasource(DatasourceId::Gemfile);
165 }
166 };
167
168 for line in content.lines().take(MAX_ITERATION_COUNT) {
169 let trimmed = line.trim();
170
171 if trimmed.is_empty() || trimmed.starts_with('#') {
173 continue;
174 }
175
176 if let Some(caps) = group_start_regex.captures(trimmed) {
178 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
179 let mut current_groups = Vec::new();
180 for cap in symbol_regex.captures_iter(groups_str) {
181 if let Some(group_name) = cap.get(1) {
182 current_groups.push(group_name.as_str().to_string());
183 }
184 }
185 block_stack.push(GemfileBlock::Group(current_groups));
186 continue;
187 }
188
189 if let Some(caps) = source_block_start_regex.captures(trimmed) {
190 let source = caps
191 .get(1)
192 .map(|m| m.as_str().to_string())
193 .unwrap_or_default();
194 if !source.is_empty() {
195 push_unique_string(&mut sources, source.clone());
196 block_stack.push(GemfileBlock::Source(source));
197 }
198 continue;
199 }
200
201 if let Some(caps) = source_regex.captures(trimmed) {
202 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
203 push_unique_string(&mut sources, source.clone());
204 default_source = Some(source);
205 }
206 continue;
207 }
208
209 if group_end_regex.is_match(trimmed) {
211 block_stack.pop();
212 continue;
213 }
214
215 if let Some(caps) = gem_regex.captures(trimmed) {
217 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
218 if name.is_empty() {
219 continue;
220 }
221
222 let mut version_parts = Vec::new();
224 if let Some(v) = caps.get(2) {
225 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
226 }
227 if let Some(v) = caps.get(3) {
228 let v_str = strip_freeze_suffix(v.as_str());
229 if looks_like_version_constraint(v_str) {
231 version_parts.push(v_str.to_string());
232 }
233 }
234
235 let extracted_requirement = if version_parts.is_empty() {
236 None
237 } else {
238 Some(version_parts.join(", "))
239 };
240
241 let current_groups = current_group_names(&block_stack);
242
243 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
246 (None, true, false)
248 } else if current_groups.iter().any(|g| g == "development") {
249 (Some("development".to_string()), false, true)
250 } else if current_groups.iter().any(|g| g == "test") {
251 (Some("test".to_string()), false, true)
252 } else {
253 let group = current_groups.first().cloned();
255 (group, true, false)
256 };
257
258 let purl = create_gem_purl(name, None);
260 let inherited_source = current_source(&block_stack, default_source.as_deref());
261 let extra_data = build_gemfile_dependency_extra_data(
262 caps.get(4).map(|m| m.as_str()),
263 inherited_source.as_deref(),
264 );
265
266 dependencies.push(Dependency {
267 purl,
268 extracted_requirement,
269 scope,
270 is_runtime: Some(is_runtime),
271 is_optional: Some(is_optional),
272 is_pinned: None,
273 is_direct: Some(true),
274 resolved_package: None,
275 extra_data,
276 });
277 }
278 }
279
280 let extra_data = if sources.is_empty() {
281 None
282 } else {
283 Some(HashMap::from([(
284 "sources".to_string(),
285 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
286 )]))
287 };
288
289 PackageData {
290 package_type: Some(PACKAGE_TYPE),
291 primary_language: Some("Ruby".to_string()),
292 dependencies,
293 extra_data,
294 datasource_id: Some(DatasourceId::Gemfile),
295 ..default_package_data()
296 }
297}
298
299fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
300 block_stack
301 .iter()
302 .rev()
303 .find_map(|block| match block {
304 GemfileBlock::Group(groups) => Some(groups.clone()),
305 GemfileBlock::Source(_) => None,
306 })
307 .unwrap_or_default()
308}
309
310fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
311 block_stack
312 .iter()
313 .rev()
314 .find_map(|block| match block {
315 GemfileBlock::Source(source) => Some(source.clone()),
316 GemfileBlock::Group(_) => None,
317 })
318 .or_else(|| default_source.map(str::to_string))
319}
320
321fn push_unique_string(values: &mut Vec<String>, value: String) {
322 if !values.contains(&value) {
323 values.push(value);
324 }
325}
326
327fn build_gemfile_dependency_extra_data(
328 options: Option<&str>,
329 inherited_source: Option<&str>,
330) -> Option<HashMap<String, serde_json::Value>> {
331 let mut extra = HashMap::new();
332 let options = options.unwrap_or("");
333
334 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
335 extra.insert(
336 "source_type".to_string(),
337 serde_json::Value::String("GIT".to_string()),
338 );
339 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
340 extra.insert("remote".to_string(), serde_json::Value::String(git));
341 }
342
343 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
344 extra.insert(
345 "source_type".to_string(),
346 serde_json::Value::String("PATH".to_string()),
347 );
348 extra.insert("path".to_string(), serde_json::Value::String(path));
349 }
350
351 for key in ["branch", "ref", "tag"] {
352 if let Some(value) = extract_gemfile_quoted_option(options, key) {
353 extra.insert(key.to_string(), serde_json::Value::String(value));
354 }
355 }
356
357 let direct_source = extract_gemfile_quoted_option(options, "source");
358 if let Some(source) = direct_source {
359 extra.insert("source".to_string(), serde_json::Value::String(source));
360 } else if !extra.contains_key("source_type")
361 && let Some(source) = inherited_source
362 {
363 extra.insert(
364 "source".to_string(),
365 serde_json::Value::String(source.to_string()),
366 );
367 }
368
369 (!extra.is_empty()).then_some(extra)
370}
371
372fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
373 if options.is_empty() {
374 return None;
375 }
376
377 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
378 Regex::new(&pattern)
379 .ok()
380 .and_then(|regex| regex.captures(options))
381 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
382}
383
384fn looks_like_version_constraint(s: &str) -> bool {
386 s.starts_with('~')
387 || s.starts_with('>')
388 || s.starts_with('<')
389 || s.starts_with('=')
390 || s.starts_with('!')
391 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
392}
393
394pub struct GemfileLockParser;
403
404impl PackageParser for GemfileLockParser {
405 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
406
407 fn extract_packages(path: &Path) -> Vec<PackageData> {
408 let datasource_id = gemfile_lock_datasource_id(path);
409 let content = match read_file_to_string(path, None) {
410 Ok(c) => c,
411 Err(e) => {
412 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
413 return vec![default_package_data_with_datasource(datasource_id)];
414 }
415 };
416
417 let mut package_data = parse_gemfile_lock(&content);
418 package_data.datasource_id = Some(datasource_id);
419 vec![package_data]
420 }
421
422 fn is_match(path: &Path) -> bool {
423 path.file_name()
424 .and_then(|n| n.to_str())
425 .is_some_and(|name| name == "Gemfile.lock")
426 || path
427 .to_str()
428 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
429 }
430}
431
432#[derive(Debug, Clone, PartialEq)]
434enum ParseState {
435 None,
436 Gem,
437 Git,
438 Path,
439 Svn,
440 Specs,
441 Platforms,
442 BundledWith,
443 Dependencies,
444}
445
446#[derive(Debug, Clone, Default)]
453struct GemInfo {
454 name: String,
455 version: Option<String>,
456 platform: Option<String>,
457 gem_type: String,
458 remote: Option<String>,
459 revision: Option<String>,
460 ref_field: Option<String>,
461 branch: Option<String>,
462 tag: Option<String>,
463 pinned: bool,
464 requirements: Vec<String>,
465}
466
467fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
468 let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
469 path_gems.sort_by(|left, right| {
470 left.remote
471 .as_deref()
472 .cmp(&right.remote.as_deref())
473 .then_with(|| left.name.cmp(&right.name))
474 });
475
476 path_gems
477 .iter()
478 .copied()
479 .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
480 .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
481 .or_else(|| {
482 path_gems
483 .iter()
484 .copied()
485 .find(|gem| gem.remote.as_deref() == Some("."))
486 })
487 .or_else(|| path_gems.first().copied())
488 .cloned()
489}
490
491fn parse_gemfile_lock(content: &str) -> PackageData {
493 let mut state = ParseState::None;
494 let mut dependencies = Vec::new();
495 let mut gems: HashMap<String, GemInfo> = HashMap::new();
496 let mut platforms: Vec<String> = Vec::new();
497 let mut bundler_version: Option<String> = None;
498 let mut current_gem_type = String::new();
499 let mut current_remote: Option<String> = None;
500 let mut current_options: HashMap<String, String> = HashMap::new();
501
502 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
504 Ok(r) => r,
505 Err(e) => {
506 warn!("Failed to compile deps regex: {}", e);
507 return default_package_data_with_datasource(DatasourceId::GemfileLock);
508 }
509 };
510
511 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
513 Ok(r) => r,
514 Err(e) => {
515 warn!("Failed to compile spec_deps regex: {}", e);
516 return default_package_data_with_datasource(DatasourceId::GemfileLock);
517 }
518 };
519
520 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
522 Ok(r) => r,
523 Err(e) => {
524 warn!("Failed to compile options regex: {}", e);
525 return default_package_data_with_datasource(DatasourceId::GemfileLock);
526 }
527 };
528
529 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
531 Ok(r) => r,
532 Err(e) => {
533 warn!("Failed to compile version regex: {}", e);
534 return default_package_data_with_datasource(DatasourceId::GemfileLock);
535 }
536 };
537
538 for line in content.lines().take(MAX_ITERATION_COUNT) {
539 let trimmed = line.trim_end();
540
541 if trimmed.is_empty() {
543 current_options.clear();
544 continue;
545 }
546
547 match trimmed {
549 "GEM" => {
550 state = ParseState::Gem;
551 current_gem_type = "GEM".to_string();
552 current_remote = None;
553 current_options.clear();
554 continue;
555 }
556 "GIT" => {
557 state = ParseState::Git;
558 current_gem_type = "GIT".to_string();
559 current_remote = None;
560 current_options.clear();
561 continue;
562 }
563 "PATH" => {
564 state = ParseState::Path;
565 current_gem_type = "PATH".to_string();
566 current_remote = None;
567 current_options.clear();
568 continue;
569 }
570 "SVN" => {
571 state = ParseState::Svn;
572 current_gem_type = "SVN".to_string();
573 current_remote = None;
574 current_options.clear();
575 continue;
576 }
577 "PLATFORMS" => {
578 state = ParseState::Platforms;
579 continue;
580 }
581 "BUNDLED WITH" => {
582 state = ParseState::BundledWith;
583 continue;
584 }
585 "DEPENDENCIES" => {
586 state = ParseState::Dependencies;
587 continue;
588 }
589 _ => {}
590 }
591
592 if trimmed.trim() == "specs:" {
596 state = match state {
597 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
598 ParseState::Specs
599 }
600 _ => state,
601 };
602 continue;
603 }
604
605 match state {
607 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
608 if let Some(caps) = options_regex.captures(line) {
610 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
611 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
612 current_options.insert(key.to_string(), value.to_string());
613 if key == "remote" {
614 current_remote = Some(value.to_string());
615 }
616 }
617 }
618 ParseState::Specs => {
619 if let Some(caps) = spec_deps_regex.captures(line) {
621 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
622 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
623
624 let (version, platform) = parse_version_platform(version_str);
626
627 if !name.is_empty() {
628 let gem_info = GemInfo {
629 name: name.clone(),
630 version,
631 platform,
632 gem_type: current_gem_type.clone(),
633 remote: current_remote.clone(),
634 revision: current_options.get("revision").cloned(),
635 ref_field: current_options.get("ref").cloned(),
636 branch: current_options.get("branch").cloned(),
637 tag: current_options.get("tag").cloned(),
638 pinned: false,
639 requirements: Vec::new(),
640 };
641 gems.insert(name, gem_info);
642 }
643 }
644 }
645 ParseState::Platforms => {
646 let platform = trimmed.trim();
648 if !platform.is_empty() {
649 platforms.push(platform.to_string());
650 }
651 }
652 ParseState::BundledWith => {
653 if let Some(caps) = version_regex.captures(line) {
655 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
656 }
657 }
658 ParseState::Dependencies => {
659 if let Some(caps) = deps_regex.captures(line) {
661 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
662 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
663 let pinned = caps.get(3).is_some();
664
665 if !name.is_empty() {
666 if let Some(gem) = gems.get_mut(&name) {
668 gem.pinned = pinned;
669 if let Some(vc) = &version_constraint {
670 gem.requirements.push(vc.clone());
671 }
672 } else {
673 let gem_info = GemInfo {
674 name: name.clone(),
675 version: None,
676 platform: None,
677 gem_type: "GEM".to_string(),
678 remote: None,
679 revision: None,
680 ref_field: None,
681 branch: None,
682 tag: None,
683 pinned,
684 requirements: version_constraint.into_iter().collect(),
685 };
686 gems.insert(name, gem_info);
687 }
688 }
689 }
690 }
691 ParseState::None => {}
692 }
693 }
694
695 let primary_gem = select_primary_path_gem(&gems);
696
697 let (
698 package_name,
699 package_version,
700 repository_homepage_url,
701 repository_download_url,
702 api_data_url,
703 download_url,
704 ) = if let Some(ref pg) = primary_gem {
705 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
706 (
707 Some(pg.name.clone()),
708 pg.version.clone(),
709 urls.0,
710 urls.1,
711 urls.2,
712 urls.3,
713 )
714 } else {
715 (None, None, None, None, None, None)
716 };
717
718 for (_, gem) in gems {
719 if let Some(ref pg) = primary_gem
720 && gem.name == pg.name
721 {
722 continue;
723 }
724
725 let version_for_purl = gem.version.as_deref();
726 let purl = create_gem_purl(&gem.name, version_for_purl);
727
728 let extracted_requirement = if !gem.requirements.is_empty() {
729 Some(gem.requirements.join(", "))
730 } else {
731 gem.version.clone()
732 };
733
734 let extra_data = build_gem_source_extra_data(&gem);
735
736 dependencies.push(Dependency {
737 purl,
738 extracted_requirement,
739 scope: Some("dependencies".to_string()),
740 is_runtime: Some(true),
741 is_optional: Some(false),
742 is_pinned: Some(gem.pinned),
743 is_direct: Some(true),
744 resolved_package: None,
745 extra_data,
746 });
747 }
748
749 dependencies.sort_by(|left, right| {
750 left.purl
751 .as_deref()
752 .cmp(&right.purl.as_deref())
753 .then_with(|| {
754 left.extracted_requirement
755 .as_deref()
756 .cmp(&right.extracted_requirement.as_deref())
757 })
758 });
759
760 let mut extra_data = HashMap::new();
762 if !platforms.is_empty() {
763 extra_data.insert(
764 "platforms".to_string(),
765 serde_json::Value::Array(
766 platforms
767 .into_iter()
768 .map(serde_json::Value::String)
769 .collect(),
770 ),
771 );
772 }
773 if let Some(bv) = bundler_version {
774 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
775 }
776
777 let purl = package_name
778 .as_deref()
779 .map(|n| create_gem_purl(n, package_version.as_deref()))
780 .unwrap_or(None);
781
782 PackageData {
783 package_type: Some(PACKAGE_TYPE),
784 name: package_name,
785 version: package_version,
786 primary_language: Some("Ruby".to_string()),
787 download_url,
788 dependencies,
789 repository_homepage_url,
790 repository_download_url,
791 api_data_url,
792 extra_data: if extra_data.is_empty() {
793 None
794 } else {
795 Some(extra_data)
796 },
797 datasource_id: Some(DatasourceId::GemfileLock),
798 purl,
799 ..default_package_data()
800 }
801}
802
803fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
804 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
805 return None;
806 }
807
808 let mut extra = HashMap::new();
809 extra.insert(
810 "source_type".to_string(),
811 serde_json::Value::String(gem.gem_type.clone()),
812 );
813
814 if let Some(ref remote) = gem.remote {
815 extra.insert(
816 "remote".to_string(),
817 serde_json::Value::String(remote.clone()),
818 );
819 }
820 if let Some(ref revision) = gem.revision {
821 extra.insert(
822 "revision".to_string(),
823 serde_json::Value::String(revision.clone()),
824 );
825 }
826 if let Some(ref ref_field) = gem.ref_field {
827 extra.insert(
828 "ref".to_string(),
829 serde_json::Value::String(ref_field.clone()),
830 );
831 }
832 if let Some(ref branch) = gem.branch {
833 extra.insert(
834 "branch".to_string(),
835 serde_json::Value::String(branch.clone()),
836 );
837 }
838 if let Some(ref tag) = gem.tag {
839 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
840 }
841
842 Some(extra)
843}
844
845fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
848 if s.is_empty() {
849 return (None, None);
850 }
851 if let Some(idx) = s.find('-') {
852 let version = &s[..idx];
853 let platform = &s[idx + 1..];
854 (Some(version.to_string()), Some(platform.to_string()))
855 } else {
856 (Some(s.to_string()), None)
857 }
858}
859
860fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
862 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
863 Ok(p) => p,
864 Err(e) => {
865 warn!("Failed to create PURL for gem '{}': {}", name, e);
866 return None;
867 }
868 };
869
870 if let Some(v) = version
871 && let Err(e) = purl.with_version(v)
872 {
873 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
874 }
875
876 Some(purl.to_string())
877}
878
879fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
880 if name.is_empty() {
881 return None;
882 }
883
884 if let Some(v) = version {
885 let v = v.trim().trim_matches('/');
886 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
887 } else {
888 Some(format!("https://rubygems.org/gems/{}", name))
889 }
890}
891
892fn rubygems_download_url(
893 name: &str,
894 version: Option<&str>,
895 platform: Option<&str>,
896) -> Option<String> {
897 if name.is_empty() || version.is_none() {
898 return None;
899 }
900
901 let name = name.trim().trim_matches('/');
902 let version = version?.trim().trim_matches('/');
903
904 let version_plat = if let Some(p) = platform {
905 if p != "ruby" {
906 format!("{}-{}", version, p)
907 } else {
908 version.to_string()
909 }
910 } else {
911 version.to_string()
912 };
913
914 Some(format!(
915 "https://rubygems.org/downloads/{}-{}.gem",
916 name, version_plat
917 ))
918}
919
920fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
921 if name.is_empty() {
922 return None;
923 }
924
925 if let Some(v) = version {
926 Some(format!(
927 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
928 name, v
929 ))
930 } else {
931 Some(format!(
932 "https://rubygems.org/api/v1/versions/{}.json",
933 name
934 ))
935 }
936}
937
938fn get_rubygems_urls(
939 name: &str,
940 version: Option<&str>,
941 platform: Option<&str>,
942) -> (
943 Option<String>,
944 Option<String>,
945 Option<String>,
946 Option<String>,
947) {
948 let repository_homepage_url = rubygems_homepage_url(name, version);
949 let repository_download_url = rubygems_download_url(name, version, platform);
950 let api_data_url = rubygems_api_url(name, version);
951 let download_url = repository_download_url.clone();
952
953 (
954 repository_homepage_url,
955 repository_download_url,
956 api_data_url,
957 download_url,
958 )
959}
960
961fn default_package_data() -> PackageData {
963 PackageData {
964 package_type: Some(PACKAGE_TYPE),
965 primary_language: Some("Ruby".to_string()),
966 ..Default::default()
967 }
968}
969
970fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
971 PackageData {
972 datasource_id: Some(datasource_id),
973 ..default_package_data()
974 }
975}
976
977pub struct GemspecParser;
987
988impl PackageParser for GemspecParser {
989 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
990
991 fn extract_packages(path: &Path) -> Vec<PackageData> {
992 let datasource_id = gemspec_datasource_id(path);
993 let content = match read_file_to_string(path, None) {
994 Ok(c) => c,
995 Err(e) => {
996 warn!("Failed to read .gemspec at {:?}: {}", path, e);
997 return vec![default_package_data_with_datasource(datasource_id)];
998 }
999 };
1000
1001 let mut package_data = parse_gemspec_with_context(&content, path.parent());
1002 package_data.datasource_id = Some(datasource_id);
1003 vec![package_data]
1004 }
1005
1006 fn is_match(path: &Path) -> bool {
1007 path.extension()
1008 .and_then(|ext| ext.to_str())
1009 .is_some_and(|ext| ext == "gemspec")
1010 }
1011}
1012
1013fn normalized_ruby_path(path: &Path) -> String {
1014 path.to_string_lossy().replace('\\', "/")
1015}
1016
1017fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1018 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1019 DatasourceId::GemfileExtracted
1020 } else {
1021 DatasourceId::Gemfile
1022 }
1023}
1024
1025fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1026 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1027 DatasourceId::GemfileLockExtracted
1028 } else {
1029 DatasourceId::GemfileLock
1030 }
1031}
1032
1033fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1034 let normalized = normalized_ruby_path(path);
1035 if normalized.contains("/data.gz-extract/") {
1036 DatasourceId::GemspecExtracted
1037 } else if normalized.contains("/specifications/") {
1038 DatasourceId::GemGemspecInstalledSpecifications
1039 } else {
1040 DatasourceId::Gemspec
1041 }
1042}
1043
1044fn clean_gemspec_value(s: &str) -> String {
1046 let s = strip_freeze_suffix(s).trim();
1047
1048 let s = if let Some(pos) = s.find(" #") {
1049 s[..pos].trim()
1050 } else {
1051 s
1052 };
1053
1054 let s = if let Some(stripped) = s.strip_prefix("%q{") {
1055 stripped.strip_suffix('}').unwrap_or(stripped)
1056 } else if let Some(stripped) = s.strip_prefix("%q<") {
1057 stripped.strip_suffix('>').unwrap_or(stripped)
1058 } else if let Some(stripped) = s.strip_prefix("%q[") {
1059 stripped.strip_suffix(']').unwrap_or(stripped)
1060 } else if let Some(stripped) = s.strip_prefix("%q(") {
1061 stripped.strip_suffix(')').unwrap_or(stripped)
1062 } else {
1063 s
1064 };
1065
1066 let s = s
1067 .trim_start_matches('"')
1068 .trim_end_matches('"')
1069 .trim_start_matches('\'')
1070 .trim_end_matches('\'');
1071 let s = strip_freeze_suffix(s).trim();
1072 s.to_string()
1073}
1074
1075fn extract_ruby_array(s: &str) -> Vec<String> {
1077 let s = strip_freeze_suffix(s.trim());
1078 let s = s.trim_start_matches('[').trim_end_matches(']');
1079 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1080 Ok(r) => r,
1081 Err(_) => return Vec::new(),
1082 };
1083 item_re
1084 .captures_iter(s)
1085 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1086 .collect()
1087}
1088
1089fn extract_all_ruby_values(s: &str) -> Vec<String> {
1090 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1091 Ok(r) => r,
1092 Err(_) => return Vec::new(),
1093 };
1094
1095 value_re
1096 .captures_iter(s)
1097 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1098 .map(|m| clean_gemspec_value(m.as_str()))
1099 .collect()
1100}
1101
1102fn extract_first_ruby_value(s: &str) -> Option<String> {
1103 extract_all_ruby_values(s).into_iter().next()
1104}
1105
1106fn after_first_argument(args: &str) -> &str {
1107 let mut bracket_depth = 0usize;
1108 let mut paren_depth = 0usize;
1109 let mut in_quote: Option<char> = None;
1110 let chars: Vec<(usize, char)> = args.char_indices().collect();
1111 let mut i = 0;
1112
1113 while i < chars.len() {
1114 let (idx, ch) = chars[i];
1115
1116 if let Some(quote) = in_quote {
1117 if ch == '\\' {
1118 i += 2;
1119 continue;
1120 }
1121 if ch == quote {
1122 in_quote = None;
1123 }
1124 i += 1;
1125 continue;
1126 }
1127
1128 match ch {
1129 '\'' | '"' => in_quote = Some(ch),
1130 '[' | '{' | '<' => bracket_depth += 1,
1131 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1132 '(' => paren_depth += 1,
1133 ')' => paren_depth = paren_depth.saturating_sub(1),
1134 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1135 _ => {}
1136 }
1137
1138 i += 1;
1139 }
1140
1141 ""
1142}
1143
1144fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1149 let var_name = var_name.trim();
1150 if var_name.is_empty() {
1151 return None;
1152 }
1153
1154 for candidate in candidate_constant_names(var_name) {
1155 let escaped = regex::escape(&candidate);
1156 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1157 let Ok(re) = Regex::new(&pattern) else {
1158 continue;
1159 };
1160
1161 for context in contexts {
1162 if let Some(caps) = re.captures(context)
1163 && let Some(expression) = caps.get(1)
1164 && let Some(resolved) =
1165 resolve_scalar_expression(expression.as_str(), None, contexts)
1166 {
1167 return Some(resolved);
1168 }
1169 }
1170 }
1171
1172 None
1173}
1174
1175fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1176 let var_name = var_name.trim();
1177 if var_name.is_empty() {
1178 return None;
1179 }
1180
1181 for candidate in candidate_constant_names(var_name) {
1182 let escaped = regex::escape(&candidate);
1183 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1184 let Ok(re) = Regex::new(&pattern) else {
1185 continue;
1186 };
1187
1188 for context in contexts {
1189 if let Some(caps) = re.captures(context)
1190 && let Some(raw) = caps.get(1)
1191 {
1192 let values = extract_ruby_array(raw.as_str());
1193 if !values.is_empty() {
1194 return Some(values);
1195 }
1196 }
1197 }
1198 }
1199
1200 None
1201}
1202
1203fn candidate_constant_names(var_name: &str) -> Vec<String> {
1204 let mut names = vec![var_name.to_string()];
1205 if let Some(last) = var_name.split("::").last()
1206 && last != var_name
1207 {
1208 names.push(last.to_string());
1209 }
1210 names
1211}
1212
1213fn looks_like_local_variable_reference(s: &str) -> bool {
1214 let mut chars = s.chars();
1215 matches!(chars.next(), Some('_' | 'a'..='z'))
1216 && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1217}
1218
1219fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1220 let base_dir = base_dir?;
1221 let current_dir = std::env::current_dir().ok();
1222
1223 current_dir
1224 .and_then(|cwd| {
1225 let canonical_cwd = cwd.canonicalize().ok()?;
1226 let canonical_base = base_dir.canonicalize().ok()?;
1227 canonical_base
1228 .starts_with(&canonical_cwd)
1229 .then_some(canonical_cwd)
1230 })
1231 .or_else(|| base_dir.canonicalize().ok())
1232}
1233
1234fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1235 let canonical_path = path.canonicalize().ok()?;
1236 canonical_path
1237 .starts_with(allowed_root)
1238 .then_some(canonical_path)
1239}
1240
1241fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1242 let base_dir = base_dir?;
1243 let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1244 let relative_path = extract_first_ruby_value(args)?;
1245 if relative_path.is_empty() {
1246 return None;
1247 }
1248
1249 let candidate = Path::new(&relative_path);
1250 let path = if candidate.is_absolute() {
1251 candidate.to_path_buf()
1252 } else {
1253 base_dir.join(candidate)
1254 };
1255
1256 let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1257
1258 fs::read_to_string(safe_path)
1259 .ok()
1260 .map(|content| content.trim().to_string())
1261 .filter(|content| !content.is_empty())
1262}
1263
1264fn resolve_scalar_expression(
1265 expression: &str,
1266 base_dir: Option<&Path>,
1267 contexts: &[String],
1268) -> Option<String> {
1269 let expression = if let Some(pos) = expression.find(" #") {
1270 expression[..pos].trim()
1271 } else {
1272 expression.trim()
1273 };
1274
1275 let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1276 if let Some(caps) = file_read_re.captures(expression) {
1277 return caps
1278 .get(1)
1279 .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1280 }
1281
1282 if let Some(joined) = resolve_joined_constant_string(expression, contexts) {
1283 return Some(joined);
1284 }
1285
1286 if let Some(value) = extract_first_ruby_value(expression) {
1287 return Some(interpolate_ruby_constant_string(&value, contexts));
1288 }
1289
1290 let cleaned = clean_gemspec_value(expression);
1291 if looks_like_constant_reference(&cleaned) {
1292 return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1293 }
1294
1295 None
1296}
1297
1298fn resolve_joined_constant_string(expression: &str, contexts: &[String]) -> Option<String> {
1299 let expression = strip_freeze_suffix(expression.trim());
1300 if !expression.starts_with('[') {
1301 return None;
1302 }
1303 let join_index = expression.find("].join(")?;
1304 let body = &expression[1..join_index];
1305 let separator_expr = expression[join_index + 7..].strip_suffix(')')?.trim();
1306 let separator = extract_first_ruby_value(separator_expr)?;
1307
1308 let mut parts = Vec::new();
1309 for item in body.split(',').take(MAX_ITERATION_COUNT) {
1310 let resolved = resolve_scalar_expression(item.trim(), None, contexts)?;
1311 parts.push(resolved);
1312 }
1313
1314 Some(parts.join(&separator))
1315}
1316
1317fn interpolate_ruby_constant_string(value: &str, contexts: &[String]) -> String {
1318 if !value.contains("#{") {
1319 return value.to_string();
1320 }
1321
1322 let Ok(interpolation_re) = Regex::new(r#"#\{([^}]+)\}"#) else {
1323 return value.to_string();
1324 };
1325 interpolation_re
1326 .replace_all(value, |captures: ®ex::Captures<'_>| {
1327 let reference = captures
1328 .get(1)
1329 .map(|m| m.as_str().trim())
1330 .unwrap_or_default();
1331 resolve_variable_version(reference, contexts).unwrap_or_else(|| {
1332 captures
1333 .get(0)
1334 .map(|value| value.as_str().to_string())
1335 .unwrap_or_default()
1336 })
1337 })
1338 .into_owned()
1339}
1340
1341fn resolve_local_variable_value(
1342 var_name: &str,
1343 content: &str,
1344 base_dir: Option<&Path>,
1345 contexts: &[String],
1346) -> Option<String> {
1347 let escaped = regex::escape(var_name.trim());
1348 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1349 let re = Regex::new(&pattern).ok()?;
1350
1351 re.captures_iter(content).find_map(|caps| {
1352 caps.get(1)
1353 .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1354 })
1355}
1356
1357fn resolve_gemspec_scalar_value(
1358 raw_value: &str,
1359 content: &str,
1360 base_dir: Option<&Path>,
1361 contexts: &[String],
1362) -> Option<String> {
1363 let cleaned = truncate_field(clean_gemspec_value(raw_value));
1364 if cleaned.is_empty() {
1365 return None;
1366 }
1367
1368 if looks_like_constant_reference(&cleaned) {
1369 return resolve_variable_version(&cleaned, contexts)
1370 .map(truncate_field)
1371 .or(Some(cleaned));
1372 }
1373
1374 if looks_like_local_variable_reference(&cleaned) {
1375 return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1376 .map(truncate_field)
1377 .or(Some(cleaned));
1378 }
1379
1380 Some(cleaned)
1381}
1382
1383fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1384 let mut contexts = vec![content.to_string()];
1385 let Some(base_dir) = base_dir else {
1386 return contexts;
1387 };
1388 let allowed_root = resolve_ruby_read_root(Some(base_dir));
1389
1390 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1391 Ok(re) => re,
1392 Err(_) => return contexts,
1393 };
1394
1395 for caps in require_re.captures_iter(content) {
1396 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1397 continue;
1398 };
1399 for candidate in candidate_require_paths(base_dir, required) {
1400 let Some(safe_candidate) = allowed_root
1401 .as_deref()
1402 .and_then(|root| resolve_ruby_read_path(candidate, root))
1403 else {
1404 continue;
1405 };
1406 if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1407 contexts.push(required_content);
1408 break;
1409 }
1410 }
1411 }
1412
1413 contexts
1414}
1415
1416fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1417 let relative = required.replace("::", "/");
1418 let filename = if relative.ends_with(".rb") {
1419 relative
1420 } else {
1421 format!("{}.rb", relative)
1422 };
1423
1424 vec![
1425 base_dir.join(&filename),
1426 base_dir.join("lib").join(&filename),
1427 ]
1428}
1429
1430fn looks_like_constant_reference(s: &str) -> bool {
1431 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1432}
1433
1434#[cfg(test)]
1436fn parse_gemspec(content: &str) -> PackageData {
1437 parse_gemspec_with_context(content, None)
1438}
1439
1440fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1441 let contexts = load_required_ruby_contexts(content, base_dir);
1442
1443 let field_re = match Regex::new(
1446 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1447 ) {
1448 Ok(r) => r,
1449 Err(e) => {
1450 warn!("Failed to compile gemspec field regex: {}", e);
1451 return default_package_data_with_datasource(DatasourceId::Gemspec);
1452 }
1453 };
1454
1455 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1456 Ok(r) => r,
1457 Err(e) => {
1458 warn!("Failed to compile licenses regex: {}", e);
1459 return default_package_data_with_datasource(DatasourceId::Gemspec);
1460 }
1461 };
1462
1463 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1464 Ok(r) => r,
1465 Err(e) => {
1466 warn!("Failed to compile authors regex: {}", e);
1467 return default_package_data_with_datasource(DatasourceId::Gemspec);
1468 }
1469 };
1470
1471 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1472 Ok(r) => r,
1473 Err(e) => {
1474 warn!("Failed to compile email regex: {}", e);
1475 return default_package_data_with_datasource(DatasourceId::Gemspec);
1476 }
1477 };
1478
1479 let dependency_call_re = match Regex::new(
1480 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1481 ) {
1482 Ok(r) => r,
1483 Err(e) => {
1484 warn!("Failed to compile gemspec dependency regex: {}", e);
1485 return default_package_data_with_datasource(DatasourceId::Gemspec);
1486 }
1487 };
1488
1489 let mut name: Option<String> = None;
1490 let mut version: Option<String> = None;
1491 let mut summary: Option<String> = None;
1492 let mut description: Option<String> = None;
1493 let mut homepage: Option<String> = None;
1494 let mut license: Option<String> = None;
1495 let mut licenses: Vec<String> = Vec::new();
1496 let mut authors: Vec<String> = Vec::new();
1497 let mut emails: Vec<String> = Vec::new();
1498 let mut dependencies: Vec<Dependency> = Vec::new();
1499
1500 for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1502 let field_name = match caps.get(1) {
1503 Some(m) => m.as_str(),
1504 None => continue,
1505 };
1506 let raw_value = match caps.get(2) {
1507 Some(m) => m.as_str().trim(),
1508 None => continue,
1509 };
1510
1511 match field_name {
1512 "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1513 "version" => {
1514 version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1515 }
1516 "summary" => {
1517 summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1518 }
1519 "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1520 "homepage" => {
1521 homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1522 }
1523 "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1524 _ => {}
1525 }
1526 }
1527
1528 for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1530 if let Some(raw) = caps.get(1) {
1531 licenses = extract_ruby_array(raw.as_str());
1532 }
1533 }
1534
1535 for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1537 if let Some(raw) = caps.get(1) {
1538 let raw_str = raw.as_str().trim();
1539 if raw_str.starts_with('[') {
1540 authors = extract_ruby_array(raw_str);
1541 } else if looks_like_constant_reference(raw_str) {
1542 authors = resolve_variable_array(raw_str, &contexts)
1543 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1544 } else {
1545 authors.push(clean_gemspec_value(raw_str));
1546 }
1547 }
1548 }
1549
1550 for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1552 if let Some(raw) = caps.get(1) {
1553 let raw_str = raw.as_str().trim();
1554 if raw_str.starts_with('[') {
1555 emails = extract_ruby_array(raw_str);
1556 } else if looks_like_constant_reference(raw_str) {
1557 emails = resolve_variable_array(raw_str, &contexts)
1558 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1559 } else {
1560 emails.push(clean_gemspec_value(raw_str));
1561 }
1562 }
1563 }
1564
1565 let mut parties: Vec<Party> = Vec::new();
1567
1568 if authors.len() == 1 && emails.len() == 1 {
1569 let email_str = emails.first().map(String::as_str);
1570 let (parsed_email_name, parsed_email) = match email_str {
1571 Some(e) => split_name_email(e),
1572 None => (None, None),
1573 };
1574
1575 parties.push(Party {
1576 r#type: Some("person".to_string()),
1577 role: Some("author".to_string()),
1578 name: authors.first().cloned().or(parsed_email_name),
1579 email: parsed_email.or_else(|| {
1580 email_str
1581 .filter(|e| e.contains('@') && !e.contains('<'))
1582 .map(|e| e.to_string())
1583 }),
1584 url: None,
1585 organization: None,
1586 organization_url: None,
1587 timezone: None,
1588 });
1589 } else {
1590 for author_name in authors {
1591 parties.push(Party {
1592 r#type: Some("person".to_string()),
1593 role: Some("author".to_string()),
1594 name: Some(author_name),
1595 email: None,
1596 url: None,
1597 organization: None,
1598 organization_url: None,
1599 timezone: None,
1600 });
1601 }
1602
1603 for email_str in emails {
1604 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1605 split_name_email(&email_str)
1606 } else {
1607 (None, None)
1608 };
1609 parties.push(Party {
1610 r#type: Some("person".to_string()),
1611 role: Some("author".to_string()),
1612 name: parsed_email_name,
1613 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1614 url: None,
1615 organization: None,
1616 organization_url: None,
1617 timezone: None,
1618 });
1619 }
1620 }
1621
1622 for caps in dependency_call_re
1623 .captures_iter(content)
1624 .take(MAX_ITERATION_COUNT)
1625 {
1626 let method = match caps.get(1) {
1627 Some(m) => m.as_str(),
1628 None => continue,
1629 };
1630 let args = match caps.get(2) {
1631 Some(m) => m.as_str(),
1632 None => continue,
1633 };
1634
1635 let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1636 continue;
1637 };
1638 let version_parts = extract_all_ruby_values(after_first_argument(args));
1639 let extracted_requirement = if version_parts.is_empty() {
1640 None
1641 } else {
1642 Some(version_parts.join(", "))
1643 };
1644 let purl = create_gem_purl(&dep_name, None);
1645 let is_development = method == "add_development_dependency";
1646 let scope = if is_development {
1647 "development"
1648 } else {
1649 "runtime"
1650 };
1651
1652 dependencies.push(Dependency {
1653 purl,
1654 extracted_requirement,
1655 scope: Some(scope.to_string()),
1656 is_runtime: Some(!is_development),
1657 is_optional: Some(is_development),
1658 is_pinned: None,
1659 is_direct: Some(true),
1660 resolved_package: None,
1661 extra_data: None,
1662 });
1663 }
1664
1665 let extracted_license_statement = if !licenses.is_empty() {
1667 Some(licenses.join(" AND "))
1668 } else {
1669 license
1670 };
1671
1672 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1673 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1674
1675 let final_description = description.or(summary);
1677
1678 let purl = name
1680 .as_deref()
1681 .map(|n| create_gem_purl(n, version.as_deref()))
1682 .unwrap_or(None);
1683
1684 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1685 if let Some(n) = name.as_deref() {
1686 get_rubygems_urls(n, version.as_deref(), None)
1687 } else {
1688 (None, None, None, None)
1689 };
1690
1691 PackageData {
1692 package_type: Some(PACKAGE_TYPE),
1693 name,
1694 version,
1695 primary_language: Some("Ruby".to_string()),
1696 description: final_description,
1697 homepage_url: homepage,
1698 download_url,
1699 declared_license_expression,
1700 declared_license_expression_spdx,
1701 license_detections,
1702 extracted_license_statement,
1703 parties,
1704 dependencies,
1705 repository_homepage_url,
1706 repository_download_url,
1707 api_data_url,
1708 datasource_id: Some(DatasourceId::Gemspec),
1709 purl,
1710 ..default_package_data()
1711 }
1712}
1713
1714const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1729
1730impl PackageParser for GemArchiveParser {
1731 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1732
1733 fn extract_packages(path: &Path) -> Vec<PackageData> {
1734 vec![match extract_gem_archive(path) {
1735 Ok(data) => data,
1736 Err(e) => {
1737 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1738 default_package_data_with_datasource(DatasourceId::GemArchive)
1739 }
1740 }]
1741 }
1742
1743 fn is_match(path: &Path) -> bool {
1744 path.extension()
1745 .and_then(|ext| ext.to_str())
1746 .is_some_and(|ext| ext == "gem")
1747 }
1748}
1749
1750fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1751 let file_metadata =
1752 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1753 let archive_size = file_metadata.len();
1754
1755 if archive_size > MAX_ARCHIVE_SIZE {
1756 return Err(format!(
1757 "Archive too large: {} bytes (limit: {} bytes)",
1758 archive_size, MAX_ARCHIVE_SIZE
1759 ));
1760 }
1761
1762 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1763 let mut archive = Archive::new(file);
1764
1765 let mut entry_count: usize = 0;
1766 for entry_result in archive
1767 .entries()
1768 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1769 {
1770 entry_count += 1;
1771 if entry_count > MAX_ITERATION_COUNT {
1772 warn!(
1773 "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1774 MAX_ITERATION_COUNT
1775 );
1776 break;
1777 }
1778
1779 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1780 let entry_path = entry
1781 .path()
1782 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1783 let entry_str = entry_path.to_string_lossy();
1784 if entry_str.contains("..") {
1785 warn!("Skipping tar entry with path traversal: {}", entry_str);
1786 continue;
1787 }
1788
1789 if entry_path.to_str() == Some("metadata.gz") {
1790 let entry_size = entry.size();
1791 if entry_size > MAX_FILE_SIZE {
1792 return Err(format!(
1793 "metadata.gz too large: {} bytes (limit: {} bytes)",
1794 entry_size, MAX_FILE_SIZE
1795 ));
1796 }
1797
1798 let mut decoder = GzDecoder::new(entry);
1799 let mut content = Vec::new();
1800 let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1801 limited
1802 .read_to_end(&mut content)
1803 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1804
1805 if content.len() > MAX_FILE_SIZE as usize {
1806 return Err(format!(
1807 "Decompressed metadata too large: exceeds {} byte limit",
1808 MAX_FILE_SIZE
1809 ));
1810 }
1811
1812 let content = match String::from_utf8(content) {
1813 Ok(s) => s,
1814 Err(err) => {
1815 let bytes = err.into_bytes();
1816 warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1817 String::from_utf8_lossy(&bytes).into_owned()
1818 }
1819 };
1820
1821 let uncompressed_size = content.len() as u64;
1822 if entry_size > 0 {
1823 let ratio = uncompressed_size as f64 / entry_size as f64;
1824 if ratio > MAX_COMPRESSION_RATIO {
1825 return Err(format!(
1826 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1827 ratio, MAX_COMPRESSION_RATIO
1828 ));
1829 }
1830 }
1831
1832 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1833 }
1834 }
1835
1836 Err("metadata.gz not found in .gem archive".to_string())
1837}
1838
1839fn parse_gem_metadata_yaml(
1840 content: &str,
1841 datasource_id: DatasourceId,
1842) -> Result<PackageData, String> {
1843 let cleaned = clean_ruby_yaml_tags(content);
1847
1848 let yaml: yaml_serde::Value =
1849 yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1850
1851 let name = yaml_string(&yaml, "name").map(truncate_field);
1852 let version = yaml.get("version").and_then(|v| {
1853 if v.is_string() {
1854 v.as_str().map(|s| truncate_field(s.to_string()))
1855 } else {
1856 yaml_string(v, "version").map(truncate_field)
1857 }
1858 });
1859 let description = yaml_string(&yaml, "description")
1860 .or_else(|| yaml_string(&yaml, "summary"))
1861 .map(truncate_field);
1862 let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1863 let summary = yaml_string(&yaml, "summary").map(truncate_field);
1864
1865 let licenses: Vec<String> = yaml
1867 .get("licenses")
1868 .and_then(|v| v.as_sequence())
1869 .map(|seq| {
1870 seq.iter()
1871 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1872 .collect()
1873 })
1874 .unwrap_or_default();
1875
1876 let extracted_license_statement = if !licenses.is_empty() {
1878 Some(licenses.join(" AND "))
1879 } else {
1880 None
1881 };
1882
1883 let (license_expression, license_expression_spdx, license_detections) =
1884 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1885
1886 let authors: Vec<String> = yaml
1888 .get("authors")
1889 .and_then(|v| v.as_sequence())
1890 .map(|seq| {
1891 seq.iter()
1892 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1893 .collect()
1894 })
1895 .unwrap_or_default();
1896
1897 let emails: Vec<String> = yaml
1898 .get("email")
1899 .map(|v| {
1900 if let Some(seq) = v.as_sequence() {
1901 seq.iter()
1902 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1903 .collect()
1904 } else if let Some(s) = v.as_str() {
1905 vec![truncate_field(s.to_string())]
1906 } else {
1907 Vec::new()
1908 }
1909 })
1910 .unwrap_or_default();
1911
1912 let mut parties: Vec<Party> = Vec::new();
1914 let max_len = authors.len().max(emails.len());
1915 for i in 0..max_len {
1916 let author_name = authors.get(i).map(|s| s.as_str());
1917 let email_str = emails.get(i).map(|s| s.as_str());
1918
1919 let (parsed_email_name, parsed_email) = match email_str {
1920 Some(e) if e.contains('<') => split_name_email(e),
1921 None => (None, None),
1922 _ => (None, None),
1923 };
1924
1925 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1926
1927 parties.push(Party {
1928 r#type: Some("person".to_string()),
1929 role: Some("author".to_string()),
1930 name: party_name,
1931 email: parsed_email.or_else(|| {
1932 email_str
1933 .filter(|e| e.contains('@') && !e.contains('<'))
1934 .map(|e| e.to_string())
1935 }),
1936 url: None,
1937 organization: None,
1938 organization_url: None,
1939 timezone: None,
1940 });
1941 }
1942
1943 let dependencies = parse_gem_yaml_dependencies(&yaml);
1945
1946 let metadata = yaml.get("metadata");
1947
1948 let bug_tracking_url = metadata
1949 .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1950 .map(truncate_field);
1951
1952 let code_view_url = metadata
1953 .and_then(|m| yaml_string(m, "source_code_uri"))
1954 .map(truncate_field);
1955
1956 let vcs_url = code_view_url.clone().or_else(|| {
1957 metadata
1958 .and_then(|m| yaml_string(m, "homepage_uri"))
1959 .map(truncate_field)
1960 });
1961
1962 let file_references = metadata
1963 .and_then(|m| m.get("files"))
1964 .and_then(|f| f.as_sequence())
1965 .map(|seq| {
1966 seq.iter()
1967 .filter_map(|v| v.as_str())
1968 .map(|s| crate::models::FileReference {
1969 path: s.to_string(),
1970 size: None,
1971 sha1: None,
1972 md5: None,
1973 sha256: None,
1974 sha512: None,
1975 extra_data: None,
1976 })
1977 .collect::<Vec<_>>()
1978 })
1979 .unwrap_or_default();
1980
1981 let release_date = yaml_string(&yaml, "date").and_then(|d| {
1982 if d.len() >= 10 {
1983 Some(d[..10].to_string())
1984 } else {
1985 None
1986 }
1987 });
1988
1989 let purl = name
1990 .as_deref()
1991 .map(|n| create_gem_purl(n, version.as_deref()))
1992 .unwrap_or(None);
1993
1994 let platform = yaml_string(&yaml, "platform").map(truncate_field);
1995 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1996 if let Some(n) = name.as_deref() {
1997 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1998 } else {
1999 (None, None, None, None)
2000 };
2001
2002 let qualifiers = if let Some(ref p) = platform {
2003 if p != "ruby" {
2004 let mut q = HashMap::new();
2005 q.insert("platform".to_string(), p.clone());
2006 Some(q)
2007 } else {
2008 None
2009 }
2010 } else {
2011 None
2012 };
2013
2014 Ok(PackageData {
2015 package_type: Some(PACKAGE_TYPE),
2016 name,
2017 version,
2018 qualifiers,
2019 primary_language: Some("Ruby".to_string()),
2020 description: description.or(summary),
2021 release_date,
2022 homepage_url: homepage,
2023 download_url,
2024 bug_tracking_url,
2025 code_view_url,
2026 declared_license_expression: license_expression,
2027 declared_license_expression_spdx: license_expression_spdx,
2028 license_detections,
2029 extracted_license_statement,
2030 file_references,
2031 parties,
2032 dependencies,
2033 repository_homepage_url,
2034 repository_download_url,
2035 api_data_url,
2036 datasource_id: Some(datasource_id),
2037 purl,
2038 vcs_url,
2039 ..default_package_data()
2040 })
2041}
2042
2043fn clean_ruby_yaml_tags(content: &str) -> String {
2045 let tag_re = match Regex::new(r"!ruby/\S+") {
2046 Ok(r) => r,
2047 Err(_) => return content.to_string(),
2048 };
2049 tag_re.replace_all(content, "").to_string()
2050}
2051
2052fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2053 yaml.get(key)
2054 .and_then(|v| v.as_str())
2055 .filter(|s| !s.is_empty())
2056 .map(|s| s.to_string())
2057}
2058
2059fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2060 let mut dependencies = Vec::new();
2061
2062 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2063 Some(seq) => seq,
2064 None => return dependencies,
2065 };
2066
2067 for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2068 let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2069 Some(n) => n,
2070 None => continue,
2071 };
2072
2073 let dep_type = yaml_string(dep_value, "type");
2074 let is_development = dep_type.as_deref() == Some(":development");
2075
2076 let requirements = dep_value
2078 .get("requirement")
2079 .or_else(|| dep_value.get("version_requirements"))
2080 .and_then(|req| req.get("requirements"))
2081 .and_then(|reqs| reqs.as_sequence());
2082
2083 let extracted_requirement = requirements.map(|reqs| {
2084 let parts: Vec<String> = reqs
2085 .iter()
2086 .filter_map(|req| {
2087 let seq = req.as_sequence()?;
2088 if seq.len() >= 2 {
2089 let op = seq[0].as_str().unwrap_or("");
2090 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2091 if op == ">=" && ver == "0" {
2092 None
2094 } else if op.is_empty() || ver.is_empty() {
2095 None
2096 } else {
2097 Some(format!("{} {}", op, ver))
2098 }
2099 } else {
2100 None
2101 }
2102 })
2103 .collect();
2104 parts.join(", ")
2105 });
2106
2107 let extracted_requirement = extracted_requirement
2108 .filter(|s| !s.is_empty())
2109 .or_else(|| Some(String::new()));
2110
2111 let (scope, is_runtime, is_optional) = if is_development {
2112 (Some("development".to_string()), false, true)
2113 } else {
2114 (Some("runtime".to_string()), true, false)
2115 };
2116
2117 let purl = create_gem_purl(&dep_name, None);
2118
2119 dependencies.push(Dependency {
2120 purl,
2121 extracted_requirement,
2122 scope,
2123 is_runtime: Some(is_runtime),
2124 is_optional: Some(is_optional),
2125 is_pinned: None,
2126 is_direct: Some(true),
2127 resolved_package: None,
2128 extra_data: None,
2129 });
2130 }
2131
2132 dependencies
2133}
2134
2135pub struct GemMetadataExtractedParser;
2140
2141impl PackageParser for GemMetadataExtractedParser {
2142 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2143
2144 fn extract_packages(path: &Path) -> Vec<PackageData> {
2145 vec![match extract_gem_metadata_extracted(path) {
2146 Ok(data) => data,
2147 Err(e) => {
2148 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2149 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2150 }
2151 }]
2152 }
2153
2154 fn is_match(path: &Path) -> bool {
2155 path.to_str()
2156 .is_some_and(|p| p.contains("metadata.gz-extract"))
2157 }
2158}
2159
2160fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2161 let content = read_file_to_string(path, None)
2162 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2163
2164 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2165}
2166
2167crate::register_parser!(
2169 "Ruby Gemfile manifest",
2170 &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2171 "gem",
2172 "Ruby",
2173 Some("https://bundler.io/man/gemfile.5.html"),
2174);
2175
2176crate::register_parser!(
2177 "Ruby Gemfile.lock lockfile",
2178 &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2179 "gem",
2180 "Ruby",
2181 Some("https://bundler.io/man/gemfile.5.html"),
2182);
2183
2184crate::register_parser!(
2185 "Ruby .gemspec manifest",
2186 &[
2187 "**/*.gemspec",
2188 "**/data.gz-extract/*.gemspec",
2189 "**/specifications/*.gemspec"
2190 ],
2191 "gem",
2192 "Ruby",
2193 Some("https://guides.rubygems.org/specification-reference/"),
2194);
2195
2196crate::register_parser!(
2197 "Ruby .gem archive",
2198 &["**/*.gem"],
2199 "gem",
2200 "Ruby",
2201 Some("https://guides.rubygems.org/specification-reference/"),
2202);
2203
2204crate::register_parser!(
2205 "Ruby gem metadata (extracted)",
2206 &["**/metadata.gz-extract"],
2207 "gem",
2208 "Ruby",
2209 Some("https://guides.rubygems.org/specification-reference/"),
2210);
2211
2212#[cfg(test)]
2213mod tests {
2214 use super::parse_gemspec;
2215
2216 #[test]
2217 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2218 assert_eq!(
2219 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2220 "Arel is a SQL AST manager for Ruby. It"
2221 );
2222 }
2223
2224 #[test]
2225 fn test_parse_gemspec_runtime_dependency_scope() {
2226 let content = r#"
2227Gem::Specification.new do |spec|
2228 spec.name = "demo"
2229 spec.version = "1.0.0"
2230 spec.add_runtime_dependency "rack", "~> 3.0"
2231 spec.add_dependency "thor", ">= 1.0"
2232end
2233"#;
2234
2235 let package_data = parse_gemspec(content);
2236 assert_eq!(package_data.dependencies.len(), 2);
2237 assert_eq!(
2238 package_data.dependencies[0].scope,
2239 Some("runtime".to_string())
2240 );
2241 assert_eq!(
2242 package_data.dependencies[0].extracted_requirement,
2243 Some("~> 3.0".to_string())
2244 );
2245 assert_eq!(
2246 package_data.dependencies[1].scope,
2247 Some("runtime".to_string())
2248 );
2249 assert_eq!(
2250 package_data.dependencies[1].extracted_requirement,
2251 Some(">= 1.0".to_string())
2252 );
2253 }
2254}