1use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33 MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46use super::metadata::ParserMetadata;
47
48const PACKAGE_TYPE: PackageType = PackageType::Gem;
49
50pub fn strip_freeze_suffix(s: &str) -> &str {
62 s.trim_end_matches(".freeze")
63}
64
65enum GemfileBlock {
66 Group(Vec<String>),
67 Source(String),
68}
69
70pub struct GemfileParser;
79
80impl PackageParser for GemfileParser {
81 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
82
83 fn metadata() -> Vec<ParserMetadata> {
84 vec![ParserMetadata {
85 description: "Ruby Gemfile manifest",
86 file_patterns: &["**/Gemfile", "**/data.gz-extract/Gemfile"],
87 package_type: "gem",
88 primary_language: "Ruby",
89 documentation_url: Some("https://bundler.io/man/gemfile.5.html"),
90 }]
91 }
92
93 fn extract_packages(path: &Path) -> Vec<PackageData> {
94 let datasource_id = gemfile_datasource_id(path);
95 let content = match read_file_to_string(path, None) {
96 Ok(c) => c,
97 Err(e) => {
98 warn!("Failed to read Gemfile at {:?}: {}", path, e);
99 return vec![default_package_data_with_datasource(datasource_id)];
100 }
101 };
102
103 let mut package_data = parse_gemfile(&content);
104 package_data.datasource_id = Some(datasource_id);
105 vec![package_data]
106 }
107
108 fn is_match(path: &Path) -> bool {
109 path.file_name()
110 .and_then(|n| n.to_str())
111 .is_some_and(|name| name == "Gemfile")
112 || path
113 .to_str()
114 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
115 }
116}
117
118fn parse_gemfile(content: &str) -> PackageData {
120 let mut dependencies = Vec::new();
121 let mut block_stack = Vec::new();
122 let mut default_source = None;
123 let mut sources = Vec::new();
124
125 let gem_regex = match Regex::new(
128 r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
129 ) {
130 Ok(r) => r,
131 Err(e) => {
132 warn!("Failed to compile gem regex: {}", e);
133 return default_package_data_with_datasource(DatasourceId::Gemfile);
134 }
135 };
136
137 let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
139 Ok(r) => r,
140 Err(e) => {
141 warn!("Failed to compile group regex: {}", e);
142 return default_package_data_with_datasource(DatasourceId::Gemfile);
143 }
144 };
145
146 let group_end_regex = match Regex::new(r"^\s*end\s*$") {
147 Ok(r) => r,
148 Err(e) => {
149 warn!("Failed to compile end regex: {}", e);
150 return default_package_data_with_datasource(DatasourceId::Gemfile);
151 }
152 };
153
154 let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
155 Ok(r) => r,
156 Err(e) => {
157 warn!("Failed to compile source block regex: {}", e);
158 return default_package_data_with_datasource(DatasourceId::Gemfile);
159 }
160 };
161
162 let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
163 Ok(r) => r,
164 Err(e) => {
165 warn!("Failed to compile source regex: {}", e);
166 return default_package_data_with_datasource(DatasourceId::Gemfile);
167 }
168 };
169
170 let symbol_regex = match Regex::new(r":(\w+)") {
172 Ok(r) => r,
173 Err(e) => {
174 warn!("Failed to compile symbol regex: {}", e);
175 return default_package_data_with_datasource(DatasourceId::Gemfile);
176 }
177 };
178
179 for line in content.lines().take(MAX_ITERATION_COUNT) {
180 let trimmed = line.trim();
181
182 if trimmed.is_empty() || trimmed.starts_with('#') {
184 continue;
185 }
186
187 if let Some(caps) = group_start_regex.captures(trimmed) {
189 let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
190 let mut current_groups = Vec::new();
191 for cap in symbol_regex.captures_iter(groups_str) {
192 if let Some(group_name) = cap.get(1) {
193 current_groups.push(group_name.as_str().to_string());
194 }
195 }
196 block_stack.push(GemfileBlock::Group(current_groups));
197 continue;
198 }
199
200 if let Some(caps) = source_block_start_regex.captures(trimmed) {
201 let source = caps
202 .get(1)
203 .map(|m| m.as_str().to_string())
204 .unwrap_or_default();
205 if !source.is_empty() {
206 push_unique_string(&mut sources, source.clone());
207 block_stack.push(GemfileBlock::Source(source));
208 }
209 continue;
210 }
211
212 if let Some(caps) = source_regex.captures(trimmed) {
213 if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
214 push_unique_string(&mut sources, source.clone());
215 default_source = Some(source);
216 }
217 continue;
218 }
219
220 if group_end_regex.is_match(trimmed) {
222 block_stack.pop();
223 continue;
224 }
225
226 if let Some(caps) = gem_regex.captures(trimmed) {
228 let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
229 if name.is_empty() {
230 continue;
231 }
232
233 let mut version_parts = Vec::new();
235 if let Some(v) = caps.get(2) {
236 version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
237 }
238 if let Some(v) = caps.get(3) {
239 let v_str = strip_freeze_suffix(v.as_str());
240 if looks_like_version_constraint(v_str) {
242 version_parts.push(v_str.to_string());
243 }
244 }
245
246 let extracted_requirement = if version_parts.is_empty() {
247 None
248 } else {
249 Some(version_parts.join(", "))
250 };
251
252 let current_groups = current_group_names(&block_stack);
253
254 let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
257 (None, true, false)
259 } else if current_groups.iter().any(|g| g == "development") {
260 (Some("development".to_string()), false, true)
261 } else if current_groups.iter().any(|g| g == "test") {
262 (Some("test".to_string()), false, true)
263 } else {
264 let group = current_groups.first().cloned();
266 (group, true, false)
267 };
268
269 let purl = create_gem_purl(name, None);
271 let inherited_source = current_source(&block_stack, default_source.as_deref());
272 let extra_data = build_gemfile_dependency_extra_data(
273 caps.get(4).map(|m| m.as_str()),
274 inherited_source.as_deref(),
275 );
276
277 dependencies.push(Dependency {
278 purl,
279 extracted_requirement,
280 scope,
281 is_runtime: Some(is_runtime),
282 is_optional: Some(is_optional),
283 is_pinned: None,
284 is_direct: Some(true),
285 resolved_package: None,
286 extra_data,
287 });
288 }
289 }
290
291 let extra_data = if sources.is_empty() {
292 None
293 } else {
294 Some(HashMap::from([(
295 "sources".to_string(),
296 serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
297 )]))
298 };
299
300 PackageData {
301 package_type: Some(PACKAGE_TYPE),
302 primary_language: Some("Ruby".to_string()),
303 dependencies,
304 extra_data,
305 datasource_id: Some(DatasourceId::Gemfile),
306 ..default_package_data()
307 }
308}
309
310fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
311 block_stack
312 .iter()
313 .rev()
314 .find_map(|block| match block {
315 GemfileBlock::Group(groups) => Some(groups.clone()),
316 GemfileBlock::Source(_) => None,
317 })
318 .unwrap_or_default()
319}
320
321fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
322 block_stack
323 .iter()
324 .rev()
325 .find_map(|block| match block {
326 GemfileBlock::Source(source) => Some(source.clone()),
327 GemfileBlock::Group(_) => None,
328 })
329 .or_else(|| default_source.map(str::to_string))
330}
331
332fn push_unique_string(values: &mut Vec<String>, value: String) {
333 if !values.contains(&value) {
334 values.push(value);
335 }
336}
337
338fn build_gemfile_dependency_extra_data(
339 options: Option<&str>,
340 inherited_source: Option<&str>,
341) -> Option<HashMap<String, serde_json::Value>> {
342 let mut extra = HashMap::new();
343 let options = options.unwrap_or("");
344
345 if let Some(git) = extract_gemfile_quoted_option(options, "git") {
346 extra.insert(
347 "source_type".to_string(),
348 serde_json::Value::String("GIT".to_string()),
349 );
350 extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
351 extra.insert("remote".to_string(), serde_json::Value::String(git));
352 }
353
354 if let Some(path) = extract_gemfile_quoted_option(options, "path") {
355 extra.insert(
356 "source_type".to_string(),
357 serde_json::Value::String("PATH".to_string()),
358 );
359 extra.insert("path".to_string(), serde_json::Value::String(path));
360 }
361
362 for key in ["branch", "ref", "tag"] {
363 if let Some(value) = extract_gemfile_quoted_option(options, key) {
364 extra.insert(key.to_string(), serde_json::Value::String(value));
365 }
366 }
367
368 let direct_source = extract_gemfile_quoted_option(options, "source");
369 if let Some(source) = direct_source {
370 extra.insert("source".to_string(), serde_json::Value::String(source));
371 } else if !extra.contains_key("source_type")
372 && let Some(source) = inherited_source
373 {
374 extra.insert(
375 "source".to_string(),
376 serde_json::Value::String(source.to_string()),
377 );
378 }
379
380 (!extra.is_empty()).then_some(extra)
381}
382
383fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
384 if options.is_empty() {
385 return None;
386 }
387
388 let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
389 Regex::new(&pattern)
390 .ok()
391 .and_then(|regex| regex.captures(options))
392 .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
393}
394
395fn looks_like_version_constraint(s: &str) -> bool {
397 s.starts_with('~')
398 || s.starts_with('>')
399 || s.starts_with('<')
400 || s.starts_with('=')
401 || s.starts_with('!')
402 || s.chars().next().is_some_and(|c| c.is_ascii_digit())
403}
404
405pub struct GemfileLockParser;
414
415impl PackageParser for GemfileLockParser {
416 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
417
418 fn metadata() -> Vec<ParserMetadata> {
419 vec![ParserMetadata {
420 description: "Ruby Gemfile.lock lockfile",
421 file_patterns: &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
422 package_type: "gem",
423 primary_language: "Ruby",
424 documentation_url: Some("https://bundler.io/man/gemfile.5.html"),
425 }]
426 }
427
428 fn extract_packages(path: &Path) -> Vec<PackageData> {
429 let datasource_id = gemfile_lock_datasource_id(path);
430 let content = match read_file_to_string(path, None) {
431 Ok(c) => c,
432 Err(e) => {
433 warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
434 return vec![default_package_data_with_datasource(datasource_id)];
435 }
436 };
437
438 let mut package_data = parse_gemfile_lock(&content);
439 package_data.datasource_id = Some(datasource_id);
440 vec![package_data]
441 }
442
443 fn is_match(path: &Path) -> bool {
444 path.file_name()
445 .and_then(|n| n.to_str())
446 .is_some_and(|name| name == "Gemfile.lock")
447 || path
448 .to_str()
449 .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
450 }
451}
452
453#[derive(Debug, Clone, PartialEq)]
455enum ParseState {
456 None,
457 Gem,
458 Git,
459 Path,
460 Svn,
461 Specs,
462 Platforms,
463 BundledWith,
464 Dependencies,
465}
466
467#[derive(Debug, Clone, Default)]
474struct GemInfo {
475 name: String,
476 version: Option<String>,
477 platform: Option<String>,
478 gem_type: String,
479 remote: Option<String>,
480 revision: Option<String>,
481 ref_field: Option<String>,
482 branch: Option<String>,
483 tag: Option<String>,
484 pinned: bool,
485 requirements: Vec<String>,
486}
487
488fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
489 let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
490 path_gems.sort_by(|left, right| {
491 left.remote
492 .as_deref()
493 .cmp(&right.remote.as_deref())
494 .then_with(|| left.name.cmp(&right.name))
495 });
496
497 path_gems
498 .iter()
499 .copied()
500 .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
501 .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
502 .or_else(|| {
503 path_gems
504 .iter()
505 .copied()
506 .find(|gem| gem.remote.as_deref() == Some("."))
507 })
508 .or_else(|| path_gems.first().copied())
509 .cloned()
510}
511
512fn parse_gemfile_lock(content: &str) -> PackageData {
514 let mut state = ParseState::None;
515 let mut dependencies = Vec::new();
516 let mut gems: HashMap<String, GemInfo> = HashMap::new();
517 let mut platforms: Vec<String> = Vec::new();
518 let mut bundler_version: Option<String> = None;
519 let mut current_gem_type = String::new();
520 let mut current_remote: Option<String> = None;
521 let mut current_options: HashMap<String, String> = HashMap::new();
522
523 let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
525 Ok(r) => r,
526 Err(e) => {
527 warn!("Failed to compile deps regex: {}", e);
528 return default_package_data_with_datasource(DatasourceId::GemfileLock);
529 }
530 };
531
532 let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
534 Ok(r) => r,
535 Err(e) => {
536 warn!("Failed to compile spec_deps regex: {}", e);
537 return default_package_data_with_datasource(DatasourceId::GemfileLock);
538 }
539 };
540
541 let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
543 Ok(r) => r,
544 Err(e) => {
545 warn!("Failed to compile options regex: {}", e);
546 return default_package_data_with_datasource(DatasourceId::GemfileLock);
547 }
548 };
549
550 let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
552 Ok(r) => r,
553 Err(e) => {
554 warn!("Failed to compile version regex: {}", e);
555 return default_package_data_with_datasource(DatasourceId::GemfileLock);
556 }
557 };
558
559 for line in content.lines().take(MAX_ITERATION_COUNT) {
560 let trimmed = line.trim_end();
561
562 if trimmed.is_empty() {
564 current_options.clear();
565 continue;
566 }
567
568 match trimmed {
570 "GEM" => {
571 state = ParseState::Gem;
572 current_gem_type = "GEM".to_string();
573 current_remote = None;
574 current_options.clear();
575 continue;
576 }
577 "GIT" => {
578 state = ParseState::Git;
579 current_gem_type = "GIT".to_string();
580 current_remote = None;
581 current_options.clear();
582 continue;
583 }
584 "PATH" => {
585 state = ParseState::Path;
586 current_gem_type = "PATH".to_string();
587 current_remote = None;
588 current_options.clear();
589 continue;
590 }
591 "SVN" => {
592 state = ParseState::Svn;
593 current_gem_type = "SVN".to_string();
594 current_remote = None;
595 current_options.clear();
596 continue;
597 }
598 "PLATFORMS" => {
599 state = ParseState::Platforms;
600 continue;
601 }
602 "BUNDLED WITH" => {
603 state = ParseState::BundledWith;
604 continue;
605 }
606 "DEPENDENCIES" => {
607 state = ParseState::Dependencies;
608 continue;
609 }
610 _ => {}
611 }
612
613 if trimmed.trim() == "specs:" {
617 state = match state {
618 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
619 ParseState::Specs
620 }
621 _ => state,
622 };
623 continue;
624 }
625
626 match state {
628 ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
629 if let Some(caps) = options_regex.captures(line) {
631 let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
632 let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
633 current_options.insert(key.to_string(), value.to_string());
634 if key == "remote" {
635 current_remote = Some(value.to_string());
636 }
637 }
638 }
639 ParseState::Specs => {
640 if let Some(caps) = spec_deps_regex.captures(line) {
642 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
643 let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
644
645 let (version, platform) = parse_version_platform(version_str);
647
648 if !name.is_empty() {
649 let gem_info = GemInfo {
650 name: name.clone(),
651 version,
652 platform,
653 gem_type: current_gem_type.clone(),
654 remote: current_remote.clone(),
655 revision: current_options.get("revision").cloned(),
656 ref_field: current_options.get("ref").cloned(),
657 branch: current_options.get("branch").cloned(),
658 tag: current_options.get("tag").cloned(),
659 pinned: false,
660 requirements: Vec::new(),
661 };
662 gems.insert(name, gem_info);
663 }
664 }
665 }
666 ParseState::Platforms => {
667 let platform = trimmed.trim();
669 if !platform.is_empty() {
670 platforms.push(platform.to_string());
671 }
672 }
673 ParseState::BundledWith => {
674 if let Some(caps) = version_regex.captures(line) {
676 bundler_version = caps.get(1).map(|m| m.as_str().to_string());
677 }
678 }
679 ParseState::Dependencies => {
680 if let Some(caps) = deps_regex.captures(line) {
682 let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
683 let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
684 let pinned = caps.get(3).is_some();
685
686 if !name.is_empty() {
687 if let Some(gem) = gems.get_mut(&name) {
689 gem.pinned = pinned;
690 if let Some(vc) = &version_constraint {
691 gem.requirements.push(vc.clone());
692 }
693 } else {
694 let gem_info = GemInfo {
695 name: name.clone(),
696 version: None,
697 platform: None,
698 gem_type: "GEM".to_string(),
699 remote: None,
700 revision: None,
701 ref_field: None,
702 branch: None,
703 tag: None,
704 pinned,
705 requirements: version_constraint.into_iter().collect(),
706 };
707 gems.insert(name, gem_info);
708 }
709 }
710 }
711 }
712 ParseState::None => {}
713 }
714 }
715
716 let primary_gem = select_primary_path_gem(&gems);
717
718 let (
719 package_name,
720 package_version,
721 repository_homepage_url,
722 repository_download_url,
723 api_data_url,
724 download_url,
725 ) = if let Some(ref pg) = primary_gem {
726 let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
727 (
728 Some(pg.name.clone()),
729 pg.version.clone(),
730 urls.0,
731 urls.1,
732 urls.2,
733 urls.3,
734 )
735 } else {
736 (None, None, None, None, None, None)
737 };
738
739 for (_, gem) in gems {
740 if let Some(ref pg) = primary_gem
741 && gem.name == pg.name
742 {
743 continue;
744 }
745
746 let version_for_purl = gem.version.as_deref();
747 let purl = create_gem_purl(&gem.name, version_for_purl);
748
749 let extracted_requirement = if !gem.requirements.is_empty() {
750 Some(gem.requirements.join(", "))
751 } else {
752 gem.version.clone()
753 };
754
755 let extra_data = build_gem_source_extra_data(&gem);
756
757 dependencies.push(Dependency {
758 purl,
759 extracted_requirement,
760 scope: Some("dependencies".to_string()),
761 is_runtime: Some(true),
762 is_optional: Some(false),
763 is_pinned: Some(gem.pinned),
764 is_direct: Some(true),
765 resolved_package: None,
766 extra_data,
767 });
768 }
769
770 dependencies.sort_by(|left, right| {
771 left.purl
772 .as_deref()
773 .cmp(&right.purl.as_deref())
774 .then_with(|| {
775 left.extracted_requirement
776 .as_deref()
777 .cmp(&right.extracted_requirement.as_deref())
778 })
779 });
780
781 let mut extra_data = HashMap::new();
783 if !platforms.is_empty() {
784 extra_data.insert(
785 "platforms".to_string(),
786 serde_json::Value::Array(
787 platforms
788 .into_iter()
789 .map(serde_json::Value::String)
790 .collect(),
791 ),
792 );
793 }
794 if let Some(bv) = bundler_version {
795 extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
796 }
797
798 let purl = package_name
799 .as_deref()
800 .map(|n| create_gem_purl(n, package_version.as_deref()))
801 .unwrap_or(None);
802
803 PackageData {
804 package_type: Some(PACKAGE_TYPE),
805 name: package_name,
806 version: package_version,
807 primary_language: Some("Ruby".to_string()),
808 download_url,
809 dependencies,
810 repository_homepage_url,
811 repository_download_url,
812 api_data_url,
813 extra_data: if extra_data.is_empty() {
814 None
815 } else {
816 Some(extra_data)
817 },
818 datasource_id: Some(DatasourceId::GemfileLock),
819 purl,
820 ..default_package_data()
821 }
822}
823
824fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
825 if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
826 return None;
827 }
828
829 let mut extra = HashMap::new();
830 extra.insert(
831 "source_type".to_string(),
832 serde_json::Value::String(gem.gem_type.clone()),
833 );
834
835 if let Some(ref remote) = gem.remote {
836 extra.insert(
837 "remote".to_string(),
838 serde_json::Value::String(remote.clone()),
839 );
840 }
841 if let Some(ref revision) = gem.revision {
842 extra.insert(
843 "revision".to_string(),
844 serde_json::Value::String(revision.clone()),
845 );
846 }
847 if let Some(ref ref_field) = gem.ref_field {
848 extra.insert(
849 "ref".to_string(),
850 serde_json::Value::String(ref_field.clone()),
851 );
852 }
853 if let Some(ref branch) = gem.branch {
854 extra.insert(
855 "branch".to_string(),
856 serde_json::Value::String(branch.clone()),
857 );
858 }
859 if let Some(ref tag) = gem.tag {
860 extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
861 }
862
863 Some(extra)
864}
865
866fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
869 if s.is_empty() {
870 return (None, None);
871 }
872 if let Some(idx) = s.find('-') {
873 let version = &s[..idx];
874 let platform = &s[idx + 1..];
875 (Some(version.to_string()), Some(platform.to_string()))
876 } else {
877 (Some(s.to_string()), None)
878 }
879}
880
881fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
883 let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
884 Ok(p) => p,
885 Err(e) => {
886 warn!("Failed to create PURL for gem '{}': {}", name, e);
887 return None;
888 }
889 };
890
891 if let Some(v) = version
892 && let Err(e) = purl.with_version(v)
893 {
894 warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
895 }
896
897 Some(purl.to_string())
898}
899
900fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
901 if name.is_empty() {
902 return None;
903 }
904
905 if let Some(v) = version {
906 let v = v.trim().trim_matches('/');
907 Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
908 } else {
909 Some(format!("https://rubygems.org/gems/{}", name))
910 }
911}
912
913fn rubygems_download_url(
914 name: &str,
915 version: Option<&str>,
916 platform: Option<&str>,
917) -> Option<String> {
918 if name.is_empty() || version.is_none() {
919 return None;
920 }
921
922 let name = name.trim().trim_matches('/');
923 let version = version?.trim().trim_matches('/');
924
925 let version_plat = if let Some(p) = platform {
926 if p != "ruby" {
927 format!("{}-{}", version, p)
928 } else {
929 version.to_string()
930 }
931 } else {
932 version.to_string()
933 };
934
935 Some(format!(
936 "https://rubygems.org/downloads/{}-{}.gem",
937 name, version_plat
938 ))
939}
940
941fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
942 if name.is_empty() {
943 return None;
944 }
945
946 if let Some(v) = version {
947 Some(format!(
948 "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
949 name, v
950 ))
951 } else {
952 Some(format!(
953 "https://rubygems.org/api/v1/versions/{}.json",
954 name
955 ))
956 }
957}
958
959fn get_rubygems_urls(
960 name: &str,
961 version: Option<&str>,
962 platform: Option<&str>,
963) -> (
964 Option<String>,
965 Option<String>,
966 Option<String>,
967 Option<String>,
968) {
969 let repository_homepage_url = rubygems_homepage_url(name, version);
970 let repository_download_url = rubygems_download_url(name, version, platform);
971 let api_data_url = rubygems_api_url(name, version);
972 let download_url = repository_download_url.clone();
973
974 (
975 repository_homepage_url,
976 repository_download_url,
977 api_data_url,
978 download_url,
979 )
980}
981
982fn default_package_data() -> PackageData {
984 PackageData {
985 package_type: Some(PACKAGE_TYPE),
986 primary_language: Some("Ruby".to_string()),
987 ..Default::default()
988 }
989}
990
991fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
992 PackageData {
993 datasource_id: Some(datasource_id),
994 ..default_package_data()
995 }
996}
997
998pub struct GemspecParser;
1008
1009impl PackageParser for GemspecParser {
1010 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1011
1012 fn metadata() -> Vec<ParserMetadata> {
1013 vec![ParserMetadata {
1014 description: "Ruby .gemspec manifest",
1015 file_patterns: &[
1016 "**/*.gemspec",
1017 "**/data.gz-extract/*.gemspec",
1018 "**/specifications/*.gemspec",
1019 ],
1020 package_type: "gem",
1021 primary_language: "Ruby",
1022 documentation_url: Some("https://guides.rubygems.org/specification-reference/"),
1023 }]
1024 }
1025
1026 fn extract_packages(path: &Path) -> Vec<PackageData> {
1027 let datasource_id = gemspec_datasource_id(path);
1028 let content = match read_file_to_string(path, None) {
1029 Ok(c) => c,
1030 Err(e) => {
1031 warn!("Failed to read .gemspec at {:?}: {}", path, e);
1032 return vec![default_package_data_with_datasource(datasource_id)];
1033 }
1034 };
1035
1036 let mut package_data = parse_gemspec_with_context(&content, path.parent());
1037 package_data.datasource_id = Some(datasource_id);
1038 vec![package_data]
1039 }
1040
1041 fn is_match(path: &Path) -> bool {
1042 path.extension()
1043 .and_then(|ext| ext.to_str())
1044 .is_some_and(|ext| ext == "gemspec")
1045 }
1046}
1047
1048fn normalized_ruby_path(path: &Path) -> String {
1049 path.to_string_lossy().replace('\\', "/")
1050}
1051
1052fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1053 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1054 DatasourceId::GemfileExtracted
1055 } else {
1056 DatasourceId::Gemfile
1057 }
1058}
1059
1060fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1061 if normalized_ruby_path(path).contains("/data.gz-extract/") {
1062 DatasourceId::GemfileLockExtracted
1063 } else {
1064 DatasourceId::GemfileLock
1065 }
1066}
1067
1068fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1069 let normalized = normalized_ruby_path(path);
1070 if normalized.contains("/data.gz-extract/") {
1071 DatasourceId::GemspecExtracted
1072 } else if normalized.contains("/specifications/") {
1073 DatasourceId::GemGemspecInstalledSpecifications
1074 } else {
1075 DatasourceId::Gemspec
1076 }
1077}
1078
1079fn clean_gemspec_value(s: &str) -> String {
1081 let s = strip_freeze_suffix(s).trim();
1082
1083 let s = if let Some(pos) = s.find(" #") {
1084 s[..pos].trim()
1085 } else {
1086 s
1087 };
1088
1089 let s = if let Some(stripped) = s.strip_prefix("%q{") {
1090 stripped.strip_suffix('}').unwrap_or(stripped)
1091 } else if let Some(stripped) = s.strip_prefix("%q<") {
1092 stripped.strip_suffix('>').unwrap_or(stripped)
1093 } else if let Some(stripped) = s.strip_prefix("%q[") {
1094 stripped.strip_suffix(']').unwrap_or(stripped)
1095 } else if let Some(stripped) = s.strip_prefix("%q(") {
1096 stripped.strip_suffix(')').unwrap_or(stripped)
1097 } else {
1098 s
1099 };
1100
1101 let s = s
1102 .trim_start_matches('"')
1103 .trim_end_matches('"')
1104 .trim_start_matches('\'')
1105 .trim_end_matches('\'');
1106 let s = strip_freeze_suffix(s).trim();
1107 s.to_string()
1108}
1109
1110fn extract_ruby_array(s: &str) -> Vec<String> {
1112 let s = strip_freeze_suffix(s.trim());
1113 let s = s.trim_start_matches('[').trim_end_matches(']');
1114 let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1115 Ok(r) => r,
1116 Err(_) => return Vec::new(),
1117 };
1118 item_re
1119 .captures_iter(s)
1120 .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1121 .collect()
1122}
1123
1124fn extract_all_ruby_values(s: &str) -> Vec<String> {
1125 let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1126 Ok(r) => r,
1127 Err(_) => return Vec::new(),
1128 };
1129
1130 value_re
1131 .captures_iter(s)
1132 .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1133 .map(|m| clean_gemspec_value(m.as_str()))
1134 .collect()
1135}
1136
1137fn extract_first_ruby_value(s: &str) -> Option<String> {
1138 extract_all_ruby_values(s).into_iter().next()
1139}
1140
1141fn after_first_argument(args: &str) -> &str {
1142 let mut bracket_depth = 0usize;
1143 let mut paren_depth = 0usize;
1144 let mut in_quote: Option<char> = None;
1145 let chars: Vec<(usize, char)> = args.char_indices().collect();
1146 let mut i = 0;
1147
1148 while i < chars.len() {
1149 let (idx, ch) = chars[i];
1150
1151 if let Some(quote) = in_quote {
1152 if ch == '\\' {
1153 i += 2;
1154 continue;
1155 }
1156 if ch == quote {
1157 in_quote = None;
1158 }
1159 i += 1;
1160 continue;
1161 }
1162
1163 match ch {
1164 '\'' | '"' => in_quote = Some(ch),
1165 '[' | '{' | '<' => bracket_depth += 1,
1166 ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1167 '(' => paren_depth += 1,
1168 ')' => paren_depth = paren_depth.saturating_sub(1),
1169 ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1170 _ => {}
1171 }
1172
1173 i += 1;
1174 }
1175
1176 ""
1177}
1178
1179fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1184 let var_name = var_name.trim();
1185 if var_name.is_empty() {
1186 return None;
1187 }
1188
1189 for candidate in candidate_constant_names(var_name) {
1190 let escaped = regex::escape(&candidate);
1191 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1192 let Ok(re) = Regex::new(&pattern) else {
1193 continue;
1194 };
1195
1196 for context in contexts {
1197 if let Some(caps) = re.captures(context)
1198 && let Some(expression) = caps.get(1)
1199 && let Some(resolved) =
1200 resolve_scalar_expression(expression.as_str(), None, contexts)
1201 {
1202 return Some(resolved);
1203 }
1204 }
1205 }
1206
1207 None
1208}
1209
1210fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1211 let var_name = var_name.trim();
1212 if var_name.is_empty() {
1213 return None;
1214 }
1215
1216 for candidate in candidate_constant_names(var_name) {
1217 let escaped = regex::escape(&candidate);
1218 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1219 let Ok(re) = Regex::new(&pattern) else {
1220 continue;
1221 };
1222
1223 for context in contexts {
1224 if let Some(caps) = re.captures(context)
1225 && let Some(raw) = caps.get(1)
1226 {
1227 let values = extract_ruby_array(raw.as_str());
1228 if !values.is_empty() {
1229 return Some(values);
1230 }
1231 }
1232 }
1233 }
1234
1235 None
1236}
1237
1238fn candidate_constant_names(var_name: &str) -> Vec<String> {
1239 let mut names = vec![var_name.to_string()];
1240 if let Some(last) = var_name.split("::").last()
1241 && last != var_name
1242 {
1243 names.push(last.to_string());
1244 }
1245 names
1246}
1247
1248fn looks_like_local_variable_reference(s: &str) -> bool {
1249 let mut chars = s.chars();
1250 matches!(chars.next(), Some('_' | 'a'..='z'))
1251 && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1252}
1253
1254fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1255 let base_dir = base_dir?;
1256 let current_dir = std::env::current_dir().ok();
1257
1258 current_dir
1259 .and_then(|cwd| {
1260 let canonical_cwd = cwd.canonicalize().ok()?;
1261 let canonical_base = base_dir.canonicalize().ok()?;
1262 canonical_base
1263 .starts_with(&canonical_cwd)
1264 .then_some(canonical_cwd)
1265 })
1266 .or_else(|| base_dir.canonicalize().ok())
1267}
1268
1269fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1270 let canonical_path = path.canonicalize().ok()?;
1271 canonical_path
1272 .starts_with(allowed_root)
1273 .then_some(canonical_path)
1274}
1275
1276fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1277 let base_dir = base_dir?;
1278 let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1279 let relative_path = extract_first_ruby_value(args)?;
1280 if relative_path.is_empty() {
1281 return None;
1282 }
1283
1284 let candidate = Path::new(&relative_path);
1285 let path = if candidate.is_absolute() {
1286 candidate.to_path_buf()
1287 } else {
1288 base_dir.join(candidate)
1289 };
1290
1291 let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1292
1293 fs::read_to_string(safe_path)
1294 .ok()
1295 .map(|content| content.trim().to_string())
1296 .filter(|content| !content.is_empty())
1297}
1298
1299fn resolve_scalar_expression(
1300 expression: &str,
1301 base_dir: Option<&Path>,
1302 contexts: &[String],
1303) -> Option<String> {
1304 let expression = if let Some(pos) = expression.find(" #") {
1305 expression[..pos].trim()
1306 } else {
1307 expression.trim()
1308 };
1309
1310 let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1311 if let Some(caps) = file_read_re.captures(expression) {
1312 return caps
1313 .get(1)
1314 .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1315 }
1316
1317 if let Some(joined) = resolve_joined_constant_string(expression, contexts) {
1318 return Some(joined);
1319 }
1320
1321 if let Some(value) = extract_first_ruby_value(expression) {
1322 return Some(interpolate_ruby_constant_string(&value, contexts));
1323 }
1324
1325 let cleaned = clean_gemspec_value(expression);
1326 if looks_like_constant_reference(&cleaned) {
1327 return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1328 }
1329
1330 None
1331}
1332
1333fn resolve_joined_constant_string(expression: &str, contexts: &[String]) -> Option<String> {
1334 let expression = strip_freeze_suffix(expression.trim());
1335 if !expression.starts_with('[') {
1336 return None;
1337 }
1338 let join_index = expression.find("].join(")?;
1339 let body = &expression[1..join_index];
1340 let separator_expr = expression[join_index + 7..].strip_suffix(')')?.trim();
1341 let separator = extract_first_ruby_value(separator_expr)?;
1342
1343 let mut parts = Vec::new();
1344 for item in body.split(',').take(MAX_ITERATION_COUNT) {
1345 let resolved = resolve_scalar_expression(item.trim(), None, contexts)?;
1346 parts.push(resolved);
1347 }
1348
1349 Some(parts.join(&separator))
1350}
1351
1352fn interpolate_ruby_constant_string(value: &str, contexts: &[String]) -> String {
1353 if !value.contains("#{") {
1354 return value.to_string();
1355 }
1356
1357 let Ok(interpolation_re) = Regex::new(r#"#\{([^}]+)\}"#) else {
1358 return value.to_string();
1359 };
1360 interpolation_re
1361 .replace_all(value, |captures: ®ex::Captures<'_>| {
1362 let reference = captures
1363 .get(1)
1364 .map(|m| m.as_str().trim())
1365 .unwrap_or_default();
1366 resolve_variable_version(reference, contexts).unwrap_or_else(|| {
1367 captures
1368 .get(0)
1369 .map(|value| value.as_str().to_string())
1370 .unwrap_or_default()
1371 })
1372 })
1373 .into_owned()
1374}
1375
1376fn resolve_local_variable_value(
1377 var_name: &str,
1378 content: &str,
1379 base_dir: Option<&Path>,
1380 contexts: &[String],
1381) -> Option<String> {
1382 let escaped = regex::escape(var_name.trim());
1383 let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1384 let re = Regex::new(&pattern).ok()?;
1385
1386 re.captures_iter(content).find_map(|caps| {
1387 caps.get(1)
1388 .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1389 })
1390}
1391
1392fn resolve_gemspec_scalar_value(
1393 raw_value: &str,
1394 content: &str,
1395 base_dir: Option<&Path>,
1396 contexts: &[String],
1397) -> Option<String> {
1398 let cleaned = truncate_field(clean_gemspec_value(raw_value));
1399 if cleaned.is_empty() {
1400 return None;
1401 }
1402
1403 if looks_like_constant_reference(&cleaned) {
1404 return resolve_variable_version(&cleaned, contexts)
1405 .map(truncate_field)
1406 .or(Some(cleaned));
1407 }
1408
1409 if looks_like_local_variable_reference(&cleaned) {
1410 return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1411 .map(truncate_field)
1412 .or(Some(cleaned));
1413 }
1414
1415 Some(cleaned)
1416}
1417
1418fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1419 let mut contexts = vec![content.to_string()];
1420 let Some(base_dir) = base_dir else {
1421 return contexts;
1422 };
1423 let allowed_root = resolve_ruby_read_root(Some(base_dir));
1424
1425 let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1426 Ok(re) => re,
1427 Err(_) => return contexts,
1428 };
1429
1430 for caps in require_re.captures_iter(content) {
1431 let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1432 continue;
1433 };
1434 for candidate in candidate_require_paths(base_dir, required) {
1435 let Some(safe_candidate) = allowed_root
1436 .as_deref()
1437 .and_then(|root| resolve_ruby_read_path(candidate, root))
1438 else {
1439 continue;
1440 };
1441 if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1442 contexts.push(required_content);
1443 break;
1444 }
1445 }
1446 }
1447
1448 contexts
1449}
1450
1451fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1452 let relative = required.replace("::", "/");
1453 let filename = if relative.ends_with(".rb") {
1454 relative
1455 } else {
1456 format!("{}.rb", relative)
1457 };
1458
1459 vec![
1460 base_dir.join(&filename),
1461 base_dir.join("lib").join(&filename),
1462 ]
1463}
1464
1465fn looks_like_constant_reference(s: &str) -> bool {
1466 s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1467}
1468
1469#[cfg(test)]
1471fn parse_gemspec(content: &str) -> PackageData {
1472 parse_gemspec_with_context(content, None)
1473}
1474
1475fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1476 let contexts = load_required_ruby_contexts(content, base_dir);
1477
1478 let field_re = match Regex::new(
1481 r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1482 ) {
1483 Ok(r) => r,
1484 Err(e) => {
1485 warn!("Failed to compile gemspec field regex: {}", e);
1486 return default_package_data_with_datasource(DatasourceId::Gemspec);
1487 }
1488 };
1489
1490 let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1491 Ok(r) => r,
1492 Err(e) => {
1493 warn!("Failed to compile licenses regex: {}", e);
1494 return default_package_data_with_datasource(DatasourceId::Gemspec);
1495 }
1496 };
1497
1498 let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1499 Ok(r) => r,
1500 Err(e) => {
1501 warn!("Failed to compile authors regex: {}", e);
1502 return default_package_data_with_datasource(DatasourceId::Gemspec);
1503 }
1504 };
1505
1506 let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1507 Ok(r) => r,
1508 Err(e) => {
1509 warn!("Failed to compile email regex: {}", e);
1510 return default_package_data_with_datasource(DatasourceId::Gemspec);
1511 }
1512 };
1513
1514 let dependency_call_re = match Regex::new(
1515 r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1516 ) {
1517 Ok(r) => r,
1518 Err(e) => {
1519 warn!("Failed to compile gemspec dependency regex: {}", e);
1520 return default_package_data_with_datasource(DatasourceId::Gemspec);
1521 }
1522 };
1523
1524 let mut name: Option<String> = None;
1525 let mut version: Option<String> = None;
1526 let mut summary: Option<String> = None;
1527 let mut description: Option<String> = None;
1528 let mut homepage: Option<String> = None;
1529 let mut license: Option<String> = None;
1530 let mut licenses: Vec<String> = Vec::new();
1531 let mut authors: Vec<String> = Vec::new();
1532 let mut emails: Vec<String> = Vec::new();
1533 let mut dependencies: Vec<Dependency> = Vec::new();
1534
1535 for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1537 let field_name = match caps.get(1) {
1538 Some(m) => m.as_str(),
1539 None => continue,
1540 };
1541 let raw_value = match caps.get(2) {
1542 Some(m) => m.as_str().trim(),
1543 None => continue,
1544 };
1545
1546 match field_name {
1547 "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1548 "version" => {
1549 version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1550 }
1551 "summary" => {
1552 summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1553 }
1554 "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1555 "homepage" => {
1556 homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1557 }
1558 "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1559 _ => {}
1560 }
1561 }
1562
1563 for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1565 if let Some(raw) = caps.get(1) {
1566 licenses = extract_ruby_array(raw.as_str());
1567 }
1568 }
1569
1570 for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1572 if let Some(raw) = caps.get(1) {
1573 let raw_str = raw.as_str().trim();
1574 if raw_str.starts_with('[') {
1575 authors = extract_ruby_array(raw_str);
1576 } else if looks_like_constant_reference(raw_str) {
1577 authors = resolve_variable_array(raw_str, &contexts)
1578 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1579 } else {
1580 authors.push(clean_gemspec_value(raw_str));
1581 }
1582 }
1583 }
1584
1585 for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1587 if let Some(raw) = caps.get(1) {
1588 let raw_str = raw.as_str().trim();
1589 if raw_str.starts_with('[') {
1590 emails = extract_ruby_array(raw_str);
1591 } else if looks_like_constant_reference(raw_str) {
1592 emails = resolve_variable_array(raw_str, &contexts)
1593 .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1594 } else {
1595 emails.push(clean_gemspec_value(raw_str));
1596 }
1597 }
1598 }
1599
1600 let mut parties: Vec<Party> = Vec::new();
1602
1603 if authors.len() == 1 && emails.len() == 1 {
1604 let email_str = emails.first().map(String::as_str);
1605 let (parsed_email_name, parsed_email) = match email_str {
1606 Some(e) => split_name_email(e),
1607 None => (None, None),
1608 };
1609
1610 parties.push(Party {
1611 r#type: Some("person".to_string()),
1612 role: Some("author".to_string()),
1613 name: authors.first().cloned().or(parsed_email_name),
1614 email: parsed_email.or_else(|| {
1615 email_str
1616 .filter(|e| e.contains('@') && !e.contains('<'))
1617 .map(|e| e.to_string())
1618 }),
1619 url: None,
1620 organization: None,
1621 organization_url: None,
1622 timezone: None,
1623 });
1624 } else {
1625 for author_name in authors {
1626 parties.push(Party {
1627 r#type: Some("person".to_string()),
1628 role: Some("author".to_string()),
1629 name: Some(author_name),
1630 email: None,
1631 url: None,
1632 organization: None,
1633 organization_url: None,
1634 timezone: None,
1635 });
1636 }
1637
1638 for email_str in emails {
1639 let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1640 split_name_email(&email_str)
1641 } else {
1642 (None, None)
1643 };
1644 parties.push(Party {
1645 r#type: Some("person".to_string()),
1646 role: Some("author".to_string()),
1647 name: parsed_email_name,
1648 email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1649 url: None,
1650 organization: None,
1651 organization_url: None,
1652 timezone: None,
1653 });
1654 }
1655 }
1656
1657 for caps in dependency_call_re
1658 .captures_iter(content)
1659 .take(MAX_ITERATION_COUNT)
1660 {
1661 let method = match caps.get(1) {
1662 Some(m) => m.as_str(),
1663 None => continue,
1664 };
1665 let args = match caps.get(2) {
1666 Some(m) => m.as_str(),
1667 None => continue,
1668 };
1669
1670 let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1671 continue;
1672 };
1673 let version_parts = extract_all_ruby_values(after_first_argument(args));
1674 let extracted_requirement = if version_parts.is_empty() {
1675 None
1676 } else {
1677 Some(version_parts.join(", "))
1678 };
1679 let purl = create_gem_purl(&dep_name, None);
1680 let is_development = method == "add_development_dependency";
1681 let scope = if is_development {
1682 "development"
1683 } else {
1684 "runtime"
1685 };
1686
1687 dependencies.push(Dependency {
1688 purl,
1689 extracted_requirement,
1690 scope: Some(scope.to_string()),
1691 is_runtime: Some(!is_development),
1692 is_optional: Some(is_development),
1693 is_pinned: None,
1694 is_direct: Some(true),
1695 resolved_package: None,
1696 extra_data: None,
1697 });
1698 }
1699
1700 let extracted_license_statement = if !licenses.is_empty() {
1702 Some(licenses.join(" AND "))
1703 } else {
1704 license
1705 };
1706
1707 let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1708 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1709
1710 let final_description = description.or(summary);
1712
1713 let purl = name
1715 .as_deref()
1716 .map(|n| create_gem_purl(n, version.as_deref()))
1717 .unwrap_or(None);
1718
1719 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1720 if let Some(n) = name.as_deref() {
1721 get_rubygems_urls(n, version.as_deref(), None)
1722 } else {
1723 (None, None, None, None)
1724 };
1725
1726 PackageData {
1727 package_type: Some(PACKAGE_TYPE),
1728 name,
1729 version,
1730 primary_language: Some("Ruby".to_string()),
1731 description: final_description,
1732 homepage_url: homepage,
1733 download_url,
1734 declared_license_expression,
1735 declared_license_expression_spdx,
1736 license_detections,
1737 extracted_license_statement,
1738 parties,
1739 dependencies,
1740 repository_homepage_url,
1741 repository_download_url,
1742 api_data_url,
1743 datasource_id: Some(DatasourceId::Gemspec),
1744 purl,
1745 ..default_package_data()
1746 }
1747}
1748
1749const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; const MAX_COMPRESSION_RATIO: f64 = 100.0; pub struct GemArchiveParser;
1764
1765impl PackageParser for GemArchiveParser {
1766 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1767
1768 fn metadata() -> Vec<ParserMetadata> {
1769 vec![ParserMetadata {
1770 description: "Ruby .gem archive",
1771 file_patterns: &["**/*.gem"],
1772 package_type: "gem",
1773 primary_language: "Ruby",
1774 documentation_url: Some("https://guides.rubygems.org/specification-reference/"),
1775 }]
1776 }
1777
1778 fn extract_packages(path: &Path) -> Vec<PackageData> {
1779 vec![match extract_gem_archive(path) {
1780 Ok(data) => data,
1781 Err(e) => {
1782 warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1783 default_package_data_with_datasource(DatasourceId::GemArchive)
1784 }
1785 }]
1786 }
1787
1788 fn is_match(path: &Path) -> bool {
1789 path.extension()
1790 .and_then(|ext| ext.to_str())
1791 .is_some_and(|ext| ext == "gem")
1792 }
1793}
1794
1795fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1796 let file_metadata =
1797 fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1798 let archive_size = file_metadata.len();
1799
1800 if archive_size > MAX_ARCHIVE_SIZE {
1801 return Err(format!(
1802 "Archive too large: {} bytes (limit: {} bytes)",
1803 archive_size, MAX_ARCHIVE_SIZE
1804 ));
1805 }
1806
1807 let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1808 let mut archive = Archive::new(file);
1809
1810 let mut entry_count: usize = 0;
1811 for entry_result in archive
1812 .entries()
1813 .map_err(|e| format!("Failed to read tar entries: {}", e))?
1814 {
1815 entry_count += 1;
1816 if entry_count > MAX_ITERATION_COUNT {
1817 warn!(
1818 "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1819 MAX_ITERATION_COUNT
1820 );
1821 break;
1822 }
1823
1824 let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1825 let entry_path = entry
1826 .path()
1827 .map_err(|e| format!("Failed to get entry path: {}", e))?;
1828 let entry_str = entry_path.to_string_lossy();
1829 if entry_str.contains("..") {
1830 warn!("Skipping tar entry with path traversal: {}", entry_str);
1831 continue;
1832 }
1833
1834 if entry_path.to_str() == Some("metadata.gz") {
1835 let entry_size = entry.size();
1836 if entry_size > MAX_FILE_SIZE {
1837 return Err(format!(
1838 "metadata.gz too large: {} bytes (limit: {} bytes)",
1839 entry_size, MAX_FILE_SIZE
1840 ));
1841 }
1842
1843 let mut decoder = GzDecoder::new(entry);
1844 let mut content = Vec::new();
1845 let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1846 limited
1847 .read_to_end(&mut content)
1848 .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1849
1850 if content.len() > MAX_FILE_SIZE as usize {
1851 return Err(format!(
1852 "Decompressed metadata too large: exceeds {} byte limit",
1853 MAX_FILE_SIZE
1854 ));
1855 }
1856
1857 let content = match String::from_utf8(content) {
1858 Ok(s) => s,
1859 Err(err) => {
1860 let bytes = err.into_bytes();
1861 warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1862 String::from_utf8_lossy(&bytes).into_owned()
1863 }
1864 };
1865
1866 let uncompressed_size = content.len() as u64;
1867 if entry_size > 0 {
1868 let ratio = uncompressed_size as f64 / entry_size as f64;
1869 if ratio > MAX_COMPRESSION_RATIO {
1870 return Err(format!(
1871 "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1872 ratio, MAX_COMPRESSION_RATIO
1873 ));
1874 }
1875 }
1876
1877 return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1878 }
1879 }
1880
1881 Err("metadata.gz not found in .gem archive".to_string())
1882}
1883
1884fn parse_gem_metadata_yaml(
1885 content: &str,
1886 datasource_id: DatasourceId,
1887) -> Result<PackageData, String> {
1888 let cleaned = clean_ruby_yaml_tags(content);
1892
1893 let yaml: yaml_serde::Value =
1894 yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1895
1896 let name = yaml_string(&yaml, "name").map(truncate_field);
1897 let version = yaml.get("version").and_then(|v| {
1898 if v.is_string() {
1899 v.as_str().map(|s| truncate_field(s.to_string()))
1900 } else {
1901 yaml_string(v, "version").map(truncate_field)
1902 }
1903 });
1904 let description = yaml_string(&yaml, "description")
1905 .or_else(|| yaml_string(&yaml, "summary"))
1906 .map(truncate_field);
1907 let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1908 let summary = yaml_string(&yaml, "summary").map(truncate_field);
1909
1910 let licenses: Vec<String> = yaml
1912 .get("licenses")
1913 .and_then(|v| v.as_sequence())
1914 .map(|seq| {
1915 seq.iter()
1916 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1917 .collect()
1918 })
1919 .unwrap_or_default();
1920
1921 let extracted_license_statement = if !licenses.is_empty() {
1923 Some(licenses.join(" AND "))
1924 } else {
1925 None
1926 };
1927
1928 let (license_expression, license_expression_spdx, license_detections) =
1929 normalize_spdx_declared_license(extracted_license_statement.as_deref());
1930
1931 let authors: Vec<String> = yaml
1933 .get("authors")
1934 .and_then(|v| v.as_sequence())
1935 .map(|seq| {
1936 seq.iter()
1937 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1938 .collect()
1939 })
1940 .unwrap_or_default();
1941
1942 let emails: Vec<String> = yaml
1943 .get("email")
1944 .map(|v| {
1945 if let Some(seq) = v.as_sequence() {
1946 seq.iter()
1947 .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1948 .collect()
1949 } else if let Some(s) = v.as_str() {
1950 vec![truncate_field(s.to_string())]
1951 } else {
1952 Vec::new()
1953 }
1954 })
1955 .unwrap_or_default();
1956
1957 let mut parties: Vec<Party> = Vec::new();
1959 let max_len = authors.len().max(emails.len());
1960 for i in 0..max_len {
1961 let author_name = authors.get(i).map(|s| s.as_str());
1962 let email_str = emails.get(i).map(|s| s.as_str());
1963
1964 let (parsed_email_name, parsed_email) = match email_str {
1965 Some(e) if e.contains('<') => split_name_email(e),
1966 None => (None, None),
1967 _ => (None, None),
1968 };
1969
1970 let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1971
1972 parties.push(Party {
1973 r#type: Some("person".to_string()),
1974 role: Some("author".to_string()),
1975 name: party_name,
1976 email: parsed_email.or_else(|| {
1977 email_str
1978 .filter(|e| e.contains('@') && !e.contains('<'))
1979 .map(|e| e.to_string())
1980 }),
1981 url: None,
1982 organization: None,
1983 organization_url: None,
1984 timezone: None,
1985 });
1986 }
1987
1988 let dependencies = parse_gem_yaml_dependencies(&yaml);
1990
1991 let metadata = yaml.get("metadata");
1992
1993 let bug_tracking_url = metadata
1994 .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1995 .map(truncate_field);
1996
1997 let code_view_url = metadata
1998 .and_then(|m| yaml_string(m, "source_code_uri"))
1999 .map(truncate_field);
2000
2001 let vcs_url = code_view_url.clone().or_else(|| {
2002 metadata
2003 .and_then(|m| yaml_string(m, "homepage_uri"))
2004 .map(truncate_field)
2005 });
2006
2007 let file_references = metadata
2008 .and_then(|m| m.get("files"))
2009 .and_then(|f| f.as_sequence())
2010 .map(|seq| {
2011 seq.iter()
2012 .filter_map(|v| v.as_str())
2013 .map(|s| crate::models::FileReference {
2014 path: s.to_string(),
2015 size: None,
2016 sha1: None,
2017 md5: None,
2018 sha256: None,
2019 sha512: None,
2020 extra_data: None,
2021 })
2022 .collect::<Vec<_>>()
2023 })
2024 .unwrap_or_default();
2025
2026 let release_date = yaml_string(&yaml, "date").and_then(|d| {
2027 if d.len() >= 10 {
2028 Some(d[..10].to_string())
2029 } else {
2030 None
2031 }
2032 });
2033
2034 let purl = name
2035 .as_deref()
2036 .map(|n| create_gem_purl(n, version.as_deref()))
2037 .unwrap_or(None);
2038
2039 let platform = yaml_string(&yaml, "platform").map(truncate_field);
2040 let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
2041 if let Some(n) = name.as_deref() {
2042 get_rubygems_urls(n, version.as_deref(), platform.as_deref())
2043 } else {
2044 (None, None, None, None)
2045 };
2046
2047 let qualifiers = if let Some(ref p) = platform {
2048 if p != "ruby" {
2049 let mut q = HashMap::new();
2050 q.insert("platform".to_string(), p.clone());
2051 Some(q)
2052 } else {
2053 None
2054 }
2055 } else {
2056 None
2057 };
2058
2059 Ok(PackageData {
2060 package_type: Some(PACKAGE_TYPE),
2061 name,
2062 version,
2063 qualifiers,
2064 primary_language: Some("Ruby".to_string()),
2065 description: description.or(summary),
2066 release_date,
2067 homepage_url: homepage,
2068 download_url,
2069 bug_tracking_url,
2070 code_view_url,
2071 declared_license_expression: license_expression,
2072 declared_license_expression_spdx: license_expression_spdx,
2073 license_detections,
2074 extracted_license_statement,
2075 file_references,
2076 parties,
2077 dependencies,
2078 repository_homepage_url,
2079 repository_download_url,
2080 api_data_url,
2081 datasource_id: Some(datasource_id),
2082 purl,
2083 vcs_url,
2084 ..default_package_data()
2085 })
2086}
2087
2088fn clean_ruby_yaml_tags(content: &str) -> String {
2090 let tag_re = match Regex::new(r"!ruby/\S+") {
2091 Ok(r) => r,
2092 Err(_) => return content.to_string(),
2093 };
2094 tag_re.replace_all(content, "").to_string()
2095}
2096
2097fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2098 yaml.get(key)
2099 .and_then(|v| v.as_str())
2100 .filter(|s| !s.is_empty())
2101 .map(|s| s.to_string())
2102}
2103
2104fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2105 let mut dependencies = Vec::new();
2106
2107 let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2108 Some(seq) => seq,
2109 None => return dependencies,
2110 };
2111
2112 for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2113 let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2114 Some(n) => n,
2115 None => continue,
2116 };
2117
2118 let dep_type = yaml_string(dep_value, "type");
2119 let is_development = dep_type.as_deref() == Some(":development");
2120
2121 let requirements = dep_value
2123 .get("requirement")
2124 .or_else(|| dep_value.get("version_requirements"))
2125 .and_then(|req| req.get("requirements"))
2126 .and_then(|reqs| reqs.as_sequence());
2127
2128 let extracted_requirement = requirements.map(|reqs| {
2129 let parts: Vec<String> = reqs
2130 .iter()
2131 .filter_map(|req| {
2132 let seq = req.as_sequence()?;
2133 if seq.len() >= 2 {
2134 let op = seq[0].as_str().unwrap_or("");
2135 let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2136 if op == ">=" && ver == "0" {
2137 None
2139 } else if op.is_empty() || ver.is_empty() {
2140 None
2141 } else {
2142 Some(format!("{} {}", op, ver))
2143 }
2144 } else {
2145 None
2146 }
2147 })
2148 .collect();
2149 parts.join(", ")
2150 });
2151
2152 let extracted_requirement = extracted_requirement
2153 .filter(|s| !s.is_empty())
2154 .or_else(|| Some(String::new()));
2155
2156 let (scope, is_runtime, is_optional) = if is_development {
2157 (Some("development".to_string()), false, true)
2158 } else {
2159 (Some("runtime".to_string()), true, false)
2160 };
2161
2162 let purl = create_gem_purl(&dep_name, None);
2163
2164 dependencies.push(Dependency {
2165 purl,
2166 extracted_requirement,
2167 scope,
2168 is_runtime: Some(is_runtime),
2169 is_optional: Some(is_optional),
2170 is_pinned: None,
2171 is_direct: Some(true),
2172 resolved_package: None,
2173 extra_data: None,
2174 });
2175 }
2176
2177 dependencies
2178}
2179
2180pub struct GemMetadataExtractedParser;
2185
2186impl PackageParser for GemMetadataExtractedParser {
2187 const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2188
2189 fn metadata() -> Vec<ParserMetadata> {
2190 vec![ParserMetadata {
2191 description: "Ruby gem metadata (extracted)",
2192 file_patterns: &["**/metadata.gz-extract"],
2193 package_type: "gem",
2194 primary_language: "Ruby",
2195 documentation_url: Some("https://guides.rubygems.org/specification-reference/"),
2196 }]
2197 }
2198
2199 fn extract_packages(path: &Path) -> Vec<PackageData> {
2200 vec![match extract_gem_metadata_extracted(path) {
2201 Ok(data) => data,
2202 Err(e) => {
2203 warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2204 default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2205 }
2206 }]
2207 }
2208
2209 fn is_match(path: &Path) -> bool {
2210 path.to_str()
2211 .is_some_and(|p| p.contains("metadata.gz-extract"))
2212 }
2213}
2214
2215fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2216 let content = read_file_to_string(path, None)
2217 .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2218
2219 parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2220}
2221
2222#[cfg(test)]
2223mod tests {
2224 use super::parse_gemspec;
2225
2226 #[test]
2227 fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2228 assert_eq!(
2229 super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2230 "Arel is a SQL AST manager for Ruby. It"
2231 );
2232 }
2233
2234 #[test]
2235 fn test_parse_gemspec_runtime_dependency_scope() {
2236 let content = r#"
2237Gem::Specification.new do |spec|
2238 spec.name = "demo"
2239 spec.version = "1.0.0"
2240 spec.add_runtime_dependency "rack", "~> 3.0"
2241 spec.add_dependency "thor", ">= 1.0"
2242end
2243"#;
2244
2245 let package_data = parse_gemspec(content);
2246 assert_eq!(package_data.dependencies.len(), 2);
2247 assert_eq!(
2248 package_data.dependencies[0].scope,
2249 Some("runtime".to_string())
2250 );
2251 assert_eq!(
2252 package_data.dependencies[0].extracted_requirement,
2253 Some("~> 3.0".to_string())
2254 );
2255 assert_eq!(
2256 package_data.dependencies[1].scope,
2257 Some("runtime".to_string())
2258 );
2259 assert_eq!(
2260 package_data.dependencies[1].extracted_requirement,
2261 Some(">= 1.0".to_string())
2262 );
2263 }
2264}