Skip to main content

provenant/parsers/
ruby.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Ruby/RubyGems package manifests.
5//!
6//! Extracts package metadata, dependencies, and platform information from
7//! Gemfile and Gemfile.lock files used by Ruby/Bundler projects.
8//!
9//! # Supported Formats
10//! - Gemfile (manifest with Ruby DSL)
11//! - Gemfile.lock (lockfile with state machine sections)
12//! - *.gemspec (gem specification files)
13//! - *.gem (gem archive packages)
14//! - metadata.gz-extract (pre-extracted gem metadata)
15//!
16//! # Key Features
17//! - State machine parsing for Gemfile.lock sections (GEM, GIT, PATH, SVN, PLATFORMS, BUNDLED WITH, DEPENDENCIES)
18//! - Regex-based Ruby DSL parsing for Gemfile
19//! - Dependency group handling (:development, :test, etc.)
20//! - Platform-specific gem support
21//! - Pessimistic version operator (~>) support
22//! - Bug Fix #1: Strip .freeze suffix from strings
23//! - Bug Fix #4: Correct dependency scope mapping (:runtime → None, :development → "development")
24//!
25//! # Implementation Notes
26//! - Uses regex for pattern matching (not full Ruby AST)
27//! - Graceful error handling: logs warnings and returns default on parse failure
28//! - PURL type: "gem"
29
30use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46
47const PACKAGE_TYPE: PackageType = PackageType::Gem;
48
49// =============================================================================
50// Bug Fix #1: Strip .freeze suffix from strings
51// =============================================================================
52
53/// Strips the `.freeze` suffix from Ruby frozen string literals.
54///
55/// In Ruby, `.freeze` makes a string immutable. We need to remove this suffix
56/// when parsing gem names and versions from Gemfile.
57///
58/// For example, `"name".freeze` becomes `"name"` and `'1.0.0'.freeze`
59/// becomes `'1.0.0'`.
60pub fn strip_freeze_suffix(s: &str) -> &str {
61    s.trim_end_matches(".freeze")
62}
63
64enum GemfileBlock {
65    Group(Vec<String>),
66    Source(String),
67}
68
69// =============================================================================
70// Gemfile Parser (Ruby DSL)
71// =============================================================================
72
73/// Ruby Gemfile parser for manifest files.
74///
75/// Parses Ruby DSL syntax to extract gem declarations, dependency groups,
76/// platform-specific gems, and version constraints.
77pub struct GemfileParser;
78
79impl PackageParser for GemfileParser {
80    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
81
82    fn extract_packages(path: &Path) -> Vec<PackageData> {
83        let datasource_id = gemfile_datasource_id(path);
84        let content = match read_file_to_string(path, None) {
85            Ok(c) => c,
86            Err(e) => {
87                warn!("Failed to read Gemfile at {:?}: {}", path, e);
88                return vec![default_package_data_with_datasource(datasource_id)];
89            }
90        };
91
92        let mut package_data = parse_gemfile(&content);
93        package_data.datasource_id = Some(datasource_id);
94        vec![package_data]
95    }
96
97    fn is_match(path: &Path) -> bool {
98        path.file_name()
99            .and_then(|n| n.to_str())
100            .is_some_and(|name| name == "Gemfile")
101            || path
102                .to_str()
103                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
104    }
105}
106
107/// Parses Gemfile content and extracts dependencies with groups.
108fn parse_gemfile(content: &str) -> PackageData {
109    let mut dependencies = Vec::new();
110    let mut block_stack = Vec::new();
111    let mut default_source = None;
112    let mut sources = Vec::new();
113
114    // Regex patterns for Gemfile parsing
115    // gem "name", "version", options...
116    let gem_regex = match Regex::new(
117        r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
118    ) {
119        Ok(r) => r,
120        Err(e) => {
121            warn!("Failed to compile gem regex: {}", e);
122            return default_package_data_with_datasource(DatasourceId::Gemfile);
123        }
124    };
125
126    // group :name do ... end
127    let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
128        Ok(r) => r,
129        Err(e) => {
130            warn!("Failed to compile group regex: {}", e);
131            return default_package_data_with_datasource(DatasourceId::Gemfile);
132        }
133    };
134
135    let group_end_regex = match Regex::new(r"^\s*end\s*$") {
136        Ok(r) => r,
137        Err(e) => {
138            warn!("Failed to compile end regex: {}", e);
139            return default_package_data_with_datasource(DatasourceId::Gemfile);
140        }
141    };
142
143    let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
144        Ok(r) => r,
145        Err(e) => {
146            warn!("Failed to compile source block regex: {}", e);
147            return default_package_data_with_datasource(DatasourceId::Gemfile);
148        }
149    };
150
151    let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
152        Ok(r) => r,
153        Err(e) => {
154            warn!("Failed to compile source regex: {}", e);
155            return default_package_data_with_datasource(DatasourceId::Gemfile);
156        }
157    };
158
159    // Parse symbols like :development, :test
160    let symbol_regex = match Regex::new(r":(\w+)") {
161        Ok(r) => r,
162        Err(e) => {
163            warn!("Failed to compile symbol regex: {}", e);
164            return default_package_data_with_datasource(DatasourceId::Gemfile);
165        }
166    };
167
168    for line in content.lines().take(MAX_ITERATION_COUNT) {
169        let trimmed = line.trim();
170
171        // Skip comments and empty lines
172        if trimmed.is_empty() || trimmed.starts_with('#') {
173            continue;
174        }
175
176        // Check for group start
177        if let Some(caps) = group_start_regex.captures(trimmed) {
178            let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
179            let mut current_groups = Vec::new();
180            for cap in symbol_regex.captures_iter(groups_str) {
181                if let Some(group_name) = cap.get(1) {
182                    current_groups.push(group_name.as_str().to_string());
183                }
184            }
185            block_stack.push(GemfileBlock::Group(current_groups));
186            continue;
187        }
188
189        if let Some(caps) = source_block_start_regex.captures(trimmed) {
190            let source = caps
191                .get(1)
192                .map(|m| m.as_str().to_string())
193                .unwrap_or_default();
194            if !source.is_empty() {
195                push_unique_string(&mut sources, source.clone());
196                block_stack.push(GemfileBlock::Source(source));
197            }
198            continue;
199        }
200
201        if let Some(caps) = source_regex.captures(trimmed) {
202            if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
203                push_unique_string(&mut sources, source.clone());
204                default_source = Some(source);
205            }
206            continue;
207        }
208
209        // Check for group end
210        if group_end_regex.is_match(trimmed) {
211            block_stack.pop();
212            continue;
213        }
214
215        // Parse gem declaration
216        if let Some(caps) = gem_regex.captures(trimmed) {
217            let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
218            if name.is_empty() {
219                continue;
220            }
221
222            // Collect version constraints
223            let mut version_parts = Vec::new();
224            if let Some(v) = caps.get(2) {
225                version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
226            }
227            if let Some(v) = caps.get(3) {
228                let v_str = strip_freeze_suffix(v.as_str());
229                // Check if it looks like a version constraint
230                if looks_like_version_constraint(v_str) {
231                    version_parts.push(v_str.to_string());
232                }
233            }
234
235            let extracted_requirement = if version_parts.is_empty() {
236                None
237            } else {
238                Some(version_parts.join(", "))
239            };
240
241            let current_groups = current_group_names(&block_stack);
242
243            // Determine scope based on current group
244            // Bug Fix #4: :runtime → None, :development → "development"
245            let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
246                // No group = runtime dependency
247                (None, true, false)
248            } else if current_groups.iter().any(|g| g == "development") {
249                (Some("development".to_string()), false, true)
250            } else if current_groups.iter().any(|g| g == "test") {
251                (Some("test".to_string()), false, true)
252            } else {
253                // Other groups (e.g., :production)
254                let group = current_groups.first().cloned();
255                (group, true, false)
256            };
257
258            // Create PURL
259            let purl = create_gem_purl(name, None);
260            let inherited_source = current_source(&block_stack, default_source.as_deref());
261            let extra_data = build_gemfile_dependency_extra_data(
262                caps.get(4).map(|m| m.as_str()),
263                inherited_source.as_deref(),
264            );
265
266            dependencies.push(Dependency {
267                purl,
268                extracted_requirement,
269                scope,
270                is_runtime: Some(is_runtime),
271                is_optional: Some(is_optional),
272                is_pinned: None,
273                is_direct: Some(true),
274                resolved_package: None,
275                extra_data,
276            });
277        }
278    }
279
280    let extra_data = if sources.is_empty() {
281        None
282    } else {
283        Some(HashMap::from([(
284            "sources".to_string(),
285            serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
286        )]))
287    };
288
289    PackageData {
290        package_type: Some(PACKAGE_TYPE),
291        primary_language: Some("Ruby".to_string()),
292        dependencies,
293        extra_data,
294        datasource_id: Some(DatasourceId::Gemfile),
295        ..default_package_data()
296    }
297}
298
299fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
300    block_stack
301        .iter()
302        .rev()
303        .find_map(|block| match block {
304            GemfileBlock::Group(groups) => Some(groups.clone()),
305            GemfileBlock::Source(_) => None,
306        })
307        .unwrap_or_default()
308}
309
310fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
311    block_stack
312        .iter()
313        .rev()
314        .find_map(|block| match block {
315            GemfileBlock::Source(source) => Some(source.clone()),
316            GemfileBlock::Group(_) => None,
317        })
318        .or_else(|| default_source.map(str::to_string))
319}
320
321fn push_unique_string(values: &mut Vec<String>, value: String) {
322    if !values.contains(&value) {
323        values.push(value);
324    }
325}
326
327fn build_gemfile_dependency_extra_data(
328    options: Option<&str>,
329    inherited_source: Option<&str>,
330) -> Option<HashMap<String, serde_json::Value>> {
331    let mut extra = HashMap::new();
332    let options = options.unwrap_or("");
333
334    if let Some(git) = extract_gemfile_quoted_option(options, "git") {
335        extra.insert(
336            "source_type".to_string(),
337            serde_json::Value::String("GIT".to_string()),
338        );
339        extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
340        extra.insert("remote".to_string(), serde_json::Value::String(git));
341    }
342
343    if let Some(path) = extract_gemfile_quoted_option(options, "path") {
344        extra.insert(
345            "source_type".to_string(),
346            serde_json::Value::String("PATH".to_string()),
347        );
348        extra.insert("path".to_string(), serde_json::Value::String(path));
349    }
350
351    for key in ["branch", "ref", "tag"] {
352        if let Some(value) = extract_gemfile_quoted_option(options, key) {
353            extra.insert(key.to_string(), serde_json::Value::String(value));
354        }
355    }
356
357    let direct_source = extract_gemfile_quoted_option(options, "source");
358    if let Some(source) = direct_source {
359        extra.insert("source".to_string(), serde_json::Value::String(source));
360    } else if !extra.contains_key("source_type")
361        && let Some(source) = inherited_source
362    {
363        extra.insert(
364            "source".to_string(),
365            serde_json::Value::String(source.to_string()),
366        );
367    }
368
369    (!extra.is_empty()).then_some(extra)
370}
371
372fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
373    if options.is_empty() {
374        return None;
375    }
376
377    let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
378    Regex::new(&pattern)
379        .ok()
380        .and_then(|regex| regex.captures(options))
381        .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
382}
383
384/// Checks if a string looks like a version constraint.
385fn looks_like_version_constraint(s: &str) -> bool {
386    s.starts_with('~')
387        || s.starts_with('>')
388        || s.starts_with('<')
389        || s.starts_with('=')
390        || s.starts_with('!')
391        || s.chars().next().is_some_and(|c| c.is_ascii_digit())
392}
393
394// =============================================================================
395// Gemfile.lock Parser (State Machine)
396// =============================================================================
397
398/// Ruby Gemfile.lock parser for lockfiles.
399///
400/// Uses a state machine to parse sections: GEM, GIT, PATH, SVN,
401/// PLATFORMS, BUNDLED WITH, DEPENDENCIES.
402pub struct GemfileLockParser;
403
404impl PackageParser for GemfileLockParser {
405    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
406
407    fn extract_packages(path: &Path) -> Vec<PackageData> {
408        let datasource_id = gemfile_lock_datasource_id(path);
409        let content = match read_file_to_string(path, None) {
410            Ok(c) => c,
411            Err(e) => {
412                warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
413                return vec![default_package_data_with_datasource(datasource_id)];
414            }
415        };
416
417        let mut package_data = parse_gemfile_lock(&content);
418        package_data.datasource_id = Some(datasource_id);
419        vec![package_data]
420    }
421
422    fn is_match(path: &Path) -> bool {
423        path.file_name()
424            .and_then(|n| n.to_str())
425            .is_some_and(|name| name == "Gemfile.lock")
426            || path
427                .to_str()
428                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
429    }
430}
431
432/// Parse state for Gemfile.lock state machine.
433#[derive(Debug, Clone, PartialEq)]
434enum ParseState {
435    None,
436    Gem,
437    Git,
438    Path,
439    Svn,
440    Specs,
441    Platforms,
442    BundledWith,
443    Dependencies,
444}
445
446/// Parsed gem information from Gemfile.lock.
447///
448/// All fields are actively used:
449/// - `gem_type`, `remote`, `revision`, `ref_field`, `branch`, `tag`: Stored in extra_data for GIT/PATH/SVN sources
450/// - `name`, `version`, `platform`, `pinned`: Used for dependency PURL and metadata generation
451/// - `requirements`: Stored as extracted_requirement for version constraints
452#[derive(Debug, Clone, Default)]
453struct GemInfo {
454    name: String,
455    version: Option<String>,
456    platform: Option<String>,
457    gem_type: String,
458    remote: Option<String>,
459    revision: Option<String>,
460    ref_field: Option<String>,
461    branch: Option<String>,
462    tag: Option<String>,
463    pinned: bool,
464    requirements: Vec<String>,
465}
466
467fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
468    let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
469    path_gems.sort_by(|left, right| {
470        left.remote
471            .as_deref()
472            .cmp(&right.remote.as_deref())
473            .then_with(|| left.name.cmp(&right.name))
474    });
475
476    path_gems
477        .iter()
478        .copied()
479        .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
480        .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
481        .or_else(|| {
482            path_gems
483                .iter()
484                .copied()
485                .find(|gem| gem.remote.as_deref() == Some("."))
486        })
487        .or_else(|| path_gems.first().copied())
488        .cloned()
489}
490
491/// Parses Gemfile.lock content using a state machine.
492fn parse_gemfile_lock(content: &str) -> PackageData {
493    let mut state = ParseState::None;
494    let mut dependencies = Vec::new();
495    let mut gems: HashMap<String, GemInfo> = HashMap::new();
496    let mut platforms: Vec<String> = Vec::new();
497    let mut bundler_version: Option<String> = None;
498    let mut current_gem_type = String::new();
499    let mut current_remote: Option<String> = None;
500    let mut current_options: HashMap<String, String> = HashMap::new();
501
502    // DEPS pattern: 2 spaces at line start
503    let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
504        Ok(r) => r,
505        Err(e) => {
506            warn!("Failed to compile deps regex: {}", e);
507            return default_package_data_with_datasource(DatasourceId::GemfileLock);
508        }
509    };
510
511    // SPEC_DEPS pattern: 4 spaces at line start
512    let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
513        Ok(r) => r,
514        Err(e) => {
515            warn!("Failed to compile spec_deps regex: {}", e);
516            return default_package_data_with_datasource(DatasourceId::GemfileLock);
517        }
518    };
519
520    // OPTIONS pattern: key: value
521    let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
522        Ok(r) => r,
523        Err(e) => {
524            warn!("Failed to compile options regex: {}", e);
525            return default_package_data_with_datasource(DatasourceId::GemfileLock);
526        }
527    };
528
529    // VERSION pattern for BUNDLED WITH
530    let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
531        Ok(r) => r,
532        Err(e) => {
533            warn!("Failed to compile version regex: {}", e);
534            return default_package_data_with_datasource(DatasourceId::GemfileLock);
535        }
536    };
537
538    for line in content.lines().take(MAX_ITERATION_COUNT) {
539        let trimmed = line.trim_end();
540
541        // Empty line resets state
542        if trimmed.is_empty() {
543            current_options.clear();
544            continue;
545        }
546
547        // Section headers (no leading whitespace) and sub-section headers
548        match trimmed {
549            "GEM" => {
550                state = ParseState::Gem;
551                current_gem_type = "GEM".to_string();
552                current_remote = None;
553                current_options.clear();
554                continue;
555            }
556            "GIT" => {
557                state = ParseState::Git;
558                current_gem_type = "GIT".to_string();
559                current_remote = None;
560                current_options.clear();
561                continue;
562            }
563            "PATH" => {
564                state = ParseState::Path;
565                current_gem_type = "PATH".to_string();
566                current_remote = None;
567                current_options.clear();
568                continue;
569            }
570            "SVN" => {
571                state = ParseState::Svn;
572                current_gem_type = "SVN".to_string();
573                current_remote = None;
574                current_options.clear();
575                continue;
576            }
577            "PLATFORMS" => {
578                state = ParseState::Platforms;
579                continue;
580            }
581            "BUNDLED WITH" => {
582                state = ParseState::BundledWith;
583                continue;
584            }
585            "DEPENDENCIES" => {
586                state = ParseState::Dependencies;
587                continue;
588            }
589            _ => {}
590        }
591
592        // Check for "  specs:" sub-section header (2-space indent) within
593        // GEM/GIT/PATH/SVN sections. This must be checked separately because
594        // the leading whitespace is preserved by trim_end().
595        if trimmed.trim() == "specs:" {
596            state = match state {
597                ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
598                    ParseState::Specs
599                }
600                _ => state,
601            };
602            continue;
603        }
604
605        // Process based on current state
606        match state {
607            ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
608                // Parse options (remote:, revision:, ref:, branch:, tag:)
609                if let Some(caps) = options_regex.captures(line) {
610                    let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
611                    let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
612                    current_options.insert(key.to_string(), value.to_string());
613                    if key == "remote" {
614                        current_remote = Some(value.to_string());
615                    }
616                }
617            }
618            ParseState::Specs => {
619                // Parse gem specs (4 spaces indent)
620                if let Some(caps) = spec_deps_regex.captures(line) {
621                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
622                    let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
623
624                    // Parse version and platform
625                    let (version, platform) = parse_version_platform(version_str);
626
627                    if !name.is_empty() {
628                        let gem_info = GemInfo {
629                            name: name.clone(),
630                            version,
631                            platform,
632                            gem_type: current_gem_type.clone(),
633                            remote: current_remote.clone(),
634                            revision: current_options.get("revision").cloned(),
635                            ref_field: current_options.get("ref").cloned(),
636                            branch: current_options.get("branch").cloned(),
637                            tag: current_options.get("tag").cloned(),
638                            pinned: false,
639                            requirements: Vec::new(),
640                        };
641                        gems.insert(name, gem_info);
642                    }
643                }
644            }
645            ParseState::Platforms => {
646                // Parse platform entries (2 spaces indent)
647                let platform = trimmed.trim();
648                if !platform.is_empty() {
649                    platforms.push(platform.to_string());
650                }
651            }
652            ParseState::BundledWith => {
653                // Parse bundler version
654                if let Some(caps) = version_regex.captures(line) {
655                    bundler_version = caps.get(1).map(|m| m.as_str().to_string());
656                }
657            }
658            ParseState::Dependencies => {
659                // Parse direct dependencies (2 spaces indent)
660                if let Some(caps) = deps_regex.captures(line) {
661                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
662                    let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
663                    let pinned = caps.get(3).is_some();
664
665                    if !name.is_empty() {
666                        // Update gem info if exists, or create new
667                        if let Some(gem) = gems.get_mut(&name) {
668                            gem.pinned = pinned;
669                            if let Some(vc) = &version_constraint {
670                                gem.requirements.push(vc.clone());
671                            }
672                        } else {
673                            let gem_info = GemInfo {
674                                name: name.clone(),
675                                version: None,
676                                platform: None,
677                                gem_type: "GEM".to_string(),
678                                remote: None,
679                                revision: None,
680                                ref_field: None,
681                                branch: None,
682                                tag: None,
683                                pinned,
684                                requirements: version_constraint.into_iter().collect(),
685                            };
686                            gems.insert(name, gem_info);
687                        }
688                    }
689                }
690            }
691            ParseState::None => {}
692        }
693    }
694
695    let primary_gem = select_primary_path_gem(&gems);
696
697    let (
698        package_name,
699        package_version,
700        repository_homepage_url,
701        repository_download_url,
702        api_data_url,
703        download_url,
704    ) = if let Some(ref pg) = primary_gem {
705        let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
706        (
707            Some(pg.name.clone()),
708            pg.version.clone(),
709            urls.0,
710            urls.1,
711            urls.2,
712            urls.3,
713        )
714    } else {
715        (None, None, None, None, None, None)
716    };
717
718    for (_, gem) in gems {
719        if let Some(ref pg) = primary_gem
720            && gem.name == pg.name
721        {
722            continue;
723        }
724
725        let version_for_purl = gem.version.as_deref();
726        let purl = create_gem_purl(&gem.name, version_for_purl);
727
728        let extracted_requirement = if !gem.requirements.is_empty() {
729            Some(gem.requirements.join(", "))
730        } else {
731            gem.version.clone()
732        };
733
734        let extra_data = build_gem_source_extra_data(&gem);
735
736        dependencies.push(Dependency {
737            purl,
738            extracted_requirement,
739            scope: Some("dependencies".to_string()),
740            is_runtime: Some(true),
741            is_optional: Some(false),
742            is_pinned: Some(gem.pinned),
743            is_direct: Some(true),
744            resolved_package: None,
745            extra_data,
746        });
747    }
748
749    dependencies.sort_by(|left, right| {
750        left.purl
751            .as_deref()
752            .cmp(&right.purl.as_deref())
753            .then_with(|| {
754                left.extracted_requirement
755                    .as_deref()
756                    .cmp(&right.extracted_requirement.as_deref())
757            })
758    });
759
760    // Build extra_data
761    let mut extra_data = HashMap::new();
762    if !platforms.is_empty() {
763        extra_data.insert(
764            "platforms".to_string(),
765            serde_json::Value::Array(
766                platforms
767                    .into_iter()
768                    .map(serde_json::Value::String)
769                    .collect(),
770            ),
771        );
772    }
773    if let Some(bv) = bundler_version {
774        extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
775    }
776
777    let purl = package_name
778        .as_deref()
779        .map(|n| create_gem_purl(n, package_version.as_deref()))
780        .unwrap_or(None);
781
782    PackageData {
783        package_type: Some(PACKAGE_TYPE),
784        name: package_name,
785        version: package_version,
786        primary_language: Some("Ruby".to_string()),
787        download_url,
788        dependencies,
789        repository_homepage_url,
790        repository_download_url,
791        api_data_url,
792        extra_data: if extra_data.is_empty() {
793            None
794        } else {
795            Some(extra_data)
796        },
797        datasource_id: Some(DatasourceId::GemfileLock),
798        purl,
799        ..default_package_data()
800    }
801}
802
803fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
804    if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
805        return None;
806    }
807
808    let mut extra = HashMap::new();
809    extra.insert(
810        "source_type".to_string(),
811        serde_json::Value::String(gem.gem_type.clone()),
812    );
813
814    if let Some(ref remote) = gem.remote {
815        extra.insert(
816            "remote".to_string(),
817            serde_json::Value::String(remote.clone()),
818        );
819    }
820    if let Some(ref revision) = gem.revision {
821        extra.insert(
822            "revision".to_string(),
823            serde_json::Value::String(revision.clone()),
824        );
825    }
826    if let Some(ref ref_field) = gem.ref_field {
827        extra.insert(
828            "ref".to_string(),
829            serde_json::Value::String(ref_field.clone()),
830        );
831    }
832    if let Some(ref branch) = gem.branch {
833        extra.insert(
834            "branch".to_string(),
835            serde_json::Value::String(branch.clone()),
836        );
837    }
838    if let Some(ref tag) = gem.tag {
839        extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
840    }
841
842    Some(extra)
843}
844
845/// Parses version and platform from a combined string.
846/// Examples: "2.6.3" -> ("2.6.3", None), "2.6.3-java" -> ("2.6.3", Some("java"))
847fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
848    if s.is_empty() {
849        return (None, None);
850    }
851    if let Some(idx) = s.find('-') {
852        let version = &s[..idx];
853        let platform = &s[idx + 1..];
854        (Some(version.to_string()), Some(platform.to_string()))
855    } else {
856        (Some(s.to_string()), None)
857    }
858}
859
860/// Creates a gem PURL.
861fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
862    let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
863        Ok(p) => p,
864        Err(e) => {
865            warn!("Failed to create PURL for gem '{}': {}", name, e);
866            return None;
867        }
868    };
869
870    if let Some(v) = version
871        && let Err(e) = purl.with_version(v)
872    {
873        warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
874    }
875
876    Some(purl.to_string())
877}
878
879fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
880    if name.is_empty() {
881        return None;
882    }
883
884    if let Some(v) = version {
885        let v = v.trim().trim_matches('/');
886        Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
887    } else {
888        Some(format!("https://rubygems.org/gems/{}", name))
889    }
890}
891
892fn rubygems_download_url(
893    name: &str,
894    version: Option<&str>,
895    platform: Option<&str>,
896) -> Option<String> {
897    if name.is_empty() || version.is_none() {
898        return None;
899    }
900
901    let name = name.trim().trim_matches('/');
902    let version = version?.trim().trim_matches('/');
903
904    let version_plat = if let Some(p) = platform {
905        if p != "ruby" {
906            format!("{}-{}", version, p)
907        } else {
908            version.to_string()
909        }
910    } else {
911        version.to_string()
912    };
913
914    Some(format!(
915        "https://rubygems.org/downloads/{}-{}.gem",
916        name, version_plat
917    ))
918}
919
920fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
921    if name.is_empty() {
922        return None;
923    }
924
925    if let Some(v) = version {
926        Some(format!(
927            "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
928            name, v
929        ))
930    } else {
931        Some(format!(
932            "https://rubygems.org/api/v1/versions/{}.json",
933            name
934        ))
935    }
936}
937
938fn get_rubygems_urls(
939    name: &str,
940    version: Option<&str>,
941    platform: Option<&str>,
942) -> (
943    Option<String>,
944    Option<String>,
945    Option<String>,
946    Option<String>,
947) {
948    let repository_homepage_url = rubygems_homepage_url(name, version);
949    let repository_download_url = rubygems_download_url(name, version, platform);
950    let api_data_url = rubygems_api_url(name, version);
951    let download_url = repository_download_url.clone();
952
953    (
954        repository_homepage_url,
955        repository_download_url,
956        api_data_url,
957        download_url,
958    )
959}
960
961/// Returns a default PackageData with gem-specific settings.
962fn default_package_data() -> PackageData {
963    PackageData {
964        package_type: Some(PACKAGE_TYPE),
965        primary_language: Some("Ruby".to_string()),
966        ..Default::default()
967    }
968}
969
970fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
971    PackageData {
972        datasource_id: Some(datasource_id),
973        ..default_package_data()
974    }
975}
976
977// =============================================================================
978// Gemspec Parser (Ruby DSL)
979// =============================================================================
980
981/// Ruby .gemspec file parser.
982///
983/// Parses `Gem::Specification.new` blocks using regex-based extraction.
984/// Handles frozen strings (Bug #1), variable version resolution (Bug #2),
985/// and RFC 5322 email parsing (Bug #6).
986pub struct GemspecParser;
987
988impl PackageParser for GemspecParser {
989    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
990
991    fn extract_packages(path: &Path) -> Vec<PackageData> {
992        let datasource_id = gemspec_datasource_id(path);
993        let content = match read_file_to_string(path, None) {
994            Ok(c) => c,
995            Err(e) => {
996                warn!("Failed to read .gemspec at {:?}: {}", path, e);
997                return vec![default_package_data_with_datasource(datasource_id)];
998            }
999        };
1000
1001        let mut package_data = parse_gemspec_with_context(&content, path.parent());
1002        package_data.datasource_id = Some(datasource_id);
1003        vec![package_data]
1004    }
1005
1006    fn is_match(path: &Path) -> bool {
1007        path.extension()
1008            .and_then(|ext| ext.to_str())
1009            .is_some_and(|ext| ext == "gemspec")
1010    }
1011}
1012
1013fn normalized_ruby_path(path: &Path) -> String {
1014    path.to_string_lossy().replace('\\', "/")
1015}
1016
1017fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1018    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1019        DatasourceId::GemfileExtracted
1020    } else {
1021        DatasourceId::Gemfile
1022    }
1023}
1024
1025fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1026    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1027        DatasourceId::GemfileLockExtracted
1028    } else {
1029        DatasourceId::GemfileLock
1030    }
1031}
1032
1033fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1034    let normalized = normalized_ruby_path(path);
1035    if normalized.contains("/data.gz-extract/") {
1036        DatasourceId::GemspecExtracted
1037    } else if normalized.contains("/specifications/") {
1038        DatasourceId::GemGemspecInstalledSpecifications
1039    } else {
1040        DatasourceId::Gemspec
1041    }
1042}
1043
1044/// Cleans a value extracted from gemspec by stripping quotes, .freeze, %q{}, and brackets.
1045fn clean_gemspec_value(s: &str) -> String {
1046    let s = strip_freeze_suffix(s).trim();
1047
1048    let s = if let Some(pos) = s.find(" #") {
1049        s[..pos].trim()
1050    } else {
1051        s
1052    };
1053
1054    let s = if let Some(stripped) = s.strip_prefix("%q{") {
1055        stripped.strip_suffix('}').unwrap_or(stripped)
1056    } else if let Some(stripped) = s.strip_prefix("%q<") {
1057        stripped.strip_suffix('>').unwrap_or(stripped)
1058    } else if let Some(stripped) = s.strip_prefix("%q[") {
1059        stripped.strip_suffix(']').unwrap_or(stripped)
1060    } else if let Some(stripped) = s.strip_prefix("%q(") {
1061        stripped.strip_suffix(')').unwrap_or(stripped)
1062    } else {
1063        s
1064    };
1065
1066    let s = s
1067        .trim_start_matches('"')
1068        .trim_end_matches('"')
1069        .trim_start_matches('\'')
1070        .trim_end_matches('\'');
1071    let s = strip_freeze_suffix(s).trim();
1072    s.to_string()
1073}
1074
1075/// Extracts items from a Ruby array literal like `["a", "b", "c"]`.
1076fn extract_ruby_array(s: &str) -> Vec<String> {
1077    let s = strip_freeze_suffix(s.trim());
1078    let s = s.trim_start_matches('[').trim_end_matches(']');
1079    let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1080        Ok(r) => r,
1081        Err(_) => return Vec::new(),
1082    };
1083    item_re
1084        .captures_iter(s)
1085        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1086        .collect()
1087}
1088
1089fn extract_all_ruby_values(s: &str) -> Vec<String> {
1090    let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1091        Ok(r) => r,
1092        Err(_) => return Vec::new(),
1093    };
1094
1095    value_re
1096        .captures_iter(s)
1097        .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1098        .map(|m| clean_gemspec_value(m.as_str()))
1099        .collect()
1100}
1101
1102fn extract_first_ruby_value(s: &str) -> Option<String> {
1103    extract_all_ruby_values(s).into_iter().next()
1104}
1105
1106fn after_first_argument(args: &str) -> &str {
1107    let mut bracket_depth = 0usize;
1108    let mut paren_depth = 0usize;
1109    let mut in_quote: Option<char> = None;
1110    let chars: Vec<(usize, char)> = args.char_indices().collect();
1111    let mut i = 0;
1112
1113    while i < chars.len() {
1114        let (idx, ch) = chars[i];
1115
1116        if let Some(quote) = in_quote {
1117            if ch == '\\' {
1118                i += 2;
1119                continue;
1120            }
1121            if ch == quote {
1122                in_quote = None;
1123            }
1124            i += 1;
1125            continue;
1126        }
1127
1128        match ch {
1129            '\'' | '"' => in_quote = Some(ch),
1130            '[' | '{' | '<' => bracket_depth += 1,
1131            ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1132            '(' => paren_depth += 1,
1133            ')' => paren_depth = paren_depth.saturating_sub(1),
1134            ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1135            _ => {}
1136        }
1137
1138        i += 1;
1139    }
1140
1141    ""
1142}
1143
1144/// Bug #2: Resolves variable version references like `CSV::VERSION` or `RAILS_VERSION`.
1145///
1146/// Scans the file content for constant definitions matching the variable name
1147/// and returns the resolved string value.
1148fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1149    let var_name = var_name.trim();
1150    if var_name.is_empty() {
1151        return None;
1152    }
1153
1154    for candidate in candidate_constant_names(var_name) {
1155        let escaped = regex::escape(&candidate);
1156        let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1157        let Ok(re) = Regex::new(&pattern) else {
1158            continue;
1159        };
1160
1161        for context in contexts {
1162            if let Some(caps) = re.captures(context)
1163                && let Some(expression) = caps.get(1)
1164                && let Some(resolved) =
1165                    resolve_scalar_expression(expression.as_str(), None, contexts)
1166            {
1167                return Some(resolved);
1168            }
1169        }
1170    }
1171
1172    None
1173}
1174
1175fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1176    let var_name = var_name.trim();
1177    if var_name.is_empty() {
1178        return None;
1179    }
1180
1181    for candidate in candidate_constant_names(var_name) {
1182        let escaped = regex::escape(&candidate);
1183        let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1184        let Ok(re) = Regex::new(&pattern) else {
1185            continue;
1186        };
1187
1188        for context in contexts {
1189            if let Some(caps) = re.captures(context)
1190                && let Some(raw) = caps.get(1)
1191            {
1192                let values = extract_ruby_array(raw.as_str());
1193                if !values.is_empty() {
1194                    return Some(values);
1195                }
1196            }
1197        }
1198    }
1199
1200    None
1201}
1202
1203fn candidate_constant_names(var_name: &str) -> Vec<String> {
1204    let mut names = vec![var_name.to_string()];
1205    if let Some(last) = var_name.split("::").last()
1206        && last != var_name
1207    {
1208        names.push(last.to_string());
1209    }
1210    names
1211}
1212
1213fn looks_like_local_variable_reference(s: &str) -> bool {
1214    let mut chars = s.chars();
1215    matches!(chars.next(), Some('_' | 'a'..='z'))
1216        && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1217}
1218
1219fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1220    let base_dir = base_dir?;
1221    let current_dir = std::env::current_dir().ok();
1222
1223    current_dir
1224        .and_then(|cwd| {
1225            let canonical_cwd = cwd.canonicalize().ok()?;
1226            let canonical_base = base_dir.canonicalize().ok()?;
1227            canonical_base
1228                .starts_with(&canonical_cwd)
1229                .then_some(canonical_cwd)
1230        })
1231        .or_else(|| base_dir.canonicalize().ok())
1232}
1233
1234fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1235    let canonical_path = path.canonicalize().ok()?;
1236    canonical_path
1237        .starts_with(allowed_root)
1238        .then_some(canonical_path)
1239}
1240
1241fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1242    let base_dir = base_dir?;
1243    let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1244    let relative_path = extract_first_ruby_value(args)?;
1245    if relative_path.is_empty() {
1246        return None;
1247    }
1248
1249    let candidate = Path::new(&relative_path);
1250    let path = if candidate.is_absolute() {
1251        candidate.to_path_buf()
1252    } else {
1253        base_dir.join(candidate)
1254    };
1255
1256    let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1257
1258    fs::read_to_string(safe_path)
1259        .ok()
1260        .map(|content| content.trim().to_string())
1261        .filter(|content| !content.is_empty())
1262}
1263
1264fn resolve_scalar_expression(
1265    expression: &str,
1266    base_dir: Option<&Path>,
1267    contexts: &[String],
1268) -> Option<String> {
1269    let expression = if let Some(pos) = expression.find(" #") {
1270        expression[..pos].trim()
1271    } else {
1272        expression.trim()
1273    };
1274
1275    let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1276    if let Some(caps) = file_read_re.captures(expression) {
1277        return caps
1278            .get(1)
1279            .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1280    }
1281
1282    if let Some(joined) = resolve_joined_constant_string(expression, contexts) {
1283        return Some(joined);
1284    }
1285
1286    if let Some(value) = extract_first_ruby_value(expression) {
1287        return Some(interpolate_ruby_constant_string(&value, contexts));
1288    }
1289
1290    let cleaned = clean_gemspec_value(expression);
1291    if looks_like_constant_reference(&cleaned) {
1292        return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1293    }
1294
1295    None
1296}
1297
1298fn resolve_joined_constant_string(expression: &str, contexts: &[String]) -> Option<String> {
1299    let expression = strip_freeze_suffix(expression.trim());
1300    if !expression.starts_with('[') {
1301        return None;
1302    }
1303    let join_index = expression.find("].join(")?;
1304    let body = &expression[1..join_index];
1305    let separator_expr = expression[join_index + 7..].strip_suffix(')')?.trim();
1306    let separator = extract_first_ruby_value(separator_expr)?;
1307
1308    let mut parts = Vec::new();
1309    for item in body.split(',').take(MAX_ITERATION_COUNT) {
1310        let resolved = resolve_scalar_expression(item.trim(), None, contexts)?;
1311        parts.push(resolved);
1312    }
1313
1314    Some(parts.join(&separator))
1315}
1316
1317fn interpolate_ruby_constant_string(value: &str, contexts: &[String]) -> String {
1318    if !value.contains("#{") {
1319        return value.to_string();
1320    }
1321
1322    let Ok(interpolation_re) = Regex::new(r#"#\{([^}]+)\}"#) else {
1323        return value.to_string();
1324    };
1325    interpolation_re
1326        .replace_all(value, |captures: &regex::Captures<'_>| {
1327            let reference = captures
1328                .get(1)
1329                .map(|m| m.as_str().trim())
1330                .unwrap_or_default();
1331            resolve_variable_version(reference, contexts).unwrap_or_else(|| {
1332                captures
1333                    .get(0)
1334                    .map(|value| value.as_str().to_string())
1335                    .unwrap_or_default()
1336            })
1337        })
1338        .into_owned()
1339}
1340
1341fn resolve_local_variable_value(
1342    var_name: &str,
1343    content: &str,
1344    base_dir: Option<&Path>,
1345    contexts: &[String],
1346) -> Option<String> {
1347    let escaped = regex::escape(var_name.trim());
1348    let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1349    let re = Regex::new(&pattern).ok()?;
1350
1351    re.captures_iter(content).find_map(|caps| {
1352        caps.get(1)
1353            .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1354    })
1355}
1356
1357fn resolve_gemspec_scalar_value(
1358    raw_value: &str,
1359    content: &str,
1360    base_dir: Option<&Path>,
1361    contexts: &[String],
1362) -> Option<String> {
1363    let cleaned = truncate_field(clean_gemspec_value(raw_value));
1364    if cleaned.is_empty() {
1365        return None;
1366    }
1367
1368    if looks_like_constant_reference(&cleaned) {
1369        return resolve_variable_version(&cleaned, contexts)
1370            .map(truncate_field)
1371            .or(Some(cleaned));
1372    }
1373
1374    if looks_like_local_variable_reference(&cleaned) {
1375        return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1376            .map(truncate_field)
1377            .or(Some(cleaned));
1378    }
1379
1380    Some(cleaned)
1381}
1382
1383fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1384    let mut contexts = vec![content.to_string()];
1385    let Some(base_dir) = base_dir else {
1386        return contexts;
1387    };
1388    let allowed_root = resolve_ruby_read_root(Some(base_dir));
1389
1390    let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1391        Ok(re) => re,
1392        Err(_) => return contexts,
1393    };
1394
1395    for caps in require_re.captures_iter(content) {
1396        let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1397            continue;
1398        };
1399        for candidate in candidate_require_paths(base_dir, required) {
1400            let Some(safe_candidate) = allowed_root
1401                .as_deref()
1402                .and_then(|root| resolve_ruby_read_path(candidate, root))
1403            else {
1404                continue;
1405            };
1406            if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1407                contexts.push(required_content);
1408                break;
1409            }
1410        }
1411    }
1412
1413    contexts
1414}
1415
1416fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1417    let relative = required.replace("::", "/");
1418    let filename = if relative.ends_with(".rb") {
1419        relative
1420    } else {
1421        format!("{}.rb", relative)
1422    };
1423
1424    vec![
1425        base_dir.join(&filename),
1426        base_dir.join("lib").join(&filename),
1427    ]
1428}
1429
1430fn looks_like_constant_reference(s: &str) -> bool {
1431    s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1432}
1433
1434/// Parses a .gemspec file content and returns PackageData.
1435#[cfg(test)]
1436fn parse_gemspec(content: &str) -> PackageData {
1437    parse_gemspec_with_context(content, None)
1438}
1439
1440fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1441    let contexts = load_required_ruby_contexts(content, base_dir);
1442
1443    // Regex for spec.name = "value" or s.name = "value"
1444    // The spec variable name varies: spec, s, gem, etc.
1445    let field_re = match Regex::new(
1446        r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1447    ) {
1448        Ok(r) => r,
1449        Err(e) => {
1450            warn!("Failed to compile gemspec field regex: {}", e);
1451            return default_package_data_with_datasource(DatasourceId::Gemspec);
1452        }
1453    };
1454
1455    let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1456        Ok(r) => r,
1457        Err(e) => {
1458            warn!("Failed to compile licenses regex: {}", e);
1459            return default_package_data_with_datasource(DatasourceId::Gemspec);
1460        }
1461    };
1462
1463    let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1464        Ok(r) => r,
1465        Err(e) => {
1466            warn!("Failed to compile authors regex: {}", e);
1467            return default_package_data_with_datasource(DatasourceId::Gemspec);
1468        }
1469    };
1470
1471    let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1472        Ok(r) => r,
1473        Err(e) => {
1474            warn!("Failed to compile email regex: {}", e);
1475            return default_package_data_with_datasource(DatasourceId::Gemspec);
1476        }
1477    };
1478
1479    let dependency_call_re = match Regex::new(
1480        r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1481    ) {
1482        Ok(r) => r,
1483        Err(e) => {
1484            warn!("Failed to compile gemspec dependency regex: {}", e);
1485            return default_package_data_with_datasource(DatasourceId::Gemspec);
1486        }
1487    };
1488
1489    let mut name: Option<String> = None;
1490    let mut version: Option<String> = None;
1491    let mut summary: Option<String> = None;
1492    let mut description: Option<String> = None;
1493    let mut homepage: Option<String> = None;
1494    let mut license: Option<String> = None;
1495    let mut licenses: Vec<String> = Vec::new();
1496    let mut authors: Vec<String> = Vec::new();
1497    let mut emails: Vec<String> = Vec::new();
1498    let mut dependencies: Vec<Dependency> = Vec::new();
1499
1500    // Extract basic fields
1501    for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1502        let field_name = match caps.get(1) {
1503            Some(m) => m.as_str(),
1504            None => continue,
1505        };
1506        let raw_value = match caps.get(2) {
1507            Some(m) => m.as_str().trim(),
1508            None => continue,
1509        };
1510
1511        match field_name {
1512            "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1513            "version" => {
1514                version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1515            }
1516            "summary" => {
1517                summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1518            }
1519            "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1520            "homepage" => {
1521                homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1522            }
1523            "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1524            _ => {}
1525        }
1526    }
1527
1528    // Extract licenses (plural)
1529    for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1530        if let Some(raw) = caps.get(1) {
1531            licenses = extract_ruby_array(raw.as_str());
1532        }
1533    }
1534
1535    // Extract authors
1536    for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1537        if let Some(raw) = caps.get(1) {
1538            let raw_str = raw.as_str().trim();
1539            if raw_str.starts_with('[') {
1540                authors = extract_ruby_array(raw_str);
1541            } else if looks_like_constant_reference(raw_str) {
1542                authors = resolve_variable_array(raw_str, &contexts)
1543                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1544            } else {
1545                authors.push(clean_gemspec_value(raw_str));
1546            }
1547        }
1548    }
1549
1550    // Extract emails
1551    for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1552        if let Some(raw) = caps.get(1) {
1553            let raw_str = raw.as_str().trim();
1554            if raw_str.starts_with('[') {
1555                emails = extract_ruby_array(raw_str);
1556            } else if looks_like_constant_reference(raw_str) {
1557                emails = resolve_variable_array(raw_str, &contexts)
1558                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1559            } else {
1560                emails.push(clean_gemspec_value(raw_str));
1561            }
1562        }
1563    }
1564
1565    // Build parties from authors and emails
1566    let mut parties: Vec<Party> = Vec::new();
1567
1568    if authors.len() == 1 && emails.len() == 1 {
1569        let email_str = emails.first().map(String::as_str);
1570        let (parsed_email_name, parsed_email) = match email_str {
1571            Some(e) => split_name_email(e),
1572            None => (None, None),
1573        };
1574
1575        parties.push(Party {
1576            r#type: Some("person".to_string()),
1577            role: Some("author".to_string()),
1578            name: authors.first().cloned().or(parsed_email_name),
1579            email: parsed_email.or_else(|| {
1580                email_str
1581                    .filter(|e| e.contains('@') && !e.contains('<'))
1582                    .map(|e| e.to_string())
1583            }),
1584            url: None,
1585            organization: None,
1586            organization_url: None,
1587            timezone: None,
1588        });
1589    } else {
1590        for author_name in authors {
1591            parties.push(Party {
1592                r#type: Some("person".to_string()),
1593                role: Some("author".to_string()),
1594                name: Some(author_name),
1595                email: None,
1596                url: None,
1597                organization: None,
1598                organization_url: None,
1599                timezone: None,
1600            });
1601        }
1602
1603        for email_str in emails {
1604            let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1605                split_name_email(&email_str)
1606            } else {
1607                (None, None)
1608            };
1609            parties.push(Party {
1610                r#type: Some("person".to_string()),
1611                role: Some("author".to_string()),
1612                name: parsed_email_name,
1613                email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1614                url: None,
1615                organization: None,
1616                organization_url: None,
1617                timezone: None,
1618            });
1619        }
1620    }
1621
1622    for caps in dependency_call_re
1623        .captures_iter(content)
1624        .take(MAX_ITERATION_COUNT)
1625    {
1626        let method = match caps.get(1) {
1627            Some(m) => m.as_str(),
1628            None => continue,
1629        };
1630        let args = match caps.get(2) {
1631            Some(m) => m.as_str(),
1632            None => continue,
1633        };
1634
1635        let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1636            continue;
1637        };
1638        let version_parts = extract_all_ruby_values(after_first_argument(args));
1639        let extracted_requirement = if version_parts.is_empty() {
1640            None
1641        } else {
1642            Some(version_parts.join(", "))
1643        };
1644        let purl = create_gem_purl(&dep_name, None);
1645        let is_development = method == "add_development_dependency";
1646        let scope = if is_development {
1647            "development"
1648        } else {
1649            "runtime"
1650        };
1651
1652        dependencies.push(Dependency {
1653            purl,
1654            extracted_requirement,
1655            scope: Some(scope.to_string()),
1656            is_runtime: Some(!is_development),
1657            is_optional: Some(is_development),
1658            is_pinned: None,
1659            is_direct: Some(true),
1660            resolved_package: None,
1661            extra_data: None,
1662        });
1663    }
1664
1665    // Extract license statement only - detection happens in separate engine
1666    let extracted_license_statement = if !licenses.is_empty() {
1667        Some(licenses.join(" AND "))
1668    } else {
1669        license
1670    };
1671
1672    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1673        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1674
1675    // Prefer description over summary
1676    let final_description = description.or(summary);
1677
1678    // Build PURL
1679    let purl = name
1680        .as_deref()
1681        .map(|n| create_gem_purl(n, version.as_deref()))
1682        .unwrap_or(None);
1683
1684    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1685        if let Some(n) = name.as_deref() {
1686            get_rubygems_urls(n, version.as_deref(), None)
1687        } else {
1688            (None, None, None, None)
1689        };
1690
1691    PackageData {
1692        package_type: Some(PACKAGE_TYPE),
1693        name,
1694        version,
1695        primary_language: Some("Ruby".to_string()),
1696        description: final_description,
1697        homepage_url: homepage,
1698        download_url,
1699        declared_license_expression,
1700        declared_license_expression_spdx,
1701        license_detections,
1702        extracted_license_statement,
1703        parties,
1704        dependencies,
1705        repository_homepage_url,
1706        repository_download_url,
1707        api_data_url,
1708        datasource_id: Some(DatasourceId::Gemspec),
1709        purl,
1710        ..default_package_data()
1711    }
1712}
1713
1714// =============================================================================
1715// .gem Archive Parser (Wave 3)
1716// =============================================================================
1717
1718const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; // 100MB
1719const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; // 50MB per file
1720const MAX_COMPRESSION_RATIO: f64 = 100.0; // 100:1 ratio
1721
1722/// Parser for .gem archive files.
1723///
1724/// Extracts metadata from Ruby .gem packages, which are tar archives
1725/// containing a gzip-compressed YAML metadata file (`metadata.gz`).
1726///
1727/// Includes safety checks against zip bombs and oversized archives.
1728pub struct GemArchiveParser;
1729
1730impl PackageParser for GemArchiveParser {
1731    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1732
1733    fn extract_packages(path: &Path) -> Vec<PackageData> {
1734        vec![match extract_gem_archive(path) {
1735            Ok(data) => data,
1736            Err(e) => {
1737                warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1738                default_package_data_with_datasource(DatasourceId::GemArchive)
1739            }
1740        }]
1741    }
1742
1743    fn is_match(path: &Path) -> bool {
1744        path.extension()
1745            .and_then(|ext| ext.to_str())
1746            .is_some_and(|ext| ext == "gem")
1747    }
1748}
1749
1750fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1751    let file_metadata =
1752        fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1753    let archive_size = file_metadata.len();
1754
1755    if archive_size > MAX_ARCHIVE_SIZE {
1756        return Err(format!(
1757            "Archive too large: {} bytes (limit: {} bytes)",
1758            archive_size, MAX_ARCHIVE_SIZE
1759        ));
1760    }
1761
1762    let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1763    let mut archive = Archive::new(file);
1764
1765    let mut entry_count: usize = 0;
1766    for entry_result in archive
1767        .entries()
1768        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1769    {
1770        entry_count += 1;
1771        if entry_count > MAX_ITERATION_COUNT {
1772            warn!(
1773                "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1774                MAX_ITERATION_COUNT
1775            );
1776            break;
1777        }
1778
1779        let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1780        let entry_path = entry
1781            .path()
1782            .map_err(|e| format!("Failed to get entry path: {}", e))?;
1783        let entry_str = entry_path.to_string_lossy();
1784        if entry_str.contains("..") {
1785            warn!("Skipping tar entry with path traversal: {}", entry_str);
1786            continue;
1787        }
1788
1789        if entry_path.to_str() == Some("metadata.gz") {
1790            let entry_size = entry.size();
1791            if entry_size > MAX_FILE_SIZE {
1792                return Err(format!(
1793                    "metadata.gz too large: {} bytes (limit: {} bytes)",
1794                    entry_size, MAX_FILE_SIZE
1795                ));
1796            }
1797
1798            let mut decoder = GzDecoder::new(entry);
1799            let mut content = Vec::new();
1800            let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1801            limited
1802                .read_to_end(&mut content)
1803                .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1804
1805            if content.len() > MAX_FILE_SIZE as usize {
1806                return Err(format!(
1807                    "Decompressed metadata too large: exceeds {} byte limit",
1808                    MAX_FILE_SIZE
1809                ));
1810            }
1811
1812            let content = match String::from_utf8(content) {
1813                Ok(s) => s,
1814                Err(err) => {
1815                    let bytes = err.into_bytes();
1816                    warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1817                    String::from_utf8_lossy(&bytes).into_owned()
1818                }
1819            };
1820
1821            let uncompressed_size = content.len() as u64;
1822            if entry_size > 0 {
1823                let ratio = uncompressed_size as f64 / entry_size as f64;
1824                if ratio > MAX_COMPRESSION_RATIO {
1825                    return Err(format!(
1826                        "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1827                        ratio, MAX_COMPRESSION_RATIO
1828                    ));
1829                }
1830            }
1831
1832            return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1833        }
1834    }
1835
1836    Err("metadata.gz not found in .gem archive".to_string())
1837}
1838
1839fn parse_gem_metadata_yaml(
1840    content: &str,
1841    datasource_id: DatasourceId,
1842) -> Result<PackageData, String> {
1843    // Ruby YAML tagged types need to be handled:
1844    // --- !ruby/object:Gem::Specification
1845    // We strip Ruby-specific YAML tags since yaml_serde can't handle them
1846    let cleaned = clean_ruby_yaml_tags(content);
1847
1848    let yaml: yaml_serde::Value =
1849        yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1850
1851    let name = yaml_string(&yaml, "name").map(truncate_field);
1852    let version = yaml.get("version").and_then(|v| {
1853        if v.is_string() {
1854            v.as_str().map(|s| truncate_field(s.to_string()))
1855        } else {
1856            yaml_string(v, "version").map(truncate_field)
1857        }
1858    });
1859    let description = yaml_string(&yaml, "description")
1860        .or_else(|| yaml_string(&yaml, "summary"))
1861        .map(truncate_field);
1862    let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1863    let summary = yaml_string(&yaml, "summary").map(truncate_field);
1864
1865    // Licenses
1866    let licenses: Vec<String> = yaml
1867        .get("licenses")
1868        .and_then(|v| v.as_sequence())
1869        .map(|seq| {
1870            seq.iter()
1871                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1872                .collect()
1873        })
1874        .unwrap_or_default();
1875
1876    // Extract license statement only - detection happens in separate engine
1877    let extracted_license_statement = if !licenses.is_empty() {
1878        Some(licenses.join(" AND "))
1879    } else {
1880        None
1881    };
1882
1883    let (license_expression, license_expression_spdx, license_detections) =
1884        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1885
1886    // Authors
1887    let authors: Vec<String> = yaml
1888        .get("authors")
1889        .and_then(|v| v.as_sequence())
1890        .map(|seq| {
1891            seq.iter()
1892                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1893                .collect()
1894        })
1895        .unwrap_or_default();
1896
1897    let emails: Vec<String> = yaml
1898        .get("email")
1899        .map(|v| {
1900            if let Some(seq) = v.as_sequence() {
1901                seq.iter()
1902                    .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1903                    .collect()
1904            } else if let Some(s) = v.as_str() {
1905                vec![truncate_field(s.to_string())]
1906            } else {
1907                Vec::new()
1908            }
1909        })
1910        .unwrap_or_default();
1911
1912    // Build parties
1913    let mut parties: Vec<Party> = Vec::new();
1914    let max_len = authors.len().max(emails.len());
1915    for i in 0..max_len {
1916        let author_name = authors.get(i).map(|s| s.as_str());
1917        let email_str = emails.get(i).map(|s| s.as_str());
1918
1919        let (parsed_email_name, parsed_email) = match email_str {
1920            Some(e) if e.contains('<') => split_name_email(e),
1921            None => (None, None),
1922            _ => (None, None),
1923        };
1924
1925        let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1926
1927        parties.push(Party {
1928            r#type: Some("person".to_string()),
1929            role: Some("author".to_string()),
1930            name: party_name,
1931            email: parsed_email.or_else(|| {
1932                email_str
1933                    .filter(|e| e.contains('@') && !e.contains('<'))
1934                    .map(|e| e.to_string())
1935            }),
1936            url: None,
1937            organization: None,
1938            organization_url: None,
1939            timezone: None,
1940        });
1941    }
1942
1943    // Dependencies
1944    let dependencies = parse_gem_yaml_dependencies(&yaml);
1945
1946    let metadata = yaml.get("metadata");
1947
1948    let bug_tracking_url = metadata
1949        .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1950        .map(truncate_field);
1951
1952    let code_view_url = metadata
1953        .and_then(|m| yaml_string(m, "source_code_uri"))
1954        .map(truncate_field);
1955
1956    let vcs_url = code_view_url.clone().or_else(|| {
1957        metadata
1958            .and_then(|m| yaml_string(m, "homepage_uri"))
1959            .map(truncate_field)
1960    });
1961
1962    let file_references = metadata
1963        .and_then(|m| m.get("files"))
1964        .and_then(|f| f.as_sequence())
1965        .map(|seq| {
1966            seq.iter()
1967                .filter_map(|v| v.as_str())
1968                .map(|s| crate::models::FileReference {
1969                    path: s.to_string(),
1970                    size: None,
1971                    sha1: None,
1972                    md5: None,
1973                    sha256: None,
1974                    sha512: None,
1975                    extra_data: None,
1976                })
1977                .collect::<Vec<_>>()
1978        })
1979        .unwrap_or_default();
1980
1981    let release_date = yaml_string(&yaml, "date").and_then(|d| {
1982        if d.len() >= 10 {
1983            Some(d[..10].to_string())
1984        } else {
1985            None
1986        }
1987    });
1988
1989    let purl = name
1990        .as_deref()
1991        .map(|n| create_gem_purl(n, version.as_deref()))
1992        .unwrap_or(None);
1993
1994    let platform = yaml_string(&yaml, "platform").map(truncate_field);
1995    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1996        if let Some(n) = name.as_deref() {
1997            get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1998        } else {
1999            (None, None, None, None)
2000        };
2001
2002    let qualifiers = if let Some(ref p) = platform {
2003        if p != "ruby" {
2004            let mut q = HashMap::new();
2005            q.insert("platform".to_string(), p.clone());
2006            Some(q)
2007        } else {
2008            None
2009        }
2010    } else {
2011        None
2012    };
2013
2014    Ok(PackageData {
2015        package_type: Some(PACKAGE_TYPE),
2016        name,
2017        version,
2018        qualifiers,
2019        primary_language: Some("Ruby".to_string()),
2020        description: description.or(summary),
2021        release_date,
2022        homepage_url: homepage,
2023        download_url,
2024        bug_tracking_url,
2025        code_view_url,
2026        declared_license_expression: license_expression,
2027        declared_license_expression_spdx: license_expression_spdx,
2028        license_detections,
2029        extracted_license_statement,
2030        file_references,
2031        parties,
2032        dependencies,
2033        repository_homepage_url,
2034        repository_download_url,
2035        api_data_url,
2036        datasource_id: Some(datasource_id),
2037        purl,
2038        vcs_url,
2039        ..default_package_data()
2040    })
2041}
2042
2043/// Strips Ruby-specific YAML tags that yaml_serde cannot handle.
2044fn clean_ruby_yaml_tags(content: &str) -> String {
2045    let tag_re = match Regex::new(r"!ruby/\S+") {
2046        Ok(r) => r,
2047        Err(_) => return content.to_string(),
2048    };
2049    tag_re.replace_all(content, "").to_string()
2050}
2051
2052fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2053    yaml.get(key)
2054        .and_then(|v| v.as_str())
2055        .filter(|s| !s.is_empty())
2056        .map(|s| s.to_string())
2057}
2058
2059fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2060    let mut dependencies = Vec::new();
2061
2062    let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2063        Some(seq) => seq,
2064        None => return dependencies,
2065    };
2066
2067    for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2068        let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2069            Some(n) => n,
2070            None => continue,
2071        };
2072
2073        let dep_type = yaml_string(dep_value, "type");
2074        let is_development = dep_type.as_deref() == Some(":development");
2075
2076        // Extract version requirements from the nested structure
2077        let requirements = dep_value
2078            .get("requirement")
2079            .or_else(|| dep_value.get("version_requirements"))
2080            .and_then(|req| req.get("requirements"))
2081            .and_then(|reqs| reqs.as_sequence());
2082
2083        let extracted_requirement = requirements.map(|reqs| {
2084            let parts: Vec<String> = reqs
2085                .iter()
2086                .filter_map(|req| {
2087                    let seq = req.as_sequence()?;
2088                    if seq.len() >= 2 {
2089                        let op = seq[0].as_str().unwrap_or("");
2090                        let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2091                        if op == ">=" && ver == "0" {
2092                            // ">= 0" means "any version" - skip
2093                            None
2094                        } else if op.is_empty() || ver.is_empty() {
2095                            None
2096                        } else {
2097                            Some(format!("{} {}", op, ver))
2098                        }
2099                    } else {
2100                        None
2101                    }
2102                })
2103                .collect();
2104            parts.join(", ")
2105        });
2106
2107        let extracted_requirement = extracted_requirement
2108            .filter(|s| !s.is_empty())
2109            .or_else(|| Some(String::new()));
2110
2111        let (scope, is_runtime, is_optional) = if is_development {
2112            (Some("development".to_string()), false, true)
2113        } else {
2114            (Some("runtime".to_string()), true, false)
2115        };
2116
2117        let purl = create_gem_purl(&dep_name, None);
2118
2119        dependencies.push(Dependency {
2120            purl,
2121            extracted_requirement,
2122            scope,
2123            is_runtime: Some(is_runtime),
2124            is_optional: Some(is_optional),
2125            is_pinned: None,
2126            is_direct: Some(true),
2127            resolved_package: None,
2128            extra_data: None,
2129        });
2130    }
2131
2132    dependencies
2133}
2134
2135// =============================================================================
2136// Gem Metadata Extracted Parser (metadata.gz-extract files)
2137// =============================================================================
2138
2139pub struct GemMetadataExtractedParser;
2140
2141impl PackageParser for GemMetadataExtractedParser {
2142    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2143
2144    fn extract_packages(path: &Path) -> Vec<PackageData> {
2145        vec![match extract_gem_metadata_extracted(path) {
2146            Ok(data) => data,
2147            Err(e) => {
2148                warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2149                default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2150            }
2151        }]
2152    }
2153
2154    fn is_match(path: &Path) -> bool {
2155        path.to_str()
2156            .is_some_and(|p| p.contains("metadata.gz-extract"))
2157    }
2158}
2159
2160fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2161    let content = read_file_to_string(path, None)
2162        .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2163
2164    parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2165}
2166
2167// Register parser with metadata
2168crate::register_parser!(
2169    "Ruby Gemfile manifest",
2170    &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2171    "gem",
2172    "Ruby",
2173    Some("https://bundler.io/man/gemfile.5.html"),
2174);
2175
2176crate::register_parser!(
2177    "Ruby Gemfile.lock lockfile",
2178    &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2179    "gem",
2180    "Ruby",
2181    Some("https://bundler.io/man/gemfile.5.html"),
2182);
2183
2184crate::register_parser!(
2185    "Ruby .gemspec manifest",
2186    &[
2187        "**/*.gemspec",
2188        "**/data.gz-extract/*.gemspec",
2189        "**/specifications/*.gemspec"
2190    ],
2191    "gem",
2192    "Ruby",
2193    Some("https://guides.rubygems.org/specification-reference/"),
2194);
2195
2196crate::register_parser!(
2197    "Ruby .gem archive",
2198    &["**/*.gem"],
2199    "gem",
2200    "Ruby",
2201    Some("https://guides.rubygems.org/specification-reference/"),
2202);
2203
2204crate::register_parser!(
2205    "Ruby gem metadata (extracted)",
2206    &["**/metadata.gz-extract"],
2207    "gem",
2208    "Ruby",
2209    Some("https://guides.rubygems.org/specification-reference/"),
2210);
2211
2212#[cfg(test)]
2213mod tests {
2214    use super::parse_gemspec;
2215
2216    #[test]
2217    fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2218        assert_eq!(
2219            super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2220            "Arel is a SQL AST manager for Ruby. It"
2221        );
2222    }
2223
2224    #[test]
2225    fn test_parse_gemspec_runtime_dependency_scope() {
2226        let content = r#"
2227Gem::Specification.new do |spec|
2228  spec.name = "demo"
2229  spec.version = "1.0.0"
2230  spec.add_runtime_dependency "rack", "~> 3.0"
2231  spec.add_dependency "thor", ">= 1.0"
2232end
2233"#;
2234
2235        let package_data = parse_gemspec(content);
2236        assert_eq!(package_data.dependencies.len(), 2);
2237        assert_eq!(
2238            package_data.dependencies[0].scope,
2239            Some("runtime".to_string())
2240        );
2241        assert_eq!(
2242            package_data.dependencies[0].extracted_requirement,
2243            Some("~> 3.0".to_string())
2244        );
2245        assert_eq!(
2246            package_data.dependencies[1].scope,
2247            Some("runtime".to_string())
2248        );
2249        assert_eq!(
2250            package_data.dependencies[1].extracted_requirement,
2251            Some(">= 1.0".to_string())
2252        );
2253    }
2254}