Skip to main content

provenant/parsers/
ruby.rs

1//! Parser for Ruby/RubyGems package manifests.
2//!
3//! Extracts package metadata, dependencies, and platform information from
4//! Gemfile and Gemfile.lock files used by Ruby/Bundler projects.
5//!
6//! # Supported Formats
7//! - Gemfile (manifest with Ruby DSL)
8//! - Gemfile.lock (lockfile with state machine sections)
9//! - *.gemspec (gem specification files)
10//! - *.gem (gem archive packages)
11//! - metadata.gz-extract (pre-extracted gem metadata)
12//!
13//! # Key Features
14//! - State machine parsing for Gemfile.lock sections (GEM, GIT, PATH, SVN, PLATFORMS, BUNDLED WITH, DEPENDENCIES)
15//! - Regex-based Ruby DSL parsing for Gemfile
16//! - Dependency group handling (:development, :test, etc.)
17//! - Platform-specific gem support
18//! - Pessimistic version operator (~>) support
19//! - Bug Fix #1: Strip .freeze suffix from strings
20//! - Bug Fix #4: Correct dependency scope mapping (:runtime → None, :development → "development")
21//!
22//! # Implementation Notes
23//! - Uses regex for pattern matching (not full Ruby AST)
24//! - Graceful error handling: logs warnings and returns default on parse failure
25//! - PURL type: "gem"
26
27use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
28use crate::parser_warn as warn;
29use crate::parsers::utils::{
30    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
31};
32use flate2::read::GzDecoder;
33use packageurl::PackageUrl;
34use regex::Regex;
35use std::collections::HashMap;
36use std::fs::{self, File};
37use std::io::Read;
38use std::path::{Path, PathBuf};
39use tar::Archive;
40
41use super::PackageParser;
42use super::license_normalization::normalize_spdx_declared_license;
43
44const PACKAGE_TYPE: PackageType = PackageType::Gem;
45
46// =============================================================================
47// Bug Fix #1: Strip .freeze suffix from strings
48// =============================================================================
49
50/// Strips the `.freeze` suffix from Ruby frozen string literals.
51///
52/// In Ruby, `.freeze` makes a string immutable. We need to remove this suffix
53/// when parsing gem names and versions from Gemfile.
54///
55/// # Examples
56/// ```ignore
57/// assert_eq!(strip_freeze_suffix("\"name\".freeze"), "\"name\"");
58/// assert_eq!(strip_freeze_suffix("'1.0.0'.freeze"), "'1.0.0'");
59/// ```
60pub fn strip_freeze_suffix(s: &str) -> &str {
61    s.trim_end_matches(".freeze")
62}
63
64enum GemfileBlock {
65    Group(Vec<String>),
66    Source(String),
67}
68
69// =============================================================================
70// Gemfile Parser (Ruby DSL)
71// =============================================================================
72
73/// Ruby Gemfile parser for manifest files.
74///
75/// Parses Ruby DSL syntax to extract gem declarations, dependency groups,
76/// platform-specific gems, and version constraints.
77pub struct GemfileParser;
78
79impl PackageParser for GemfileParser {
80    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
81
82    fn extract_packages(path: &Path) -> Vec<PackageData> {
83        let datasource_id = gemfile_datasource_id(path);
84        let content = match read_file_to_string(path, None) {
85            Ok(c) => c,
86            Err(e) => {
87                warn!("Failed to read Gemfile at {:?}: {}", path, e);
88                return vec![default_package_data_with_datasource(datasource_id)];
89            }
90        };
91
92        let mut package_data = parse_gemfile(&content);
93        package_data.datasource_id = Some(datasource_id);
94        vec![package_data]
95    }
96
97    fn is_match(path: &Path) -> bool {
98        path.file_name()
99            .and_then(|n| n.to_str())
100            .is_some_and(|name| name == "Gemfile")
101            || path
102                .to_str()
103                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
104    }
105}
106
107/// Parses Gemfile content and extracts dependencies with groups.
108fn parse_gemfile(content: &str) -> PackageData {
109    let mut dependencies = Vec::new();
110    let mut block_stack = Vec::new();
111    let mut default_source = None;
112    let mut sources = Vec::new();
113
114    // Regex patterns for Gemfile parsing
115    // gem "name", "version", options...
116    let gem_regex = match Regex::new(
117        r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
118    ) {
119        Ok(r) => r,
120        Err(e) => {
121            warn!("Failed to compile gem regex: {}", e);
122            return default_package_data_with_datasource(DatasourceId::Gemfile);
123        }
124    };
125
126    // group :name do ... end
127    let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
128        Ok(r) => r,
129        Err(e) => {
130            warn!("Failed to compile group regex: {}", e);
131            return default_package_data_with_datasource(DatasourceId::Gemfile);
132        }
133    };
134
135    let group_end_regex = match Regex::new(r"^\s*end\s*$") {
136        Ok(r) => r,
137        Err(e) => {
138            warn!("Failed to compile end regex: {}", e);
139            return default_package_data_with_datasource(DatasourceId::Gemfile);
140        }
141    };
142
143    let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
144        Ok(r) => r,
145        Err(e) => {
146            warn!("Failed to compile source block regex: {}", e);
147            return default_package_data_with_datasource(DatasourceId::Gemfile);
148        }
149    };
150
151    let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
152        Ok(r) => r,
153        Err(e) => {
154            warn!("Failed to compile source regex: {}", e);
155            return default_package_data_with_datasource(DatasourceId::Gemfile);
156        }
157    };
158
159    // Parse symbols like :development, :test
160    let symbol_regex = match Regex::new(r":(\w+)") {
161        Ok(r) => r,
162        Err(e) => {
163            warn!("Failed to compile symbol regex: {}", e);
164            return default_package_data_with_datasource(DatasourceId::Gemfile);
165        }
166    };
167
168    for line in content.lines().take(MAX_ITERATION_COUNT) {
169        let trimmed = line.trim();
170
171        // Skip comments and empty lines
172        if trimmed.is_empty() || trimmed.starts_with('#') {
173            continue;
174        }
175
176        // Check for group start
177        if let Some(caps) = group_start_regex.captures(trimmed) {
178            let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
179            let mut current_groups = Vec::new();
180            for cap in symbol_regex.captures_iter(groups_str) {
181                if let Some(group_name) = cap.get(1) {
182                    current_groups.push(group_name.as_str().to_string());
183                }
184            }
185            block_stack.push(GemfileBlock::Group(current_groups));
186            continue;
187        }
188
189        if let Some(caps) = source_block_start_regex.captures(trimmed) {
190            let source = caps
191                .get(1)
192                .map(|m| m.as_str().to_string())
193                .unwrap_or_default();
194            if !source.is_empty() {
195                push_unique_string(&mut sources, source.clone());
196                block_stack.push(GemfileBlock::Source(source));
197            }
198            continue;
199        }
200
201        if let Some(caps) = source_regex.captures(trimmed) {
202            if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
203                push_unique_string(&mut sources, source.clone());
204                default_source = Some(source);
205            }
206            continue;
207        }
208
209        // Check for group end
210        if group_end_regex.is_match(trimmed) {
211            block_stack.pop();
212            continue;
213        }
214
215        // Parse gem declaration
216        if let Some(caps) = gem_regex.captures(trimmed) {
217            let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
218            if name.is_empty() {
219                continue;
220            }
221
222            // Collect version constraints
223            let mut version_parts = Vec::new();
224            if let Some(v) = caps.get(2) {
225                version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
226            }
227            if let Some(v) = caps.get(3) {
228                let v_str = strip_freeze_suffix(v.as_str());
229                // Check if it looks like a version constraint
230                if looks_like_version_constraint(v_str) {
231                    version_parts.push(v_str.to_string());
232                }
233            }
234
235            let extracted_requirement = if version_parts.is_empty() {
236                None
237            } else {
238                Some(version_parts.join(", "))
239            };
240
241            let current_groups = current_group_names(&block_stack);
242
243            // Determine scope based on current group
244            // Bug Fix #4: :runtime → None, :development → "development"
245            let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
246                // No group = runtime dependency
247                (None, true, false)
248            } else if current_groups.iter().any(|g| g == "development") {
249                (Some("development".to_string()), false, true)
250            } else if current_groups.iter().any(|g| g == "test") {
251                (Some("test".to_string()), false, true)
252            } else {
253                // Other groups (e.g., :production)
254                let group = current_groups.first().cloned();
255                (group, true, false)
256            };
257
258            // Create PURL
259            let purl = create_gem_purl(name, None);
260            let inherited_source = current_source(&block_stack, default_source.as_deref());
261            let extra_data = build_gemfile_dependency_extra_data(
262                caps.get(4).map(|m| m.as_str()),
263                inherited_source.as_deref(),
264            );
265
266            dependencies.push(Dependency {
267                purl,
268                extracted_requirement,
269                scope,
270                is_runtime: Some(is_runtime),
271                is_optional: Some(is_optional),
272                is_pinned: None,
273                is_direct: Some(true),
274                resolved_package: None,
275                extra_data,
276            });
277        }
278    }
279
280    let extra_data = if sources.is_empty() {
281        None
282    } else {
283        Some(HashMap::from([(
284            "sources".to_string(),
285            serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
286        )]))
287    };
288
289    PackageData {
290        package_type: Some(PACKAGE_TYPE),
291        primary_language: Some("Ruby".to_string()),
292        dependencies,
293        extra_data,
294        datasource_id: Some(DatasourceId::Gemfile),
295        ..default_package_data()
296    }
297}
298
299fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
300    block_stack
301        .iter()
302        .rev()
303        .find_map(|block| match block {
304            GemfileBlock::Group(groups) => Some(groups.clone()),
305            GemfileBlock::Source(_) => None,
306        })
307        .unwrap_or_default()
308}
309
310fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
311    block_stack
312        .iter()
313        .rev()
314        .find_map(|block| match block {
315            GemfileBlock::Source(source) => Some(source.clone()),
316            GemfileBlock::Group(_) => None,
317        })
318        .or_else(|| default_source.map(str::to_string))
319}
320
321fn push_unique_string(values: &mut Vec<String>, value: String) {
322    if !values.contains(&value) {
323        values.push(value);
324    }
325}
326
327fn build_gemfile_dependency_extra_data(
328    options: Option<&str>,
329    inherited_source: Option<&str>,
330) -> Option<HashMap<String, serde_json::Value>> {
331    let mut extra = HashMap::new();
332    let options = options.unwrap_or("");
333
334    if let Some(git) = extract_gemfile_quoted_option(options, "git") {
335        extra.insert(
336            "source_type".to_string(),
337            serde_json::Value::String("GIT".to_string()),
338        );
339        extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
340        extra.insert("remote".to_string(), serde_json::Value::String(git));
341    }
342
343    if let Some(path) = extract_gemfile_quoted_option(options, "path") {
344        extra.insert(
345            "source_type".to_string(),
346            serde_json::Value::String("PATH".to_string()),
347        );
348        extra.insert("path".to_string(), serde_json::Value::String(path));
349    }
350
351    for key in ["branch", "ref", "tag"] {
352        if let Some(value) = extract_gemfile_quoted_option(options, key) {
353            extra.insert(key.to_string(), serde_json::Value::String(value));
354        }
355    }
356
357    let direct_source = extract_gemfile_quoted_option(options, "source");
358    if let Some(source) = direct_source {
359        extra.insert("source".to_string(), serde_json::Value::String(source));
360    } else if !extra.contains_key("source_type")
361        && let Some(source) = inherited_source
362    {
363        extra.insert(
364            "source".to_string(),
365            serde_json::Value::String(source.to_string()),
366        );
367    }
368
369    (!extra.is_empty()).then_some(extra)
370}
371
372fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
373    if options.is_empty() {
374        return None;
375    }
376
377    let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
378    Regex::new(&pattern)
379        .ok()
380        .and_then(|regex| regex.captures(options))
381        .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
382}
383
384/// Checks if a string looks like a version constraint.
385fn looks_like_version_constraint(s: &str) -> bool {
386    s.starts_with('~')
387        || s.starts_with('>')
388        || s.starts_with('<')
389        || s.starts_with('=')
390        || s.starts_with('!')
391        || s.chars().next().is_some_and(|c| c.is_ascii_digit())
392}
393
394// =============================================================================
395// Gemfile.lock Parser (State Machine)
396// =============================================================================
397
398/// Ruby Gemfile.lock parser for lockfiles.
399///
400/// Uses a state machine to parse sections: GEM, GIT, PATH, SVN,
401/// PLATFORMS, BUNDLED WITH, DEPENDENCIES.
402pub struct GemfileLockParser;
403
404impl PackageParser for GemfileLockParser {
405    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
406
407    fn extract_packages(path: &Path) -> Vec<PackageData> {
408        let datasource_id = gemfile_lock_datasource_id(path);
409        let content = match read_file_to_string(path, None) {
410            Ok(c) => c,
411            Err(e) => {
412                warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
413                return vec![default_package_data_with_datasource(datasource_id)];
414            }
415        };
416
417        let mut package_data = parse_gemfile_lock(&content);
418        package_data.datasource_id = Some(datasource_id);
419        vec![package_data]
420    }
421
422    fn is_match(path: &Path) -> bool {
423        path.file_name()
424            .and_then(|n| n.to_str())
425            .is_some_and(|name| name == "Gemfile.lock")
426            || path
427                .to_str()
428                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
429    }
430}
431
432/// Parse state for Gemfile.lock state machine.
433#[derive(Debug, Clone, PartialEq)]
434enum ParseState {
435    None,
436    Gem,
437    Git,
438    Path,
439    Svn,
440    Specs,
441    Platforms,
442    BundledWith,
443    Dependencies,
444}
445
446/// Parsed gem information from Gemfile.lock.
447///
448/// All fields are actively used:
449/// - `gem_type`, `remote`, `revision`, `ref_field`, `branch`, `tag`: Stored in extra_data for GIT/PATH/SVN sources
450/// - `name`, `version`, `platform`, `pinned`: Used for dependency PURL and metadata generation
451/// - `requirements`: Stored as extracted_requirement for version constraints
452#[derive(Debug, Clone, Default)]
453struct GemInfo {
454    name: String,
455    version: Option<String>,
456    platform: Option<String>,
457    gem_type: String,
458    remote: Option<String>,
459    revision: Option<String>,
460    ref_field: Option<String>,
461    branch: Option<String>,
462    tag: Option<String>,
463    pinned: bool,
464    requirements: Vec<String>,
465}
466
467fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
468    let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
469    path_gems.sort_by(|left, right| {
470        left.remote
471            .as_deref()
472            .cmp(&right.remote.as_deref())
473            .then_with(|| left.name.cmp(&right.name))
474    });
475
476    path_gems
477        .iter()
478        .copied()
479        .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
480        .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
481        .or_else(|| {
482            path_gems
483                .iter()
484                .copied()
485                .find(|gem| gem.remote.as_deref() == Some("."))
486        })
487        .or_else(|| path_gems.first().copied())
488        .cloned()
489}
490
491/// Parses Gemfile.lock content using a state machine.
492fn parse_gemfile_lock(content: &str) -> PackageData {
493    let mut state = ParseState::None;
494    let mut dependencies = Vec::new();
495    let mut gems: HashMap<String, GemInfo> = HashMap::new();
496    let mut platforms: Vec<String> = Vec::new();
497    let mut bundler_version: Option<String> = None;
498    let mut current_gem_type = String::new();
499    let mut current_remote: Option<String> = None;
500    let mut current_options: HashMap<String, String> = HashMap::new();
501
502    // DEPS pattern: 2 spaces at line start
503    let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
504        Ok(r) => r,
505        Err(e) => {
506            warn!("Failed to compile deps regex: {}", e);
507            return default_package_data_with_datasource(DatasourceId::GemfileLock);
508        }
509    };
510
511    // SPEC_DEPS pattern: 4 spaces at line start
512    let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
513        Ok(r) => r,
514        Err(e) => {
515            warn!("Failed to compile spec_deps regex: {}", e);
516            return default_package_data_with_datasource(DatasourceId::GemfileLock);
517        }
518    };
519
520    // OPTIONS pattern: key: value
521    let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
522        Ok(r) => r,
523        Err(e) => {
524            warn!("Failed to compile options regex: {}", e);
525            return default_package_data_with_datasource(DatasourceId::GemfileLock);
526        }
527    };
528
529    // VERSION pattern for BUNDLED WITH
530    let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
531        Ok(r) => r,
532        Err(e) => {
533            warn!("Failed to compile version regex: {}", e);
534            return default_package_data_with_datasource(DatasourceId::GemfileLock);
535        }
536    };
537
538    for line in content.lines().take(MAX_ITERATION_COUNT) {
539        let trimmed = line.trim_end();
540
541        // Empty line resets state
542        if trimmed.is_empty() {
543            current_options.clear();
544            continue;
545        }
546
547        // Section headers (no leading whitespace) and sub-section headers
548        match trimmed {
549            "GEM" => {
550                state = ParseState::Gem;
551                current_gem_type = "GEM".to_string();
552                current_remote = None;
553                current_options.clear();
554                continue;
555            }
556            "GIT" => {
557                state = ParseState::Git;
558                current_gem_type = "GIT".to_string();
559                current_remote = None;
560                current_options.clear();
561                continue;
562            }
563            "PATH" => {
564                state = ParseState::Path;
565                current_gem_type = "PATH".to_string();
566                current_remote = None;
567                current_options.clear();
568                continue;
569            }
570            "SVN" => {
571                state = ParseState::Svn;
572                current_gem_type = "SVN".to_string();
573                current_remote = None;
574                current_options.clear();
575                continue;
576            }
577            "PLATFORMS" => {
578                state = ParseState::Platforms;
579                continue;
580            }
581            "BUNDLED WITH" => {
582                state = ParseState::BundledWith;
583                continue;
584            }
585            "DEPENDENCIES" => {
586                state = ParseState::Dependencies;
587                continue;
588            }
589            _ => {}
590        }
591
592        // Check for "  specs:" sub-section header (2-space indent) within
593        // GEM/GIT/PATH/SVN sections. This must be checked separately because
594        // the leading whitespace is preserved by trim_end().
595        if trimmed.trim() == "specs:" {
596            state = match state {
597                ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
598                    ParseState::Specs
599                }
600                _ => state,
601            };
602            continue;
603        }
604
605        // Process based on current state
606        match state {
607            ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
608                // Parse options (remote:, revision:, ref:, branch:, tag:)
609                if let Some(caps) = options_regex.captures(line) {
610                    let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
611                    let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
612                    current_options.insert(key.to_string(), value.to_string());
613                    if key == "remote" {
614                        current_remote = Some(value.to_string());
615                    }
616                }
617            }
618            ParseState::Specs => {
619                // Parse gem specs (4 spaces indent)
620                if let Some(caps) = spec_deps_regex.captures(line) {
621                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
622                    let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
623
624                    // Parse version and platform
625                    let (version, platform) = parse_version_platform(version_str);
626
627                    if !name.is_empty() {
628                        let gem_info = GemInfo {
629                            name: name.clone(),
630                            version,
631                            platform,
632                            gem_type: current_gem_type.clone(),
633                            remote: current_remote.clone(),
634                            revision: current_options.get("revision").cloned(),
635                            ref_field: current_options.get("ref").cloned(),
636                            branch: current_options.get("branch").cloned(),
637                            tag: current_options.get("tag").cloned(),
638                            pinned: false,
639                            requirements: Vec::new(),
640                        };
641                        gems.insert(name, gem_info);
642                    }
643                }
644            }
645            ParseState::Platforms => {
646                // Parse platform entries (2 spaces indent)
647                let platform = trimmed.trim();
648                if !platform.is_empty() {
649                    platforms.push(platform.to_string());
650                }
651            }
652            ParseState::BundledWith => {
653                // Parse bundler version
654                if let Some(caps) = version_regex.captures(line) {
655                    bundler_version = caps.get(1).map(|m| m.as_str().to_string());
656                }
657            }
658            ParseState::Dependencies => {
659                // Parse direct dependencies (2 spaces indent)
660                if let Some(caps) = deps_regex.captures(line) {
661                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
662                    let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
663                    let pinned = caps.get(3).is_some();
664
665                    if !name.is_empty() {
666                        // Update gem info if exists, or create new
667                        if let Some(gem) = gems.get_mut(&name) {
668                            gem.pinned = pinned;
669                            if let Some(vc) = &version_constraint {
670                                gem.requirements.push(vc.clone());
671                            }
672                        } else {
673                            let gem_info = GemInfo {
674                                name: name.clone(),
675                                version: None,
676                                platform: None,
677                                gem_type: "GEM".to_string(),
678                                remote: None,
679                                revision: None,
680                                ref_field: None,
681                                branch: None,
682                                tag: None,
683                                pinned,
684                                requirements: version_constraint.into_iter().collect(),
685                            };
686                            gems.insert(name, gem_info);
687                        }
688                    }
689                }
690            }
691            ParseState::None => {}
692        }
693    }
694
695    let primary_gem = select_primary_path_gem(&gems);
696
697    let (
698        package_name,
699        package_version,
700        repository_homepage_url,
701        repository_download_url,
702        api_data_url,
703        download_url,
704    ) = if let Some(ref pg) = primary_gem {
705        let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
706        (
707            Some(pg.name.clone()),
708            pg.version.clone(),
709            urls.0,
710            urls.1,
711            urls.2,
712            urls.3,
713        )
714    } else {
715        (None, None, None, None, None, None)
716    };
717
718    for (_, gem) in gems {
719        if let Some(ref pg) = primary_gem
720            && gem.name == pg.name
721        {
722            continue;
723        }
724
725        let version_for_purl = gem.version.as_deref();
726        let purl = create_gem_purl(&gem.name, version_for_purl);
727
728        let extracted_requirement = if !gem.requirements.is_empty() {
729            Some(gem.requirements.join(", "))
730        } else {
731            gem.version.clone()
732        };
733
734        let extra_data = build_gem_source_extra_data(&gem);
735
736        dependencies.push(Dependency {
737            purl,
738            extracted_requirement,
739            scope: Some("dependencies".to_string()),
740            is_runtime: Some(true),
741            is_optional: Some(false),
742            is_pinned: Some(gem.pinned),
743            is_direct: Some(true),
744            resolved_package: None,
745            extra_data,
746        });
747    }
748
749    dependencies.sort_by(|left, right| {
750        left.purl
751            .as_deref()
752            .cmp(&right.purl.as_deref())
753            .then_with(|| {
754                left.extracted_requirement
755                    .as_deref()
756                    .cmp(&right.extracted_requirement.as_deref())
757            })
758    });
759
760    // Build extra_data
761    let mut extra_data = HashMap::new();
762    if !platforms.is_empty() {
763        extra_data.insert(
764            "platforms".to_string(),
765            serde_json::Value::Array(
766                platforms
767                    .into_iter()
768                    .map(serde_json::Value::String)
769                    .collect(),
770            ),
771        );
772    }
773    if let Some(bv) = bundler_version {
774        extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
775    }
776
777    let purl = package_name
778        .as_deref()
779        .map(|n| create_gem_purl(n, package_version.as_deref()))
780        .unwrap_or(None);
781
782    PackageData {
783        package_type: Some(PACKAGE_TYPE),
784        name: package_name,
785        version: package_version,
786        primary_language: Some("Ruby".to_string()),
787        download_url,
788        dependencies,
789        repository_homepage_url,
790        repository_download_url,
791        api_data_url,
792        extra_data: if extra_data.is_empty() {
793            None
794        } else {
795            Some(extra_data)
796        },
797        datasource_id: Some(DatasourceId::GemfileLock),
798        purl,
799        ..default_package_data()
800    }
801}
802
803fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
804    if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
805        return None;
806    }
807
808    let mut extra = HashMap::new();
809    extra.insert(
810        "source_type".to_string(),
811        serde_json::Value::String(gem.gem_type.clone()),
812    );
813
814    if let Some(ref remote) = gem.remote {
815        extra.insert(
816            "remote".to_string(),
817            serde_json::Value::String(remote.clone()),
818        );
819    }
820    if let Some(ref revision) = gem.revision {
821        extra.insert(
822            "revision".to_string(),
823            serde_json::Value::String(revision.clone()),
824        );
825    }
826    if let Some(ref ref_field) = gem.ref_field {
827        extra.insert(
828            "ref".to_string(),
829            serde_json::Value::String(ref_field.clone()),
830        );
831    }
832    if let Some(ref branch) = gem.branch {
833        extra.insert(
834            "branch".to_string(),
835            serde_json::Value::String(branch.clone()),
836        );
837    }
838    if let Some(ref tag) = gem.tag {
839        extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
840    }
841
842    Some(extra)
843}
844
845/// Parses version and platform from a combined string.
846/// Examples: "2.6.3" -> ("2.6.3", None), "2.6.3-java" -> ("2.6.3", Some("java"))
847fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
848    if s.is_empty() {
849        return (None, None);
850    }
851    if let Some(idx) = s.find('-') {
852        let version = &s[..idx];
853        let platform = &s[idx + 1..];
854        (Some(version.to_string()), Some(platform.to_string()))
855    } else {
856        (Some(s.to_string()), None)
857    }
858}
859
860/// Creates a gem PURL.
861fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
862    let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
863        Ok(p) => p,
864        Err(e) => {
865            warn!("Failed to create PURL for gem '{}': {}", name, e);
866            return None;
867        }
868    };
869
870    if let Some(v) = version
871        && let Err(e) = purl.with_version(v)
872    {
873        warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
874    }
875
876    Some(purl.to_string())
877}
878
879fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
880    if name.is_empty() {
881        return None;
882    }
883
884    if let Some(v) = version {
885        let v = v.trim().trim_matches('/');
886        Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
887    } else {
888        Some(format!("https://rubygems.org/gems/{}", name))
889    }
890}
891
892fn rubygems_download_url(
893    name: &str,
894    version: Option<&str>,
895    platform: Option<&str>,
896) -> Option<String> {
897    if name.is_empty() || version.is_none() {
898        return None;
899    }
900
901    let name = name.trim().trim_matches('/');
902    let version = version?.trim().trim_matches('/');
903
904    let version_plat = if let Some(p) = platform {
905        if p != "ruby" {
906            format!("{}-{}", version, p)
907        } else {
908            version.to_string()
909        }
910    } else {
911        version.to_string()
912    };
913
914    Some(format!(
915        "https://rubygems.org/downloads/{}-{}.gem",
916        name, version_plat
917    ))
918}
919
920fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
921    if name.is_empty() {
922        return None;
923    }
924
925    if let Some(v) = version {
926        Some(format!(
927            "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
928            name, v
929        ))
930    } else {
931        Some(format!(
932            "https://rubygems.org/api/v1/versions/{}.json",
933            name
934        ))
935    }
936}
937
938fn get_rubygems_urls(
939    name: &str,
940    version: Option<&str>,
941    platform: Option<&str>,
942) -> (
943    Option<String>,
944    Option<String>,
945    Option<String>,
946    Option<String>,
947) {
948    let repository_homepage_url = rubygems_homepage_url(name, version);
949    let repository_download_url = rubygems_download_url(name, version, platform);
950    let api_data_url = rubygems_api_url(name, version);
951    let download_url = repository_download_url.clone();
952
953    (
954        repository_homepage_url,
955        repository_download_url,
956        api_data_url,
957        download_url,
958    )
959}
960
961/// Returns a default PackageData with gem-specific settings.
962fn default_package_data() -> PackageData {
963    PackageData {
964        package_type: Some(PACKAGE_TYPE),
965        primary_language: Some("Ruby".to_string()),
966        ..Default::default()
967    }
968}
969
970fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
971    PackageData {
972        datasource_id: Some(datasource_id),
973        ..default_package_data()
974    }
975}
976
977// =============================================================================
978// Gemspec Parser (Ruby DSL)
979// =============================================================================
980
981/// Ruby .gemspec file parser.
982///
983/// Parses `Gem::Specification.new` blocks using regex-based extraction.
984/// Handles frozen strings (Bug #1), variable version resolution (Bug #2),
985/// and RFC 5322 email parsing (Bug #6).
986pub struct GemspecParser;
987
988impl PackageParser for GemspecParser {
989    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
990
991    fn extract_packages(path: &Path) -> Vec<PackageData> {
992        let datasource_id = gemspec_datasource_id(path);
993        let content = match read_file_to_string(path, None) {
994            Ok(c) => c,
995            Err(e) => {
996                warn!("Failed to read .gemspec at {:?}: {}", path, e);
997                return vec![default_package_data_with_datasource(datasource_id)];
998            }
999        };
1000
1001        let mut package_data = parse_gemspec_with_context(&content, path.parent());
1002        package_data.datasource_id = Some(datasource_id);
1003        vec![package_data]
1004    }
1005
1006    fn is_match(path: &Path) -> bool {
1007        path.extension()
1008            .and_then(|ext| ext.to_str())
1009            .is_some_and(|ext| ext == "gemspec")
1010    }
1011}
1012
1013fn normalized_ruby_path(path: &Path) -> String {
1014    path.to_string_lossy().replace('\\', "/")
1015}
1016
1017fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1018    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1019        DatasourceId::GemfileExtracted
1020    } else {
1021        DatasourceId::Gemfile
1022    }
1023}
1024
1025fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1026    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1027        DatasourceId::GemfileLockExtracted
1028    } else {
1029        DatasourceId::GemfileLock
1030    }
1031}
1032
1033fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1034    let normalized = normalized_ruby_path(path);
1035    if normalized.contains("/data.gz-extract/") {
1036        DatasourceId::GemspecExtracted
1037    } else if normalized.contains("/specifications/") {
1038        DatasourceId::GemGemspecInstalledSpecifications
1039    } else {
1040        DatasourceId::Gemspec
1041    }
1042}
1043
1044/// Cleans a value extracted from gemspec by stripping quotes, .freeze, %q{}, and brackets.
1045fn clean_gemspec_value(s: &str) -> String {
1046    let s = strip_freeze_suffix(s).trim();
1047
1048    let s = if let Some(pos) = s.find(" #") {
1049        s[..pos].trim()
1050    } else {
1051        s
1052    };
1053
1054    let s = if let Some(stripped) = s.strip_prefix("%q{") {
1055        stripped.strip_suffix('}').unwrap_or(stripped)
1056    } else if let Some(stripped) = s.strip_prefix("%q<") {
1057        stripped.strip_suffix('>').unwrap_or(stripped)
1058    } else if let Some(stripped) = s.strip_prefix("%q[") {
1059        stripped.strip_suffix(']').unwrap_or(stripped)
1060    } else if let Some(stripped) = s.strip_prefix("%q(") {
1061        stripped.strip_suffix(')').unwrap_or(stripped)
1062    } else {
1063        s
1064    };
1065
1066    let s = s
1067        .trim_start_matches('"')
1068        .trim_end_matches('"')
1069        .trim_start_matches('\'')
1070        .trim_end_matches('\'');
1071    let s = strip_freeze_suffix(s).trim();
1072    s.to_string()
1073}
1074
1075/// Extracts items from a Ruby array literal like `["a", "b", "c"]`.
1076fn extract_ruby_array(s: &str) -> Vec<String> {
1077    let s = strip_freeze_suffix(s.trim());
1078    let s = s.trim_start_matches('[').trim_end_matches(']');
1079    let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1080        Ok(r) => r,
1081        Err(_) => return Vec::new(),
1082    };
1083    item_re
1084        .captures_iter(s)
1085        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1086        .collect()
1087}
1088
1089fn extract_all_ruby_values(s: &str) -> Vec<String> {
1090    let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1091        Ok(r) => r,
1092        Err(_) => return Vec::new(),
1093    };
1094
1095    value_re
1096        .captures_iter(s)
1097        .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1098        .map(|m| clean_gemspec_value(m.as_str()))
1099        .collect()
1100}
1101
1102fn extract_first_ruby_value(s: &str) -> Option<String> {
1103    extract_all_ruby_values(s).into_iter().next()
1104}
1105
1106fn after_first_argument(args: &str) -> &str {
1107    let mut bracket_depth = 0usize;
1108    let mut paren_depth = 0usize;
1109    let mut in_quote: Option<char> = None;
1110    let chars: Vec<(usize, char)> = args.char_indices().collect();
1111    let mut i = 0;
1112
1113    while i < chars.len() {
1114        let (idx, ch) = chars[i];
1115
1116        if let Some(quote) = in_quote {
1117            if ch == '\\' {
1118                i += 2;
1119                continue;
1120            }
1121            if ch == quote {
1122                in_quote = None;
1123            }
1124            i += 1;
1125            continue;
1126        }
1127
1128        match ch {
1129            '\'' | '"' => in_quote = Some(ch),
1130            '[' | '{' | '<' => bracket_depth += 1,
1131            ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1132            '(' => paren_depth += 1,
1133            ')' => paren_depth = paren_depth.saturating_sub(1),
1134            ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1135            _ => {}
1136        }
1137
1138        i += 1;
1139    }
1140
1141    ""
1142}
1143
1144/// Bug #2: Resolves variable version references like `CSV::VERSION` or `RAILS_VERSION`.
1145///
1146/// Scans the file content for constant definitions matching the variable name
1147/// and returns the resolved string value.
1148fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1149    let var_name = var_name.trim();
1150    if var_name.is_empty() {
1151        return None;
1152    }
1153
1154    for candidate in candidate_constant_names(var_name) {
1155        let escaped = regex::escape(&candidate);
1156        let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1157        let Ok(re) = Regex::new(&pattern) else {
1158            continue;
1159        };
1160
1161        for context in contexts {
1162            if let Some(caps) = re.captures(context) {
1163                return caps.get(1).map(|m| m.as_str().to_string());
1164            }
1165        }
1166    }
1167
1168    None
1169}
1170
1171fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1172    let var_name = var_name.trim();
1173    if var_name.is_empty() {
1174        return None;
1175    }
1176
1177    for candidate in candidate_constant_names(var_name) {
1178        let escaped = regex::escape(&candidate);
1179        let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1180        let Ok(re) = Regex::new(&pattern) else {
1181            continue;
1182        };
1183
1184        for context in contexts {
1185            if let Some(caps) = re.captures(context)
1186                && let Some(raw) = caps.get(1)
1187            {
1188                let values = extract_ruby_array(raw.as_str());
1189                if !values.is_empty() {
1190                    return Some(values);
1191                }
1192            }
1193        }
1194    }
1195
1196    None
1197}
1198
1199fn candidate_constant_names(var_name: &str) -> Vec<String> {
1200    let mut names = vec![var_name.to_string()];
1201    if let Some(last) = var_name.split("::").last()
1202        && last != var_name
1203    {
1204        names.push(last.to_string());
1205    }
1206    names
1207}
1208
1209fn looks_like_local_variable_reference(s: &str) -> bool {
1210    let mut chars = s.chars();
1211    matches!(chars.next(), Some('_' | 'a'..='z'))
1212        && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1213}
1214
1215fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1216    let base_dir = base_dir?;
1217    let current_dir = std::env::current_dir().ok();
1218
1219    current_dir
1220        .and_then(|cwd| {
1221            let canonical_cwd = cwd.canonicalize().ok()?;
1222            let canonical_base = base_dir.canonicalize().ok()?;
1223            canonical_base
1224                .starts_with(&canonical_cwd)
1225                .then_some(canonical_cwd)
1226        })
1227        .or_else(|| base_dir.canonicalize().ok())
1228}
1229
1230fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1231    let canonical_path = path.canonicalize().ok()?;
1232    canonical_path
1233        .starts_with(allowed_root)
1234        .then_some(canonical_path)
1235}
1236
1237fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1238    let base_dir = base_dir?;
1239    let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1240    let relative_path = extract_first_ruby_value(args)?;
1241    if relative_path.is_empty() {
1242        return None;
1243    }
1244
1245    let candidate = Path::new(&relative_path);
1246    let path = if candidate.is_absolute() {
1247        candidate.to_path_buf()
1248    } else {
1249        base_dir.join(candidate)
1250    };
1251
1252    let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1253
1254    fs::read_to_string(safe_path)
1255        .ok()
1256        .map(|content| content.trim().to_string())
1257        .filter(|content| !content.is_empty())
1258}
1259
1260fn resolve_scalar_expression(
1261    expression: &str,
1262    base_dir: Option<&Path>,
1263    contexts: &[String],
1264) -> Option<String> {
1265    let expression = if let Some(pos) = expression.find(" #") {
1266        expression[..pos].trim()
1267    } else {
1268        expression.trim()
1269    };
1270
1271    let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1272    if let Some(caps) = file_read_re.captures(expression) {
1273        return caps
1274            .get(1)
1275            .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1276    }
1277
1278    if let Some(value) = extract_first_ruby_value(expression) {
1279        return Some(value);
1280    }
1281
1282    let cleaned = clean_gemspec_value(expression);
1283    if looks_like_constant_reference(&cleaned) {
1284        return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1285    }
1286
1287    None
1288}
1289
1290fn resolve_local_variable_value(
1291    var_name: &str,
1292    content: &str,
1293    base_dir: Option<&Path>,
1294    contexts: &[String],
1295) -> Option<String> {
1296    let escaped = regex::escape(var_name.trim());
1297    let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1298    let re = Regex::new(&pattern).ok()?;
1299
1300    re.captures_iter(content).find_map(|caps| {
1301        caps.get(1)
1302            .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1303    })
1304}
1305
1306fn resolve_gemspec_scalar_value(
1307    raw_value: &str,
1308    content: &str,
1309    base_dir: Option<&Path>,
1310    contexts: &[String],
1311) -> Option<String> {
1312    let cleaned = truncate_field(clean_gemspec_value(raw_value));
1313    if cleaned.is_empty() {
1314        return None;
1315    }
1316
1317    if looks_like_constant_reference(&cleaned) {
1318        return resolve_variable_version(&cleaned, contexts)
1319            .map(truncate_field)
1320            .or(Some(cleaned));
1321    }
1322
1323    if looks_like_local_variable_reference(&cleaned) {
1324        return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1325            .map(truncate_field)
1326            .or(Some(cleaned));
1327    }
1328
1329    Some(cleaned)
1330}
1331
1332fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1333    let mut contexts = vec![content.to_string()];
1334    let Some(base_dir) = base_dir else {
1335        return contexts;
1336    };
1337    let allowed_root = resolve_ruby_read_root(Some(base_dir));
1338
1339    let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1340        Ok(re) => re,
1341        Err(_) => return contexts,
1342    };
1343
1344    for caps in require_re.captures_iter(content) {
1345        let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1346            continue;
1347        };
1348        for candidate in candidate_require_paths(base_dir, required) {
1349            let Some(safe_candidate) = allowed_root
1350                .as_deref()
1351                .and_then(|root| resolve_ruby_read_path(candidate, root))
1352            else {
1353                continue;
1354            };
1355            if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1356                contexts.push(required_content);
1357                break;
1358            }
1359        }
1360    }
1361
1362    contexts
1363}
1364
1365fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1366    let relative = required.replace("::", "/");
1367    let filename = if relative.ends_with(".rb") {
1368        relative
1369    } else {
1370        format!("{}.rb", relative)
1371    };
1372
1373    vec![
1374        base_dir.join(&filename),
1375        base_dir.join("lib").join(&filename),
1376    ]
1377}
1378
1379fn looks_like_constant_reference(s: &str) -> bool {
1380    s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1381}
1382
1383/// Parses a .gemspec file content and returns PackageData.
1384#[cfg(test)]
1385fn parse_gemspec(content: &str) -> PackageData {
1386    parse_gemspec_with_context(content, None)
1387}
1388
1389fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1390    let contexts = load_required_ruby_contexts(content, base_dir);
1391
1392    // Regex for spec.name = "value" or s.name = "value"
1393    // The spec variable name varies: spec, s, gem, etc.
1394    let field_re = match Regex::new(
1395        r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1396    ) {
1397        Ok(r) => r,
1398        Err(e) => {
1399            warn!("Failed to compile gemspec field regex: {}", e);
1400            return default_package_data_with_datasource(DatasourceId::Gemspec);
1401        }
1402    };
1403
1404    let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1405        Ok(r) => r,
1406        Err(e) => {
1407            warn!("Failed to compile licenses regex: {}", e);
1408            return default_package_data_with_datasource(DatasourceId::Gemspec);
1409        }
1410    };
1411
1412    let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1413        Ok(r) => r,
1414        Err(e) => {
1415            warn!("Failed to compile authors regex: {}", e);
1416            return default_package_data_with_datasource(DatasourceId::Gemspec);
1417        }
1418    };
1419
1420    let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1421        Ok(r) => r,
1422        Err(e) => {
1423            warn!("Failed to compile email regex: {}", e);
1424            return default_package_data_with_datasource(DatasourceId::Gemspec);
1425        }
1426    };
1427
1428    let dependency_call_re = match Regex::new(
1429        r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1430    ) {
1431        Ok(r) => r,
1432        Err(e) => {
1433            warn!("Failed to compile gemspec dependency regex: {}", e);
1434            return default_package_data_with_datasource(DatasourceId::Gemspec);
1435        }
1436    };
1437
1438    let mut name: Option<String> = None;
1439    let mut version: Option<String> = None;
1440    let mut summary: Option<String> = None;
1441    let mut description: Option<String> = None;
1442    let mut homepage: Option<String> = None;
1443    let mut license: Option<String> = None;
1444    let mut licenses: Vec<String> = Vec::new();
1445    let mut authors: Vec<String> = Vec::new();
1446    let mut emails: Vec<String> = Vec::new();
1447    let mut dependencies: Vec<Dependency> = Vec::new();
1448
1449    // Extract basic fields
1450    for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1451        let field_name = match caps.get(1) {
1452            Some(m) => m.as_str(),
1453            None => continue,
1454        };
1455        let raw_value = match caps.get(2) {
1456            Some(m) => m.as_str().trim(),
1457            None => continue,
1458        };
1459
1460        match field_name {
1461            "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1462            "version" => {
1463                version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1464            }
1465            "summary" => {
1466                summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1467            }
1468            "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1469            "homepage" => {
1470                homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1471            }
1472            "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1473            _ => {}
1474        }
1475    }
1476
1477    // Extract licenses (plural)
1478    for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1479        if let Some(raw) = caps.get(1) {
1480            licenses = extract_ruby_array(raw.as_str());
1481        }
1482    }
1483
1484    // Extract authors
1485    for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1486        if let Some(raw) = caps.get(1) {
1487            let raw_str = raw.as_str().trim();
1488            if raw_str.starts_with('[') {
1489                authors = extract_ruby_array(raw_str);
1490            } else if looks_like_constant_reference(raw_str) {
1491                authors = resolve_variable_array(raw_str, &contexts)
1492                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1493            } else {
1494                authors.push(clean_gemspec_value(raw_str));
1495            }
1496        }
1497    }
1498
1499    // Extract emails
1500    for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1501        if let Some(raw) = caps.get(1) {
1502            let raw_str = raw.as_str().trim();
1503            if raw_str.starts_with('[') {
1504                emails = extract_ruby_array(raw_str);
1505            } else if looks_like_constant_reference(raw_str) {
1506                emails = resolve_variable_array(raw_str, &contexts)
1507                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1508            } else {
1509                emails.push(clean_gemspec_value(raw_str));
1510            }
1511        }
1512    }
1513
1514    // Build parties from authors and emails
1515    let mut parties: Vec<Party> = Vec::new();
1516
1517    if authors.len() == 1 && emails.len() == 1 {
1518        let email_str = emails.first().map(String::as_str);
1519        let (parsed_email_name, parsed_email) = match email_str {
1520            Some(e) => split_name_email(e),
1521            None => (None, None),
1522        };
1523
1524        parties.push(Party {
1525            r#type: Some("person".to_string()),
1526            role: Some("author".to_string()),
1527            name: authors.first().cloned().or(parsed_email_name),
1528            email: parsed_email.or_else(|| {
1529                email_str
1530                    .filter(|e| e.contains('@') && !e.contains('<'))
1531                    .map(|e| e.to_string())
1532            }),
1533            url: None,
1534            organization: None,
1535            organization_url: None,
1536            timezone: None,
1537        });
1538    } else {
1539        for author_name in authors {
1540            parties.push(Party {
1541                r#type: Some("person".to_string()),
1542                role: Some("author".to_string()),
1543                name: Some(author_name),
1544                email: None,
1545                url: None,
1546                organization: None,
1547                organization_url: None,
1548                timezone: None,
1549            });
1550        }
1551
1552        for email_str in emails {
1553            let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1554                split_name_email(&email_str)
1555            } else {
1556                (None, None)
1557            };
1558            parties.push(Party {
1559                r#type: Some("person".to_string()),
1560                role: Some("author".to_string()),
1561                name: parsed_email_name,
1562                email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1563                url: None,
1564                organization: None,
1565                organization_url: None,
1566                timezone: None,
1567            });
1568        }
1569    }
1570
1571    for caps in dependency_call_re
1572        .captures_iter(content)
1573        .take(MAX_ITERATION_COUNT)
1574    {
1575        let method = match caps.get(1) {
1576            Some(m) => m.as_str(),
1577            None => continue,
1578        };
1579        let args = match caps.get(2) {
1580            Some(m) => m.as_str(),
1581            None => continue,
1582        };
1583
1584        let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1585            continue;
1586        };
1587        let version_parts = extract_all_ruby_values(after_first_argument(args));
1588        let extracted_requirement = if version_parts.is_empty() {
1589            None
1590        } else {
1591            Some(version_parts.join(", "))
1592        };
1593        let purl = create_gem_purl(&dep_name, None);
1594        let is_development = method == "add_development_dependency";
1595        let scope = if is_development {
1596            "development"
1597        } else {
1598            "runtime"
1599        };
1600
1601        dependencies.push(Dependency {
1602            purl,
1603            extracted_requirement,
1604            scope: Some(scope.to_string()),
1605            is_runtime: Some(!is_development),
1606            is_optional: Some(is_development),
1607            is_pinned: None,
1608            is_direct: Some(true),
1609            resolved_package: None,
1610            extra_data: None,
1611        });
1612    }
1613
1614    // Extract license statement only - detection happens in separate engine
1615    let extracted_license_statement = if !licenses.is_empty() {
1616        Some(licenses.join(" AND "))
1617    } else {
1618        license
1619    };
1620
1621    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1622        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1623
1624    // Prefer description over summary
1625    let final_description = description.or(summary);
1626
1627    // Build PURL
1628    let purl = name
1629        .as_deref()
1630        .map(|n| create_gem_purl(n, version.as_deref()))
1631        .unwrap_or(None);
1632
1633    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1634        if let Some(n) = name.as_deref() {
1635            get_rubygems_urls(n, version.as_deref(), None)
1636        } else {
1637            (None, None, None, None)
1638        };
1639
1640    PackageData {
1641        package_type: Some(PACKAGE_TYPE),
1642        name,
1643        version,
1644        primary_language: Some("Ruby".to_string()),
1645        description: final_description,
1646        homepage_url: homepage,
1647        download_url,
1648        declared_license_expression,
1649        declared_license_expression_spdx,
1650        license_detections,
1651        extracted_license_statement,
1652        parties,
1653        dependencies,
1654        repository_homepage_url,
1655        repository_download_url,
1656        api_data_url,
1657        datasource_id: Some(DatasourceId::Gemspec),
1658        purl,
1659        ..default_package_data()
1660    }
1661}
1662
1663// =============================================================================
1664// .gem Archive Parser (Wave 3)
1665// =============================================================================
1666
1667const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; // 100MB
1668const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; // 50MB per file
1669const MAX_COMPRESSION_RATIO: f64 = 100.0; // 100:1 ratio
1670
1671/// Parser for .gem archive files.
1672///
1673/// Extracts metadata from Ruby .gem packages, which are tar archives
1674/// containing a gzip-compressed YAML metadata file (`metadata.gz`).
1675///
1676/// Includes safety checks against zip bombs and oversized archives.
1677pub struct GemArchiveParser;
1678
1679impl PackageParser for GemArchiveParser {
1680    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1681
1682    fn extract_packages(path: &Path) -> Vec<PackageData> {
1683        vec![match extract_gem_archive(path) {
1684            Ok(data) => data,
1685            Err(e) => {
1686                warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1687                default_package_data_with_datasource(DatasourceId::GemArchive)
1688            }
1689        }]
1690    }
1691
1692    fn is_match(path: &Path) -> bool {
1693        path.extension()
1694            .and_then(|ext| ext.to_str())
1695            .is_some_and(|ext| ext == "gem")
1696    }
1697}
1698
1699fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1700    let file_metadata =
1701        fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1702    let archive_size = file_metadata.len();
1703
1704    if archive_size > MAX_ARCHIVE_SIZE {
1705        return Err(format!(
1706            "Archive too large: {} bytes (limit: {} bytes)",
1707            archive_size, MAX_ARCHIVE_SIZE
1708        ));
1709    }
1710
1711    let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1712    let mut archive = Archive::new(file);
1713
1714    let mut entry_count: usize = 0;
1715    for entry_result in archive
1716        .entries()
1717        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1718    {
1719        entry_count += 1;
1720        if entry_count > MAX_ITERATION_COUNT {
1721            warn!(
1722                "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1723                MAX_ITERATION_COUNT
1724            );
1725            break;
1726        }
1727
1728        let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1729        let entry_path = entry
1730            .path()
1731            .map_err(|e| format!("Failed to get entry path: {}", e))?;
1732        let entry_str = entry_path.to_string_lossy();
1733        if entry_str.contains("..") {
1734            warn!("Skipping tar entry with path traversal: {}", entry_str);
1735            continue;
1736        }
1737
1738        if entry_path.to_str() == Some("metadata.gz") {
1739            let entry_size = entry.size();
1740            if entry_size > MAX_FILE_SIZE {
1741                return Err(format!(
1742                    "metadata.gz too large: {} bytes (limit: {} bytes)",
1743                    entry_size, MAX_FILE_SIZE
1744                ));
1745            }
1746
1747            let mut decoder = GzDecoder::new(entry);
1748            let mut content = Vec::new();
1749            let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1750            limited
1751                .read_to_end(&mut content)
1752                .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1753
1754            if content.len() > MAX_FILE_SIZE as usize {
1755                return Err(format!(
1756                    "Decompressed metadata too large: exceeds {} byte limit",
1757                    MAX_FILE_SIZE
1758                ));
1759            }
1760
1761            let content = match String::from_utf8(content) {
1762                Ok(s) => s,
1763                Err(err) => {
1764                    let bytes = err.into_bytes();
1765                    warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1766                    String::from_utf8_lossy(&bytes).into_owned()
1767                }
1768            };
1769
1770            let uncompressed_size = content.len() as u64;
1771            if entry_size > 0 {
1772                let ratio = uncompressed_size as f64 / entry_size as f64;
1773                if ratio > MAX_COMPRESSION_RATIO {
1774                    return Err(format!(
1775                        "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1776                        ratio, MAX_COMPRESSION_RATIO
1777                    ));
1778                }
1779            }
1780
1781            return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1782        }
1783    }
1784
1785    Err("metadata.gz not found in .gem archive".to_string())
1786}
1787
1788fn parse_gem_metadata_yaml(
1789    content: &str,
1790    datasource_id: DatasourceId,
1791) -> Result<PackageData, String> {
1792    // Ruby YAML tagged types need to be handled:
1793    // --- !ruby/object:Gem::Specification
1794    // We strip Ruby-specific YAML tags since yaml_serde can't handle them
1795    let cleaned = clean_ruby_yaml_tags(content);
1796
1797    let yaml: yaml_serde::Value =
1798        yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1799
1800    let name = yaml_string(&yaml, "name").map(truncate_field);
1801    let version = yaml.get("version").and_then(|v| {
1802        if v.is_string() {
1803            v.as_str().map(|s| truncate_field(s.to_string()))
1804        } else {
1805            yaml_string(v, "version").map(truncate_field)
1806        }
1807    });
1808    let description = yaml_string(&yaml, "description")
1809        .or_else(|| yaml_string(&yaml, "summary"))
1810        .map(truncate_field);
1811    let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1812    let summary = yaml_string(&yaml, "summary").map(truncate_field);
1813
1814    // Licenses
1815    let licenses: Vec<String> = yaml
1816        .get("licenses")
1817        .and_then(|v| v.as_sequence())
1818        .map(|seq| {
1819            seq.iter()
1820                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1821                .collect()
1822        })
1823        .unwrap_or_default();
1824
1825    // Extract license statement only - detection happens in separate engine
1826    let extracted_license_statement = if !licenses.is_empty() {
1827        Some(licenses.join(" AND "))
1828    } else {
1829        None
1830    };
1831
1832    let (license_expression, license_expression_spdx, license_detections) =
1833        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1834
1835    // Authors
1836    let authors: Vec<String> = yaml
1837        .get("authors")
1838        .and_then(|v| v.as_sequence())
1839        .map(|seq| {
1840            seq.iter()
1841                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1842                .collect()
1843        })
1844        .unwrap_or_default();
1845
1846    let emails: Vec<String> = yaml
1847        .get("email")
1848        .map(|v| {
1849            if let Some(seq) = v.as_sequence() {
1850                seq.iter()
1851                    .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1852                    .collect()
1853            } else if let Some(s) = v.as_str() {
1854                vec![truncate_field(s.to_string())]
1855            } else {
1856                Vec::new()
1857            }
1858        })
1859        .unwrap_or_default();
1860
1861    // Build parties
1862    let mut parties: Vec<Party> = Vec::new();
1863    let max_len = authors.len().max(emails.len());
1864    for i in 0..max_len {
1865        let author_name = authors.get(i).map(|s| s.as_str());
1866        let email_str = emails.get(i).map(|s| s.as_str());
1867
1868        let (parsed_email_name, parsed_email) = match email_str {
1869            Some(e) if e.contains('<') => split_name_email(e),
1870            None => (None, None),
1871            _ => (None, None),
1872        };
1873
1874        let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1875
1876        parties.push(Party {
1877            r#type: Some("person".to_string()),
1878            role: Some("author".to_string()),
1879            name: party_name,
1880            email: parsed_email.or_else(|| {
1881                email_str
1882                    .filter(|e| e.contains('@') && !e.contains('<'))
1883                    .map(|e| e.to_string())
1884            }),
1885            url: None,
1886            organization: None,
1887            organization_url: None,
1888            timezone: None,
1889        });
1890    }
1891
1892    // Dependencies
1893    let dependencies = parse_gem_yaml_dependencies(&yaml);
1894
1895    let metadata = yaml.get("metadata");
1896
1897    let bug_tracking_url = metadata
1898        .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1899        .map(truncate_field);
1900
1901    let code_view_url = metadata
1902        .and_then(|m| yaml_string(m, "source_code_uri"))
1903        .map(truncate_field);
1904
1905    let vcs_url = code_view_url.clone().or_else(|| {
1906        metadata
1907            .and_then(|m| yaml_string(m, "homepage_uri"))
1908            .map(truncate_field)
1909    });
1910
1911    let file_references = metadata
1912        .and_then(|m| m.get("files"))
1913        .and_then(|f| f.as_sequence())
1914        .map(|seq| {
1915            seq.iter()
1916                .filter_map(|v| v.as_str())
1917                .map(|s| crate::models::FileReference {
1918                    path: s.to_string(),
1919                    size: None,
1920                    sha1: None,
1921                    md5: None,
1922                    sha256: None,
1923                    sha512: None,
1924                    extra_data: None,
1925                })
1926                .collect::<Vec<_>>()
1927        })
1928        .unwrap_or_default();
1929
1930    let release_date = yaml_string(&yaml, "date").and_then(|d| {
1931        if d.len() >= 10 {
1932            Some(d[..10].to_string())
1933        } else {
1934            None
1935        }
1936    });
1937
1938    let purl = name
1939        .as_deref()
1940        .map(|n| create_gem_purl(n, version.as_deref()))
1941        .unwrap_or(None);
1942
1943    let platform = yaml_string(&yaml, "platform").map(truncate_field);
1944    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1945        if let Some(n) = name.as_deref() {
1946            get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1947        } else {
1948            (None, None, None, None)
1949        };
1950
1951    let qualifiers = if let Some(ref p) = platform {
1952        if p != "ruby" {
1953            let mut q = HashMap::new();
1954            q.insert("platform".to_string(), p.clone());
1955            Some(q)
1956        } else {
1957            None
1958        }
1959    } else {
1960        None
1961    };
1962
1963    Ok(PackageData {
1964        package_type: Some(PACKAGE_TYPE),
1965        name,
1966        version,
1967        qualifiers,
1968        primary_language: Some("Ruby".to_string()),
1969        description: description.or(summary),
1970        release_date,
1971        homepage_url: homepage,
1972        download_url,
1973        bug_tracking_url,
1974        code_view_url,
1975        declared_license_expression: license_expression,
1976        declared_license_expression_spdx: license_expression_spdx,
1977        license_detections,
1978        extracted_license_statement,
1979        file_references,
1980        parties,
1981        dependencies,
1982        repository_homepage_url,
1983        repository_download_url,
1984        api_data_url,
1985        datasource_id: Some(datasource_id),
1986        purl,
1987        vcs_url,
1988        ..default_package_data()
1989    })
1990}
1991
1992/// Strips Ruby-specific YAML tags that yaml_serde cannot handle.
1993fn clean_ruby_yaml_tags(content: &str) -> String {
1994    let tag_re = match Regex::new(r"!ruby/\S+") {
1995        Ok(r) => r,
1996        Err(_) => return content.to_string(),
1997    };
1998    tag_re.replace_all(content, "").to_string()
1999}
2000
2001fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2002    yaml.get(key)
2003        .and_then(|v| v.as_str())
2004        .filter(|s| !s.is_empty())
2005        .map(|s| s.to_string())
2006}
2007
2008fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2009    let mut dependencies = Vec::new();
2010
2011    let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2012        Some(seq) => seq,
2013        None => return dependencies,
2014    };
2015
2016    for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2017        let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2018            Some(n) => n,
2019            None => continue,
2020        };
2021
2022        let dep_type = yaml_string(dep_value, "type");
2023        let is_development = dep_type.as_deref() == Some(":development");
2024
2025        // Extract version requirements from the nested structure
2026        let requirements = dep_value
2027            .get("requirement")
2028            .or_else(|| dep_value.get("version_requirements"))
2029            .and_then(|req| req.get("requirements"))
2030            .and_then(|reqs| reqs.as_sequence());
2031
2032        let extracted_requirement = requirements.map(|reqs| {
2033            let parts: Vec<String> = reqs
2034                .iter()
2035                .filter_map(|req| {
2036                    let seq = req.as_sequence()?;
2037                    if seq.len() >= 2 {
2038                        let op = seq[0].as_str().unwrap_or("");
2039                        let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2040                        if op == ">=" && ver == "0" {
2041                            // ">= 0" means "any version" - skip
2042                            None
2043                        } else if op.is_empty() || ver.is_empty() {
2044                            None
2045                        } else {
2046                            Some(format!("{} {}", op, ver))
2047                        }
2048                    } else {
2049                        None
2050                    }
2051                })
2052                .collect();
2053            parts.join(", ")
2054        });
2055
2056        let extracted_requirement = extracted_requirement
2057            .filter(|s| !s.is_empty())
2058            .or_else(|| Some(String::new()));
2059
2060        let (scope, is_runtime, is_optional) = if is_development {
2061            (Some("development".to_string()), false, true)
2062        } else {
2063            (Some("runtime".to_string()), true, false)
2064        };
2065
2066        let purl = create_gem_purl(&dep_name, None);
2067
2068        dependencies.push(Dependency {
2069            purl,
2070            extracted_requirement,
2071            scope,
2072            is_runtime: Some(is_runtime),
2073            is_optional: Some(is_optional),
2074            is_pinned: None,
2075            is_direct: Some(true),
2076            resolved_package: None,
2077            extra_data: None,
2078        });
2079    }
2080
2081    dependencies
2082}
2083
2084// =============================================================================
2085// Gem Metadata Extracted Parser (metadata.gz-extract files)
2086// =============================================================================
2087
2088pub struct GemMetadataExtractedParser;
2089
2090impl PackageParser for GemMetadataExtractedParser {
2091    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2092
2093    fn extract_packages(path: &Path) -> Vec<PackageData> {
2094        vec![match extract_gem_metadata_extracted(path) {
2095            Ok(data) => data,
2096            Err(e) => {
2097                warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2098                default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2099            }
2100        }]
2101    }
2102
2103    fn is_match(path: &Path) -> bool {
2104        path.to_str()
2105            .is_some_and(|p| p.contains("metadata.gz-extract"))
2106    }
2107}
2108
2109fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2110    let content = read_file_to_string(path, None)
2111        .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2112
2113    parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2114}
2115
2116// Register parser with metadata
2117crate::register_parser!(
2118    "Ruby Gemfile manifest",
2119    &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2120    "gem",
2121    "Ruby",
2122    Some("https://bundler.io/man/gemfile.5.html"),
2123);
2124
2125crate::register_parser!(
2126    "Ruby Gemfile.lock lockfile",
2127    &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2128    "gem",
2129    "Ruby",
2130    Some("https://bundler.io/man/gemfile.5.html"),
2131);
2132
2133crate::register_parser!(
2134    "Ruby .gemspec manifest",
2135    &[
2136        "**/*.gemspec",
2137        "**/data.gz-extract/*.gemspec",
2138        "**/specifications/*.gemspec"
2139    ],
2140    "gem",
2141    "Ruby",
2142    Some("https://guides.rubygems.org/specification-reference/"),
2143);
2144
2145crate::register_parser!(
2146    "Ruby .gem archive",
2147    &["**/*.gem"],
2148    "gem",
2149    "Ruby",
2150    Some("https://guides.rubygems.org/specification-reference/"),
2151);
2152
2153crate::register_parser!(
2154    "Ruby gem metadata (extracted)",
2155    &["**/metadata.gz-extract"],
2156    "gem",
2157    "Ruby",
2158    Some("https://guides.rubygems.org/specification-reference/"),
2159);
2160
2161#[cfg(test)]
2162mod tests {
2163    use super::parse_gemspec;
2164
2165    #[test]
2166    fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2167        assert_eq!(
2168            super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2169            "Arel is a SQL AST manager for Ruby. It"
2170        );
2171    }
2172
2173    #[test]
2174    fn test_parse_gemspec_runtime_dependency_scope() {
2175        let content = r#"
2176Gem::Specification.new do |spec|
2177  spec.name = "demo"
2178  spec.version = "1.0.0"
2179  spec.add_runtime_dependency "rack", "~> 3.0"
2180  spec.add_dependency "thor", ">= 1.0"
2181end
2182"#;
2183
2184        let package_data = parse_gemspec(content);
2185        assert_eq!(package_data.dependencies.len(), 2);
2186        assert_eq!(
2187            package_data.dependencies[0].scope,
2188            Some("runtime".to_string())
2189        );
2190        assert_eq!(
2191            package_data.dependencies[0].extracted_requirement,
2192            Some("~> 3.0".to_string())
2193        );
2194        assert_eq!(
2195            package_data.dependencies[1].scope,
2196            Some("runtime".to_string())
2197        );
2198        assert_eq!(
2199            package_data.dependencies[1].extracted_requirement,
2200            Some(">= 1.0".to_string())
2201        );
2202    }
2203}