Skip to main content

provenant/parsers/
ruby.rs

1// SPDX-FileCopyrightText: Provenant contributors
2// SPDX-License-Identifier: Apache-2.0
3
4//! Parser for Ruby/RubyGems package manifests.
5//!
6//! Extracts package metadata, dependencies, and platform information from
7//! Gemfile and Gemfile.lock files used by Ruby/Bundler projects.
8//!
9//! # Supported Formats
10//! - Gemfile (manifest with Ruby DSL)
11//! - Gemfile.lock (lockfile with state machine sections)
12//! - *.gemspec (gem specification files)
13//! - *.gem (gem archive packages)
14//! - metadata.gz-extract (pre-extracted gem metadata)
15//!
16//! # Key Features
17//! - State machine parsing for Gemfile.lock sections (GEM, GIT, PATH, SVN, PLATFORMS, BUNDLED WITH, DEPENDENCIES)
18//! - Regex-based Ruby DSL parsing for Gemfile
19//! - Dependency group handling (:development, :test, etc.)
20//! - Platform-specific gem support
21//! - Pessimistic version operator (~>) support
22//! - Bug Fix #1: Strip .freeze suffix from strings
23//! - Bug Fix #4: Correct dependency scope mapping (:runtime → None, :development → "development")
24//!
25//! # Implementation Notes
26//! - Uses regex for pattern matching (not full Ruby AST)
27//! - Graceful error handling: logs warnings and returns default on parse failure
28//! - PURL type: "gem"
29
30use crate::models::{DatasourceId, Dependency, PackageData, PackageType, Party};
31use crate::parser_warn as warn;
32use crate::parsers::utils::{
33    MAX_ITERATION_COUNT, read_file_to_string, split_name_email, truncate_field,
34};
35use flate2::read::GzDecoder;
36use packageurl::PackageUrl;
37use regex::Regex;
38use std::collections::HashMap;
39use std::fs::{self, File};
40use std::io::Read;
41use std::path::{Path, PathBuf};
42use tar::Archive;
43
44use super::PackageParser;
45use super::license_normalization::normalize_spdx_declared_license;
46
47const PACKAGE_TYPE: PackageType = PackageType::Gem;
48
49// =============================================================================
50// Bug Fix #1: Strip .freeze suffix from strings
51// =============================================================================
52
53/// Strips the `.freeze` suffix from Ruby frozen string literals.
54///
55/// In Ruby, `.freeze` makes a string immutable. We need to remove this suffix
56/// when parsing gem names and versions from Gemfile.
57///
58/// # Examples
59/// ```ignore
60/// assert_eq!(strip_freeze_suffix("\"name\".freeze"), "\"name\"");
61/// assert_eq!(strip_freeze_suffix("'1.0.0'.freeze"), "'1.0.0'");
62/// ```
63pub fn strip_freeze_suffix(s: &str) -> &str {
64    s.trim_end_matches(".freeze")
65}
66
67enum GemfileBlock {
68    Group(Vec<String>),
69    Source(String),
70}
71
72// =============================================================================
73// Gemfile Parser (Ruby DSL)
74// =============================================================================
75
76/// Ruby Gemfile parser for manifest files.
77///
78/// Parses Ruby DSL syntax to extract gem declarations, dependency groups,
79/// platform-specific gems, and version constraints.
80pub struct GemfileParser;
81
82impl PackageParser for GemfileParser {
83    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
84
85    fn extract_packages(path: &Path) -> Vec<PackageData> {
86        let datasource_id = gemfile_datasource_id(path);
87        let content = match read_file_to_string(path, None) {
88            Ok(c) => c,
89            Err(e) => {
90                warn!("Failed to read Gemfile at {:?}: {}", path, e);
91                return vec![default_package_data_with_datasource(datasource_id)];
92            }
93        };
94
95        let mut package_data = parse_gemfile(&content);
96        package_data.datasource_id = Some(datasource_id);
97        vec![package_data]
98    }
99
100    fn is_match(path: &Path) -> bool {
101        path.file_name()
102            .and_then(|n| n.to_str())
103            .is_some_and(|name| name == "Gemfile")
104            || path
105                .to_str()
106                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile"))
107    }
108}
109
110/// Parses Gemfile content and extracts dependencies with groups.
111fn parse_gemfile(content: &str) -> PackageData {
112    let mut dependencies = Vec::new();
113    let mut block_stack = Vec::new();
114    let mut default_source = None;
115    let mut sources = Vec::new();
116
117    // Regex patterns for Gemfile parsing
118    // gem "name", "version", options...
119    let gem_regex = match Regex::new(
120        r#"^\s*gem\s+["']([^"']+)["'](?:\.freeze)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*["']([^"']+)["'](?:\.freeze)?)?(?:\s*,\s*(.+))?"#,
121    ) {
122        Ok(r) => r,
123        Err(e) => {
124            warn!("Failed to compile gem regex: {}", e);
125            return default_package_data_with_datasource(DatasourceId::Gemfile);
126        }
127    };
128
129    // group :name do ... end
130    let group_start_regex = match Regex::new(r"^\s*group\s+(.+?)\s+do\s*$") {
131        Ok(r) => r,
132        Err(e) => {
133            warn!("Failed to compile group regex: {}", e);
134            return default_package_data_with_datasource(DatasourceId::Gemfile);
135        }
136    };
137
138    let group_end_regex = match Regex::new(r"^\s*end\s*$") {
139        Ok(r) => r,
140        Err(e) => {
141            warn!("Failed to compile end regex: {}", e);
142            return default_package_data_with_datasource(DatasourceId::Gemfile);
143        }
144    };
145
146    let source_block_start_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s+do\s*$"#) {
147        Ok(r) => r,
148        Err(e) => {
149            warn!("Failed to compile source block regex: {}", e);
150            return default_package_data_with_datasource(DatasourceId::Gemfile);
151        }
152    };
153
154    let source_regex = match Regex::new(r#"^\s*source\s+["']([^"']+)["']\s*$"#) {
155        Ok(r) => r,
156        Err(e) => {
157            warn!("Failed to compile source regex: {}", e);
158            return default_package_data_with_datasource(DatasourceId::Gemfile);
159        }
160    };
161
162    // Parse symbols like :development, :test
163    let symbol_regex = match Regex::new(r":(\w+)") {
164        Ok(r) => r,
165        Err(e) => {
166            warn!("Failed to compile symbol regex: {}", e);
167            return default_package_data_with_datasource(DatasourceId::Gemfile);
168        }
169    };
170
171    for line in content.lines().take(MAX_ITERATION_COUNT) {
172        let trimmed = line.trim();
173
174        // Skip comments and empty lines
175        if trimmed.is_empty() || trimmed.starts_with('#') {
176            continue;
177        }
178
179        // Check for group start
180        if let Some(caps) = group_start_regex.captures(trimmed) {
181            let groups_str = caps.get(1).map(|m| m.as_str()).unwrap_or("");
182            let mut current_groups = Vec::new();
183            for cap in symbol_regex.captures_iter(groups_str) {
184                if let Some(group_name) = cap.get(1) {
185                    current_groups.push(group_name.as_str().to_string());
186                }
187            }
188            block_stack.push(GemfileBlock::Group(current_groups));
189            continue;
190        }
191
192        if let Some(caps) = source_block_start_regex.captures(trimmed) {
193            let source = caps
194                .get(1)
195                .map(|m| m.as_str().to_string())
196                .unwrap_or_default();
197            if !source.is_empty() {
198                push_unique_string(&mut sources, source.clone());
199                block_stack.push(GemfileBlock::Source(source));
200            }
201            continue;
202        }
203
204        if let Some(caps) = source_regex.captures(trimmed) {
205            if let Some(source) = caps.get(1).map(|m| m.as_str().to_string()) {
206                push_unique_string(&mut sources, source.clone());
207                default_source = Some(source);
208            }
209            continue;
210        }
211
212        // Check for group end
213        if group_end_regex.is_match(trimmed) {
214            block_stack.pop();
215            continue;
216        }
217
218        // Parse gem declaration
219        if let Some(caps) = gem_regex.captures(trimmed) {
220            let name = strip_freeze_suffix(caps.get(1).map(|m| m.as_str()).unwrap_or(""));
221            if name.is_empty() {
222                continue;
223            }
224
225            // Collect version constraints
226            let mut version_parts = Vec::new();
227            if let Some(v) = caps.get(2) {
228                version_parts.push(strip_freeze_suffix(v.as_str()).to_string());
229            }
230            if let Some(v) = caps.get(3) {
231                let v_str = strip_freeze_suffix(v.as_str());
232                // Check if it looks like a version constraint
233                if looks_like_version_constraint(v_str) {
234                    version_parts.push(v_str.to_string());
235                }
236            }
237
238            let extracted_requirement = if version_parts.is_empty() {
239                None
240            } else {
241                Some(version_parts.join(", "))
242            };
243
244            let current_groups = current_group_names(&block_stack);
245
246            // Determine scope based on current group
247            // Bug Fix #4: :runtime → None, :development → "development"
248            let (scope, is_runtime, is_optional) = if current_groups.is_empty() {
249                // No group = runtime dependency
250                (None, true, false)
251            } else if current_groups.iter().any(|g| g == "development") {
252                (Some("development".to_string()), false, true)
253            } else if current_groups.iter().any(|g| g == "test") {
254                (Some("test".to_string()), false, true)
255            } else {
256                // Other groups (e.g., :production)
257                let group = current_groups.first().cloned();
258                (group, true, false)
259            };
260
261            // Create PURL
262            let purl = create_gem_purl(name, None);
263            let inherited_source = current_source(&block_stack, default_source.as_deref());
264            let extra_data = build_gemfile_dependency_extra_data(
265                caps.get(4).map(|m| m.as_str()),
266                inherited_source.as_deref(),
267            );
268
269            dependencies.push(Dependency {
270                purl,
271                extracted_requirement,
272                scope,
273                is_runtime: Some(is_runtime),
274                is_optional: Some(is_optional),
275                is_pinned: None,
276                is_direct: Some(true),
277                resolved_package: None,
278                extra_data,
279            });
280        }
281    }
282
283    let extra_data = if sources.is_empty() {
284        None
285    } else {
286        Some(HashMap::from([(
287            "sources".to_string(),
288            serde_json::Value::Array(sources.into_iter().map(serde_json::Value::String).collect()),
289        )]))
290    };
291
292    PackageData {
293        package_type: Some(PACKAGE_TYPE),
294        primary_language: Some("Ruby".to_string()),
295        dependencies,
296        extra_data,
297        datasource_id: Some(DatasourceId::Gemfile),
298        ..default_package_data()
299    }
300}
301
302fn current_group_names(block_stack: &[GemfileBlock]) -> Vec<String> {
303    block_stack
304        .iter()
305        .rev()
306        .find_map(|block| match block {
307            GemfileBlock::Group(groups) => Some(groups.clone()),
308            GemfileBlock::Source(_) => None,
309        })
310        .unwrap_or_default()
311}
312
313fn current_source(block_stack: &[GemfileBlock], default_source: Option<&str>) -> Option<String> {
314    block_stack
315        .iter()
316        .rev()
317        .find_map(|block| match block {
318            GemfileBlock::Source(source) => Some(source.clone()),
319            GemfileBlock::Group(_) => None,
320        })
321        .or_else(|| default_source.map(str::to_string))
322}
323
324fn push_unique_string(values: &mut Vec<String>, value: String) {
325    if !values.contains(&value) {
326        values.push(value);
327    }
328}
329
330fn build_gemfile_dependency_extra_data(
331    options: Option<&str>,
332    inherited_source: Option<&str>,
333) -> Option<HashMap<String, serde_json::Value>> {
334    let mut extra = HashMap::new();
335    let options = options.unwrap_or("");
336
337    if let Some(git) = extract_gemfile_quoted_option(options, "git") {
338        extra.insert(
339            "source_type".to_string(),
340            serde_json::Value::String("GIT".to_string()),
341        );
342        extra.insert("git".to_string(), serde_json::Value::String(git.clone()));
343        extra.insert("remote".to_string(), serde_json::Value::String(git));
344    }
345
346    if let Some(path) = extract_gemfile_quoted_option(options, "path") {
347        extra.insert(
348            "source_type".to_string(),
349            serde_json::Value::String("PATH".to_string()),
350        );
351        extra.insert("path".to_string(), serde_json::Value::String(path));
352    }
353
354    for key in ["branch", "ref", "tag"] {
355        if let Some(value) = extract_gemfile_quoted_option(options, key) {
356            extra.insert(key.to_string(), serde_json::Value::String(value));
357        }
358    }
359
360    let direct_source = extract_gemfile_quoted_option(options, "source");
361    if let Some(source) = direct_source {
362        extra.insert("source".to_string(), serde_json::Value::String(source));
363    } else if !extra.contains_key("source_type")
364        && let Some(source) = inherited_source
365    {
366        extra.insert(
367            "source".to_string(),
368            serde_json::Value::String(source.to_string()),
369        );
370    }
371
372    (!extra.is_empty()).then_some(extra)
373}
374
375fn extract_gemfile_quoted_option(options: &str, key: &str) -> Option<String> {
376    if options.is_empty() {
377        return None;
378    }
379
380    let pattern = format!(r#"(?:^|,\s*){}\s*:\s*["']([^"']+)["']"#, regex::escape(key));
381    Regex::new(&pattern)
382        .ok()
383        .and_then(|regex| regex.captures(options))
384        .and_then(|captures| captures.get(1).map(|m| m.as_str().to_string()))
385}
386
387/// Checks if a string looks like a version constraint.
388fn looks_like_version_constraint(s: &str) -> bool {
389    s.starts_with('~')
390        || s.starts_with('>')
391        || s.starts_with('<')
392        || s.starts_with('=')
393        || s.starts_with('!')
394        || s.chars().next().is_some_and(|c| c.is_ascii_digit())
395}
396
397// =============================================================================
398// Gemfile.lock Parser (State Machine)
399// =============================================================================
400
401/// Ruby Gemfile.lock parser for lockfiles.
402///
403/// Uses a state machine to parse sections: GEM, GIT, PATH, SVN,
404/// PLATFORMS, BUNDLED WITH, DEPENDENCIES.
405pub struct GemfileLockParser;
406
407impl PackageParser for GemfileLockParser {
408    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
409
410    fn extract_packages(path: &Path) -> Vec<PackageData> {
411        let datasource_id = gemfile_lock_datasource_id(path);
412        let content = match read_file_to_string(path, None) {
413            Ok(c) => c,
414            Err(e) => {
415                warn!("Failed to read Gemfile.lock at {:?}: {}", path, e);
416                return vec![default_package_data_with_datasource(datasource_id)];
417            }
418        };
419
420        let mut package_data = parse_gemfile_lock(&content);
421        package_data.datasource_id = Some(datasource_id);
422        vec![package_data]
423    }
424
425    fn is_match(path: &Path) -> bool {
426        path.file_name()
427            .and_then(|n| n.to_str())
428            .is_some_and(|name| name == "Gemfile.lock")
429            || path
430                .to_str()
431                .is_some_and(|p| p.contains("data.gz-extract/") && p.ends_with("/Gemfile.lock"))
432    }
433}
434
435/// Parse state for Gemfile.lock state machine.
436#[derive(Debug, Clone, PartialEq)]
437enum ParseState {
438    None,
439    Gem,
440    Git,
441    Path,
442    Svn,
443    Specs,
444    Platforms,
445    BundledWith,
446    Dependencies,
447}
448
449/// Parsed gem information from Gemfile.lock.
450///
451/// All fields are actively used:
452/// - `gem_type`, `remote`, `revision`, `ref_field`, `branch`, `tag`: Stored in extra_data for GIT/PATH/SVN sources
453/// - `name`, `version`, `platform`, `pinned`: Used for dependency PURL and metadata generation
454/// - `requirements`: Stored as extracted_requirement for version constraints
455#[derive(Debug, Clone, Default)]
456struct GemInfo {
457    name: String,
458    version: Option<String>,
459    platform: Option<String>,
460    gem_type: String,
461    remote: Option<String>,
462    revision: Option<String>,
463    ref_field: Option<String>,
464    branch: Option<String>,
465    tag: Option<String>,
466    pinned: bool,
467    requirements: Vec<String>,
468}
469
470fn select_primary_path_gem(gems: &HashMap<String, GemInfo>) -> Option<GemInfo> {
471    let mut path_gems: Vec<&GemInfo> = gems.values().filter(|gem| gem.gem_type == "PATH").collect();
472    path_gems.sort_by(|left, right| {
473        left.remote
474            .as_deref()
475            .cmp(&right.remote.as_deref())
476            .then_with(|| left.name.cmp(&right.name))
477    });
478
479    path_gems
480        .iter()
481        .copied()
482        .find(|gem| gem.pinned && gem.remote.as_deref() == Some("."))
483        .or_else(|| path_gems.iter().copied().find(|gem| gem.pinned))
484        .or_else(|| {
485            path_gems
486                .iter()
487                .copied()
488                .find(|gem| gem.remote.as_deref() == Some("."))
489        })
490        .or_else(|| path_gems.first().copied())
491        .cloned()
492}
493
494/// Parses Gemfile.lock content using a state machine.
495fn parse_gemfile_lock(content: &str) -> PackageData {
496    let mut state = ParseState::None;
497    let mut dependencies = Vec::new();
498    let mut gems: HashMap<String, GemInfo> = HashMap::new();
499    let mut platforms: Vec<String> = Vec::new();
500    let mut bundler_version: Option<String> = None;
501    let mut current_gem_type = String::new();
502    let mut current_remote: Option<String> = None;
503    let mut current_options: HashMap<String, String> = HashMap::new();
504
505    // DEPS pattern: 2 spaces at line start
506    let deps_regex = match Regex::new(r"^ {2}([^ \)\(,!:]+)(?: \(([^)]+)\))?(!)?$") {
507        Ok(r) => r,
508        Err(e) => {
509            warn!("Failed to compile deps regex: {}", e);
510            return default_package_data_with_datasource(DatasourceId::GemfileLock);
511        }
512    };
513
514    // SPEC_DEPS pattern: 4 spaces at line start
515    let spec_deps_regex = match Regex::new(r"^ {4}([^ \)\(,!:]+)(?: \(([^)]+)\))?$") {
516        Ok(r) => r,
517        Err(e) => {
518            warn!("Failed to compile spec_deps regex: {}", e);
519            return default_package_data_with_datasource(DatasourceId::GemfileLock);
520        }
521    };
522
523    // OPTIONS pattern: key: value
524    let options_regex = match Regex::new(r"^ {2}([a-z]+): (.+)$") {
525        Ok(r) => r,
526        Err(e) => {
527            warn!("Failed to compile options regex: {}", e);
528            return default_package_data_with_datasource(DatasourceId::GemfileLock);
529        }
530    };
531
532    // VERSION pattern for BUNDLED WITH
533    let version_regex = match Regex::new(r"^\s+(\d+(?:\.\d+)+)\s*$") {
534        Ok(r) => r,
535        Err(e) => {
536            warn!("Failed to compile version regex: {}", e);
537            return default_package_data_with_datasource(DatasourceId::GemfileLock);
538        }
539    };
540
541    for line in content.lines().take(MAX_ITERATION_COUNT) {
542        let trimmed = line.trim_end();
543
544        // Empty line resets state
545        if trimmed.is_empty() {
546            current_options.clear();
547            continue;
548        }
549
550        // Section headers (no leading whitespace) and sub-section headers
551        match trimmed {
552            "GEM" => {
553                state = ParseState::Gem;
554                current_gem_type = "GEM".to_string();
555                current_remote = None;
556                current_options.clear();
557                continue;
558            }
559            "GIT" => {
560                state = ParseState::Git;
561                current_gem_type = "GIT".to_string();
562                current_remote = None;
563                current_options.clear();
564                continue;
565            }
566            "PATH" => {
567                state = ParseState::Path;
568                current_gem_type = "PATH".to_string();
569                current_remote = None;
570                current_options.clear();
571                continue;
572            }
573            "SVN" => {
574                state = ParseState::Svn;
575                current_gem_type = "SVN".to_string();
576                current_remote = None;
577                current_options.clear();
578                continue;
579            }
580            "PLATFORMS" => {
581                state = ParseState::Platforms;
582                continue;
583            }
584            "BUNDLED WITH" => {
585                state = ParseState::BundledWith;
586                continue;
587            }
588            "DEPENDENCIES" => {
589                state = ParseState::Dependencies;
590                continue;
591            }
592            _ => {}
593        }
594
595        // Check for "  specs:" sub-section header (2-space indent) within
596        // GEM/GIT/PATH/SVN sections. This must be checked separately because
597        // the leading whitespace is preserved by trim_end().
598        if trimmed.trim() == "specs:" {
599            state = match state {
600                ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
601                    ParseState::Specs
602                }
603                _ => state,
604            };
605            continue;
606        }
607
608        // Process based on current state
609        match state {
610            ParseState::Gem | ParseState::Git | ParseState::Path | ParseState::Svn => {
611                // Parse options (remote:, revision:, ref:, branch:, tag:)
612                if let Some(caps) = options_regex.captures(line) {
613                    let key = caps.get(1).map(|m| m.as_str()).unwrap_or("");
614                    let value = caps.get(2).map(|m| m.as_str()).unwrap_or("");
615                    current_options.insert(key.to_string(), value.to_string());
616                    if key == "remote" {
617                        current_remote = Some(value.to_string());
618                    }
619                }
620            }
621            ParseState::Specs => {
622                // Parse gem specs (4 spaces indent)
623                if let Some(caps) = spec_deps_regex.captures(line) {
624                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
625                    let version_str = caps.get(2).map(|m| m.as_str()).unwrap_or("");
626
627                    // Parse version and platform
628                    let (version, platform) = parse_version_platform(version_str);
629
630                    if !name.is_empty() {
631                        let gem_info = GemInfo {
632                            name: name.clone(),
633                            version,
634                            platform,
635                            gem_type: current_gem_type.clone(),
636                            remote: current_remote.clone(),
637                            revision: current_options.get("revision").cloned(),
638                            ref_field: current_options.get("ref").cloned(),
639                            branch: current_options.get("branch").cloned(),
640                            tag: current_options.get("tag").cloned(),
641                            pinned: false,
642                            requirements: Vec::new(),
643                        };
644                        gems.insert(name, gem_info);
645                    }
646                }
647            }
648            ParseState::Platforms => {
649                // Parse platform entries (2 spaces indent)
650                let platform = trimmed.trim();
651                if !platform.is_empty() {
652                    platforms.push(platform.to_string());
653                }
654            }
655            ParseState::BundledWith => {
656                // Parse bundler version
657                if let Some(caps) = version_regex.captures(line) {
658                    bundler_version = caps.get(1).map(|m| m.as_str().to_string());
659                }
660            }
661            ParseState::Dependencies => {
662                // Parse direct dependencies (2 spaces indent)
663                if let Some(caps) = deps_regex.captures(line) {
664                    let name = caps.get(1).map(|m| m.as_str()).unwrap_or("").to_string();
665                    let version_constraint = caps.get(2).map(|m| m.as_str().to_string());
666                    let pinned = caps.get(3).is_some();
667
668                    if !name.is_empty() {
669                        // Update gem info if exists, or create new
670                        if let Some(gem) = gems.get_mut(&name) {
671                            gem.pinned = pinned;
672                            if let Some(vc) = &version_constraint {
673                                gem.requirements.push(vc.clone());
674                            }
675                        } else {
676                            let gem_info = GemInfo {
677                                name: name.clone(),
678                                version: None,
679                                platform: None,
680                                gem_type: "GEM".to_string(),
681                                remote: None,
682                                revision: None,
683                                ref_field: None,
684                                branch: None,
685                                tag: None,
686                                pinned,
687                                requirements: version_constraint.into_iter().collect(),
688                            };
689                            gems.insert(name, gem_info);
690                        }
691                    }
692                }
693            }
694            ParseState::None => {}
695        }
696    }
697
698    let primary_gem = select_primary_path_gem(&gems);
699
700    let (
701        package_name,
702        package_version,
703        repository_homepage_url,
704        repository_download_url,
705        api_data_url,
706        download_url,
707    ) = if let Some(ref pg) = primary_gem {
708        let urls = get_rubygems_urls(&pg.name, pg.version.as_deref(), pg.platform.as_deref());
709        (
710            Some(pg.name.clone()),
711            pg.version.clone(),
712            urls.0,
713            urls.1,
714            urls.2,
715            urls.3,
716        )
717    } else {
718        (None, None, None, None, None, None)
719    };
720
721    for (_, gem) in gems {
722        if let Some(ref pg) = primary_gem
723            && gem.name == pg.name
724        {
725            continue;
726        }
727
728        let version_for_purl = gem.version.as_deref();
729        let purl = create_gem_purl(&gem.name, version_for_purl);
730
731        let extracted_requirement = if !gem.requirements.is_empty() {
732            Some(gem.requirements.join(", "))
733        } else {
734            gem.version.clone()
735        };
736
737        let extra_data = build_gem_source_extra_data(&gem);
738
739        dependencies.push(Dependency {
740            purl,
741            extracted_requirement,
742            scope: Some("dependencies".to_string()),
743            is_runtime: Some(true),
744            is_optional: Some(false),
745            is_pinned: Some(gem.pinned),
746            is_direct: Some(true),
747            resolved_package: None,
748            extra_data,
749        });
750    }
751
752    dependencies.sort_by(|left, right| {
753        left.purl
754            .as_deref()
755            .cmp(&right.purl.as_deref())
756            .then_with(|| {
757                left.extracted_requirement
758                    .as_deref()
759                    .cmp(&right.extracted_requirement.as_deref())
760            })
761    });
762
763    // Build extra_data
764    let mut extra_data = HashMap::new();
765    if !platforms.is_empty() {
766        extra_data.insert(
767            "platforms".to_string(),
768            serde_json::Value::Array(
769                platforms
770                    .into_iter()
771                    .map(serde_json::Value::String)
772                    .collect(),
773            ),
774        );
775    }
776    if let Some(bv) = bundler_version {
777        extra_data.insert("bundler_version".to_string(), serde_json::Value::String(bv));
778    }
779
780    let purl = package_name
781        .as_deref()
782        .map(|n| create_gem_purl(n, package_version.as_deref()))
783        .unwrap_or(None);
784
785    PackageData {
786        package_type: Some(PACKAGE_TYPE),
787        name: package_name,
788        version: package_version,
789        primary_language: Some("Ruby".to_string()),
790        download_url,
791        dependencies,
792        repository_homepage_url,
793        repository_download_url,
794        api_data_url,
795        extra_data: if extra_data.is_empty() {
796            None
797        } else {
798            Some(extra_data)
799        },
800        datasource_id: Some(DatasourceId::GemfileLock),
801        purl,
802        ..default_package_data()
803    }
804}
805
806fn build_gem_source_extra_data(gem: &GemInfo) -> Option<HashMap<String, serde_json::Value>> {
807    if gem.gem_type != "GIT" && gem.gem_type != "PATH" && gem.gem_type != "SVN" {
808        return None;
809    }
810
811    let mut extra = HashMap::new();
812    extra.insert(
813        "source_type".to_string(),
814        serde_json::Value::String(gem.gem_type.clone()),
815    );
816
817    if let Some(ref remote) = gem.remote {
818        extra.insert(
819            "remote".to_string(),
820            serde_json::Value::String(remote.clone()),
821        );
822    }
823    if let Some(ref revision) = gem.revision {
824        extra.insert(
825            "revision".to_string(),
826            serde_json::Value::String(revision.clone()),
827        );
828    }
829    if let Some(ref ref_field) = gem.ref_field {
830        extra.insert(
831            "ref".to_string(),
832            serde_json::Value::String(ref_field.clone()),
833        );
834    }
835    if let Some(ref branch) = gem.branch {
836        extra.insert(
837            "branch".to_string(),
838            serde_json::Value::String(branch.clone()),
839        );
840    }
841    if let Some(ref tag) = gem.tag {
842        extra.insert("tag".to_string(), serde_json::Value::String(tag.clone()));
843    }
844
845    Some(extra)
846}
847
848/// Parses version and platform from a combined string.
849/// Examples: "2.6.3" -> ("2.6.3", None), "2.6.3-java" -> ("2.6.3", Some("java"))
850fn parse_version_platform(s: &str) -> (Option<String>, Option<String>) {
851    if s.is_empty() {
852        return (None, None);
853    }
854    if let Some(idx) = s.find('-') {
855        let version = &s[..idx];
856        let platform = &s[idx + 1..];
857        (Some(version.to_string()), Some(platform.to_string()))
858    } else {
859        (Some(s.to_string()), None)
860    }
861}
862
863/// Creates a gem PURL.
864fn create_gem_purl(name: &str, version: Option<&str>) -> Option<String> {
865    let mut purl = match PackageUrl::new(PACKAGE_TYPE.as_str(), name) {
866        Ok(p) => p,
867        Err(e) => {
868            warn!("Failed to create PURL for gem '{}': {}", name, e);
869            return None;
870        }
871    };
872
873    if let Some(v) = version
874        && let Err(e) = purl.with_version(v)
875    {
876        warn!("Failed to set version '{}' for gem '{}': {}", v, name, e);
877    }
878
879    Some(purl.to_string())
880}
881
882fn rubygems_homepage_url(name: &str, version: Option<&str>) -> Option<String> {
883    if name.is_empty() {
884        return None;
885    }
886
887    if let Some(v) = version {
888        let v = v.trim().trim_matches('/');
889        Some(format!("https://rubygems.org/gems/{}/versions/{}", name, v))
890    } else {
891        Some(format!("https://rubygems.org/gems/{}", name))
892    }
893}
894
895fn rubygems_download_url(
896    name: &str,
897    version: Option<&str>,
898    platform: Option<&str>,
899) -> Option<String> {
900    if name.is_empty() || version.is_none() {
901        return None;
902    }
903
904    let name = name.trim().trim_matches('/');
905    let version = version?.trim().trim_matches('/');
906
907    let version_plat = if let Some(p) = platform {
908        if p != "ruby" {
909            format!("{}-{}", version, p)
910        } else {
911            version.to_string()
912        }
913    } else {
914        version.to_string()
915    };
916
917    Some(format!(
918        "https://rubygems.org/downloads/{}-{}.gem",
919        name, version_plat
920    ))
921}
922
923fn rubygems_api_url(name: &str, version: Option<&str>) -> Option<String> {
924    if name.is_empty() {
925        return None;
926    }
927
928    if let Some(v) = version {
929        Some(format!(
930            "https://rubygems.org/api/v2/rubygems/{}/versions/{}.json",
931            name, v
932        ))
933    } else {
934        Some(format!(
935            "https://rubygems.org/api/v1/versions/{}.json",
936            name
937        ))
938    }
939}
940
941fn get_rubygems_urls(
942    name: &str,
943    version: Option<&str>,
944    platform: Option<&str>,
945) -> (
946    Option<String>,
947    Option<String>,
948    Option<String>,
949    Option<String>,
950) {
951    let repository_homepage_url = rubygems_homepage_url(name, version);
952    let repository_download_url = rubygems_download_url(name, version, platform);
953    let api_data_url = rubygems_api_url(name, version);
954    let download_url = repository_download_url.clone();
955
956    (
957        repository_homepage_url,
958        repository_download_url,
959        api_data_url,
960        download_url,
961    )
962}
963
964/// Returns a default PackageData with gem-specific settings.
965fn default_package_data() -> PackageData {
966    PackageData {
967        package_type: Some(PACKAGE_TYPE),
968        primary_language: Some("Ruby".to_string()),
969        ..Default::default()
970    }
971}
972
973fn default_package_data_with_datasource(datasource_id: DatasourceId) -> PackageData {
974    PackageData {
975        datasource_id: Some(datasource_id),
976        ..default_package_data()
977    }
978}
979
980// =============================================================================
981// Gemspec Parser (Ruby DSL)
982// =============================================================================
983
984/// Ruby .gemspec file parser.
985///
986/// Parses `Gem::Specification.new` blocks using regex-based extraction.
987/// Handles frozen strings (Bug #1), variable version resolution (Bug #2),
988/// and RFC 5322 email parsing (Bug #6).
989pub struct GemspecParser;
990
991impl PackageParser for GemspecParser {
992    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
993
994    fn extract_packages(path: &Path) -> Vec<PackageData> {
995        let datasource_id = gemspec_datasource_id(path);
996        let content = match read_file_to_string(path, None) {
997            Ok(c) => c,
998            Err(e) => {
999                warn!("Failed to read .gemspec at {:?}: {}", path, e);
1000                return vec![default_package_data_with_datasource(datasource_id)];
1001            }
1002        };
1003
1004        let mut package_data = parse_gemspec_with_context(&content, path.parent());
1005        package_data.datasource_id = Some(datasource_id);
1006        vec![package_data]
1007    }
1008
1009    fn is_match(path: &Path) -> bool {
1010        path.extension()
1011            .and_then(|ext| ext.to_str())
1012            .is_some_and(|ext| ext == "gemspec")
1013    }
1014}
1015
1016fn normalized_ruby_path(path: &Path) -> String {
1017    path.to_string_lossy().replace('\\', "/")
1018}
1019
1020fn gemfile_datasource_id(path: &Path) -> DatasourceId {
1021    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1022        DatasourceId::GemfileExtracted
1023    } else {
1024        DatasourceId::Gemfile
1025    }
1026}
1027
1028fn gemfile_lock_datasource_id(path: &Path) -> DatasourceId {
1029    if normalized_ruby_path(path).contains("/data.gz-extract/") {
1030        DatasourceId::GemfileLockExtracted
1031    } else {
1032        DatasourceId::GemfileLock
1033    }
1034}
1035
1036fn gemspec_datasource_id(path: &Path) -> DatasourceId {
1037    let normalized = normalized_ruby_path(path);
1038    if normalized.contains("/data.gz-extract/") {
1039        DatasourceId::GemspecExtracted
1040    } else if normalized.contains("/specifications/") {
1041        DatasourceId::GemGemspecInstalledSpecifications
1042    } else {
1043        DatasourceId::Gemspec
1044    }
1045}
1046
1047/// Cleans a value extracted from gemspec by stripping quotes, .freeze, %q{}, and brackets.
1048fn clean_gemspec_value(s: &str) -> String {
1049    let s = strip_freeze_suffix(s).trim();
1050
1051    let s = if let Some(pos) = s.find(" #") {
1052        s[..pos].trim()
1053    } else {
1054        s
1055    };
1056
1057    let s = if let Some(stripped) = s.strip_prefix("%q{") {
1058        stripped.strip_suffix('}').unwrap_or(stripped)
1059    } else if let Some(stripped) = s.strip_prefix("%q<") {
1060        stripped.strip_suffix('>').unwrap_or(stripped)
1061    } else if let Some(stripped) = s.strip_prefix("%q[") {
1062        stripped.strip_suffix(']').unwrap_or(stripped)
1063    } else if let Some(stripped) = s.strip_prefix("%q(") {
1064        stripped.strip_suffix(')').unwrap_or(stripped)
1065    } else {
1066        s
1067    };
1068
1069    let s = s
1070        .trim_start_matches('"')
1071        .trim_end_matches('"')
1072        .trim_start_matches('\'')
1073        .trim_end_matches('\'');
1074    let s = strip_freeze_suffix(s).trim();
1075    s.to_string()
1076}
1077
1078/// Extracts items from a Ruby array literal like `["a", "b", "c"]`.
1079fn extract_ruby_array(s: &str) -> Vec<String> {
1080    let s = strip_freeze_suffix(s.trim());
1081    let s = s.trim_start_matches('[').trim_end_matches(']');
1082    let item_re = match Regex::new(r#"["']([^"']*?)["'](?:\.freeze)?"#) {
1083        Ok(r) => r,
1084        Err(_) => return Vec::new(),
1085    };
1086    item_re
1087        .captures_iter(s)
1088        .filter_map(|cap| cap.get(1).map(|m| m.as_str().to_string()))
1089        .collect()
1090}
1091
1092fn extract_all_ruby_values(s: &str) -> Vec<String> {
1093    let value_re = match Regex::new(r#"%q[\{<\[(]([^\}>\])]+)[\}>\])]|["']([^"']+)["']"#) {
1094        Ok(r) => r,
1095        Err(_) => return Vec::new(),
1096    };
1097
1098    value_re
1099        .captures_iter(s)
1100        .filter_map(|caps| caps.get(1).or_else(|| caps.get(2)))
1101        .map(|m| clean_gemspec_value(m.as_str()))
1102        .collect()
1103}
1104
1105fn extract_first_ruby_value(s: &str) -> Option<String> {
1106    extract_all_ruby_values(s).into_iter().next()
1107}
1108
1109fn after_first_argument(args: &str) -> &str {
1110    let mut bracket_depth = 0usize;
1111    let mut paren_depth = 0usize;
1112    let mut in_quote: Option<char> = None;
1113    let chars: Vec<(usize, char)> = args.char_indices().collect();
1114    let mut i = 0;
1115
1116    while i < chars.len() {
1117        let (idx, ch) = chars[i];
1118
1119        if let Some(quote) = in_quote {
1120            if ch == '\\' {
1121                i += 2;
1122                continue;
1123            }
1124            if ch == quote {
1125                in_quote = None;
1126            }
1127            i += 1;
1128            continue;
1129        }
1130
1131        match ch {
1132            '\'' | '"' => in_quote = Some(ch),
1133            '[' | '{' | '<' => bracket_depth += 1,
1134            ']' | '}' | '>' => bracket_depth = bracket_depth.saturating_sub(1),
1135            '(' => paren_depth += 1,
1136            ')' => paren_depth = paren_depth.saturating_sub(1),
1137            ',' if bracket_depth == 0 && paren_depth == 0 => return args[idx + 1..].trim(),
1138            _ => {}
1139        }
1140
1141        i += 1;
1142    }
1143
1144    ""
1145}
1146
1147/// Bug #2: Resolves variable version references like `CSV::VERSION` or `RAILS_VERSION`.
1148///
1149/// Scans the file content for constant definitions matching the variable name
1150/// and returns the resolved string value.
1151fn resolve_variable_version(var_name: &str, contexts: &[String]) -> Option<String> {
1152    let var_name = var_name.trim();
1153    if var_name.is_empty() {
1154        return None;
1155    }
1156
1157    for candidate in candidate_constant_names(var_name) {
1158        let escaped = regex::escape(&candidate);
1159        let pattern = format!(r#"(?m)^\s*{}\s*=\s*["']([^"']+)["']"#, escaped);
1160        let Ok(re) = Regex::new(&pattern) else {
1161            continue;
1162        };
1163
1164        for context in contexts {
1165            if let Some(caps) = re.captures(context) {
1166                return caps.get(1).map(|m| m.as_str().to_string());
1167            }
1168        }
1169    }
1170
1171    None
1172}
1173
1174fn resolve_variable_array(var_name: &str, contexts: &[String]) -> Option<Vec<String>> {
1175    let var_name = var_name.trim();
1176    if var_name.is_empty() {
1177        return None;
1178    }
1179
1180    for candidate in candidate_constant_names(var_name) {
1181        let escaped = regex::escape(&candidate);
1182        let pattern = format!(r#"(?m)^\s*{}\s*=\s*(\[[^\n]+\])"#, escaped);
1183        let Ok(re) = Regex::new(&pattern) else {
1184            continue;
1185        };
1186
1187        for context in contexts {
1188            if let Some(caps) = re.captures(context)
1189                && let Some(raw) = caps.get(1)
1190            {
1191                let values = extract_ruby_array(raw.as_str());
1192                if !values.is_empty() {
1193                    return Some(values);
1194                }
1195            }
1196        }
1197    }
1198
1199    None
1200}
1201
1202fn candidate_constant_names(var_name: &str) -> Vec<String> {
1203    let mut names = vec![var_name.to_string()];
1204    if let Some(last) = var_name.split("::").last()
1205        && last != var_name
1206    {
1207        names.push(last.to_string());
1208    }
1209    names
1210}
1211
1212fn looks_like_local_variable_reference(s: &str) -> bool {
1213    let mut chars = s.chars();
1214    matches!(chars.next(), Some('_' | 'a'..='z'))
1215        && chars.all(|c| c == '_' || c.is_ascii_alphanumeric())
1216}
1217
1218fn resolve_ruby_read_root(base_dir: Option<&Path>) -> Option<PathBuf> {
1219    let base_dir = base_dir?;
1220    let current_dir = std::env::current_dir().ok();
1221
1222    current_dir
1223        .and_then(|cwd| {
1224            let canonical_cwd = cwd.canonicalize().ok()?;
1225            let canonical_base = base_dir.canonicalize().ok()?;
1226            canonical_base
1227                .starts_with(&canonical_cwd)
1228                .then_some(canonical_cwd)
1229        })
1230        .or_else(|| base_dir.canonicalize().ok())
1231}
1232
1233fn resolve_ruby_read_path(path: PathBuf, allowed_root: &Path) -> Option<PathBuf> {
1234    let canonical_path = path.canonicalize().ok()?;
1235    canonical_path
1236        .starts_with(allowed_root)
1237        .then_some(canonical_path)
1238}
1239
1240fn resolve_file_read_argument(args: &str, base_dir: Option<&Path>) -> Option<String> {
1241    let base_dir = base_dir?;
1242    let allowed_root = resolve_ruby_read_root(base_dir.into())?;
1243    let relative_path = extract_first_ruby_value(args)?;
1244    if relative_path.is_empty() {
1245        return None;
1246    }
1247
1248    let candidate = Path::new(&relative_path);
1249    let path = if candidate.is_absolute() {
1250        candidate.to_path_buf()
1251    } else {
1252        base_dir.join(candidate)
1253    };
1254
1255    let safe_path = resolve_ruby_read_path(path, &allowed_root)?;
1256
1257    fs::read_to_string(safe_path)
1258        .ok()
1259        .map(|content| content.trim().to_string())
1260        .filter(|content| !content.is_empty())
1261}
1262
1263fn resolve_scalar_expression(
1264    expression: &str,
1265    base_dir: Option<&Path>,
1266    contexts: &[String],
1267) -> Option<String> {
1268    let expression = if let Some(pos) = expression.find(" #") {
1269        expression[..pos].trim()
1270    } else {
1271        expression.trim()
1272    };
1273
1274    let file_read_re = Regex::new(r#"^File\.read\((.+)\)(?:\.strip)?(?:\.freeze)?$"#).ok()?;
1275    if let Some(caps) = file_read_re.captures(expression) {
1276        return caps
1277            .get(1)
1278            .and_then(|m| resolve_file_read_argument(m.as_str(), base_dir));
1279    }
1280
1281    if let Some(value) = extract_first_ruby_value(expression) {
1282        return Some(value);
1283    }
1284
1285    let cleaned = clean_gemspec_value(expression);
1286    if looks_like_constant_reference(&cleaned) {
1287        return resolve_variable_version(&cleaned, contexts).or(Some(cleaned));
1288    }
1289
1290    None
1291}
1292
1293fn resolve_local_variable_value(
1294    var_name: &str,
1295    content: &str,
1296    base_dir: Option<&Path>,
1297    contexts: &[String],
1298) -> Option<String> {
1299    let escaped = regex::escape(var_name.trim());
1300    let pattern = format!(r#"(?m)^\s*{}\s*=\s*(.+)$"#, escaped);
1301    let re = Regex::new(&pattern).ok()?;
1302
1303    re.captures_iter(content).find_map(|caps| {
1304        caps.get(1)
1305            .and_then(|m| resolve_scalar_expression(m.as_str(), base_dir, contexts))
1306    })
1307}
1308
1309fn resolve_gemspec_scalar_value(
1310    raw_value: &str,
1311    content: &str,
1312    base_dir: Option<&Path>,
1313    contexts: &[String],
1314) -> Option<String> {
1315    let cleaned = truncate_field(clean_gemspec_value(raw_value));
1316    if cleaned.is_empty() {
1317        return None;
1318    }
1319
1320    if looks_like_constant_reference(&cleaned) {
1321        return resolve_variable_version(&cleaned, contexts)
1322            .map(truncate_field)
1323            .or(Some(cleaned));
1324    }
1325
1326    if looks_like_local_variable_reference(&cleaned) {
1327        return resolve_local_variable_value(&cleaned, content, base_dir, contexts)
1328            .map(truncate_field)
1329            .or(Some(cleaned));
1330    }
1331
1332    Some(cleaned)
1333}
1334
1335fn load_required_ruby_contexts(content: &str, base_dir: Option<&Path>) -> Vec<String> {
1336    let mut contexts = vec![content.to_string()];
1337    let Some(base_dir) = base_dir else {
1338        return contexts;
1339    };
1340    let allowed_root = resolve_ruby_read_root(Some(base_dir));
1341
1342    let require_re = match Regex::new(r#"(?m)^\s*require(?:_relative)?\s+["']([^"']+)["']"#) {
1343        Ok(re) => re,
1344        Err(_) => return contexts,
1345    };
1346
1347    for caps in require_re.captures_iter(content) {
1348        let Some(required) = caps.get(1).map(|m| m.as_str()) else {
1349            continue;
1350        };
1351        for candidate in candidate_require_paths(base_dir, required) {
1352            let Some(safe_candidate) = allowed_root
1353                .as_deref()
1354                .and_then(|root| resolve_ruby_read_path(candidate, root))
1355            else {
1356                continue;
1357            };
1358            if let Ok(required_content) = read_file_to_string(&safe_candidate, None) {
1359                contexts.push(required_content);
1360                break;
1361            }
1362        }
1363    }
1364
1365    contexts
1366}
1367
1368fn candidate_require_paths(base_dir: &Path, required: &str) -> Vec<PathBuf> {
1369    let relative = required.replace("::", "/");
1370    let filename = if relative.ends_with(".rb") {
1371        relative
1372    } else {
1373        format!("{}.rb", relative)
1374    };
1375
1376    vec![
1377        base_dir.join(&filename),
1378        base_dir.join("lib").join(&filename),
1379    ]
1380}
1381
1382fn looks_like_constant_reference(s: &str) -> bool {
1383    s.contains("::") || s.chars().next().is_some_and(|c| c.is_ascii_uppercase())
1384}
1385
1386/// Parses a .gemspec file content and returns PackageData.
1387#[cfg(test)]
1388fn parse_gemspec(content: &str) -> PackageData {
1389    parse_gemspec_with_context(content, None)
1390}
1391
1392fn parse_gemspec_with_context(content: &str, base_dir: Option<&Path>) -> PackageData {
1393    let contexts = load_required_ruby_contexts(content, base_dir);
1394
1395    // Regex for spec.name = "value" or s.name = "value"
1396    // The spec variable name varies: spec, s, gem, etc.
1397    let field_re = match Regex::new(
1398        r#"(?m)^\s*\w+\.(name|version|summary|description|homepage|license)\s*=\s*(.+)$"#,
1399    ) {
1400        Ok(r) => r,
1401        Err(e) => {
1402            warn!("Failed to compile gemspec field regex: {}", e);
1403            return default_package_data_with_datasource(DatasourceId::Gemspec);
1404        }
1405    };
1406
1407    let licenses_re = match Regex::new(r#"(?m)^\s*\w+\.licenses\s*=\s*(.+)$"#) {
1408        Ok(r) => r,
1409        Err(e) => {
1410            warn!("Failed to compile licenses regex: {}", e);
1411            return default_package_data_with_datasource(DatasourceId::Gemspec);
1412        }
1413    };
1414
1415    let authors_re = match Regex::new(r#"(?m)^\s*\w+\.(?:authors|author)\s*=\s*(.+)$"#) {
1416        Ok(r) => r,
1417        Err(e) => {
1418            warn!("Failed to compile authors regex: {}", e);
1419            return default_package_data_with_datasource(DatasourceId::Gemspec);
1420        }
1421    };
1422
1423    let email_re = match Regex::new(r#"(?m)^\s*\w+\.email\s*=\s*(.+)$"#) {
1424        Ok(r) => r,
1425        Err(e) => {
1426            warn!("Failed to compile email regex: {}", e);
1427            return default_package_data_with_datasource(DatasourceId::Gemspec);
1428        }
1429    };
1430
1431    let dependency_call_re = match Regex::new(
1432        r#"(?m)^\s*\w+\.(add_(?:development_|runtime_)?dependency)\s*\(?(.+?)\)?\s*$"#,
1433    ) {
1434        Ok(r) => r,
1435        Err(e) => {
1436            warn!("Failed to compile gemspec dependency regex: {}", e);
1437            return default_package_data_with_datasource(DatasourceId::Gemspec);
1438        }
1439    };
1440
1441    let mut name: Option<String> = None;
1442    let mut version: Option<String> = None;
1443    let mut summary: Option<String> = None;
1444    let mut description: Option<String> = None;
1445    let mut homepage: Option<String> = None;
1446    let mut license: Option<String> = None;
1447    let mut licenses: Vec<String> = Vec::new();
1448    let mut authors: Vec<String> = Vec::new();
1449    let mut emails: Vec<String> = Vec::new();
1450    let mut dependencies: Vec<Dependency> = Vec::new();
1451
1452    // Extract basic fields
1453    for caps in field_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1454        let field_name = match caps.get(1) {
1455            Some(m) => m.as_str(),
1456            None => continue,
1457        };
1458        let raw_value = match caps.get(2) {
1459            Some(m) => m.as_str().trim(),
1460            None => continue,
1461        };
1462
1463        match field_name {
1464            "name" => name = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts),
1465            "version" => {
1466                version = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts);
1467            }
1468            "summary" => {
1469                summary = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1470            }
1471            "description" => description = Some(truncate_field(clean_gemspec_value(raw_value))),
1472            "homepage" => {
1473                homepage = resolve_gemspec_scalar_value(raw_value, content, base_dir, &contexts)
1474            }
1475            "license" => license = Some(truncate_field(clean_gemspec_value(raw_value))),
1476            _ => {}
1477        }
1478    }
1479
1480    // Extract licenses (plural)
1481    for caps in licenses_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1482        if let Some(raw) = caps.get(1) {
1483            licenses = extract_ruby_array(raw.as_str());
1484        }
1485    }
1486
1487    // Extract authors
1488    for caps in authors_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1489        if let Some(raw) = caps.get(1) {
1490            let raw_str = raw.as_str().trim();
1491            if raw_str.starts_with('[') {
1492                authors = extract_ruby_array(raw_str);
1493            } else if looks_like_constant_reference(raw_str) {
1494                authors = resolve_variable_array(raw_str, &contexts)
1495                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1496            } else {
1497                authors.push(clean_gemspec_value(raw_str));
1498            }
1499        }
1500    }
1501
1502    // Extract emails
1503    for caps in email_re.captures_iter(content).take(MAX_ITERATION_COUNT) {
1504        if let Some(raw) = caps.get(1) {
1505            let raw_str = raw.as_str().trim();
1506            if raw_str.starts_with('[') {
1507                emails = extract_ruby_array(raw_str);
1508            } else if looks_like_constant_reference(raw_str) {
1509                emails = resolve_variable_array(raw_str, &contexts)
1510                    .unwrap_or_else(|| vec![clean_gemspec_value(raw_str)]);
1511            } else {
1512                emails.push(clean_gemspec_value(raw_str));
1513            }
1514        }
1515    }
1516
1517    // Build parties from authors and emails
1518    let mut parties: Vec<Party> = Vec::new();
1519
1520    if authors.len() == 1 && emails.len() == 1 {
1521        let email_str = emails.first().map(String::as_str);
1522        let (parsed_email_name, parsed_email) = match email_str {
1523            Some(e) => split_name_email(e),
1524            None => (None, None),
1525        };
1526
1527        parties.push(Party {
1528            r#type: Some("person".to_string()),
1529            role: Some("author".to_string()),
1530            name: authors.first().cloned().or(parsed_email_name),
1531            email: parsed_email.or_else(|| {
1532                email_str
1533                    .filter(|e| e.contains('@') && !e.contains('<'))
1534                    .map(|e| e.to_string())
1535            }),
1536            url: None,
1537            organization: None,
1538            organization_url: None,
1539            timezone: None,
1540        });
1541    } else {
1542        for author_name in authors {
1543            parties.push(Party {
1544                r#type: Some("person".to_string()),
1545                role: Some("author".to_string()),
1546                name: Some(author_name),
1547                email: None,
1548                url: None,
1549                organization: None,
1550                organization_url: None,
1551                timezone: None,
1552            });
1553        }
1554
1555        for email_str in emails {
1556            let (parsed_email_name, parsed_email) = if email_str.contains('<') {
1557                split_name_email(&email_str)
1558            } else {
1559                (None, None)
1560            };
1561            parties.push(Party {
1562                r#type: Some("person".to_string()),
1563                role: Some("author".to_string()),
1564                name: parsed_email_name,
1565                email: parsed_email.or_else(|| email_str.contains('@').then_some(email_str)),
1566                url: None,
1567                organization: None,
1568                organization_url: None,
1569                timezone: None,
1570            });
1571        }
1572    }
1573
1574    for caps in dependency_call_re
1575        .captures_iter(content)
1576        .take(MAX_ITERATION_COUNT)
1577    {
1578        let method = match caps.get(1) {
1579            Some(m) => m.as_str(),
1580            None => continue,
1581        };
1582        let args = match caps.get(2) {
1583            Some(m) => m.as_str(),
1584            None => continue,
1585        };
1586
1587        let Some(dep_name) = extract_first_ruby_value(args).map(truncate_field) else {
1588            continue;
1589        };
1590        let version_parts = extract_all_ruby_values(after_first_argument(args));
1591        let extracted_requirement = if version_parts.is_empty() {
1592            None
1593        } else {
1594            Some(version_parts.join(", "))
1595        };
1596        let purl = create_gem_purl(&dep_name, None);
1597        let is_development = method == "add_development_dependency";
1598        let scope = if is_development {
1599            "development"
1600        } else {
1601            "runtime"
1602        };
1603
1604        dependencies.push(Dependency {
1605            purl,
1606            extracted_requirement,
1607            scope: Some(scope.to_string()),
1608            is_runtime: Some(!is_development),
1609            is_optional: Some(is_development),
1610            is_pinned: None,
1611            is_direct: Some(true),
1612            resolved_package: None,
1613            extra_data: None,
1614        });
1615    }
1616
1617    // Extract license statement only - detection happens in separate engine
1618    let extracted_license_statement = if !licenses.is_empty() {
1619        Some(licenses.join(" AND "))
1620    } else {
1621        license
1622    };
1623
1624    let (declared_license_expression, declared_license_expression_spdx, license_detections) =
1625        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1626
1627    // Prefer description over summary
1628    let final_description = description.or(summary);
1629
1630    // Build PURL
1631    let purl = name
1632        .as_deref()
1633        .map(|n| create_gem_purl(n, version.as_deref()))
1634        .unwrap_or(None);
1635
1636    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1637        if let Some(n) = name.as_deref() {
1638            get_rubygems_urls(n, version.as_deref(), None)
1639        } else {
1640            (None, None, None, None)
1641        };
1642
1643    PackageData {
1644        package_type: Some(PACKAGE_TYPE),
1645        name,
1646        version,
1647        primary_language: Some("Ruby".to_string()),
1648        description: final_description,
1649        homepage_url: homepage,
1650        download_url,
1651        declared_license_expression,
1652        declared_license_expression_spdx,
1653        license_detections,
1654        extracted_license_statement,
1655        parties,
1656        dependencies,
1657        repository_homepage_url,
1658        repository_download_url,
1659        api_data_url,
1660        datasource_id: Some(DatasourceId::Gemspec),
1661        purl,
1662        ..default_package_data()
1663    }
1664}
1665
1666// =============================================================================
1667// .gem Archive Parser (Wave 3)
1668// =============================================================================
1669
1670const MAX_ARCHIVE_SIZE: u64 = 100 * 1024 * 1024; // 100MB
1671const MAX_FILE_SIZE: u64 = 50 * 1024 * 1024; // 50MB per file
1672const MAX_COMPRESSION_RATIO: f64 = 100.0; // 100:1 ratio
1673
1674/// Parser for .gem archive files.
1675///
1676/// Extracts metadata from Ruby .gem packages, which are tar archives
1677/// containing a gzip-compressed YAML metadata file (`metadata.gz`).
1678///
1679/// Includes safety checks against zip bombs and oversized archives.
1680pub struct GemArchiveParser;
1681
1682impl PackageParser for GemArchiveParser {
1683    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
1684
1685    fn extract_packages(path: &Path) -> Vec<PackageData> {
1686        vec![match extract_gem_archive(path) {
1687            Ok(data) => data,
1688            Err(e) => {
1689                warn!("Failed to extract .gem archive at {:?}: {}", path, e);
1690                default_package_data_with_datasource(DatasourceId::GemArchive)
1691            }
1692        }]
1693    }
1694
1695    fn is_match(path: &Path) -> bool {
1696        path.extension()
1697            .and_then(|ext| ext.to_str())
1698            .is_some_and(|ext| ext == "gem")
1699    }
1700}
1701
1702fn extract_gem_archive(path: &Path) -> Result<PackageData, String> {
1703    let file_metadata =
1704        fs::metadata(path).map_err(|e| format!("Failed to read file metadata: {}", e))?;
1705    let archive_size = file_metadata.len();
1706
1707    if archive_size > MAX_ARCHIVE_SIZE {
1708        return Err(format!(
1709            "Archive too large: {} bytes (limit: {} bytes)",
1710            archive_size, MAX_ARCHIVE_SIZE
1711        ));
1712    }
1713
1714    let file = File::open(path).map_err(|e| format!("Failed to open archive: {}", e))?;
1715    let mut archive = Archive::new(file);
1716
1717    let mut entry_count: usize = 0;
1718    for entry_result in archive
1719        .entries()
1720        .map_err(|e| format!("Failed to read tar entries: {}", e))?
1721    {
1722        entry_count += 1;
1723        if entry_count > MAX_ITERATION_COUNT {
1724            warn!(
1725                "Exceeded max tar entry count ({}) in .gem archive, stopping iteration",
1726                MAX_ITERATION_COUNT
1727            );
1728            break;
1729        }
1730
1731        let entry = entry_result.map_err(|e| format!("Failed to read tar entry: {}", e))?;
1732        let entry_path = entry
1733            .path()
1734            .map_err(|e| format!("Failed to get entry path: {}", e))?;
1735        let entry_str = entry_path.to_string_lossy();
1736        if entry_str.contains("..") {
1737            warn!("Skipping tar entry with path traversal: {}", entry_str);
1738            continue;
1739        }
1740
1741        if entry_path.to_str() == Some("metadata.gz") {
1742            let entry_size = entry.size();
1743            if entry_size > MAX_FILE_SIZE {
1744                return Err(format!(
1745                    "metadata.gz too large: {} bytes (limit: {} bytes)",
1746                    entry_size, MAX_FILE_SIZE
1747                ));
1748            }
1749
1750            let mut decoder = GzDecoder::new(entry);
1751            let mut content = Vec::new();
1752            let mut limited = std::io::Read::take(&mut decoder, MAX_FILE_SIZE + 1);
1753            limited
1754                .read_to_end(&mut content)
1755                .map_err(|e| format!("Failed to decompress metadata.gz: {}", e))?;
1756
1757            if content.len() > MAX_FILE_SIZE as usize {
1758                return Err(format!(
1759                    "Decompressed metadata too large: exceeds {} byte limit",
1760                    MAX_FILE_SIZE
1761                ));
1762            }
1763
1764            let content = match String::from_utf8(content) {
1765                Ok(s) => s,
1766                Err(err) => {
1767                    let bytes = err.into_bytes();
1768                    warn!("Invalid UTF-8 in gem metadata; using lossy conversion");
1769                    String::from_utf8_lossy(&bytes).into_owned()
1770                }
1771            };
1772
1773            let uncompressed_size = content.len() as u64;
1774            if entry_size > 0 {
1775                let ratio = uncompressed_size as f64 / entry_size as f64;
1776                if ratio > MAX_COMPRESSION_RATIO {
1777                    return Err(format!(
1778                        "Suspicious compression ratio: {:.2}:1 (limit: {:.0}:1)",
1779                        ratio, MAX_COMPRESSION_RATIO
1780                    ));
1781                }
1782            }
1783
1784            return parse_gem_metadata_yaml(&content, DatasourceId::GemArchive);
1785        }
1786    }
1787
1788    Err("metadata.gz not found in .gem archive".to_string())
1789}
1790
1791fn parse_gem_metadata_yaml(
1792    content: &str,
1793    datasource_id: DatasourceId,
1794) -> Result<PackageData, String> {
1795    // Ruby YAML tagged types need to be handled:
1796    // --- !ruby/object:Gem::Specification
1797    // We strip Ruby-specific YAML tags since yaml_serde can't handle them
1798    let cleaned = clean_ruby_yaml_tags(content);
1799
1800    let yaml: yaml_serde::Value =
1801        yaml_serde::from_str(&cleaned).map_err(|e| format!("Failed to parse YAML: {}", e))?;
1802
1803    let name = yaml_string(&yaml, "name").map(truncate_field);
1804    let version = yaml.get("version").and_then(|v| {
1805        if v.is_string() {
1806            v.as_str().map(|s| truncate_field(s.to_string()))
1807        } else {
1808            yaml_string(v, "version").map(truncate_field)
1809        }
1810    });
1811    let description = yaml_string(&yaml, "description")
1812        .or_else(|| yaml_string(&yaml, "summary"))
1813        .map(truncate_field);
1814    let homepage = yaml_string(&yaml, "homepage").map(truncate_field);
1815    let summary = yaml_string(&yaml, "summary").map(truncate_field);
1816
1817    // Licenses
1818    let licenses: Vec<String> = yaml
1819        .get("licenses")
1820        .and_then(|v| v.as_sequence())
1821        .map(|seq| {
1822            seq.iter()
1823                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1824                .collect()
1825        })
1826        .unwrap_or_default();
1827
1828    // Extract license statement only - detection happens in separate engine
1829    let extracted_license_statement = if !licenses.is_empty() {
1830        Some(licenses.join(" AND "))
1831    } else {
1832        None
1833    };
1834
1835    let (license_expression, license_expression_spdx, license_detections) =
1836        normalize_spdx_declared_license(extracted_license_statement.as_deref());
1837
1838    // Authors
1839    let authors: Vec<String> = yaml
1840        .get("authors")
1841        .and_then(|v| v.as_sequence())
1842        .map(|seq| {
1843            seq.iter()
1844                .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1845                .collect()
1846        })
1847        .unwrap_or_default();
1848
1849    let emails: Vec<String> = yaml
1850        .get("email")
1851        .map(|v| {
1852            if let Some(seq) = v.as_sequence() {
1853                seq.iter()
1854                    .filter_map(|item| item.as_str().map(|s| truncate_field(s.to_string())))
1855                    .collect()
1856            } else if let Some(s) = v.as_str() {
1857                vec![truncate_field(s.to_string())]
1858            } else {
1859                Vec::new()
1860            }
1861        })
1862        .unwrap_or_default();
1863
1864    // Build parties
1865    let mut parties: Vec<Party> = Vec::new();
1866    let max_len = authors.len().max(emails.len());
1867    for i in 0..max_len {
1868        let author_name = authors.get(i).map(|s| s.as_str());
1869        let email_str = emails.get(i).map(|s| s.as_str());
1870
1871        let (parsed_email_name, parsed_email) = match email_str {
1872            Some(e) if e.contains('<') => split_name_email(e),
1873            None => (None, None),
1874            _ => (None, None),
1875        };
1876
1877        let party_name = author_name.map(|s| s.to_string()).or(parsed_email_name);
1878
1879        parties.push(Party {
1880            r#type: Some("person".to_string()),
1881            role: Some("author".to_string()),
1882            name: party_name,
1883            email: parsed_email.or_else(|| {
1884                email_str
1885                    .filter(|e| e.contains('@') && !e.contains('<'))
1886                    .map(|e| e.to_string())
1887            }),
1888            url: None,
1889            organization: None,
1890            organization_url: None,
1891            timezone: None,
1892        });
1893    }
1894
1895    // Dependencies
1896    let dependencies = parse_gem_yaml_dependencies(&yaml);
1897
1898    let metadata = yaml.get("metadata");
1899
1900    let bug_tracking_url = metadata
1901        .and_then(|m| yaml_string(m, "bug_tracking_uri"))
1902        .map(truncate_field);
1903
1904    let code_view_url = metadata
1905        .and_then(|m| yaml_string(m, "source_code_uri"))
1906        .map(truncate_field);
1907
1908    let vcs_url = code_view_url.clone().or_else(|| {
1909        metadata
1910            .and_then(|m| yaml_string(m, "homepage_uri"))
1911            .map(truncate_field)
1912    });
1913
1914    let file_references = metadata
1915        .and_then(|m| m.get("files"))
1916        .and_then(|f| f.as_sequence())
1917        .map(|seq| {
1918            seq.iter()
1919                .filter_map(|v| v.as_str())
1920                .map(|s| crate::models::FileReference {
1921                    path: s.to_string(),
1922                    size: None,
1923                    sha1: None,
1924                    md5: None,
1925                    sha256: None,
1926                    sha512: None,
1927                    extra_data: None,
1928                })
1929                .collect::<Vec<_>>()
1930        })
1931        .unwrap_or_default();
1932
1933    let release_date = yaml_string(&yaml, "date").and_then(|d| {
1934        if d.len() >= 10 {
1935            Some(d[..10].to_string())
1936        } else {
1937            None
1938        }
1939    });
1940
1941    let purl = name
1942        .as_deref()
1943        .map(|n| create_gem_purl(n, version.as_deref()))
1944        .unwrap_or(None);
1945
1946    let platform = yaml_string(&yaml, "platform").map(truncate_field);
1947    let (repository_homepage_url, repository_download_url, api_data_url, download_url) =
1948        if let Some(n) = name.as_deref() {
1949            get_rubygems_urls(n, version.as_deref(), platform.as_deref())
1950        } else {
1951            (None, None, None, None)
1952        };
1953
1954    let qualifiers = if let Some(ref p) = platform {
1955        if p != "ruby" {
1956            let mut q = HashMap::new();
1957            q.insert("platform".to_string(), p.clone());
1958            Some(q)
1959        } else {
1960            None
1961        }
1962    } else {
1963        None
1964    };
1965
1966    Ok(PackageData {
1967        package_type: Some(PACKAGE_TYPE),
1968        name,
1969        version,
1970        qualifiers,
1971        primary_language: Some("Ruby".to_string()),
1972        description: description.or(summary),
1973        release_date,
1974        homepage_url: homepage,
1975        download_url,
1976        bug_tracking_url,
1977        code_view_url,
1978        declared_license_expression: license_expression,
1979        declared_license_expression_spdx: license_expression_spdx,
1980        license_detections,
1981        extracted_license_statement,
1982        file_references,
1983        parties,
1984        dependencies,
1985        repository_homepage_url,
1986        repository_download_url,
1987        api_data_url,
1988        datasource_id: Some(datasource_id),
1989        purl,
1990        vcs_url,
1991        ..default_package_data()
1992    })
1993}
1994
1995/// Strips Ruby-specific YAML tags that yaml_serde cannot handle.
1996fn clean_ruby_yaml_tags(content: &str) -> String {
1997    let tag_re = match Regex::new(r"!ruby/\S+") {
1998        Ok(r) => r,
1999        Err(_) => return content.to_string(),
2000    };
2001    tag_re.replace_all(content, "").to_string()
2002}
2003
2004fn yaml_string(yaml: &yaml_serde::Value, key: &str) -> Option<String> {
2005    yaml.get(key)
2006        .and_then(|v| v.as_str())
2007        .filter(|s| !s.is_empty())
2008        .map(|s| s.to_string())
2009}
2010
2011fn parse_gem_yaml_dependencies(yaml: &yaml_serde::Value) -> Vec<Dependency> {
2012    let mut dependencies = Vec::new();
2013
2014    let deps_seq = match yaml.get("dependencies").and_then(|v| v.as_sequence()) {
2015        Some(seq) => seq,
2016        None => return dependencies,
2017    };
2018
2019    for dep_value in deps_seq.iter().take(MAX_ITERATION_COUNT) {
2020        let dep_name = match yaml_string(dep_value, "name").map(truncate_field) {
2021            Some(n) => n,
2022            None => continue,
2023        };
2024
2025        let dep_type = yaml_string(dep_value, "type");
2026        let is_development = dep_type.as_deref() == Some(":development");
2027
2028        // Extract version requirements from the nested structure
2029        let requirements = dep_value
2030            .get("requirement")
2031            .or_else(|| dep_value.get("version_requirements"))
2032            .and_then(|req| req.get("requirements"))
2033            .and_then(|reqs| reqs.as_sequence());
2034
2035        let extracted_requirement = requirements.map(|reqs| {
2036            let parts: Vec<String> = reqs
2037                .iter()
2038                .filter_map(|req| {
2039                    let seq = req.as_sequence()?;
2040                    if seq.len() >= 2 {
2041                        let op = seq[0].as_str().unwrap_or("");
2042                        let ver = seq[1].get("version").and_then(|v| v.as_str()).unwrap_or("");
2043                        if op == ">=" && ver == "0" {
2044                            // ">= 0" means "any version" - skip
2045                            None
2046                        } else if op.is_empty() || ver.is_empty() {
2047                            None
2048                        } else {
2049                            Some(format!("{} {}", op, ver))
2050                        }
2051                    } else {
2052                        None
2053                    }
2054                })
2055                .collect();
2056            parts.join(", ")
2057        });
2058
2059        let extracted_requirement = extracted_requirement
2060            .filter(|s| !s.is_empty())
2061            .or_else(|| Some(String::new()));
2062
2063        let (scope, is_runtime, is_optional) = if is_development {
2064            (Some("development".to_string()), false, true)
2065        } else {
2066            (Some("runtime".to_string()), true, false)
2067        };
2068
2069        let purl = create_gem_purl(&dep_name, None);
2070
2071        dependencies.push(Dependency {
2072            purl,
2073            extracted_requirement,
2074            scope,
2075            is_runtime: Some(is_runtime),
2076            is_optional: Some(is_optional),
2077            is_pinned: None,
2078            is_direct: Some(true),
2079            resolved_package: None,
2080            extra_data: None,
2081        });
2082    }
2083
2084    dependencies
2085}
2086
2087// =============================================================================
2088// Gem Metadata Extracted Parser (metadata.gz-extract files)
2089// =============================================================================
2090
2091pub struct GemMetadataExtractedParser;
2092
2093impl PackageParser for GemMetadataExtractedParser {
2094    const PACKAGE_TYPE: PackageType = PACKAGE_TYPE;
2095
2096    fn extract_packages(path: &Path) -> Vec<PackageData> {
2097        vec![match extract_gem_metadata_extracted(path) {
2098            Ok(data) => data,
2099            Err(e) => {
2100                warn!("Failed to extract gem metadata from {:?}: {}", path, e);
2101                default_package_data_with_datasource(DatasourceId::GemArchiveExtracted)
2102            }
2103        }]
2104    }
2105
2106    fn is_match(path: &Path) -> bool {
2107        path.to_str()
2108            .is_some_and(|p| p.contains("metadata.gz-extract"))
2109    }
2110}
2111
2112fn extract_gem_metadata_extracted(path: &Path) -> Result<PackageData, String> {
2113    let content = read_file_to_string(path, None)
2114        .map_err(|e| format!("Failed to read metadata.gz-extract file: {}", e))?;
2115
2116    parse_gem_metadata_yaml(&content, DatasourceId::GemArchiveExtracted)
2117}
2118
2119// Register parser with metadata
2120crate::register_parser!(
2121    "Ruby Gemfile manifest",
2122    &["**/Gemfile", "**/data.gz-extract/Gemfile"],
2123    "gem",
2124    "Ruby",
2125    Some("https://bundler.io/man/gemfile.5.html"),
2126);
2127
2128crate::register_parser!(
2129    "Ruby Gemfile.lock lockfile",
2130    &["**/Gemfile.lock", "**/data.gz-extract/Gemfile.lock"],
2131    "gem",
2132    "Ruby",
2133    Some("https://bundler.io/man/gemfile.5.html"),
2134);
2135
2136crate::register_parser!(
2137    "Ruby .gemspec manifest",
2138    &[
2139        "**/*.gemspec",
2140        "**/data.gz-extract/*.gemspec",
2141        "**/specifications/*.gemspec"
2142    ],
2143    "gem",
2144    "Ruby",
2145    Some("https://guides.rubygems.org/specification-reference/"),
2146);
2147
2148crate::register_parser!(
2149    "Ruby .gem archive",
2150    &["**/*.gem"],
2151    "gem",
2152    "Ruby",
2153    Some("https://guides.rubygems.org/specification-reference/"),
2154);
2155
2156crate::register_parser!(
2157    "Ruby gem metadata (extracted)",
2158    &["**/metadata.gz-extract"],
2159    "gem",
2160    "Ruby",
2161    Some("https://guides.rubygems.org/specification-reference/"),
2162);
2163
2164#[cfg(test)]
2165mod tests {
2166    use super::parse_gemspec;
2167
2168    #[test]
2169    fn test_clean_gemspec_value_handles_unterminated_percent_q() {
2170        assert_eq!(
2171            super::clean_gemspec_value("%q{Arel is a SQL AST manager for Ruby. It"),
2172            "Arel is a SQL AST manager for Ruby. It"
2173        );
2174    }
2175
2176    #[test]
2177    fn test_parse_gemspec_runtime_dependency_scope() {
2178        let content = r#"
2179Gem::Specification.new do |spec|
2180  spec.name = "demo"
2181  spec.version = "1.0.0"
2182  spec.add_runtime_dependency "rack", "~> 3.0"
2183  spec.add_dependency "thor", ">= 1.0"
2184end
2185"#;
2186
2187        let package_data = parse_gemspec(content);
2188        assert_eq!(package_data.dependencies.len(), 2);
2189        assert_eq!(
2190            package_data.dependencies[0].scope,
2191            Some("runtime".to_string())
2192        );
2193        assert_eq!(
2194            package_data.dependencies[0].extracted_requirement,
2195            Some("~> 3.0".to_string())
2196        );
2197        assert_eq!(
2198            package_data.dependencies[1].scope,
2199            Some("runtime".to_string())
2200        );
2201        assert_eq!(
2202            package_data.dependencies[1].extracted_requirement,
2203            Some(">= 1.0".to_string())
2204        );
2205    }
2206}