Skip to main content

gem_audit/lockfile/
parser.rs

1use super::*;
2
3/// The current section being parsed.
4#[derive(Debug, Clone, PartialEq, Eq)]
5enum Section {
6    None,
7    Git,
8    Gem,
9    Path,
10    Platforms,
11    Dependencies,
12    RubyVersion,
13    BundledWith,
14}
15
16/// Parse a Gemfile.lock string into a `Lockfile`.
17pub fn parse(input: &str) -> Result<Lockfile, ParseError> {
18    let mut sources: Vec<Source> = Vec::new();
19    let mut specs: Vec<GemSpec> = Vec::new();
20    let mut platforms: Vec<String> = Vec::new();
21    let mut dependencies: Vec<Dependency> = Vec::new();
22    let mut ruby_version: Option<String> = None;
23    let mut bundled_with: Option<String> = None;
24
25    let mut section = Section::None;
26    let mut in_specs = false;
27
28    // Current source being built
29    let mut current_remote: Option<String> = None;
30    let mut current_revision: Option<String> = None;
31    let mut current_branch: Option<String> = None;
32    let mut current_tag: Option<String> = None;
33
34    // Current gem spec being built
35    let mut current_spec: Option<GemSpec> = None;
36
37    let lines: Vec<&str> = input.lines().collect();
38
39    for (line_idx, &line) in lines.iter().enumerate() {
40        let _line_number = line_idx + 1;
41
42        // Empty line — finalize current spec if any
43        if line.trim().is_empty() {
44            if let Some(spec) = current_spec.take() {
45                specs.push(spec);
46            }
47            continue;
48        }
49
50        let indent = count_indent(line);
51        let trimmed = line.trim();
52
53        // Section headers (indent == 0)
54        if indent == 0 {
55            // Finalize any in-progress spec
56            if let Some(spec) = current_spec.take() {
57                specs.push(spec);
58            }
59            // Finalize any in-progress source
60            finalize_source(
61                &section,
62                &mut sources,
63                &mut current_remote,
64                &mut current_revision,
65                &mut current_branch,
66                &mut current_tag,
67            );
68
69            in_specs = false;
70            section = match trimmed {
71                "GIT" => Section::Git,
72                "GEM" => Section::Gem,
73                "PATH" => Section::Path,
74                "PLATFORMS" => Section::Platforms,
75                "DEPENDENCIES" => Section::Dependencies,
76                "RUBY VERSION" => Section::RubyVersion,
77                "BUNDLED WITH" => Section::BundledWith,
78                _ => Section::None,
79            };
80            continue;
81        }
82
83        match section {
84            Section::Git | Section::Gem | Section::Path => {
85                parse_source_line(
86                    trimmed,
87                    indent,
88                    &section,
89                    &mut in_specs,
90                    &mut current_remote,
91                    &mut current_revision,
92                    &mut current_branch,
93                    &mut current_tag,
94                    &mut current_spec,
95                    &mut specs,
96                    sources.len(),
97                );
98            }
99            Section::Platforms => {
100                if indent >= 2 {
101                    platforms.push(trimmed.to_string());
102                }
103            }
104            Section::Dependencies => {
105                if indent >= 2 {
106                    dependencies.push(parse_dependency_line(trimmed));
107                }
108            }
109            Section::RubyVersion => {
110                if indent >= 2 {
111                    ruby_version = Some(trimmed.to_string());
112                }
113            }
114            Section::BundledWith => {
115                if indent >= 2 {
116                    bundled_with = Some(trimmed.to_string());
117                }
118            }
119            Section::None => {}
120        }
121    }
122
123    // Finalize remaining state
124    if let Some(spec) = current_spec.take() {
125        specs.push(spec);
126    }
127    finalize_source(
128        &section,
129        &mut sources,
130        &mut current_remote,
131        &mut current_revision,
132        &mut current_branch,
133        &mut current_tag,
134    );
135
136    if sources.is_empty() && specs.is_empty() {
137        return Err(ParseError::Empty);
138    }
139
140    Ok(Lockfile {
141        sources,
142        specs,
143        platforms,
144        dependencies,
145        ruby_version,
146        bundled_with,
147    })
148}
149
150/// Count leading spaces in a line.
151fn count_indent(line: &str) -> usize {
152    line.len() - line.trim_start().len()
153}
154
155/// Finalize the current source section and add it to the sources list.
156fn finalize_source(
157    section: &Section,
158    sources: &mut Vec<Source>,
159    current_remote: &mut Option<String>,
160    current_revision: &mut Option<String>,
161    current_branch: &mut Option<String>,
162    current_tag: &mut Option<String>,
163) {
164    if let Some(remote) = current_remote.take() {
165        match section {
166            Section::Gem => {
167                sources.push(Source::Rubygems(RubygemsSource { remote }));
168            }
169            Section::Git => {
170                sources.push(Source::Git(GitSource {
171                    remote,
172                    revision: current_revision.take(),
173                    branch: current_branch.take(),
174                    tag: current_tag.take(),
175                }));
176            }
177            Section::Path => {
178                sources.push(Source::Path(PathSource { remote }));
179            }
180            _ => {}
181        }
182    }
183    *current_revision = None;
184    *current_branch = None;
185    *current_tag = None;
186}
187
188/// Parse a line inside a GEM/GIT/PATH section.
189#[allow(clippy::too_many_arguments)]
190fn parse_source_line(
191    trimmed: &str,
192    indent: usize,
193    _section: &Section,
194    in_specs: &mut bool,
195    current_remote: &mut Option<String>,
196    current_revision: &mut Option<String>,
197    current_branch: &mut Option<String>,
198    current_tag: &mut Option<String>,
199    current_spec: &mut Option<GemSpec>,
200    specs: &mut Vec<GemSpec>,
201    source_index: usize,
202) {
203    // Indent 2: attributes (remote:, revision:, specs:, branch:, tag:)
204    if indent == 2 {
205        if let Some(value) = trimmed.strip_prefix("remote:") {
206            *current_remote = Some(value.trim().to_string());
207            *in_specs = false;
208        } else if let Some(value) = trimmed.strip_prefix("revision:") {
209            *current_revision = Some(value.trim().to_string());
210        } else if let Some(value) = trimmed.strip_prefix("branch:") {
211            *current_branch = Some(value.trim().to_string());
212        } else if let Some(value) = trimmed.strip_prefix("tag:") {
213            *current_tag = Some(value.trim().to_string());
214        } else if trimmed == "specs:" {
215            *in_specs = true;
216        }
217        return;
218    }
219
220    if !*in_specs {
221        return;
222    }
223
224    // Indent 4: gem spec entry — "name (version)" or "name (version-platform)"
225    if indent == 4 {
226        // Finalize previous spec
227        if let Some(spec) = current_spec.take() {
228            specs.push(spec);
229        }
230
231        if let Some(spec) = parse_gem_spec_line(trimmed, source_index) {
232            *current_spec = Some(spec);
233        }
234        return;
235    }
236
237    // Indent 6: dependency of current spec — "name (constraint)" or "name"
238    if indent == 6
239        && let Some(spec) = current_spec
240    {
241        spec.dependencies.push(parse_gem_dependency(trimmed));
242    }
243}
244
245/// Parse a gem spec line like "actioncable (5.2.8)" or "nokogiri (1.13.10-x86_64-linux)".
246fn parse_gem_spec_line(trimmed: &str, source_index: usize) -> Option<GemSpec> {
247    let (name, rest) = trimmed.split_once(' ')?;
248    // rest should be "(version)" or "(version-platform)"
249    let version_str = rest.strip_prefix('(')?.strip_suffix(')')?;
250
251    let (version, platform) = parse_version_platform(version_str);
252
253    Some(GemSpec {
254        name: name.to_string(),
255        version,
256        platform,
257        dependencies: Vec::new(),
258        source_index,
259    })
260}
261
262/// Split "1.13.10-x86_64-linux" into version "1.13.10" and platform "x86_64-linux".
263///
264/// Platform detection: if the string contains a hyphen followed by a known platform
265/// pattern (like x86_64-linux, arm64-darwin, java, etc.), split there.
266/// Otherwise, the entire string is the version.
267fn parse_version_platform(input: &str) -> (String, Option<String>) {
268    // Known platform patterns that appear after a hyphen in gem versions
269    let platform_patterns = [
270        "x86_64-linux",
271        "x86_64-darwin",
272        "x86-linux",
273        "x86-mingw32",
274        "x86-mswin32",
275        "x64-mingw32",
276        "x64-mingw-ucrt",
277        "arm64-darwin",
278        "aarch64-linux",
279        "arm-linux",
280        "java",
281        "jruby",
282        "mswin32",
283        "mingw32",
284        "universal-darwin",
285    ];
286
287    for pattern in &platform_patterns {
288        if let Some(prefix) = input.strip_suffix(pattern)
289            && let Some(version) = prefix.strip_suffix('-')
290        {
291            return (version.to_string(), Some(pattern.to_string()));
292        }
293    }
294
295    // Fallback: heuristic — find the last hyphen where the part after it
296    // contains non-numeric characters (likely a platform)
297    // But only if the part after doesn't look like a pre-release version segment
298    if let Some(pos) = input.rfind('-') {
299        let after = &input[pos + 1..];
300        // If after contains a known arch prefix, it's a platform
301        if after.starts_with("x86")
302            || after.starts_with("x64")
303            || after.starts_with("arm")
304            || after.starts_with("aarch")
305            || after == "java"
306            || after == "jruby"
307            || after.starts_with("universal")
308            || after.contains("mingw")
309            || after.contains("mswin")
310            || after.contains("linux")
311            || after.contains("darwin")
312        {
313            return (input[..pos].to_string(), Some(after.to_string()));
314        }
315    }
316
317    (input.to_string(), None)
318}
319
320/// Parse a gem dependency line like "actionpack (= 5.2.8)" or "method_source" or "rack (~> 2.0, >= 2.0.8)".
321fn parse_gem_dependency(trimmed: &str) -> GemDependency {
322    if let Some(paren_start) = trimmed.find('(') {
323        let name = trimmed[..paren_start].trim();
324        let constraint = trimmed[paren_start + 1..]
325            .strip_suffix(')')
326            .unwrap_or(&trimmed[paren_start + 1..])
327            .trim();
328        GemDependency {
329            name: name.to_string(),
330            requirement: if constraint.is_empty() {
331                None
332            } else {
333                Some(constraint.to_string())
334            },
335        }
336    } else {
337        GemDependency {
338            name: trimmed.to_string(),
339            requirement: None,
340        }
341    }
342}
343
344/// Parse a DEPENDENCIES line like "rails (~> 5.2)" or "jquery-rails!" or "activerecord (= 3.2.10)".
345fn parse_dependency_line(trimmed: &str) -> Dependency {
346    let pinned = trimmed.ends_with('!');
347    let trimmed = if pinned {
348        trimmed.strip_suffix('!').unwrap().trim()
349    } else {
350        trimmed
351    };
352
353    if let Some(paren_start) = trimmed.find('(') {
354        let name = trimmed[..paren_start].trim();
355        let constraint = trimmed[paren_start + 1..]
356            .strip_suffix(')')
357            .unwrap_or(&trimmed[paren_start + 1..])
358            .trim();
359        Dependency {
360            name: name.to_string(),
361            requirement: if constraint.is_empty() {
362                None
363            } else {
364                Some(constraint.to_string())
365            },
366            pinned,
367        }
368    } else {
369        Dependency {
370            name: trimmed.to_string(),
371            requirement: None,
372            pinned,
373        }
374    }
375}
376
377#[cfg(test)]
378mod tests {
379    use super::*;
380
381    // ========== Secure Lockfile ==========
382
383    #[test]
384    fn parse_secure_lockfile() {
385        let input = include_str!("../../tests/fixtures/secure/Gemfile.lock");
386        let lockfile = parse(input).unwrap();
387
388        // Should have one GEM source
389        assert_eq!(lockfile.sources.len(), 1);
390        assert_eq!(
391            lockfile.sources[0],
392            Source::Rubygems(RubygemsSource {
393                remote: "https://rubygems.org/".to_string(),
394            })
395        );
396
397        // Check platforms
398        assert_eq!(lockfile.platforms, vec!["ruby", "x86_64-linux"]);
399
400        // Check bundled with
401        assert_eq!(lockfile.bundled_with, Some("2.3.6".to_string()));
402
403        // Check dependencies
404        assert_eq!(lockfile.dependencies.len(), 2);
405        assert_eq!(lockfile.dependencies[0].name, "rails");
406        assert_eq!(
407            lockfile.dependencies[0].requirement,
408            Some("~> 5.2".to_string())
409        );
410        assert!(!lockfile.dependencies[0].pinned);
411    }
412
413    #[test]
414    fn parse_secure_specs() {
415        let input = include_str!("../../tests/fixtures/secure/Gemfile.lock");
416        let lockfile = parse(input).unwrap();
417
418        // Check a specific gem
419        let actioncable = lockfile.find_spec("actioncable").unwrap();
420        assert_eq!(actioncable.version, "5.2.8");
421        assert_eq!(actioncable.dependencies.len(), 3);
422        assert_eq!(actioncable.dependencies[0].name, "actionpack");
423        assert_eq!(
424            actioncable.dependencies[0].requirement,
425            Some("= 5.2.8".to_string())
426        );
427
428        // Check nokogiri with platform variant
429        let nokogiri_specs = lockfile.find_specs("nokogiri");
430        assert_eq!(nokogiri_specs.len(), 2);
431
432        let nokogiri_plain = nokogiri_specs
433            .iter()
434            .find(|s| s.platform.is_none())
435            .unwrap();
436        assert_eq!(nokogiri_plain.version, "1.13.10");
437        assert_eq!(nokogiri_plain.dependencies.len(), 2);
438
439        let nokogiri_linux = nokogiri_specs
440            .iter()
441            .find(|s| s.platform.as_deref() == Some("x86_64-linux"))
442            .unwrap();
443        assert_eq!(nokogiri_linux.version, "1.13.10");
444        assert_eq!(nokogiri_linux.dependencies.len(), 1); // only racc
445    }
446
447    #[test]
448    fn parse_secure_gem_count() {
449        let input = include_str!("../../tests/fixtures/secure/Gemfile.lock");
450        let lockfile = parse(input).unwrap();
451
452        // Count unique gem names (some may have platform variants)
453        let unique_names: std::collections::HashSet<&str> =
454            lockfile.specs.iter().map(|s| s.name.as_str()).collect();
455
456        // From the file: actioncable, actionmailer, actionpack, actionview,
457        // activejob, activemodel, activerecord, activestorage, activesupport,
458        // arel, builder, concurrent-ruby, crass, erubi, globalid, i18n, loofah,
459        // mail, marcel, method_source, mini_mime, mini_portile2, minitest, nio4r,
460        // nokogiri (x2 with platform), racc, rack, rack-test, rails,
461        // rails-dom-testing, rails-html-sanitizer, railties, rake, sprockets,
462        // sprockets-rails, thor, thread_safe, tzinfo, websocket-driver,
463        // websocket-extensions
464        assert!(unique_names.len() >= 30);
465    }
466
467    // ========== Insecure Sources Lockfile ==========
468
469    #[test]
470    fn parse_insecure_sources_lockfile() {
471        let input = include_str!("../../tests/fixtures/insecure_sources/Gemfile.lock");
472        let lockfile = parse(input).unwrap();
473
474        // Should have two sources: GIT + GEM
475        assert_eq!(lockfile.sources.len(), 2);
476
477        // First source: GIT
478        match &lockfile.sources[0] {
479            Source::Git(git) => {
480                assert_eq!(git.remote, "git://github.com/rails/jquery-rails.git");
481                assert_eq!(
482                    git.revision,
483                    Some("a8b003d726522cf663611c114d8f0e79abf8d200".to_string())
484                );
485            }
486            other => panic!("expected Git source, got {:?}", other),
487        }
488
489        // Second source: GEM with http (insecure)
490        match &lockfile.sources[1] {
491            Source::Rubygems(gem) => {
492                assert_eq!(gem.remote, "http://rubygems.org/");
493            }
494            other => panic!("expected Rubygems source, got {:?}", other),
495        }
496    }
497
498    #[test]
499    fn parse_insecure_git_source_specs() {
500        let input = include_str!("../../tests/fixtures/insecure_sources/Gemfile.lock");
501        let lockfile = parse(input).unwrap();
502
503        // jquery-rails comes from GIT source (index 0)
504        let jquery = lockfile.find_spec("jquery-rails").unwrap();
505        assert_eq!(jquery.version, "4.4.0");
506        assert_eq!(jquery.source_index, 0);
507        assert_eq!(jquery.dependencies.len(), 3);
508    }
509
510    #[test]
511    fn parse_insecure_pinned_dependency() {
512        let input = include_str!("../../tests/fixtures/insecure_sources/Gemfile.lock");
513        let lockfile = parse(input).unwrap();
514
515        let jquery_dep = lockfile
516            .dependencies
517            .iter()
518            .find(|d| d.name == "jquery-rails")
519            .unwrap();
520        assert!(jquery_dep.pinned);
521        assert!(jquery_dep.requirement.is_none());
522
523        let rails_dep = lockfile
524            .dependencies
525            .iter()
526            .find(|d| d.name == "rails")
527            .unwrap();
528        assert!(!rails_dep.pinned);
529        assert!(rails_dep.requirement.is_none());
530    }
531
532    // ========== Unpatched Gems Lockfile ==========
533
534    #[test]
535    fn parse_unpatched_gems_lockfile() {
536        let input = include_str!("../../tests/fixtures/unpatched_gems/Gemfile.lock");
537        let lockfile = parse(input).unwrap();
538
539        assert_eq!(lockfile.sources.len(), 1);
540        assert_eq!(lockfile.bundled_with, Some("2.2.0".to_string()));
541
542        let activerecord = lockfile.find_spec("activerecord").unwrap();
543        assert_eq!(activerecord.version, "3.2.10");
544
545        // DEPENDENCIES section has "activerecord (= 3.2.10)"
546        assert_eq!(lockfile.dependencies.len(), 1);
547        assert_eq!(lockfile.dependencies[0].name, "activerecord");
548        assert_eq!(
549            lockfile.dependencies[0].requirement,
550            Some("= 3.2.10".to_string())
551        );
552    }
553
554    // ========== Version-Platform Parsing ==========
555
556    #[test]
557    fn parse_version_platform_plain() {
558        let (v, p) = parse_version_platform("1.13.10");
559        assert_eq!(v, "1.13.10");
560        assert_eq!(p, None);
561    }
562
563    #[test]
564    fn parse_version_platform_with_linux() {
565        let (v, p) = parse_version_platform("1.13.10-x86_64-linux");
566        assert_eq!(v, "1.13.10");
567        assert_eq!(p, Some("x86_64-linux".to_string()));
568    }
569
570    #[test]
571    fn parse_version_platform_java() {
572        let (v, p) = parse_version_platform("9.2.14.0-java");
573        assert_eq!(v, "9.2.14.0");
574        assert_eq!(p, Some("java".to_string()));
575    }
576
577    #[test]
578    fn parse_version_platform_darwin() {
579        let (v, p) = parse_version_platform("1.13.10-arm64-darwin");
580        assert_eq!(v, "1.13.10");
581        assert_eq!(p, Some("arm64-darwin".to_string()));
582    }
583
584    // ========== Dependency Line Parsing ==========
585
586    #[test]
587    fn parse_dependency_with_constraint() {
588        let dep = parse_dependency_line("rails (~> 5.2)");
589        assert_eq!(dep.name, "rails");
590        assert_eq!(dep.requirement, Some("~> 5.2".to_string()));
591        assert!(!dep.pinned);
592    }
593
594    #[test]
595    fn parse_dependency_pinned() {
596        let dep = parse_dependency_line("jquery-rails!");
597        assert_eq!(dep.name, "jquery-rails");
598        assert!(dep.requirement.is_none());
599        assert!(dep.pinned);
600    }
601
602    #[test]
603    fn parse_dependency_plain() {
604        let dep = parse_dependency_line("rails");
605        assert_eq!(dep.name, "rails");
606        assert!(dep.requirement.is_none());
607        assert!(!dep.pinned);
608    }
609
610    // ========== Gem Dependency Parsing ==========
611
612    #[test]
613    fn parse_gem_dep_with_constraint() {
614        let dep = parse_gem_dependency("actionpack (= 5.2.8)");
615        assert_eq!(dep.name, "actionpack");
616        assert_eq!(dep.requirement, Some("= 5.2.8".to_string()));
617    }
618
619    #[test]
620    fn parse_gem_dep_compound_constraint() {
621        let dep = parse_gem_dependency("rack (~> 2.0, >= 2.0.8)");
622        assert_eq!(dep.name, "rack");
623        assert_eq!(dep.requirement, Some("~> 2.0, >= 2.0.8".to_string()));
624    }
625
626    #[test]
627    fn parse_gem_dep_no_constraint() {
628        let dep = parse_gem_dependency("method_source");
629        assert_eq!(dep.name, "method_source");
630        assert!(dep.requirement.is_none());
631    }
632
633    // ========== Edge Cases ==========
634
635    #[test]
636    fn parse_empty_input() {
637        let result = parse("");
638        assert!(result.is_err());
639    }
640
641    #[test]
642    fn parse_minimal_lockfile() {
643        let input = "\
644GEM
645  remote: https://rubygems.org/
646  specs:
647    rack (2.2.0)
648
649PLATFORMS
650  ruby
651
652DEPENDENCIES
653  rack
654";
655        let lockfile = parse(input).unwrap();
656        assert_eq!(lockfile.specs.len(), 1);
657        assert_eq!(lockfile.specs[0].name, "rack");
658        assert_eq!(lockfile.specs[0].version, "2.2.0");
659        assert_eq!(lockfile.platforms, vec!["ruby"]);
660        assert_eq!(lockfile.dependencies.len(), 1);
661    }
662
663    // ========== PATH Source ==========
664
665    #[test]
666    fn parse_path_source() {
667        let input = "\
668PATH
669  remote: .
670  specs:
671    my_gem (0.1.0)
672
673GEM
674  remote: https://rubygems.org/
675  specs:
676    rack (2.0.0)
677
678PLATFORMS
679  ruby
680
681DEPENDENCIES
682  my_gem!
683  rack
684";
685        let lockfile = parse(input).unwrap();
686        assert_eq!(lockfile.sources.len(), 2);
687        match &lockfile.sources[0] {
688            Source::Path(p) => assert_eq!(p.remote, "."),
689            other => panic!("expected Path source, got {:?}", other),
690        }
691        let my_gem = lockfile.find_spec("my_gem").unwrap();
692        assert_eq!(my_gem.version, "0.1.0");
693        assert_eq!(my_gem.source_index, 0);
694    }
695
696    // ========== GIT with tag ==========
697
698    #[test]
699    fn parse_git_source_with_tag() {
700        let input = "\
701GIT
702  remote: https://github.com/foo/bar.git
703  revision: abc123
704  tag: v1.0.0
705  specs:
706    bar (1.0.0)
707
708GEM
709  remote: https://rubygems.org/
710  specs:
711    rack (2.0.0)
712
713PLATFORMS
714  ruby
715
716DEPENDENCIES
717  bar!
718  rack
719";
720        let lockfile = parse(input).unwrap();
721        match &lockfile.sources[0] {
722            Source::Git(git) => {
723                assert_eq!(git.tag, Some("v1.0.0".to_string()));
724                assert_eq!(git.revision, Some("abc123".to_string()));
725            }
726            other => panic!("expected Git source, got {:?}", other),
727        }
728    }
729
730    // ========== RUBY VERSION section ==========
731
732    #[test]
733    fn parse_ruby_version_section() {
734        let input = "\
735GEM
736  remote: https://rubygems.org/
737  specs:
738    rack (2.0.0)
739
740PLATFORMS
741  ruby
742
743DEPENDENCIES
744  rack
745
746RUBY VERSION
747   ruby 3.0.0p0
748
749BUNDLED WITH
750   2.3.6
751";
752        let lockfile = parse(input).unwrap();
753        assert_eq!(lockfile.ruby_version, Some("ruby 3.0.0p0".to_string()));
754    }
755
756    #[test]
757    fn all_specs_have_valid_source_index() {
758        let input = include_str!("../../tests/fixtures/insecure_sources/Gemfile.lock");
759        let lockfile = parse(input).unwrap();
760
761        for spec in &lockfile.specs {
762            assert!(
763                spec.source_index < lockfile.sources.len(),
764                "spec {} has source_index {} but only {} sources",
765                spec.name,
766                spec.source_index,
767                lockfile.sources.len()
768            );
769        }
770    }
771}