Skip to main content

git_stub/
git_stub.rs

1// Copyright 2026 Oxide Computer Company
2
3//! Git stub types and operations.
4
5use crate::{GitCommitHash, GitStubParseError};
6use camino::{Utf8Component, Utf8Path, Utf8PathBuf};
7use std::{fmt, str::FromStr};
8
9/// Represents a git stub: a reference to a file at a specific commit.
10///
11/// A git stub is stored as a string in the format `commit:path`, and can be
12/// used to retrieve file contents via `git cat-file blob commit:path`.
13///
14/// Construct via [`FromStr`] (parsing) or [`GitStub::new`].
15///
16/// # Invariants
17///
18/// - The path is non-empty.
19/// - The path uses forward slashes (backslashes are normalized on
20///   construction).
21/// - Every path component is a normal file or directory name (no `..`,
22///   `.`, root `/`, or Windows prefixes).
23///
24/// # Examples
25///
26/// ```
27/// use git_stub::GitStub;
28///
29/// let git_stub: GitStub =
30///     "0123456789abcdef0123456789abcdef01234567:openapi/api.json"
31///         .parse()
32///         .unwrap();
33///
34/// assert_eq!(git_stub.path().as_str(), "openapi/api.json");
35/// ```
36#[derive(Clone, Debug)]
37pub struct GitStub {
38    commit: GitCommitHash,
39    path: Utf8PathBuf,
40    /// Whether the input used to construct this `GitStub` was not in canonical
41    /// form (e.g., had backslashes, extra whitespace, or a missing trailing
42    /// newline).
43    needs_rewrite: bool,
44}
45
46impl PartialEq for GitStub {
47    fn eq(&self, other: &Self) -> bool {
48        self.commit == other.commit && self.path == other.path
49    }
50}
51
52impl Eq for GitStub {}
53
54// Hash must be consistent with the custom PartialEq above: exclude
55// `needs_rewrite` so that two stubs with the same commit:path hash
56// identically regardless of how they were parsed.
57impl core::hash::Hash for GitStub {
58    fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
59        self.commit.hash(state);
60        self.path.hash(state);
61    }
62}
63
64impl GitStub {
65    /// Creates a new `GitStub` with the given commit hash and path.
66    ///
67    /// The path is normalized: backslashes are converted to forward slashes.
68    ///
69    /// Returns an error if:
70    /// - The path is empty.
71    /// - The path contains a newline character.
72    /// - Any path component is not a normal file or directory name (e.g.,
73    ///   `..`, `.`, root `/`, or a Windows prefix).
74    pub fn new(
75        commit: GitCommitHash,
76        path: Utf8PathBuf,
77    ) -> Result<Self, GitStubParseError> {
78        let raw = path.as_str();
79        let needs_rewrite = raw.contains('\\');
80        let normalized = raw.replace('\\', "/");
81        if normalized.is_empty() {
82            return Err(GitStubParseError::EmptyPath);
83        }
84        if normalized.contains('\n') {
85            return Err(GitStubParseError::NewlineInPath);
86        }
87        let path = Utf8PathBuf::from(normalized);
88
89        // Reject paths that contain anything other than plain file/directory
90        // names. This prevents path traversal (e.g., `../escape`) and
91        // absolute paths (e.g., `/etc/passwd`).
92        if let Some(component) = find_non_normal_component(&path) {
93            return Err(GitStubParseError::InvalidPathComponent {
94                path,
95                component,
96            });
97        }
98
99        Ok(GitStub { commit, path, needs_rewrite })
100    }
101
102    /// Returns the commit hash.
103    pub fn commit(&self) -> GitCommitHash {
104        self.commit
105    }
106
107    /// Returns the path within the repository.
108    pub fn path(&self) -> &Utf8Path {
109        &self.path
110    }
111
112    /// Returns the canonical file contents for this git stub.
113    ///
114    /// The canonical format is `commit:path\n` where:
115    /// - The path uses forward slashes (even on Windows).
116    /// - The file ends with a single newline.
117    pub fn to_file_contents(&self) -> String {
118        format!("{}\n", self)
119    }
120
121    /// Returns whether the input used to construct this `GitStub` was not in
122    /// canonical form.
123    ///
124    /// A Git stub needs rewriting if it doesn't match the canonical
125    /// format:
126    ///
127    /// - Missing trailing newline.
128    /// - Contains backslashes in the path.
129    /// - Has extra whitespace.
130    pub fn needs_rewrite(&self) -> bool {
131        self.needs_rewrite
132    }
133}
134
135impl fmt::Display for GitStub {
136    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> std::fmt::Result {
137        write!(f, "{}:{}", self.commit, self.path)
138    }
139}
140
141impl FromStr for GitStub {
142    type Err = GitStubParseError;
143
144    fn from_str(s: &str) -> Result<Self, Self::Err> {
145        // Check for non-canonical formatting before trimming. Canonical
146        // form is exactly `commit:path\n`: a single trailing newline and no
147        // other surrounding whitespace. (Backslash detection is handled
148        // separately by `new()`.)
149        let needs_rewrite = !s.ends_with('\n') || s.trim().len() + 1 != s.len();
150
151        let trimmed = s.trim();
152        if trimmed.is_empty() {
153            return Err(GitStubParseError::EmptyInput);
154        }
155        let (commit_str, path) = trimmed.split_once(':').ok_or_else(|| {
156            GitStubParseError::InvalidFormat(trimmed.to_owned())
157        })?;
158        let commit: GitCommitHash = commit_str.parse()?;
159        // Uppercase hex is accepted by the parser but Display emits
160        // lowercase, so the round-trip would differ. Flag it.
161        let has_uppercase_hex =
162            commit_str.bytes().any(|b| b.is_ascii_uppercase());
163        // GitStub::new handles backslash normalization and empty-path
164        // rejection.
165        let mut stub = GitStub::new(commit, Utf8PathBuf::from(path))?;
166        // Merge in the whitespace/newline/case canonicality check with
167        // whatever new() detected (e.g., backslashes in path).
168        stub.needs_rewrite =
169            stub.needs_rewrite || needs_rewrite || has_uppercase_hex;
170        Ok(stub)
171    }
172}
173
174/// Returns the first non-normal component in the path, if any.
175///
176/// A normal component is a plain file or directory name (not `..`, `.`,
177/// root `/`, or a Windows prefix).
178fn find_non_normal_component(path: &Utf8Path) -> Option<String> {
179    path.components().find_map(|component| match component {
180        Utf8Component::Normal(_) => None,
181        Utf8Component::Prefix(_)
182        | Utf8Component::RootDir
183        | Utf8Component::CurDir
184        | Utf8Component::ParentDir => Some(component.as_str().to_owned()),
185    })
186}
187
188#[cfg(test)]
189mod tests {
190    use super::*;
191
192    const VALID_SHA1: &str = "0123456789abcdef0123456789abcdef01234567";
193
194    #[test]
195    fn test_git_stub_parse() {
196        let input = format!("{}:openapi/api/api-1.0.0-def456.json", VALID_SHA1);
197        let git_stub = input.parse::<GitStub>().unwrap();
198        assert_eq!(git_stub.commit().to_string(), VALID_SHA1);
199        assert_eq!(
200            git_stub.path().as_str(),
201            "openapi/api/api-1.0.0-def456.json"
202        );
203    }
204
205    #[test]
206    fn test_git_stub_parse_with_whitespace() {
207        let input = format!("  {}:path/file.json\n", VALID_SHA1);
208        let git_stub = input.parse::<GitStub>().unwrap();
209        assert_eq!(git_stub.commit().to_string(), VALID_SHA1);
210        assert_eq!(git_stub.path().as_str(), "path/file.json");
211    }
212
213    #[test]
214    fn test_git_stub_parse_invalid_no_colon() {
215        let result = "no-colon".parse::<GitStub>();
216        assert!(result.is_err());
217        assert!(matches!(
218            result.unwrap_err(),
219            GitStubParseError::InvalidFormat(_)
220        ));
221    }
222
223    #[test]
224    fn test_git_stub_parse_invalid_empty() {
225        let result = "".parse::<GitStub>();
226        assert!(result.is_err());
227    }
228
229    #[test]
230    fn test_git_stub_parse_invalid_commit_hash() {
231        // Valid format but invalid commit hash (too short).
232        let result = "abc123:path/file.json".parse::<GitStub>();
233        assert!(result.is_err());
234        assert!(matches!(
235            result.unwrap_err(),
236            GitStubParseError::InvalidCommitHash(_)
237        ));
238    }
239
240    #[test]
241    fn test_git_stub_roundtrip() {
242        let git_stub = GitStub::new(
243            VALID_SHA1.parse().unwrap(),
244            Utf8PathBuf::from("path/to/file.json"),
245        )
246        .unwrap();
247        let s = git_stub.to_string();
248        let expected = format!("{}:path/to/file.json", VALID_SHA1);
249        assert_eq!(s, expected);
250        let parsed = s.parse::<GitStub>().unwrap();
251        assert_eq!(git_stub, parsed);
252    }
253
254    #[test]
255    fn test_git_stub_to_file_contents() {
256        let git_stub = GitStub::new(
257            VALID_SHA1.parse().unwrap(),
258            Utf8PathBuf::from("path/to/file.json"),
259        )
260        .unwrap();
261        let contents = git_stub.to_file_contents();
262        let expected = format!("{}:path/to/file.json\n", VALID_SHA1);
263        assert_eq!(contents, expected, "should have trailing newline");
264    }
265
266    #[test]
267    fn test_git_stub_new_normalizes_backslashes() {
268        // The constructor should normalize backslashes to forward slashes.
269        let git_stub = GitStub::new(
270            VALID_SHA1.parse().unwrap(),
271            Utf8PathBuf::from("path\\to\\file.json"),
272        )
273        .unwrap();
274        assert_eq!(
275            git_stub.path().as_str(),
276            "path/to/file.json",
277            "constructor should normalize backslashes"
278        );
279        // Display should also reflect the normalization.
280        let s = git_stub.to_string();
281        assert!(!s.contains('\\'), "display should not contain backslashes");
282        assert!(s.contains("path/to/file.json"));
283    }
284
285    #[test]
286    fn test_git_stub_new_rejects_empty_path() {
287        let result =
288            GitStub::new(VALID_SHA1.parse().unwrap(), Utf8PathBuf::from(""));
289        assert!(
290            matches!(result, Err(GitStubParseError::EmptyPath)),
291            "should reject empty path"
292        );
293    }
294
295    #[test]
296    fn test_git_stub_parse_normalizes_backslashes() {
297        // Parsing should normalize backslashes to forward slashes.
298        let input = format!("{}:path\\to\\file.json", VALID_SHA1);
299        let git_stub = input.parse::<GitStub>().unwrap();
300        assert_eq!(
301            git_stub.path().as_str(),
302            "path/to/file.json",
303            "backslashes should be normalized to forward slashes"
304        );
305    }
306
307    #[test]
308    fn test_git_stub_parse_error_variants() {
309        // Empty input.
310        let result = "".parse::<GitStub>();
311        assert!(matches!(result, Err(GitStubParseError::EmptyInput)));
312
313        // Whitespace-only input.
314        let result = "   \n  ".parse::<GitStub>();
315        assert!(matches!(result, Err(GitStubParseError::EmptyInput)));
316
317        // Empty path (valid commit hash but nothing after colon).
318        let input = format!("{}:", VALID_SHA1);
319        let result = input.parse::<GitStub>();
320        assert!(matches!(result, Err(GitStubParseError::EmptyPath)));
321    }
322
323    #[test]
324    fn test_git_stub_needs_rewrite() {
325        // Canonical format: forward slashes, single trailing newline.
326        let canonical = format!("{}:path/to/file.json\n", VALID_SHA1);
327        let stub = canonical.parse::<GitStub>().unwrap();
328        assert!(
329            !stub.needs_rewrite(),
330            "canonical format should not need rewrite"
331        );
332
333        // Missing trailing newline.
334        let missing_newline = format!("{}:path/to/file.json", VALID_SHA1);
335        let stub = missing_newline.parse::<GitStub>().unwrap();
336        assert!(
337            stub.needs_rewrite(),
338            "missing trailing newline should need rewrite"
339        );
340
341        // Extra trailing newlines.
342        let extra_newlines = format!("{}:path/to/file.json\n\n", VALID_SHA1);
343        let stub = extra_newlines.parse::<GitStub>().unwrap();
344        assert!(
345            stub.needs_rewrite(),
346            "extra trailing newlines should need rewrite"
347        );
348
349        // Leading whitespace.
350        let leading_whitespace =
351            format!("  {}:path/to/file.json\n", VALID_SHA1);
352        let stub = leading_whitespace.parse::<GitStub>().unwrap();
353        assert!(stub.needs_rewrite(), "leading whitespace should need rewrite");
354
355        // Backslashes in path.
356        let backslashes = format!("{}:path\\to\\file.json\n", VALID_SHA1);
357        let stub = backslashes.parse::<GitStub>().unwrap();
358        assert!(
359            stub.needs_rewrite(),
360            "backslashes in path should need rewrite"
361        );
362
363        // CRLF line ending.
364        let crlf = format!("{}:path/to/file.json\r\n", VALID_SHA1);
365        let stub = crlf.parse::<GitStub>().unwrap();
366        assert!(stub.needs_rewrite(), "CRLF should need rewrite");
367        assert_eq!(
368            stub.path().as_str(),
369            "path/to/file.json",
370            "CRLF should not leave \\r in the path"
371        );
372    }
373
374    #[test]
375    fn test_git_stub_new_needs_rewrite() {
376        // new() with a clean path should not need rewrite.
377        let stub = GitStub::new(
378            VALID_SHA1.parse().unwrap(),
379            Utf8PathBuf::from("path/to/file.json"),
380        )
381        .unwrap();
382        assert!(
383            !stub.needs_rewrite(),
384            "new() with canonical path should not need rewrite"
385        );
386
387        // new() with backslashes should need rewrite.
388        let stub = GitStub::new(
389            VALID_SHA1.parse().unwrap(),
390            Utf8PathBuf::from("path\\to\\file.json"),
391        )
392        .unwrap();
393        assert!(
394            stub.needs_rewrite(),
395            "new() with backslashes should need rewrite"
396        );
397    }
398
399    #[test]
400    fn test_git_stub_needs_rewrite_uppercase_hex() {
401        // Uppercase hex is valid but non-canonical; Display emits
402        // lowercase, so the round-trip would differ.
403        let upper = "0123456789ABCDEF0123456789ABCDEF01234567";
404        let input = format!("{}:path/to/file.json\n", upper);
405        let stub = input.parse::<GitStub>().unwrap();
406        assert!(
407            stub.needs_rewrite(),
408            "uppercase hex in commit hash should need rewrite"
409        );
410
411        // The canonical form uses lowercase.
412        let canonical = stub.to_file_contents();
413        assert_ne!(
414            canonical, input,
415            "canonical output should differ from uppercase input"
416        );
417        assert_eq!(
418            canonical,
419            format!("{}:path/to/file.json\n", upper.to_ascii_lowercase()),
420        );
421
422        // Lowercase hex should not need rewrite.
423        let lower_input = format!("{}:path/to/file.json\n", VALID_SHA1);
424        let stub2 = lower_input.parse::<GitStub>().unwrap();
425        assert!(
426            !stub2.needs_rewrite(),
427            "lowercase hex should not need rewrite"
428        );
429    }
430
431    #[test]
432    fn test_git_stub_needs_rewrite_equality() {
433        // Two stubs with the same commit:path should be equal even if one
434        // needs rewriting.
435        let canonical = format!("{}:path/to/file.json\n", VALID_SHA1);
436        let non_canonical = format!("  {}:path/to/file.json", VALID_SHA1);
437        let a = canonical.parse::<GitStub>().unwrap();
438        let b = non_canonical.parse::<GitStub>().unwrap();
439        assert!(!a.needs_rewrite());
440        assert!(b.needs_rewrite());
441        assert_eq!(a, b, "equality should ignore needs_rewrite");
442    }
443
444    #[test]
445    fn test_git_stub_sha256_roundtrip() {
446        let sha256 = "0123456789abcdef0123456789abcdef\
447             0123456789abcdef0123456789abcdef";
448        let input = format!("{}:openapi/api.json\n", sha256);
449        let stub = input.parse::<GitStub>().unwrap();
450
451        assert!(
452            matches!(stub.commit(), crate::GitCommitHash::Sha256(_)),
453            "64-char hex should parse as SHA-256"
454        );
455        assert_eq!(stub.path().as_str(), "openapi/api.json");
456        assert!(!stub.needs_rewrite());
457
458        // Round-trip through Display and back.
459        let reparsed = stub.to_string().parse::<GitStub>().unwrap();
460        assert_eq!(stub, reparsed);
461    }
462
463    #[test]
464    fn test_git_stub_path_containing_colon() {
465        // Colons after the first are part of the path. The parser uses
466        // split_once(':'), so only the first colon is the separator.
467        let input = format!("{}:path/to/file:v2.json\n", VALID_SHA1);
468        let stub = input.parse::<GitStub>().unwrap();
469        assert_eq!(
470            stub.path().as_str(),
471            "path/to/file:v2.json",
472            "colons after the first should be part of the path"
473        );
474        assert!(!stub.needs_rewrite());
475    }
476
477    #[test]
478    fn test_git_stub_hash_consistency_with_eq() {
479        use std::collections::HashSet;
480
481        // Two stubs that are equal (same commit:path) but differ in
482        // needs_rewrite must produce the same Hash.
483        let canonical = format!("{}:path/to/file.json\n", VALID_SHA1);
484        let non_canonical = format!("  {}:path/to/file.json", VALID_SHA1);
485        let a = canonical.parse::<GitStub>().unwrap();
486        let b = non_canonical.parse::<GitStub>().unwrap();
487        assert_eq!(a, b);
488        assert!(!a.needs_rewrite());
489        assert!(b.needs_rewrite());
490
491        let mut set = HashSet::new();
492        set.insert(a);
493        set.insert(b);
494        assert_eq!(set.len(), 1, "equal stubs must hash identically");
495    }
496
497    // --- Path component validation tests ---
498
499    #[test]
500    fn test_git_stub_rejects_parent_dir() {
501        let result = GitStub::new(
502            VALID_SHA1.parse().unwrap(),
503            Utf8PathBuf::from("../escape/file.json"),
504        );
505        assert!(
506            matches!(
507                result,
508                Err(GitStubParseError::InvalidPathComponent { .. })
509            ),
510            "should reject path with .. component"
511        );
512    }
513
514    #[test]
515    fn test_git_stub_rejects_current_dir() {
516        let result = GitStub::new(
517            VALID_SHA1.parse().unwrap(),
518            Utf8PathBuf::from("./path/file.json"),
519        );
520        assert!(
521            matches!(
522                result,
523                Err(GitStubParseError::InvalidPathComponent { .. })
524            ),
525            "should reject path with . component"
526        );
527    }
528
529    #[test]
530    fn test_git_stub_rejects_absolute_path() {
531        let result = GitStub::new(
532            VALID_SHA1.parse().unwrap(),
533            Utf8PathBuf::from("/absolute/path/file.json"),
534        );
535        assert!(
536            matches!(
537                result,
538                Err(GitStubParseError::InvalidPathComponent { .. })
539            ),
540            "should reject absolute path"
541        );
542    }
543
544    #[test]
545    fn test_git_stub_rejects_embedded_parent_dir() {
546        let result = GitStub::new(
547            VALID_SHA1.parse().unwrap(),
548            Utf8PathBuf::from("path/../../escape/file.json"),
549        );
550        assert!(
551            matches!(
552                result,
553                Err(GitStubParseError::InvalidPathComponent { .. })
554            ),
555            "should reject path with embedded .. components"
556        );
557    }
558
559    #[test]
560    fn test_git_stub_rejects_dot_only_path() {
561        let result =
562            GitStub::new(VALID_SHA1.parse().unwrap(), Utf8PathBuf::from("."));
563        assert!(
564            matches!(
565                result,
566                Err(GitStubParseError::InvalidPathComponent { .. })
567            ),
568            "should reject path that is just '.'"
569        );
570    }
571
572    #[test]
573    fn test_git_stub_rejects_dotdot_only_path() {
574        let result =
575            GitStub::new(VALID_SHA1.parse().unwrap(), Utf8PathBuf::from(".."));
576        assert!(
577            matches!(
578                result,
579                Err(GitStubParseError::InvalidPathComponent { .. })
580            ),
581            "should reject path that is just '..'"
582        );
583    }
584
585    #[test]
586    fn test_git_stub_rejects_backslash_parent_dir() {
587        // After backslash normalization, this becomes "../escape/file.json".
588        let result = GitStub::new(
589            VALID_SHA1.parse().unwrap(),
590            Utf8PathBuf::from("..\\escape\\file.json"),
591        );
592        assert!(
593            matches!(
594                result,
595                Err(GitStubParseError::InvalidPathComponent { .. })
596            ),
597            "should reject backslash-normalized path with .. component"
598        );
599    }
600
601    #[test]
602    fn test_git_stub_parse_rejects_parent_dir() {
603        // Validation also applies when parsing from string.
604        let input = format!("{}:../escape/file.json", VALID_SHA1);
605        let result = input.parse::<GitStub>();
606        assert!(
607            matches!(
608                result,
609                Err(GitStubParseError::InvalidPathComponent { .. })
610            ),
611            "parsing should reject path with .. component"
612        );
613    }
614
615    #[test]
616    fn test_git_stub_parse_rejects_absolute_path() {
617        let input = format!("{}:/etc/passwd", VALID_SHA1);
618        let result = input.parse::<GitStub>();
619        assert!(
620            matches!(
621                result,
622                Err(GitStubParseError::InvalidPathComponent { .. })
623            ),
624            "parsing should reject absolute path"
625        );
626    }
627
628    #[test]
629    fn test_git_stub_parse_rejects_current_dir() {
630        let input = format!("{}:./path/file.json", VALID_SHA1);
631        let result = input.parse::<GitStub>();
632        assert!(
633            matches!(
634                result,
635                Err(GitStubParseError::InvalidPathComponent { .. })
636            ),
637            "parsing should reject path with . component"
638        );
639    }
640
641    #[test]
642    fn test_git_stub_rejects_newline_in_path() {
643        // Multi-line input (e.g., from a merge conflict or
644        // accidental concatenation) is rejected because the
645        // path would contain a newline.
646        let input =
647            format!("{}:path/a.json\n{}:path/b.json\n", VALID_SHA1, VALID_SHA1);
648        let result = input.parse::<GitStub>();
649        assert!(
650            matches!(result, Err(GitStubParseError::NewlineInPath)),
651            "multi-line input should be rejected"
652        );
653
654        // Direct construction also rejects newlines.
655        let result = GitStub::new(
656            VALID_SHA1.parse().unwrap(),
657            Utf8PathBuf::from("path/\n/file.json"),
658        );
659        assert!(
660            matches!(result, Err(GitStubParseError::NewlineInPath)),
661            "path with embedded newline should be rejected"
662        );
663    }
664}