Skip to main content

git_lfs_pointer/
lib.rs

1//! Parse and encode Git LFS pointer files.
2//!
3//! A pointer is a small UTF-8 text file that stands in for a large
4//! file in a git repository. It carries the file's SHA-256 OID, its
5//! size, and an optional list of extension records. This crate
6//! handles parsing and encoding of that format, with no I/O, no
7//! network, and no git dependency.
8//!
9//! The format is a sorted sequence of `key value` lines: the
10//! `version` URL always first, then optional extension records
11//! sorted by single-digit priority, then the `oid` and `size`
12//! lines. The whole file must be under 1024 bytes (see
13//! [`docs/spec.md`] for the full grammar).
14//!
15//! [`Pointer::parse`] is permissive (CRLF line endings, trailing
16//! whitespace, unsorted extensions, and older version URLs all
17//! parse cleanly), while [`Pointer::encode`] always emits the
18//! canonical form. Each parsed pointer carries a `canonical` flag
19//! so callers like the smudge filter can pass the original bytes
20//! through verbatim when they already match; re-encoding a
21//! non-canonical pointer would change its git blob hash.
22//!
23//! Parse errors split into [`DecodeError::NotAPointer`] (input
24//! bears no LFS markers; callers should treat the bytes as opaque
25//! content) and [`DecodeError::Malformed`] (input has pointer
26//! shape but invalid contents; callers should surface the error).
27//! [`DecodeError::is_not_a_pointer`] is the predicate test.
28//!
29//! ```
30//! use git_lfs_pointer::{Oid, Pointer};
31//!
32//! let oid: Oid = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393"
33//!     .parse()
34//!     .unwrap();
35//! let pointer = Pointer::new(oid, 12345);
36//!
37//! let encoded = pointer.encode();
38//! let parsed = Pointer::parse(encoded.as_bytes()).unwrap();
39//! assert_eq!(parsed.oid, oid);
40//! assert_eq!(parsed.size, 12345);
41//! assert!(parsed.canonical);
42//! ```
43//!
44//! [`docs/spec.md`]: https://gitlab.com/rustutils/git-lfs/-/blob/master/docs/spec.md
45
46mod oid;
47
48pub use oid::{EMPTY_HEX, Oid, OidParseError};
49
50/// The version URL we always emit. Older aliases parse but re-encode to this.
51pub const VERSION_LATEST: &str = "https://git-lfs.github.com/spec/v1";
52
53/// Hard cap on pointer file size.
54///
55/// Pointer files must be strictly smaller than this value (per the
56/// spec); inputs of this size or larger are not pointers.
57pub const MAX_POINTER_SIZE: usize = 1024;
58
59/// Recognized version URLs we accept on the read path.
60const VERSION_ALIASES: &[&str] = &[
61    "http://git-media.io/v/2",            // alpha
62    "https://hawser.github.com/spec/v1",  // pre-release
63    "https://git-lfs.github.com/spec/v1", // current
64];
65
66/// A parsed git-lfs pointer.
67///
68/// A pointer with `size == 0` is an *empty pointer*: it represents an empty
69/// file and serializes to the empty byte string. The `oid` field of an empty
70/// pointer is conventionally [`Oid::EMPTY`] (SHA-256 of zero bytes).
71#[derive(Debug, Clone, PartialEq, Eq)]
72pub struct Pointer {
73    /// SHA-256 of the file's content.
74    pub oid: Oid,
75    /// Size of the file in bytes.
76    pub size: u64,
77    /// Pointer extensions, sorted by `priority` ascending. May be empty.
78    pub extensions: Vec<Extension>,
79    /// `true` if this was decoded from input that exactly matched the
80    /// canonical encoding, or if it was constructed programmatically.
81    ///
82    /// Re-encoding a non-canonical parse produces canonical bytes.
83    pub canonical: bool,
84}
85
86/// A pointer extension.
87///
88/// Extensions appear between the `version` and `oid` lines in the
89/// encoded form, sorted by `priority`. Priorities are single decimal
90/// digits (0-9). See [`docs/extensions.md`] for the chain semantics.
91///
92/// [`docs/extensions.md`]: https://gitlab.com/rustutils/git-lfs/-/blob/master/docs/extensions.md
93#[derive(Debug, Clone, PartialEq, Eq)]
94pub struct Extension {
95    /// Extension name, e.g. `lfs-folderstore`. ASCII alphanumeric or `_`.
96    pub name: String,
97    /// Single decimal digit (0-9). Lower priorities run earlier in
98    /// the extension chain.
99    pub priority: u8,
100    /// SHA-256 of the data this extension saw as input during clean.
101    pub oid: Oid,
102}
103
104impl Pointer {
105    /// Build a non-empty pointer with no extensions.
106    pub fn new(oid: Oid, size: u64) -> Self {
107        Self {
108            oid,
109            size,
110            extensions: Vec::new(),
111            canonical: true,
112        }
113    }
114
115    /// The empty pointer: size 0, OID [`Oid::EMPTY`], no extensions.
116    ///
117    /// This is both the parse result for empty input and the pointer
118    /// representation of an empty file.
119    pub fn empty() -> Self {
120        Self {
121            oid: Oid::EMPTY,
122            size: 0,
123            extensions: Vec::new(),
124            canonical: true,
125        }
126    }
127
128    /// `true` if this is the empty pointer (size 0).
129    pub fn is_empty(&self) -> bool {
130        self.size == 0
131    }
132
133    /// Encode to canonical text form. The empty pointer encodes to `""`.
134    ///
135    /// Extensions are emitted sorted by priority. The version line is always
136    /// [`VERSION_LATEST`], regardless of what the source used.
137    pub fn encode(&self) -> String {
138        use std::fmt::Write as _;
139        if self.size == 0 {
140            return String::new();
141        }
142        let mut exts: Vec<&Extension> = self.extensions.iter().collect();
143        exts.sort_by_key(|e| e.priority);
144
145        let mut out = String::with_capacity(160 + 80 * exts.len());
146        writeln!(out, "version {VERSION_LATEST}").unwrap();
147        for ext in exts {
148            writeln!(out, "ext-{}-{} sha256:{}", ext.priority, ext.name, ext.oid).unwrap();
149        }
150        writeln!(out, "oid sha256:{}", self.oid).unwrap();
151        writeln!(out, "size {}", self.size).unwrap();
152        out
153    }
154
155    /// Parse a pointer from the raw bytes of a blob.
156    ///
157    /// Returns [`DecodeError::NotAPointer`] if the input doesn't look like a
158    /// pointer at all (callers like the smudge filter should pass the bytes
159    /// through unchanged), or [`DecodeError::Malformed`] if the input has
160    /// pointer shape but invalid contents (callers should error out).
161    pub fn parse(input: &[u8]) -> Result<Self, DecodeError> {
162        if input.is_empty() {
163            return Ok(Self::empty());
164        }
165        if input.len() >= MAX_POINTER_SIZE {
166            return Err(DecodeError::NotAPointer(NotAPointerReason::TooLarge {
167                size: input.len(),
168            }));
169        }
170        let text = std::str::from_utf8(input)
171            .map_err(|_| DecodeError::NotAPointer(NotAPointerReason::NotUtf8))?;
172        if !contains_spec_marker(text) {
173            return Err(DecodeError::NotAPointer(NotAPointerReason::MissingHeader));
174        }
175
176        let mut pointer = parse_lines(text.trim())?;
177        pointer.canonical = pointer.encode().as_bytes() == input;
178        Ok(pointer)
179    }
180}
181
182fn contains_spec_marker(text: &str) -> bool {
183    text.contains("git-lfs") || text.contains("git-media") || text.contains("hawser")
184}
185
186fn parse_lines(text: &str) -> Result<Pointer, DecodeError> {
187    const REQUIRED: [&str; 3] = ["version", "oid", "size"];
188    let mut filled: [Option<&str>; 3] = [None, None, None];
189    let mut consumed = 0usize;
190    let mut extensions: Vec<Extension> = Vec::new();
191
192    for (line_no, raw_line) in text.split('\n').enumerate() {
193        // Tolerate CRLF: bufio.Scanner does this in upstream, so we match.
194        let line = raw_line.strip_suffix('\r').unwrap_or(raw_line);
195        if line.is_empty() {
196            continue;
197        }
198
199        let (key, value) = line.split_once(' ').ok_or(DecodeError::NotAPointer(
200            NotAPointerReason::MalformedLine { line: line_no },
201        ))?;
202
203        if consumed == REQUIRED.len() {
204            return Err(DecodeError::NotAPointer(NotAPointerReason::ExtraLine {
205                line: line_no,
206                content: line.into(),
207            }));
208        }
209
210        let expected = REQUIRED[consumed];
211        if key == expected {
212            filled[consumed] = Some(value);
213            consumed += 1;
214            continue;
215        }
216
217        // Mismatch: try to parse as an extension.
218        if let Some((priority, name)) = parse_extension_key(key) {
219            let ext_oid = parse_oid_value(value).map_err(DecodeError::Malformed)?;
220            extensions.push(Extension {
221                name: name.to_owned(),
222                priority,
223                oid: ext_oid,
224            });
225            continue;
226        }
227
228        // Not a required key, not an extension. If this happens before the
229        // version line, treat as NotAPointer (matches upstream's
230        // StandardizeBadPointerError); otherwise it's a malformed pointer.
231        return Err(if expected == "version" {
232            DecodeError::NotAPointer(NotAPointerReason::NotVersionFirst { got: key.into() })
233        } else {
234            DecodeError::Malformed(MalformedReason::UnexpectedKey {
235                expected,
236                got: key.into(),
237            })
238        });
239    }
240
241    let version = filled[0].ok_or(DecodeError::NotAPointer(NotAPointerReason::MissingVersion))?;
242    if !VERSION_ALIASES.contains(&version) {
243        return Err(DecodeError::Malformed(MalformedReason::InvalidVersion(
244            version.into(),
245        )));
246    }
247
248    let oid_value =
249        filled[1].ok_or(DecodeError::Malformed(MalformedReason::MissingField("oid")))?;
250    let oid = parse_oid_value(oid_value).map_err(DecodeError::Malformed)?;
251
252    let size_value = filled[2].ok_or(DecodeError::Malformed(MalformedReason::MissingField(
253        "size",
254    )))?;
255    let size = parse_size(size_value).map_err(DecodeError::Malformed)?;
256
257    extensions.sort_by_key(|e| e.priority);
258    for w in extensions.windows(2) {
259        if w[0].priority == w[1].priority {
260            return Err(DecodeError::Malformed(
261                MalformedReason::DuplicateExtensionPriority(w[0].priority),
262            ));
263        }
264    }
265
266    Ok(Pointer {
267        oid,
268        size,
269        extensions,
270        canonical: true, // overwritten by Pointer::parse
271    })
272}
273
274fn parse_oid_value(value: &str) -> Result<Oid, MalformedReason> {
275    let (oid_type, hash) = value
276        .split_once(':')
277        .ok_or_else(|| MalformedReason::MalformedOidValue(value.into()))?;
278    if oid_type != "sha256" {
279        return Err(MalformedReason::UnsupportedOidType(oid_type.into()));
280    }
281    Oid::from_hex(hash).map_err(MalformedReason::InvalidOidHash)
282}
283
284fn parse_size(value: &str) -> Result<u64, MalformedReason> {
285    // u64 parse already rejects leading '-', '+', whitespace, and non-digits.
286    value
287        .parse::<u64>()
288        .map_err(|_| MalformedReason::InvalidSize(value.into()))
289}
290
291/// Returns `Some((priority, name))` if `key` is a valid extension key in the
292/// form `ext-<digit>-<word>`. Word characters are ASCII alphanumeric or `_`.
293fn parse_extension_key(key: &str) -> Option<(u8, &str)> {
294    let rest = key.strip_prefix("ext-")?;
295    let bytes = rest.as_bytes();
296    if bytes.len() < 3 {
297        return None;
298    }
299    if !bytes[0].is_ascii_digit() || bytes[1] != b'-' {
300        return None;
301    }
302    let name = &rest[2..];
303    if !name.bytes().all(|b| b.is_ascii_alphanumeric() || b == b'_') {
304        return None;
305    }
306    Some((bytes[0] - b'0', name))
307}
308
309/// Why a [`Pointer::parse`] call failed.
310///
311/// Splits into "not a pointer" (input wasn't intended to be an LFS
312/// pointer; callers should surface the bytes as opaque content) and
313/// "malformed" (input claimed to be a pointer but didn't validate;
314/// callers should surface the error).
315#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
316pub enum DecodeError {
317    /// The input does not look like a pointer at all.
318    #[error("not a git-lfs pointer: {0}")]
319    NotAPointer(NotAPointerReason),
320    /// The input has pointer shape but is invalid.
321    #[error("malformed git-lfs pointer: {0}")]
322    Malformed(MalformedReason),
323}
324
325impl DecodeError {
326    /// `true` if the input doesn't look like a pointer.
327    ///
328    /// The smudge filter should pass the bytes through unchanged in
329    /// this case.
330    pub fn is_not_a_pointer(&self) -> bool {
331        matches!(self, DecodeError::NotAPointer(_))
332    }
333}
334
335/// Specific reason a [`DecodeError::NotAPointer`] was returned.
336///
337/// Each variant captures one shape that doesn't qualify as a pointer
338/// at all; callers should treat the input as opaque content.
339#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
340pub enum NotAPointerReason {
341    /// Input is at or above the [`MAX_POINTER_SIZE`] cutoff.
342    #[error("size {size} bytes is not below the {MAX_POINTER_SIZE}-byte cutoff")]
343    TooLarge { size: usize },
344    /// Input bytes aren't valid UTF-8; pointer files are UTF-8 by spec.
345    #[error("input is not valid UTF-8")]
346    NotUtf8,
347    /// Input doesn't contain any of the recognized git-lfs spec markers.
348    #[error("missing git-lfs spec marker")]
349    MissingHeader,
350    /// A line is missing the `<key> <value>` separator.
351    #[error("line {line} has no key/value separator")]
352    MalformedLine { line: usize },
353    /// The input ended without yielding a `version` line.
354    #[error("missing version line")]
355    MissingVersion,
356    /// The first non-empty line's key wasn't `version`.
357    #[error("first key is {got:?}, expected version")]
358    NotVersionFirst { got: String },
359    /// Trailing content past the `size` line (only `version`, optional
360    /// `ext-N-name`, `oid`, `size` are allowed).
361    #[error("extra content on line {line}: {content:?}")]
362    ExtraLine { line: usize, content: String },
363}
364
365/// Specific reason a [`DecodeError::Malformed`] was returned.
366///
367/// Each variant marks a pointer that passed the not-a-pointer shape
368/// checks but failed deeper validation; callers should surface the
369/// error to the user.
370#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
371pub enum MalformedReason {
372    /// The `version` value isn't one of the recognized spec URLs.
373    #[error("unrecognized version: {0:?}")]
374    InvalidVersion(String),
375    /// A line's key was unexpected at this point in the parse.
376    #[error("expected key {expected:?}, got {got:?}")]
377    UnexpectedKey { expected: &'static str, got: String },
378    /// A required line (`oid` or `size`) was missing.
379    #[error("missing required {0:?} line")]
380    MissingField(&'static str),
381    /// An OID value isn't in `<type>:<hash>` form.
382    #[error("oid value {0:?} is not in the form <type>:<hash>")]
383    MalformedOidValue(String),
384    /// An OID's type prefix wasn't `sha256` (the only one we support).
385    #[error("unsupported oid type {0:?}; only sha256 is supported")]
386    UnsupportedOidType(String),
387    /// An OID hash failed [`OidParseError`] validation (length or
388    /// character set).
389    #[error("invalid oid hash: {0}")]
390    InvalidOidHash(#[source] OidParseError),
391    /// The `size` value isn't a non-negative integer.
392    #[error("size value {0:?} is not a non-negative integer")]
393    InvalidSize(String),
394    /// Two extension records shared the same priority digit.
395    #[error("duplicate extension priority {0}")]
396    DuplicateExtensionPriority(u8),
397}
398
399#[cfg(test)]
400mod tests {
401    use super::*;
402
403    fn sha(hex: &str) -> Oid {
404        Oid::from_hex(hex).unwrap()
405    }
406
407    const SAMPLE_OID_HEX: &str = "4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393";
408
409    // ---------- encode ----------
410
411    #[test]
412    fn encode_simple() {
413        let p = Pointer::new(sha(SAMPLE_OID_HEX), 12345);
414        let expected =
415            format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\n");
416        assert_eq!(p.encode(), expected);
417    }
418
419    #[test]
420    fn encode_empty() {
421        // Per spec, the empty pointer encodes to the empty string.
422        assert_eq!(Pointer::empty().encode(), "");
423        // Any pointer with size 0 also encodes to "" (matches upstream).
424        let p = Pointer::new(sha(SAMPLE_OID_HEX), 0);
425        assert_eq!(p.encode(), "");
426    }
427
428    #[test]
429    fn encode_extensions_sorted_on_output() {
430        let exts = vec![
431            Extension {
432                name: "baz".into(),
433                priority: 2,
434                oid: sha("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"),
435            },
436            Extension {
437                name: "foo".into(),
438                priority: 0,
439                oid: sha("ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff"),
440            },
441            Extension {
442                name: "bar".into(),
443                priority: 1,
444                oid: sha("bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb"),
445            },
446        ];
447        let p = Pointer {
448            oid: sha(SAMPLE_OID_HEX),
449            size: 12345,
450            extensions: exts,
451            canonical: true,
452        };
453        let expected = format!(
454            "version {VERSION_LATEST}\n\
455             ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
456             ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\
457             ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\
458             oid sha256:{SAMPLE_OID_HEX}\n\
459             size 12345\n",
460        );
461        assert_eq!(p.encode(), expected);
462    }
463
464    // ---------- parse: happy paths ----------
465
466    #[test]
467    fn parse_standard() {
468        let input = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\n");
469        let p = Pointer::parse(input.as_bytes()).unwrap();
470        assert_eq!(p.oid, sha(SAMPLE_OID_HEX));
471        assert_eq!(p.size, 12345);
472        assert!(p.extensions.is_empty());
473        assert!(p.canonical);
474    }
475
476    #[test]
477    fn parse_empty_input_is_empty_pointer() {
478        let p = Pointer::parse(b"").unwrap();
479        assert_eq!(p, Pointer::empty());
480        assert!(p.canonical);
481    }
482
483    #[test]
484    fn parse_extensions_sorted() {
485        let input = format!(
486            "version {VERSION_LATEST}\n\
487             ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
488             ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\
489             ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\
490             oid sha256:{SAMPLE_OID_HEX}\n\
491             size 12345\n",
492        );
493        let p = Pointer::parse(input.as_bytes()).unwrap();
494        assert_eq!(p.extensions.len(), 3);
495        assert_eq!(p.extensions[0].name, "foo");
496        assert_eq!(p.extensions[0].priority, 0);
497        assert_eq!(p.extensions[1].name, "bar");
498        assert_eq!(p.extensions[2].name, "baz");
499        assert!(p.canonical);
500    }
501
502    #[test]
503    fn parse_unsorted_extensions_sorts_and_marks_noncanonical() {
504        // Same content, but ext-2 listed first.
505        let input = format!(
506            "version {VERSION_LATEST}\n\
507             ext-2-baz sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\
508             ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
509             ext-1-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\
510             oid sha256:{SAMPLE_OID_HEX}\n\
511             size 12345\n",
512        );
513        let p = Pointer::parse(input.as_bytes()).unwrap();
514        assert_eq!(p.extensions[0].priority, 0);
515        assert_eq!(p.extensions[1].priority, 1);
516        assert_eq!(p.extensions[2].priority, 2);
517        assert!(!p.canonical);
518    }
519
520    #[test]
521    fn parse_pre_release_version_alias() {
522        let input = format!(
523            "version https://hawser.github.com/spec/v1\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\n"
524        );
525        let p = Pointer::parse(input.as_bytes()).unwrap();
526        assert_eq!(p.size, 12345);
527        // Re-encoding rewrites version to latest, so input is NOT canonical.
528        assert!(!p.canonical);
529        assert!(
530            p.encode()
531                .starts_with(&format!("version {VERSION_LATEST}\n"))
532        );
533    }
534
535    #[test]
536    fn parse_round_trip() {
537        let p = Pointer::new(sha(SAMPLE_OID_HEX), 12345);
538        let encoded = p.encode();
539        let parsed = Pointer::parse(encoded.as_bytes()).unwrap();
540        assert_eq!(parsed.oid, p.oid);
541        assert_eq!(parsed.size, p.size);
542        assert!(parsed.canonical);
543    }
544
545    // ---------- canonical bytes ----------
546
547    #[test]
548    fn canonical_examples() {
549        // Standard form, with trailing \n.
550        let s = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\n");
551        assert!(Pointer::parse(s.as_bytes()).unwrap().canonical);
552
553        // Empty input.
554        assert!(Pointer::parse(b"").unwrap().canonical);
555    }
556
557    #[test]
558    fn non_canonical_examples() {
559        let cases: &[&str] = &[
560            // missing trailing newline
561            "version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 12345",
562            // CRLF line endings
563            "version https://git-lfs.github.com/spec/v1\r\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\r\nsize 12345\r\n",
564            // trailing whitespace on a line
565            "version https://git-lfs.github.com/spec/v1\noid sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\nsize 12345   \n",
566        ];
567        for case in cases {
568            let p = Pointer::parse(case.as_bytes())
569                .unwrap_or_else(|e| panic!("failed to parse {case:?}: {e}"));
570            assert!(!p.canonical, "expected non-canonical for {case:?}");
571        }
572    }
573
574    // ---------- parse: NotAPointer ----------
575
576    #[test]
577    fn tiny_non_pointer_is_not_a_pointer() {
578        let err = Pointer::parse(b"this is not a git-lfs file!").unwrap_err();
579        assert!(err.is_not_a_pointer(), "expected NotAPointer, got {err:?}");
580    }
581
582    #[test]
583    fn header_only_is_not_a_pointer() {
584        // Mentions git-media so passes the marker check, but no key/value.
585        let err = Pointer::parse(b"# git-media").unwrap_err();
586        assert!(err.is_not_a_pointer(), "expected NotAPointer, got {err:?}");
587    }
588
589    #[test]
590    fn oversized_input_is_not_a_pointer() {
591        let big = vec![b'x'; MAX_POINTER_SIZE + 1];
592        let err = Pointer::parse(&big).unwrap_err();
593        assert!(matches!(
594            err,
595            DecodeError::NotAPointer(NotAPointerReason::TooLarge { .. })
596        ));
597    }
598
599    #[test]
600    fn exactly_max_size_is_not_a_pointer() {
601        // Spec: pointer files must be *less than* 1024 bytes. At-cutoff is too large.
602        let exact = vec![b'x'; MAX_POINTER_SIZE];
603        let err = Pointer::parse(&exact).unwrap_err();
604        assert!(matches!(
605            err,
606            DecodeError::NotAPointer(NotAPointerReason::TooLarge { .. })
607        ));
608    }
609
610    #[test]
611    fn equals_separator_is_not_a_pointer() {
612        // From upstream's TestDecodeInvalid: bad `key value` format using '='.
613        let s = "version=https://git-lfs.github.com/spec/v1\n\
614                 oid=sha256:4d7a214614ab2935c943f9e0ff69d22eadbb8f32b1258daaa5e2ca24d17e2393\n\
615                 size=fif";
616        let err = Pointer::parse(s.as_bytes()).unwrap_err();
617        assert!(err.is_not_a_pointer());
618    }
619
620    #[test]
621    fn no_marker_is_not_a_pointer() {
622        let err = Pointer::parse(b"version=http://wat.io/v/2\noid=foo\nsize=fif").unwrap_err();
623        assert!(matches!(
624            err,
625            DecodeError::NotAPointer(NotAPointerReason::MissingHeader)
626        ));
627    }
628
629    #[test]
630    fn missing_version_first_is_not_a_pointer() {
631        // OID line first, no version. From upstream's "no version" case.
632        let s = format!("oid sha256:{SAMPLE_OID_HEX}\nsize 12345\n");
633        let err = Pointer::parse(s.as_bytes()).unwrap_err();
634        assert!(err.is_not_a_pointer(), "got {err:?}");
635    }
636
637    #[test]
638    fn extra_line_after_size_is_not_a_pointer() {
639        let s =
640            format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\nwat wat\n");
641        let err = Pointer::parse(s.as_bytes()).unwrap_err();
642        assert!(matches!(
643            err,
644            DecodeError::NotAPointer(NotAPointerReason::ExtraLine { .. })
645        ));
646    }
647
648    // ---------- parse: Malformed ----------
649
650    #[test]
651    fn invalid_version_is_malformed() {
652        // Non-empty version that isn't an alias.
653        let s = format!(
654            "version http://git-media.io/v/whatever\noid sha256:{SAMPLE_OID_HEX}\nsize 12345\n"
655        );
656        let err = Pointer::parse(s.as_bytes()).unwrap_err();
657        assert!(matches!(
658            err,
659            DecodeError::Malformed(MalformedReason::InvalidVersion(_))
660        ));
661    }
662
663    #[test]
664    fn missing_oid_is_malformed() {
665        let s = format!("version {VERSION_LATEST}\nsize 12345\n");
666        let err = Pointer::parse(s.as_bytes()).unwrap_err();
667        assert!(matches!(err, DecodeError::Malformed(_)));
668    }
669
670    #[test]
671    fn missing_size_is_malformed() {
672        let s = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\n");
673        let err = Pointer::parse(s.as_bytes()).unwrap_err();
674        assert!(matches!(
675            err,
676            DecodeError::Malformed(MalformedReason::MissingField("size"))
677        ));
678    }
679
680    #[test]
681    fn keys_out_of_order_is_malformed() {
682        let s = format!("version {VERSION_LATEST}\nsize 12345\noid sha256:{SAMPLE_OID_HEX}\n");
683        let err = Pointer::parse(s.as_bytes()).unwrap_err();
684        assert!(matches!(
685            err,
686            DecodeError::Malformed(MalformedReason::UnexpectedKey { .. })
687        ));
688    }
689
690    #[test]
691    fn bad_oid_hex_is_malformed() {
692        let s = format!("version {VERSION_LATEST}\noid sha256:boom\nsize 12345\n");
693        let err = Pointer::parse(s.as_bytes()).unwrap_err();
694        assert!(matches!(
695            err,
696            DecodeError::Malformed(MalformedReason::InvalidOidHash(_))
697        ));
698    }
699
700    #[test]
701    fn bad_oid_type_is_malformed() {
702        let s = format!("version {VERSION_LATEST}\noid shazam:{SAMPLE_OID_HEX}\nsize 12345\n");
703        let err = Pointer::parse(s.as_bytes()).unwrap_err();
704        assert!(matches!(
705            err,
706            DecodeError::Malformed(MalformedReason::UnsupportedOidType(_))
707        ));
708    }
709
710    #[test]
711    fn bad_size_is_malformed() {
712        let s = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize fif\n");
713        let err = Pointer::parse(s.as_bytes()).unwrap_err();
714        assert!(matches!(
715            err,
716            DecodeError::Malformed(MalformedReason::InvalidSize(_))
717        ));
718    }
719
720    #[test]
721    fn negative_size_is_malformed() {
722        let s = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}\nsize -1\n");
723        let err = Pointer::parse(s.as_bytes()).unwrap_err();
724        assert!(matches!(
725            err,
726            DecodeError::Malformed(MalformedReason::InvalidSize(_))
727        ));
728    }
729
730    #[test]
731    fn oid_with_trailing_garbage_is_malformed() {
732        let s = format!("version {VERSION_LATEST}\noid sha256:{SAMPLE_OID_HEX}&\nsize 177735\n");
733        let err = Pointer::parse(s.as_bytes()).unwrap_err();
734        assert!(matches!(
735            err,
736            DecodeError::Malformed(MalformedReason::InvalidOidHash(_))
737        ));
738    }
739
740    // ---------- parse: extensions ----------
741
742    #[test]
743    fn ext_priority_over_9_is_malformed() {
744        // ext-10-foo: priority must be a single digit (matches upstream regex).
745        let s = format!(
746            "version {VERSION_LATEST}\n\
747             ext-10-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
748             oid sha256:{SAMPLE_OID_HEX}\n\
749             size 12345\n",
750        );
751        let err = Pointer::parse(s.as_bytes()).unwrap_err();
752        assert!(matches!(err, DecodeError::Malformed(_)), "got {err:?}");
753    }
754
755    #[test]
756    fn ext_with_non_digit_priority_is_malformed() {
757        let s = format!(
758            "version {VERSION_LATEST}\n\
759             ext-#-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
760             oid sha256:{SAMPLE_OID_HEX}\n\
761             size 12345\n",
762        );
763        let err = Pointer::parse(s.as_bytes()).unwrap_err();
764        assert!(matches!(err, DecodeError::Malformed(_)), "got {err:?}");
765    }
766
767    #[test]
768    fn ext_with_non_word_name_is_malformed() {
769        let s = format!(
770            "version {VERSION_LATEST}\n\
771             ext-0-$$$$ sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
772             oid sha256:{SAMPLE_OID_HEX}\n\
773             size 12345\n",
774        );
775        let err = Pointer::parse(s.as_bytes()).unwrap_err();
776        assert!(matches!(err, DecodeError::Malformed(_)), "got {err:?}");
777    }
778
779    #[test]
780    fn ext_bad_oid_is_malformed() {
781        let s = format!(
782            "version {VERSION_LATEST}\n\
783             ext-0-foo sha256:boom\n\
784             oid sha256:{SAMPLE_OID_HEX}\n\
785             size 12345\n",
786        );
787        let err = Pointer::parse(s.as_bytes()).unwrap_err();
788        assert!(matches!(
789            err,
790            DecodeError::Malformed(MalformedReason::InvalidOidHash(_))
791        ));
792    }
793
794    #[test]
795    fn ext_bad_oid_type_is_malformed() {
796        let s = format!(
797            "version {VERSION_LATEST}\n\
798             ext-0-foo boom:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
799             oid sha256:{SAMPLE_OID_HEX}\n\
800             size 12345\n",
801        );
802        let err = Pointer::parse(s.as_bytes()).unwrap_err();
803        assert!(matches!(
804            err,
805            DecodeError::Malformed(MalformedReason::UnsupportedOidType(_))
806        ));
807    }
808
809    #[test]
810    fn duplicate_ext_priority_is_malformed() {
811        let s = format!(
812            "version {VERSION_LATEST}\n\
813             ext-0-foo sha256:ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff\n\
814             ext-0-bar sha256:bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\
815             oid sha256:{SAMPLE_OID_HEX}\n\
816             size 12345\n",
817        );
818        let err = Pointer::parse(s.as_bytes()).unwrap_err();
819        assert!(matches!(
820            err,
821            DecodeError::Malformed(MalformedReason::DuplicateExtensionPriority(0))
822        ));
823    }
824}