Skip to main content

grit_lib/
delta_encode.rs

1//! Encode Git pack binary deltas (format decoded by [`crate::unpack_objects::apply_delta`]).
2//!
3//! Used when writing `REF_DELTA` pack objects so repositories can store similar blobs
4//! compactly (see `git diff-delta` / `create_delta` in upstream Git).
5
6use crate::error::{Error, Result};
7
8fn write_delta_varint(out: &mut Vec<u8>, mut n: usize) {
9    loop {
10        let mut b = (n & 0x7f) as u8;
11        n >>= 7;
12        if n != 0 {
13            b |= 0x80;
14        }
15        out.push(b);
16        if n == 0 {
17            break;
18        }
19    }
20}
21
22/// Emit one `COPY` opcode (copy `size` bytes from `offset` in the base object).
23fn push_copy(out: &mut Vec<u8>, mut offset: usize, mut size: usize) -> Result<()> {
24    if size == 0 {
25        return Ok(());
26    }
27    // Git limits one COPY to 64 KiB; split larger runs.
28    while size > 0 {
29        let chunk = size.min(0x10000);
30        let mut op = 0x80u8;
31        let moff = offset;
32        let msize = chunk;
33
34        if moff & 0x0000_00ff != 0 {
35            op |= 0x01;
36        }
37        if moff & 0x0000_ff00 != 0 {
38            op |= 0x02;
39        }
40        if moff & 0x00ff_0000 != 0 {
41            op |= 0x04;
42        }
43        if moff & 0xff00_0000 != 0 {
44            op |= 0x08;
45        }
46
47        // 65536-byte copies omit size bytes; the decoder treats missing size as 0x10000.
48        if msize != 0x10000 {
49            if msize & 0x00ff != 0 {
50                op |= 0x10;
51            }
52            if msize & 0xff00 != 0 {
53                op |= 0x20;
54            }
55        }
56
57        out.push(op);
58        if op & 0x01 != 0 {
59            out.push(moff as u8);
60        }
61        if op & 0x02 != 0 {
62            out.push((moff >> 8) as u8);
63        }
64        if op & 0x04 != 0 {
65            out.push((moff >> 16) as u8);
66        }
67        if op & 0x08 != 0 {
68            out.push((moff >> 24) as u8);
69        }
70        if msize != 0x10000 {
71            if op & 0x10 != 0 {
72                out.push(msize as u8);
73            }
74            if op & 0x20 != 0 {
75                out.push((msize >> 8) as u8);
76            }
77        }
78
79        offset = offset.saturating_add(chunk);
80        size -= chunk;
81    }
82    Ok(())
83}
84
85fn push_insert(out: &mut Vec<u8>, mut data: &[u8]) {
86    while !data.is_empty() {
87        let n = data.len().min(127);
88        out.push(n as u8);
89        out.extend_from_slice(&data[..n]);
90        data = &data[n..];
91    }
92}
93
94/// Build a delta when `target` begins with the entire `base` buffer (strict extension).
95///
96/// This matches the common pack-objects case where one blob is a suffix of another.
97pub fn encode_prefix_extension_delta(base: &[u8], target: &[u8]) -> Result<Vec<u8>> {
98    if !target.starts_with(base) || target.len() <= base.len() {
99        return Err(Error::CorruptObject(
100            "encode_prefix_extension_delta: target must strictly extend base".into(),
101        ));
102    }
103    let mut out = Vec::new();
104    write_delta_varint(&mut out, base.len());
105    write_delta_varint(&mut out, target.len());
106    push_copy(&mut out, 0, base.len())?;
107    push_insert(&mut out, &target[base.len()..]);
108    Ok(out)
109}
110
111/// Encode a binary delta using the longest common prefix of `base` and `target`.
112///
113/// The reconstructed object is `target`; this matches Git's thin-pack deltas when successive
114/// versions share a long prefix but are not strict extensions (e.g. `…\\n9` vs `…\\n10`).
115pub fn encode_lcp_delta(base: &[u8], target: &[u8]) -> Result<Vec<u8>> {
116    let lcp = base
117        .iter()
118        .zip(target.iter())
119        .take_while(|(a, b)| a == b)
120        .count();
121    let mut out = Vec::new();
122    write_delta_varint(&mut out, base.len());
123    write_delta_varint(&mut out, target.len());
124    push_copy(&mut out, 0, lcp)?;
125    push_insert(&mut out, &target[lcp..]);
126    Ok(out)
127}
128
129#[cfg(test)]
130mod tests {
131    use super::*;
132    use crate::unpack_objects::apply_delta;
133
134    #[test]
135    fn roundtrip_prefix_delta() {
136        let base = b"hello world".repeat(100);
137        let mut target = base.clone();
138        target.extend_from_slice(b"\nextra suffix\n");
139        let delta = encode_prefix_extension_delta(&base, &target).unwrap();
140        let got = apply_delta(&base, &delta).unwrap();
141        assert_eq!(got, target);
142    }
143
144    #[test]
145    fn roundtrip_lcp_delta() {
146        let base = b"padding\n9";
147        let target = b"padding\n10";
148        let delta = encode_lcp_delta(base, target).unwrap();
149        let got = apply_delta(base, &delta).unwrap();
150        assert_eq!(got, target);
151    }
152}