Skip to main content

objects/delta/
delta_decoder.rs

1// SPDX-License-Identifier: Apache-2.0
2//! Delta decoder for Git-style compact copy instructions.
3//!
4//! Copy instruction format:
5//! ```text
6//! Byte 0: 1oooosss
7//!   o bits (4-7 after the MSB): which offset bytes follow
8//!   s bits (0-2): which size bytes follow (all zero = size 0x10000)
9//! [offset bytes, low to high]
10//! [size bytes, low to high]
11//! ```
12//!
13//! Insert instruction: `[length-1] [literal bytes]`
14
15/// Maximum decoded delta size accepted by default.
16pub const MAX_DELTA_OUTPUT_SIZE: usize = 128 * 1024 * 1024;
17
18/// Errors that can occur while decoding a delta stream.
19#[derive(Debug, Clone, PartialEq, Eq, thiserror::Error)]
20pub enum DeltaError {
21    #[error("delta output exceeds max size {max_output} bytes (attempted {attempted} bytes)")]
22    OutputLimitExceeded { attempted: usize, max_output: usize },
23
24    #[error(
25        "delta literal at instruction {instruction_offset} requires {length} bytes, but only {available} remain"
26    )]
27    TruncatedLiteral {
28        instruction_offset: usize,
29        length: usize,
30        available: usize,
31    },
32
33    #[error(
34        "delta copy at instruction {instruction_offset} needs {expected_bytes} more bytes, but only {available} remain"
35    )]
36    TruncatedCopyInstruction {
37        instruction_offset: usize,
38        expected_bytes: usize,
39        available: usize,
40    },
41
42    #[error(
43        "delta copy at instruction {instruction_offset} references base range {copy_offset}..{copy_end}, but base length is {base_len}"
44    )]
45    InvalidBaseRange {
46        instruction_offset: usize,
47        copy_offset: usize,
48        copy_end: usize,
49        base_len: usize,
50    },
51
52    #[error("reserved delta instruction 0x80 at offset {instruction_offset}")]
53    ReservedInstruction { instruction_offset: usize },
54}
55
56/// Delta decoder.
57#[derive(Debug)]
58pub struct DeltaDecoder;
59
60impl DeltaDecoder {
61    /// Create a new delta decoder.
62    pub fn new() -> Self {
63        Self
64    }
65
66    /// Decode a delta to reconstruct the target from base.
67    pub fn decode(base: &[u8], delta: &[u8], max_output: usize) -> Result<Vec<u8>, DeltaError> {
68        let mut target = Vec::new();
69        let mut pos = 0;
70
71        while pos < delta.len() {
72            let instruction_offset = pos;
73            let header = delta[pos];
74            pos += 1;
75
76            if header & 0x80 == 0 {
77                // Insert instruction
78                let len = (header + 1) as usize;
79                if pos + len > delta.len() {
80                    return Err(DeltaError::TruncatedLiteral {
81                        instruction_offset,
82                        length: len,
83                        available: delta.len().saturating_sub(pos),
84                    });
85                }
86
87                Self::ensure_output_limit(target.len(), len, max_output)?;
88                target.extend_from_slice(&delta[pos..pos + len]);
89                pos += len;
90                continue;
91            }
92
93            // Copy instruction: 1oooosss [offset bytes] [size bytes]
94            // cmd=0x80 with no offset/size bits set is reserved
95            if header == 0x80 {
96                return Err(DeltaError::ReservedInstruction { instruction_offset });
97            }
98
99            // Count expected bytes from flag bits
100            // Bits 0-3: offset byte flags, bits 4-6: size byte flags
101            let expected = (header & 0x01 != 0) as usize
102                + (header & 0x02 != 0) as usize
103                + (header & 0x04 != 0) as usize
104                + (header & 0x08 != 0) as usize
105                + (header & 0x10 != 0) as usize
106                + (header & 0x20 != 0) as usize
107                + (header & 0x40 != 0) as usize;
108
109            if pos + expected > delta.len() {
110                return Err(DeltaError::TruncatedCopyInstruction {
111                    instruction_offset,
112                    expected_bytes: expected,
113                    available: delta.len().saturating_sub(pos),
114                });
115            }
116
117            // Decode offset (bits 0-3)
118            let mut offset: usize = 0;
119            if header & 0x01 != 0 {
120                offset |= delta[pos] as usize;
121                pos += 1;
122            }
123            if header & 0x02 != 0 {
124                offset |= (delta[pos] as usize) << 8;
125                pos += 1;
126            }
127            if header & 0x04 != 0 {
128                offset |= (delta[pos] as usize) << 16;
129                pos += 1;
130            }
131            if header & 0x08 != 0 {
132                offset |= (delta[pos] as usize) << 24;
133                pos += 1;
134            }
135
136            // Decode size (bits 4-6)
137            let mut length: usize = 0;
138            if header & 0x10 != 0 {
139                length |= delta[pos] as usize;
140                pos += 1;
141            }
142            if header & 0x20 != 0 {
143                length |= (delta[pos] as usize) << 8;
144                pos += 1;
145            }
146            if header & 0x40 != 0 {
147                length |= (delta[pos] as usize) << 16;
148                pos += 1;
149            }
150            // If no size bits set, size = 0x10000
151            if length == 0 {
152                length = 0x10000;
153            }
154
155            let copy_end = offset
156                .checked_add(length)
157                .ok_or(DeltaError::InvalidBaseRange {
158                    instruction_offset,
159                    copy_offset: offset,
160                    copy_end: usize::MAX,
161                    base_len: base.len(),
162                })?;
163
164            if copy_end > base.len() {
165                return Err(DeltaError::InvalidBaseRange {
166                    instruction_offset,
167                    copy_offset: offset,
168                    copy_end,
169                    base_len: base.len(),
170                });
171            }
172
173            Self::ensure_output_limit(target.len(), length, max_output)?;
174            target.extend_from_slice(&base[offset..copy_end]);
175        }
176
177        Ok(target)
178    }
179
180    fn ensure_output_limit(
181        current_len: usize,
182        append_len: usize,
183        max_output: usize,
184    ) -> Result<(), DeltaError> {
185        let attempted =
186            current_len
187                .checked_add(append_len)
188                .ok_or(DeltaError::OutputLimitExceeded {
189                    attempted: usize::MAX,
190                    max_output,
191                })?;
192
193        if attempted > max_output {
194            return Err(DeltaError::OutputLimitExceeded {
195                attempted,
196                max_output,
197            });
198        }
199
200        Ok(())
201    }
202}
203
204impl Default for DeltaDecoder {
205    fn default() -> Self {
206        Self::new()
207    }
208}