Skip to main content

delta/
types.rs

1use std::fmt;
2
3// ============================================================================
4// Constants (Ajtai, Burns, Fagin, Long — JACM 2002)
5//
6// Hash parameters (Section 2.1.3):
7//   p (SEED_LEN)  = minimum match length / fingerprint window
8//   b (HASH_BASE) = polynomial base for Karp-Rabin hash
9//   Q (HASH_MOD)  = Mersenne prime 2^61-1 for fingerprint arithmetic
10//   q (TABLE_SIZE) = hash table capacity; correcting uses checkpointing
11//                    (Section 8) to fit any |R| into fixed-size table
12// Delta commands: Section 2.1.1
13// ============================================================================
14
15pub const SEED_LEN: usize = 16;
16pub const TABLE_SIZE: usize = 1048573;      // largest prime < 2^20
17pub const MAX_TABLE_SIZE: usize = 1_073_741_827; // prime near 2^30; default ceiling for auto-sizing
18                                                  // Section 8: correcting uses checkpointing to fit any |R|
19pub const HASH_BASE: u64 = 263;
20pub const HASH_MOD: u64 = (1 << 61) - 1; // Mersenne prime 2^61-1
21pub const DELTA_MAGIC:    &[u8; 4] = b"DLT\x03";
22pub const DELTA_MAGIC_LARGE: &[u8; 4] = b"DLT\x04";
23pub const DELTA_FLAG_INPLACE:  u8 = 0x01;
24pub const DELTA_CMD_END:     u8 = 0;
25pub const DELTA_CMD_COPY:    u8 = 1;
26pub const DELTA_CMD_ADD:     u8 = 2;
27pub const DELTA_CMD_BIGCOPY: u8 = 3;
28pub const DELTA_CMD_BIGADD:  u8 = 4;
29pub const DELTA_CMD_MOVE:    u8 = 5;
30pub const DELTA_CMD_BIGMOVE: u8 = 6;
31pub const DELTA_CRC_SIZE:        usize = 8;
32pub const DELTA_U32_SIZE:        usize = 4;
33pub const DELTA_U64_SIZE:        usize = 8;
34pub const DELTA_HEADER_SIZE:     usize = 25; // magic(4)+flags(1)+version_size(4)+crcs(16)
35pub const DELTA_HEADER_SIZE_LARGE:  usize = 29; // magic(4)+flags(1)+version_size(8)+crcs(16)
36pub const DELTA_COPY_PAYLOAD:    usize = 12; // src(4)+dst(4)+len(4)
37pub const DELTA_ADD_HEADER:      usize = 8;  // dst(4)+len(4)
38pub const DELTA_BIGCOPY_PAYLOAD: usize = 24; // src(8)+dst(8)+len(8)
39pub const DELTA_BIGADD_HEADER:   usize = 16; // dst(8)+len(8)
40pub const DELTA_BUF_CAP: usize = 256;
41
42// ============================================================================
43// Delta Commands (Section 2.1.1)
44// ============================================================================
45
46/// Algorithm output: copy from reference or add literal bytes.
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub enum Command {
49    Copy { offset: usize, length: usize },
50    Add { data: Vec<u8> },
51}
52
53impl fmt::Display for Command {
54    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55        match self {
56            Command::Copy { offset, length } => write!(f, "COPY(off={}, len={})", offset, length),
57            Command::Add { data } => {
58                if data.len() <= 20 {
59                    write!(f, "ADD({:?})", data)
60                } else {
61                    write!(f, "ADD(len={})", data.len())
62                }
63            }
64        }
65    }
66}
67
68// ============================================================================
69// Placed Commands — ready for encoding and application
70// ============================================================================
71
72/// A command with explicit source and destination offsets.
73///
74/// For standard deltas, `Copy::src` is an offset into the reference and
75/// `Copy::dst` is the write position in the output.  For in-place deltas,
76/// both refer to positions in the shared working buffer.
77///
78/// `Move` copies from already-written output buffer at `src` to `dst`.
79/// Constraint: `src + length <= dst` (only previously written bytes).
80/// DLT\x04 only; always safe for in-place (no CRWI cycle possible).
81#[derive(Clone, Debug, PartialEq, Eq)]
82pub enum PlacedCommand {
83    Copy { src: usize, dst: usize, length: usize },
84    Add  { dst: usize, data: Vec<u8> },
85    Move { src: usize, dst: usize, length: usize },
86}
87
88impl fmt::Display for PlacedCommand {
89    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90        match self {
91            PlacedCommand::Copy { src, dst, length } => {
92                write!(f, "COPY(src={}, dst={}, len={})", src, dst, length)
93            }
94            PlacedCommand::Add { dst, data } => {
95                if data.len() <= 20 {
96                    write!(f, "ADD(dst={}, {:?})", dst, data)
97                } else {
98                    write!(f, "ADD(dst={}, len={})", dst, data.len())
99                }
100            }
101            PlacedCommand::Move { src, dst, length } => {
102                write!(f, "MOVE(src={}, dst={}, len={})", src, dst, length)
103            }
104        }
105    }
106}
107
108// ============================================================================
109// Algorithm and Policy enums
110// ============================================================================
111
112/// Differencing algorithm selection.
113#[derive(Clone, Copy, Debug, PartialEq, Eq)]
114pub enum Algorithm {
115    /// Optimal under simple cost; O(|V|·|R|) time, O(|R|) space (Section 3).
116    Greedy,
117    /// Linear time and near-constant space; concurrent scan of R and V (Section 4).
118    Onepass,
119    /// Near-optimal, 1.5-pass; hash table with fingerprint checkpointing (Sections 7–8).
120    Correcting,
121}
122
123/// Cycle-breaking policy for in-place reordering (Section 4.3 of Burns et al. 2003).
124#[derive(Clone, Copy, Debug, PartialEq, Eq)]
125pub enum CyclePolicy {
126    /// Break each cycle at the copy with the shortest length, minimising literal bytes added.
127    Localmin,
128    /// Break each cycle at the first remaining vertex; simpler but ignores copy lengths.
129    Constant,
130}
131
132/// Tuning parameters for differencing algorithms.
133#[derive(Clone, Debug)]
134pub struct DiffOptions {
135    /// Seed length: minimum match length and fingerprint window (Section 2.1.3).
136    pub p: usize,
137    /// Hash table capacity floor; algorithms auto-size upward from input length.
138    pub q: usize,
139    /// Lookback buffer depth for the correcting algorithm (Section 5.2).
140    pub buf_cap: usize,
141    /// Print per-run statistics to stderr when true.
142    pub verbose: bool,
143    /// Use a Sleator-Tarjan splay tree instead of a hash table for R lookups.
144    pub use_splay: bool,
145    /// Auto-sizing ceiling; prevents unbounded memory use on very large inputs.
146    pub max_table: usize,
147}
148
149impl Default for DiffOptions {
150    fn default() -> Self {
151        Self {
152            p: SEED_LEN,
153            q: TABLE_SIZE,
154            buf_cap: DELTA_BUF_CAP,
155            verbose: false,
156            use_splay: false,
157            max_table: MAX_TABLE_SIZE,
158        }
159    }
160}
161
162// ============================================================================
163// Error type
164// ============================================================================
165
166/// Errors produced by delta encoding, decoding, and I/O.
167#[derive(Debug)]
168pub enum DeltaError {
169    /// The binary data does not match the expected delta format.
170    InvalidFormat(String),
171    /// The delta data ended before all commands were read.
172    UnexpectedEof,
173    /// An I/O error occurred while reading or writing a file.
174    IoError(std::io::Error),
175}
176
177impl fmt::Display for DeltaError {
178    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179        match self {
180            DeltaError::InvalidFormat(msg) => write!(f, "invalid delta format: {}", msg),
181            DeltaError::UnexpectedEof => write!(f, "unexpected end of delta data"),
182            DeltaError::IoError(e) => write!(f, "I/O error: {}", e),
183        }
184    }
185}
186
187impl std::error::Error for DeltaError {}
188
189impl From<std::io::Error> for DeltaError {
190    fn from(e: std::io::Error) -> Self {
191        DeltaError::IoError(e)
192    }
193}
194
195// ============================================================================
196// Summary statistics
197// ============================================================================
198
199/// Summary statistics for a set of commands.
200#[derive(Debug)]
201pub struct DeltaSummary {
202    pub num_commands: usize,
203    pub num_copies: usize,
204    pub num_adds: usize,
205    pub copy_bytes: usize,
206    pub add_bytes: usize,
207    pub total_output_bytes: usize,
208}
209
210/// Compute summary statistics from algorithm-level commands.
211pub fn delta_summary(commands: &[Command]) -> DeltaSummary {
212    let mut num_copies = 0;
213    let mut num_adds = 0;
214    let mut copy_bytes = 0;
215    let mut add_bytes = 0;
216    for cmd in commands {
217        match cmd {
218            Command::Copy { length, .. } => {
219                num_copies += 1;
220                copy_bytes += length;
221            }
222            Command::Add { data } => {
223                num_adds += 1;
224                add_bytes += data.len();
225            }
226        }
227    }
228    DeltaSummary {
229        num_commands: commands.len(),
230        num_copies,
231        num_adds,
232        copy_bytes,
233        add_bytes,
234        total_output_bytes: copy_bytes + add_bytes,
235    }
236}
237
238/// Compute summary statistics from placed commands.
239pub fn placed_summary(commands: &[PlacedCommand]) -> DeltaSummary {
240    let mut num_copies = 0;
241    let mut num_adds = 0;
242    let mut copy_bytes = 0;
243    let mut add_bytes = 0;
244    for cmd in commands {
245        match cmd {
246            PlacedCommand::Copy { length, .. } => {
247                num_copies += 1;
248                copy_bytes += length;
249            }
250            PlacedCommand::Add { data, .. } => {
251                num_adds += 1;
252                add_bytes += data.len();
253            }
254            PlacedCommand::Move { length, .. } => {
255                // Move is counted with copies: it copies from already-written output.
256                num_copies += 1;
257                copy_bytes += length;
258            }
259        }
260    }
261    DeltaSummary {
262        num_commands: commands.len(),
263        num_copies,
264        num_adds,
265        copy_bytes,
266        add_bytes,
267        total_output_bytes: copy_bytes + add_bytes,
268    }
269}