delta/types.rs
1use std::fmt;
2
3// ============================================================================
4// Constants (Ajtai, Burns, Fagin, Long — JACM 2002)
5//
6// Hash parameters (Section 2.1.3):
7// p (SEED_LEN) = minimum match length / fingerprint window
8// b (HASH_BASE) = polynomial base for Karp-Rabin hash
9// Q (HASH_MOD) = Mersenne prime 2^61-1 for fingerprint arithmetic
10// q (TABLE_SIZE) = hash table capacity; correcting uses checkpointing
11// (Section 8) to fit any |R| into fixed-size table
12// Delta commands: Section 2.1.1
13// ============================================================================
14
15pub const SEED_LEN: usize = 16;
16pub const TABLE_SIZE: usize = 1048573; // largest prime < 2^20
17pub const MAX_TABLE_SIZE: usize = 1_073_741_827; // prime near 2^30; default ceiling for auto-sizing
18 // Section 8: correcting uses checkpointing to fit any |R|
19pub const HASH_BASE: u64 = 263;
20pub const HASH_MOD: u64 = (1 << 61) - 1; // Mersenne prime 2^61-1
21pub const DELTA_MAGIC: &[u8; 4] = b"DLT\x03";
22pub const DELTA_MAGIC_LARGE: &[u8; 4] = b"DLT\x04";
23pub const DELTA_FLAG_INPLACE: u8 = 0x01;
24pub const DELTA_CMD_END: u8 = 0;
25pub const DELTA_CMD_COPY: u8 = 1;
26pub const DELTA_CMD_ADD: u8 = 2;
27pub const DELTA_CMD_BIGCOPY: u8 = 3;
28pub const DELTA_CMD_BIGADD: u8 = 4;
29pub const DELTA_CMD_MOVE: u8 = 5;
30pub const DELTA_CMD_BIGMOVE: u8 = 6;
31pub const DELTA_CRC_SIZE: usize = 8;
32pub const DELTA_U32_SIZE: usize = 4;
33pub const DELTA_U64_SIZE: usize = 8;
34pub const DELTA_HEADER_SIZE: usize = 25; // magic(4)+flags(1)+version_size(4)+crcs(16)
35pub const DELTA_HEADER_SIZE_LARGE: usize = 29; // magic(4)+flags(1)+version_size(8)+crcs(16)
36pub const DELTA_COPY_PAYLOAD: usize = 12; // src(4)+dst(4)+len(4)
37pub const DELTA_ADD_HEADER: usize = 8; // dst(4)+len(4)
38pub const DELTA_BIGCOPY_PAYLOAD: usize = 24; // src(8)+dst(8)+len(8)
39pub const DELTA_BIGADD_HEADER: usize = 16; // dst(8)+len(8)
40pub const DELTA_BUF_CAP: usize = 256;
41
42// ============================================================================
43// Delta Commands (Section 2.1.1)
44// ============================================================================
45
46/// Algorithm output: copy from reference or add literal bytes.
47#[derive(Clone, Debug, PartialEq, Eq)]
48pub enum Command {
49 Copy { offset: usize, length: usize },
50 Add { data: Vec<u8> },
51}
52
53impl fmt::Display for Command {
54 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
55 match self {
56 Command::Copy { offset, length } => write!(f, "COPY(off={}, len={})", offset, length),
57 Command::Add { data } => {
58 if data.len() <= 20 {
59 write!(f, "ADD({:?})", data)
60 } else {
61 write!(f, "ADD(len={})", data.len())
62 }
63 }
64 }
65 }
66}
67
68// ============================================================================
69// Placed Commands — ready for encoding and application
70// ============================================================================
71
72/// A command with explicit source and destination offsets.
73///
74/// For standard deltas, `Copy::src` is an offset into the reference and
75/// `Copy::dst` is the write position in the output. For in-place deltas,
76/// both refer to positions in the shared working buffer.
77///
78/// `Move` copies from already-written output buffer at `src` to `dst`.
79/// Constraint: `src + length <= dst` (only previously written bytes).
80/// DLT\x04 only; always safe for in-place (no CRWI cycle possible).
81#[derive(Clone, Debug, PartialEq, Eq)]
82pub enum PlacedCommand {
83 Copy { src: usize, dst: usize, length: usize },
84 Add { dst: usize, data: Vec<u8> },
85 Move { src: usize, dst: usize, length: usize },
86}
87
88impl fmt::Display for PlacedCommand {
89 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
90 match self {
91 PlacedCommand::Copy { src, dst, length } => {
92 write!(f, "COPY(src={}, dst={}, len={})", src, dst, length)
93 }
94 PlacedCommand::Add { dst, data } => {
95 if data.len() <= 20 {
96 write!(f, "ADD(dst={}, {:?})", dst, data)
97 } else {
98 write!(f, "ADD(dst={}, len={})", dst, data.len())
99 }
100 }
101 PlacedCommand::Move { src, dst, length } => {
102 write!(f, "MOVE(src={}, dst={}, len={})", src, dst, length)
103 }
104 }
105 }
106}
107
108// ============================================================================
109// Algorithm and Policy enums
110// ============================================================================
111
112/// Differencing algorithm selection.
113#[derive(Clone, Copy, Debug, PartialEq, Eq)]
114pub enum Algorithm {
115 /// Optimal under simple cost; O(|V|·|R|) time, O(|R|) space (Section 3).
116 Greedy,
117 /// Linear time and near-constant space; concurrent scan of R and V (Section 4).
118 Onepass,
119 /// Near-optimal, 1.5-pass; hash table with fingerprint checkpointing (Sections 7–8).
120 Correcting,
121}
122
123/// Cycle-breaking policy for in-place reordering (Section 4.3 of Burns et al. 2003).
124#[derive(Clone, Copy, Debug, PartialEq, Eq)]
125pub enum CyclePolicy {
126 /// Break each cycle at the copy with the shortest length, minimising literal bytes added.
127 Localmin,
128 /// Break each cycle at the first remaining vertex; simpler but ignores copy lengths.
129 Constant,
130}
131
132/// Tuning parameters for differencing algorithms.
133#[derive(Clone, Debug)]
134pub struct DiffOptions {
135 /// Seed length: minimum match length and fingerprint window (Section 2.1.3).
136 pub p: usize,
137 /// Hash table capacity floor; algorithms auto-size upward from input length.
138 pub q: usize,
139 /// Lookback buffer depth for the correcting algorithm (Section 5.2).
140 pub buf_cap: usize,
141 /// Print per-run statistics to stderr when true.
142 pub verbose: bool,
143 /// Use a Sleator-Tarjan splay tree instead of a hash table for R lookups.
144 pub use_splay: bool,
145 /// Auto-sizing ceiling; prevents unbounded memory use on very large inputs.
146 pub max_table: usize,
147}
148
149impl Default for DiffOptions {
150 fn default() -> Self {
151 Self {
152 p: SEED_LEN,
153 q: TABLE_SIZE,
154 buf_cap: DELTA_BUF_CAP,
155 verbose: false,
156 use_splay: false,
157 max_table: MAX_TABLE_SIZE,
158 }
159 }
160}
161
162// ============================================================================
163// Error type
164// ============================================================================
165
166/// Errors produced by delta encoding, decoding, and I/O.
167#[derive(Debug)]
168pub enum DeltaError {
169 /// The binary data does not match the expected delta format.
170 InvalidFormat(String),
171 /// The delta data ended before all commands were read.
172 UnexpectedEof,
173 /// An I/O error occurred while reading or writing a file.
174 IoError(std::io::Error),
175}
176
177impl fmt::Display for DeltaError {
178 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
179 match self {
180 DeltaError::InvalidFormat(msg) => write!(f, "invalid delta format: {}", msg),
181 DeltaError::UnexpectedEof => write!(f, "unexpected end of delta data"),
182 DeltaError::IoError(e) => write!(f, "I/O error: {}", e),
183 }
184 }
185}
186
187impl std::error::Error for DeltaError {}
188
189impl From<std::io::Error> for DeltaError {
190 fn from(e: std::io::Error) -> Self {
191 DeltaError::IoError(e)
192 }
193}
194
195// ============================================================================
196// Summary statistics
197// ============================================================================
198
199/// Summary statistics for a set of commands.
200#[derive(Debug)]
201pub struct DeltaSummary {
202 pub num_commands: usize,
203 pub num_copies: usize,
204 pub num_adds: usize,
205 pub copy_bytes: usize,
206 pub add_bytes: usize,
207 pub total_output_bytes: usize,
208}
209
210/// Compute summary statistics from algorithm-level commands.
211pub fn delta_summary(commands: &[Command]) -> DeltaSummary {
212 let mut num_copies = 0;
213 let mut num_adds = 0;
214 let mut copy_bytes = 0;
215 let mut add_bytes = 0;
216 for cmd in commands {
217 match cmd {
218 Command::Copy { length, .. } => {
219 num_copies += 1;
220 copy_bytes += length;
221 }
222 Command::Add { data } => {
223 num_adds += 1;
224 add_bytes += data.len();
225 }
226 }
227 }
228 DeltaSummary {
229 num_commands: commands.len(),
230 num_copies,
231 num_adds,
232 copy_bytes,
233 add_bytes,
234 total_output_bytes: copy_bytes + add_bytes,
235 }
236}
237
238/// Compute summary statistics from placed commands.
239pub fn placed_summary(commands: &[PlacedCommand]) -> DeltaSummary {
240 let mut num_copies = 0;
241 let mut num_adds = 0;
242 let mut copy_bytes = 0;
243 let mut add_bytes = 0;
244 for cmd in commands {
245 match cmd {
246 PlacedCommand::Copy { length, .. } => {
247 num_copies += 1;
248 copy_bytes += length;
249 }
250 PlacedCommand::Add { data, .. } => {
251 num_adds += 1;
252 add_bytes += data.len();
253 }
254 PlacedCommand::Move { length, .. } => {
255 // Move is counted with copies: it copies from already-written output.
256 num_copies += 1;
257 copy_bytes += length;
258 }
259 }
260 }
261 DeltaSummary {
262 num_commands: commands.len(),
263 num_copies,
264 num_adds,
265 copy_bytes,
266 add_bytes,
267 total_output_bytes: copy_bytes + add_bytes,
268 }
269}