gdelta/lib.rs
1//! # gdelta
2//!
3//! A fast delta compression algorithm for similar data chunks.
4//!
5//! `GDelta` efficiently encodes the differences between similar data chunks using
6//! GEAR rolling hash for pattern matching and variable-length integer encoding
7//! for space efficiency.
8//!
9//! ## Quick Start
10//!
11//! ```
12//! use gdelta::{encode, decode};
13//!
14//! let base = b"Hello, World!";
15//! let new = b"Hello, Rust!";
16//!
17//! // Encode the delta
18//! let delta = encode(new, base).unwrap();
19//!
20//! // Decode to recover the new data
21//! let recovered = decode(&delta, base).unwrap();
22//! assert_eq!(recovered, new);
23//! ```
24//!
25//! ## Algorithm Details
26//!
27//! The algorithm works by:
28//! 1. Finding common prefix and suffix between base and new data
29//! 2. Building a hash table of the base data using GEAR rolling hash
30//! 3. Scanning the new data to find matches in the base
31//! 4. Encoding the result as copy and literal instructions
32//!
33//! ## Performance
34//!
35//! `GDelta` is optimized for:
36//! - Speed: Faster than Xdelta, Zdelta, Ddelta, and Edelta
37//! - Similar chunks: Best for data chunks 4KB - 64KB in size
38//! - Inter-chunk redundancy: Removes redundancy between similar chunks
39//!
40//! For maximum compression, combine `GDelta` with a general-purpose compressor
41//! like ZSTD or LZ4.
42
43#![forbid(unsafe_code)]
44#![warn(missing_docs)]
45#![warn(clippy::all)]
46
47mod buffer;
48mod delta;
49mod error;
50mod gear;
51mod varint;
52
53pub use error::{GDeltaError, Result};
54
55/// Encodes the delta between new data and base data.
56///
57/// This function computes a compact representation of the differences between
58/// `new_data` and `base_data`. The resulting delta can be later used with
59/// [`decode`] to reconstruct the new data.
60///
61/// # Arguments
62///
63/// * `new_data` - The target data to encode
64/// * `base_data` - The reference data to encode against
65///
66/// # Returns
67///
68/// A `Vec<u8>` containing the encoded delta, or a [`GDeltaError`] if encoding fails.
69///
70/// # Errors
71///
72/// Currently, encoding does not fail under normal circumstances. The `Result` type
73/// is used for API consistency with `decode` and to allow for future validation
74/// or error conditions without breaking the API.
75///
76/// # Examples
77///
78/// ```
79/// use gdelta::encode;
80///
81/// let base = b"The quick brown fox jumps over the lazy dog";
82/// let new = b"The quick brown cat jumps over the lazy dog";
83///
84/// let delta = encode(new, base).unwrap();
85/// println!("Delta size: {} bytes", delta.len());
86/// ```
87///
88/// # Performance
89///
90/// The encoding time is roughly proportional to the size of the new data,
91/// with additional overhead for building the hash table of the base data.
92/// Typical throughput is several hundred MB/s on modern hardware.
93pub fn encode(new_data: &[u8], base_data: &[u8]) -> Result<Vec<u8>> {
94 delta::encode(new_data, base_data)
95}
96
97/// Decodes delta data using the base data to reconstruct the original.
98///
99/// This function applies the delta (created by [`encode`]) to the base data
100/// to reconstruct the new data.
101///
102/// # Arguments
103///
104/// * `delta` - The encoded delta data
105/// * `base_data` - The same base data used during encoding
106///
107/// # Returns
108///
109/// A `Vec<u8>` containing the reconstructed data, or a [`GDeltaError`] if
110/// decoding fails (e.g., corrupted delta data).
111///
112/// # Errors
113///
114/// Returns `GDeltaError::InvalidDelta` if:
115/// - The delta data is corrupted or malformed
116/// - The instruction length exceeds the delta size
117/// - A copy instruction references data beyond the base data bounds
118///
119/// # Examples
120///
121/// ```
122/// use gdelta::{encode, decode};
123///
124/// let base = b"Hello, World!";
125/// let new = b"Hello, Rust!";
126///
127/// let delta = encode(new, base).unwrap();
128/// let recovered = decode(&delta, base).unwrap();
129///
130/// assert_eq!(recovered, new);
131/// ```
132///
133/// # Performance
134///
135/// Decoding is typically faster than encoding, as it only needs to follow
136/// the instructions in the delta without performing hash table lookups.
137pub fn decode(delta: &[u8], base_data: &[u8]) -> Result<Vec<u8>> {
138 delta::decode(delta, base_data)
139}
140
141#[cfg(test)]
142mod tests {
143 use super::*;
144
145 #[test]
146 fn test_encode_decode_identical() {
147 let data = b"Hello, World!";
148 let delta = encode(data, data).unwrap();
149 let recovered = decode(&delta[..], data).unwrap();
150 assert_eq!(recovered, data);
151 }
152
153 #[test]
154 fn test_encode_decode_different() {
155 let base = b"The quick brown fox jumps over the lazy dog";
156 let new = b"The quick brown cat jumps over the lazy dog";
157
158 let delta = encode(new, base).unwrap();
159 let recovered = decode(&delta[..], base).unwrap();
160 assert_eq!(recovered, new);
161 }
162
163 #[test]
164 fn test_encode_decode_empty() {
165 let base = b"Some data";
166 let new = b"";
167
168 let delta = encode(new, base).unwrap();
169 let recovered = decode(&delta[..], base).unwrap();
170 assert_eq!(recovered, new);
171 }
172
173 #[test]
174 #[allow(clippy::cast_possible_truncation)]
175 fn test_encode_decode_large() {
176 let mut base = vec![0u8; 100_000];
177 let mut new = vec![0u8; 100_000];
178
179 // Fill with pattern
180 for i in 0..base.len() {
181 base[i] = (i % 256) as u8;
182 new[i] = (i % 256) as u8;
183 }
184
185 // Make some changes
186 for i in (0..new.len()).step_by(488) {
187 if i < new.len() {
188 new[i] = new[i].wrapping_add(1);
189 }
190 }
191
192 let delta = encode(&new, &base).unwrap();
193 let recovered = decode(&delta, &base).unwrap();
194 assert_eq!(recovered, new);
195
196 // Delta should be smaller than new data
197 assert!(delta.len() < new.len());
198 }
199}