Skip to main content

delta/algorithm/
greedy.rs

1use std::collections::HashMap;
2
3use crate::hash::RollingHash;
4use crate::splay::SplayTree;
5use crate::types::{Command, DiffOptions};
6
7/// Greedy algorithm (Section 3.1, Figure 2).
8///
9/// Finds an optimal delta encoding under the simple cost measure
10/// (optimality proof: Section 3.3, Theorem 1).
11/// Uses a chained hash table (HashMap) or splay tree storing ALL offsets
12/// in R per fingerprint.
13/// Time: O(|V| * |R|) worst case. Space: O(|R|).
14pub fn diff_greedy(r: &[u8], v: &[u8], opts: &DiffOptions) -> Vec<Command> {
15    let p = opts.p;
16    let verbose = opts.verbose;
17    let use_splay = opts.use_splay;
18
19    let mut commands = Vec::new();
20    if v.is_empty() {
21        return commands;
22    }
23
24    // Step (1): Build lookup structure for R keyed by full fingerprint.
25    // Hash table (default) or splay tree (--splay).
26    let mut h_r: HashMap<u64, Vec<usize>> = HashMap::new();
27    let mut splay_r: SplayTree<Vec<usize>> = SplayTree::new();
28
29    if r.len() >= p {
30        let mut rh = RollingHash::new(r, 0, p);
31        if use_splay {
32            splay_r.insert_or_get(rh.value(), Vec::new()).push(0);
33        } else {
34            h_r.entry(rh.value()).or_default().push(0);
35        }
36        for a in 1..=(r.len() - p) {
37            rh.roll(r[a - 1], r[a + p - 1]);
38            if use_splay {
39                splay_r.insert_or_get(rh.value(), Vec::new()).push(a);
40            } else {
41                h_r.entry(rh.value()).or_default().push(a);
42            }
43        }
44    }
45
46    if verbose {
47        eprintln!(
48            "greedy: {}, |R|={}, |V|={}, seed_len={}",
49            if use_splay { "splay tree" } else { "hash table" },
50            r.len(), v.len(), p
51        );
52    }
53
54    // Step (2): initialize scan pointers
55    let mut v_c: usize = 0;
56    let mut v_s: usize = 0;
57
58    // Rolling hash for O(1) per-position V fingerprinting.
59    let mut rh_v: Option<RollingHash> = if v.len() >= p { Some(RollingHash::new(v, 0, p)) } else { None };
60    let mut rh_v_pos: usize = 0;
61
62    loop {
63        // Step (3): check for end of V
64        if v_c + p > v.len() {
65            break;
66        }
67
68        let fp_v = if let Some(ref mut rh) = rh_v {
69            if v_c == rh_v_pos {
70                rh.value()
71            } else if v_c == rh_v_pos + 1 {
72                rh.roll(v[v_c - 1], v[v_c + p - 1]);
73                rh_v_pos = v_c;
74                rh.value()
75            } else {
76                // Jump after match — reinitialize
77                *rh = RollingHash::new(v, v_c, p);
78                rh_v_pos = v_c;
79                rh.value()
80            }
81        } else {
82            break;
83        };
84
85        // Steps (4)+(5): find the longest matching substring
86        let mut best_rm: Option<usize> = None;
87        let mut best_len: usize = 0;
88
89        let offsets: Option<&[usize]> = if use_splay {
90            splay_r.find(fp_v).map(|v| v.as_slice())
91        } else {
92            h_r.get(&fp_v).map(|v| v.as_slice())
93        };
94
95        if let Some(offsets) = offsets {
96            for &r_cand in offsets {
97                // Verify the seed actually matches
98                if r[r_cand..r_cand + p] != v[v_c..v_c + p] {
99                    continue;
100                }
101                let mut ml = p;
102                while v_c + ml < v.len() && r_cand + ml < r.len() && v[v_c + ml] == r[r_cand + ml]
103                {
104                    ml += 1;
105                }
106                if ml > best_len {
107                    best_len = ml;
108                    best_rm = Some(r_cand);
109                }
110            }
111        }
112
113        if best_len < p {
114            v_c += 1;
115            continue;
116        }
117
118        // Step (6): encode
119        if v_s < v_c {
120            commands.push(Command::Add {
121                data: v[v_s..v_c].to_vec(),
122            });
123        }
124        commands.push(Command::Copy {
125            offset: best_rm.unwrap(),
126            length: best_len,
127        });
128        v_s = v_c + best_len;
129
130        // Step (7): advance past matched region
131        v_c += best_len;
132    }
133
134    // Step (8): trailing add
135    if v_s < v.len() {
136        commands.push(Command::Add {
137            data: v[v_s..].to_vec(),
138        });
139    }
140
141    if verbose {
142        super::print_command_stats(&commands);
143    }
144
145    commands
146}
147
148/// Convenience wrapper with default parameters.
149pub fn diff_greedy_default(r: &[u8], v: &[u8]) -> Vec<Command> {
150    diff_greedy(r, v, &DiffOptions::default())
151}