speck-core 0.2.0

Secure runtime package manager for MMU-less microcontrollers
Documentation
//! Delta construction using suffix array for optimal matches

use alloc::vec::Vec;
use crate::delta::{Delta, Op, MAX_HUNK_SIZE};
use crate::error::Result;

/// Builder for creating optimized binary deltas
#[derive(Debug)]
pub struct DeltaBuilder {
    min_match_len: usize,
    max_search_depth: usize,
}

impl Default for DeltaBuilder {
    fn default() -> Self {
        Self {
            min_match_len: 8,
            max_search_depth: 100,
        }
    }
}

impl DeltaBuilder {
    /// Create new builder with default settings
    pub fn new() -> Self {
        Self::default()
    }
    
    /// Set minimum match length (smaller = better compression, slower)
    pub fn min_match_len(mut self, len: usize) -> Self {
        self.min_match_len = len.max(4);
        self
    }
    
    /// Build delta from old to new
    pub fn build(&self, old: &[u8], new: &[u8]) -> Result<Delta> {
        if old.is_empty() {
            // Pure insert
            return Ok(Delta {
                source_size: 0,
                target_size: new.len() as u64,
                ops: vec![Op::Insert { data: new.to_vec() }],
            });
        }
        
        let mut ops = Vec::new();
        let mut new_pos: usize = 0;
        
        // Build suffix array for old data
        let sa = SuffixArray::new(old);
        
        while new_pos < new.len() {
            // Find best match
            let best = self.find_best_match(&sa, old, &new[new_pos..], new_pos);
            
            if let Some((src_off, match_len)) = best {
                if match_len >= self.min_match_len {
                    // Emit copy
                    ops.push(Op::Copy {
                        src_offset: src_off as u64,
                        length: match_len as u64,
                    });
                    new_pos += match_len;
                    continue;
                }
            }
            
            // No good match, gather literals
            let literal_start = new_pos;
            let mut literal_len = 1;
            
            while literal_len < MAX_HUNK_SIZE 
                && new_pos + literal_len < new.len() {
                // Check if starting a match would be better
                if literal_len >= self.min_match_len {
                    let lookahead = &new[new_pos + literal_len..];
                    if self.find_best_match(&sa, old, lookahead, new_pos + literal_len)
                        .map_or(false, |(_, len)| len >= self.min_match_len) {
                        break;
                    }
                }
                literal_len += 1;
            }
            
            ops.push(Op::Insert {
                data: new[literal_start..literal_start + literal_len].to_vec(),
            });
            new_pos += literal_len;
        }
        
        // Coalesce adjacent copies
        let ops = self.coalesce_copies(ops);
        
        Ok(Delta {
            source_size: old.len() as u64,
            target_size: new.len() as u64,
            ops,
        })
    }
    
    fn find_best_match(&self, sa: &SuffixArray, old: &[u8], new_suffix: &[u8], _new_pos: usize) -> Option<(usize, usize)> {
        if new_suffix.len() < self.min_match_len {
            return None;
        }
        
        // Binary search for best prefix match
        let mut best_len = 0;
        let mut best_pos = 0;
        
        // Simple approach: sample positions
        let step = (old.len() / self.max_search_depth).max(1);
        for i in (0..old.len()).step_by(step) {
            let len = common_prefix_length(&old[i..], new_suffix);
            if len > best_len && len >= self.min_match_len {
                best_len = len;
                best_pos = i;
                if best_len == new_suffix.len() {
                    break;
                }
            }
        }
        
        if best_len >= self.min_match_len {
            Some((best_pos, best_len))
        } else {
            None
        }
    }
    
    fn coalesce_copies(&self, ops: Vec<Op>) -> Vec<Op> {
        let mut result = Vec::with_capacity(ops.len());

        for op in ops {
            if let Some(last) = result.last_mut() {
                match (last, &op) {
                    (Op::Copy { src_offset, length }, Op::Copy {
                        src_offset: next_src,
                        length: next_len,
                    }) => {
                        // Check if adjacent in both source and destination
                        if *src_offset + *length == *next_src {
                            *length += *next_len;
                            continue;
                        }
                    }
                    (Op::Insert { data }, Op::Insert { data: next_data }) => {
                        if data.len() + next_data.len() <= MAX_HUNK_SIZE {
                            data.extend_from_slice(next_data);
                            continue;
                        }
                    }
                    _ => {}
                }
            }
            result.push(op);
        }

        result
    }
}

/// Simple suffix array implementation for substring search
struct SuffixArray<'a> {
    text: &'a [u8],
    sa: Vec<usize>,
}

impl<'a> SuffixArray<'a> {
    fn new(text: &'a [u8]) -> Self {
        let mut sa: Vec<usize> = (0..text.len()).collect();
        sa.sort_by(|&a, &b| text[a..].cmp(&text[b..]));
        Self { text, sa }
    }
}

fn common_prefix_length(a: &[u8], b: &[u8]) -> usize {
    a.iter().zip(b.iter()).take_while(|(x, y)| x == y).count()
}