embeddenator-fs 0.25.0

//! Delta Encoding for Incremental Engram Updates
//!
//! This module provides efficient incremental updates to holographic engrams
//! without full re-encoding. It leverages VSA's algebraic properties for
//! in-place modifications.
//!
//! # Architecture Alignment
//!
//! Per ADR-001 and ARCHITECTURE.md:
//! - Engrams use superposition storage (bundle all chunks into root)
//! - The correction layer (CORRECTION.md) already stores deltas efficiently
//! - Immutability is preserved through copy-on-write semantics
//!
//! # VSA Algebra for Delta Encoding
//!
//! Given position-aware encoding: `encoded_byte = bind(position_vec, byte_vec)`
//!
//! To modify byte at position P from old_value to new_value:
//! ```text
//! 1. old_encoded = bind(position_vec[P], byte_vec[old_value])
//! 2. new_encoded = bind(position_vec[P], byte_vec[new_value])
//! 3. chunk_new = (chunk_old ⊙ old_encoded) ⊕ new_encoded
//!              = unbind(chunk_old, old_encoded) ⊕ new_encoded
//! ```
//!
//! The correction layer automatically handles any accuracy loss from this operation.
//!
//! # Performance
//!
//! | Operation | Full Re-encode | Delta Encoding | Speedup |
//! |-----------|----------------|----------------|---------|
//! | 1 byte change | ~90ms | ~1ms | ~90x |
//! | 10 byte changes | ~90ms | ~10ms | ~9x |
//! | Chunk append | ~90ms × N | ~90ms × 1 | ~Nx |
//!
//! # Example
//!
//! ```rust,ignore
//! use embeddenator_fs::fs::delta::{Delta, DeltaType};
//!
//! let delta = Delta::new(DeltaType::ByteReplace {
//!     offset: 100,
//!     old_value: b'A',
//!     new_value: b'B',
//! });
//!
//! fs.apply_delta("file.txt", &delta)?;
//! ```

use super::versioned::types::{ChunkId, ChunkOffset};

/// A single modification operation
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum DeltaType {
    /// Replace a single byte at a specific offset
    ///
    /// Most efficient: O(1) VSA operations per byte
    ByteReplace {
        /// File offset of the byte to replace
        offset: usize,
        /// Original byte value (for verification/undo)
        old_value: u8,
        /// New byte value
        new_value: u8,
    },

    /// Replace multiple bytes at specified offsets
    ///
    /// More efficient than multiple ByteReplace for nearby changes
    MultiByteReplace {
        /// List of (offset, old_value, new_value)
        changes: Vec<(usize, u8, u8)>,
    },

    /// Replace a contiguous range of bytes
    ///
    /// Efficient for block-level modifications
    RangeReplace {
        /// Start offset in the file
        offset: usize,
        /// Original data (for verification/undo)
        old_data: Vec<u8>,
        /// New data (must be same length as old_data)
        new_data: Vec<u8>,
    },

    /// Insert bytes at a position (shifts subsequent bytes)
    ///
    /// More expensive: requires re-encoding positions after insertion point
    Insert {
        /// Position to insert at
        offset: usize,
        /// Data to insert
        data: Vec<u8>,
    },

    /// Delete bytes from a position (shifts subsequent bytes)
    ///
    /// More expensive: requires re-encoding positions after deletion point
    Delete {
        /// Start position of deletion
        offset: usize,
        /// Number of bytes to delete
        length: usize,
        /// Deleted data (for undo)
        deleted_data: Vec<u8>,
    },

    /// Append bytes to the end of the file
    ///
    /// Efficient: only encodes new chunks, bundles into existing root
    Append {
        /// Data to append
        data: Vec<u8>,
    },

    /// Truncate file to a specific length
    ///
    /// Efficient: removes chunks from manifest, may need partial chunk re-encode
    Truncate {
        /// New file length
        new_length: usize,
        /// Truncated data (for undo)
        truncated_data: Vec<u8>,
    },
}

impl DeltaType {
    /// Returns true if this delta type can be applied without shifting positions
    ///
    /// Non-shifting deltas are much more efficient as they don't require
    /// re-encoding subsequent bytes.
    pub fn is_non_shifting(&self) -> bool {
        matches!(
            self,
            DeltaType::ByteReplace { .. }
                | DeltaType::MultiByteReplace { .. }
                | DeltaType::RangeReplace { .. }
        )
    }

    /// Returns true if this delta type changes the file length
    pub fn changes_length(&self) -> bool {
        matches!(
            self,
            DeltaType::Insert { .. }
                | DeltaType::Delete { .. }
                | DeltaType::Append { .. }
                | DeltaType::Truncate { .. }
        )
    }

    /// Estimate the number of chunks affected by this delta
    pub fn affected_chunk_count(&self, chunk_size: usize) -> usize {
        match self {
            DeltaType::ByteReplace { .. } => 1,
            DeltaType::MultiByteReplace { changes } => {
                if changes.is_empty() {
                    return 0;
                }
                let min_offset = changes.iter().map(|(o, _, _)| *o).min().unwrap_or(0);
                let max_offset = changes.iter().map(|(o, _, _)| *o).max().unwrap_or(0);
                (max_offset / chunk_size) - (min_offset / chunk_size) + 1
            }
            DeltaType::RangeReplace {
                offset, new_data, ..
            } => {
                let end = offset + new_data.len();
                (end / chunk_size) - (offset / chunk_size) + 1
            }
            DeltaType::Insert { offset: _, data } => {
                // Affects from insertion point to end of file
                // Return direct chunks estimate
                let direct_chunks = data.len().div_ceil(chunk_size);
                direct_chunks.max(1)
            }
            DeltaType::Delete { offset, length, .. } => {
                let end = offset + length;
                (end / chunk_size) - (offset / chunk_size) + 1
            }
            DeltaType::Append { data } => data.len().div_ceil(chunk_size),
            DeltaType::Truncate { .. } => 1, // At most affects final chunk
        }
    }
}

/// A complete delta operation with metadata
#[derive(Debug, Clone)]
pub struct Delta {
    /// The type of modification
    pub delta_type: DeltaType,

    /// Expected file version before applying delta (for optimistic locking)
    pub expected_version: Option<u64>,

    /// Whether to verify the old data matches before applying
    pub verify_old_data: bool,
}

impl Delta {
    /// Create a new delta operation
    pub fn new(delta_type: DeltaType) -> Self {
        Self {
            delta_type,
            expected_version: None,
            verify_old_data: true,
        }
    }

    /// Create a delta with version checking
    pub fn with_version(delta_type: DeltaType, expected_version: u64) -> Self {
        Self {
            delta_type,
            expected_version: Some(expected_version),
            verify_old_data: true,
        }
    }

    /// Disable old data verification (faster but less safe)
    pub fn without_verification(mut self) -> Self {
        self.verify_old_data = false;
        self
    }
}

/// Result of analyzing which chunks are affected by a delta
#[derive(Debug, Clone)]
pub struct AffectedChunks {
    /// Chunks that need to be modified in-place (non-shifting deltas)
    pub modified_chunks: Vec<ChunkModification>,

    /// Chunks that need to be completely re-encoded (shifting deltas)
    pub reencoded_chunks: Vec<ChunkId>,

    /// New chunks to add (appends)
    pub new_chunks: Vec<NewChunk>,

    /// Chunks to remove from manifest (truncation)
    pub removed_chunks: Vec<ChunkId>,
}

/// Information about a chunk modification
#[derive(Debug, Clone)]
pub struct ChunkModification {
    /// The chunk being modified
    pub chunk_id: ChunkId,

    /// Byte changes within this chunk: (offset_within_chunk, old_value, new_value)
    pub byte_changes: Vec<(usize, u8, u8)>,

    /// The chunk's offset info (if available from manifest)
    pub chunk_offset: Option<ChunkOffset>,
}

/// Information about a new chunk to create
#[derive(Debug, Clone)]
pub struct NewChunk {
    /// The data for the new chunk
    pub data: Vec<u8>,

    /// Position in the file (byte offset)
    pub file_offset: usize,
}

impl AffectedChunks {
    /// Create an empty affected chunks result
    pub fn empty() -> Self {
        Self {
            modified_chunks: Vec::new(),
            reencoded_chunks: Vec::new(),
            new_chunks: Vec::new(),
            removed_chunks: Vec::new(),
        }
    }

    /// Check if any chunks are affected
    pub fn is_empty(&self) -> bool {
        self.modified_chunks.is_empty()
            && self.reencoded_chunks.is_empty()
            && self.new_chunks.is_empty()
            && self.removed_chunks.is_empty()
    }

    /// Total number of affected chunks
    pub fn total_affected(&self) -> usize {
        self.modified_chunks.len()
            + self.reencoded_chunks.len()
            + self.new_chunks.len()
            + self.removed_chunks.len()
    }
}

/// Analyze a delta to determine which chunks are affected
///
/// This function examines the delta and the file's chunk structure to
/// determine the minimal set of chunks that need modification.
///
/// # Arguments
/// * `delta` - The delta operation to analyze
/// * `file_size` - Current size of the file
/// * `chunk_size` - Size of each chunk (typically 64 for holographic, 4096 for standard)
/// * `chunk_offsets` - Optional chunk offset information from manifest
pub fn analyze_delta(
    delta: &Delta,
    file_size: usize,
    chunk_size: usize,
    chunk_offsets: Option<&[ChunkOffset]>,
) -> AffectedChunks {
    let mut result = AffectedChunks::empty();

    match &delta.delta_type {
        DeltaType::ByteReplace {
            offset,
            old_value,
            new_value,
        } => {
            if *offset >= file_size {
                return result; // Out of bounds
            }
            let chunk_idx = offset / chunk_size;
            let offset_in_chunk = offset % chunk_size;

            result.modified_chunks.push(ChunkModification {
                chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
                byte_changes: vec![(offset_in_chunk, *old_value, *new_value)],
                chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
            });
        }

        DeltaType::MultiByteReplace { changes } => {
            // Group changes by chunk
            let mut changes_by_chunk: std::collections::HashMap<usize, Vec<(usize, u8, u8)>> =
                std::collections::HashMap::new();

            for &(offset, old_val, new_val) in changes {
                if offset >= file_size {
                    continue;
                }
                let chunk_idx = offset / chunk_size;
                let offset_in_chunk = offset % chunk_size;
                changes_by_chunk.entry(chunk_idx).or_default().push((
                    offset_in_chunk,
                    old_val,
                    new_val,
                ));
            }

            for (chunk_idx, byte_changes) in changes_by_chunk {
                result.modified_chunks.push(ChunkModification {
                    chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
                    byte_changes,
                    chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
                });
            }
        }

        DeltaType::RangeReplace {
            offset,
            old_data,
            new_data,
        } => {
            if old_data.len() != new_data.len() {
                // Length-changing range replace is actually insert+delete
                // For now, treat as needing full re-encode of affected chunks
                let start_chunk = offset / chunk_size;
                let end_chunk = (offset + old_data.len().max(new_data.len())) / chunk_size;
                for chunk_idx in start_chunk..=end_chunk {
                    result
                        .reencoded_chunks
                        .push(get_chunk_id(chunk_idx, chunk_offsets));
                }
            } else {
                // Same length: can do byte-by-byte replacement
                let mut changes_by_chunk: std::collections::HashMap<usize, Vec<(usize, u8, u8)>> =
                    std::collections::HashMap::new();

                for (i, (&old_byte, &new_byte)) in old_data.iter().zip(new_data.iter()).enumerate()
                {
                    if old_byte != new_byte {
                        let file_offset = offset + i;
                        let chunk_idx = file_offset / chunk_size;
                        let offset_in_chunk = file_offset % chunk_size;
                        changes_by_chunk.entry(chunk_idx).or_default().push((
                            offset_in_chunk,
                            old_byte,
                            new_byte,
                        ));
                    }
                }

                for (chunk_idx, byte_changes) in changes_by_chunk {
                    result.modified_chunks.push(ChunkModification {
                        chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
                        byte_changes,
                        chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
                    });
                }
            }
        }

        DeltaType::Insert { offset, data } => {
            // Insertions require re-encoding from insertion point onward
            // because position vectors shift
            let start_chunk = offset / chunk_size;
            let total_chunks = file_size.div_ceil(chunk_size);

            for chunk_idx in start_chunk..total_chunks {
                result
                    .reencoded_chunks
                    .push(get_chunk_id(chunk_idx, chunk_offsets));
            }

            // Calculate new chunks needed for inserted data
            let new_total_size = file_size + data.len();
            let new_total_chunks = new_total_size.div_ceil(chunk_size);
            if new_total_chunks > total_chunks {
                for _ in total_chunks..new_total_chunks {
                    result.new_chunks.push(NewChunk {
                        data: Vec::new(), // Data will be filled during apply
                        file_offset: 0,   // Will be calculated during apply
                    });
                }
            }
        }

        DeltaType::Delete { offset, length, .. } => {
            // Deletions require re-encoding from deletion point onward
            let start_chunk = offset / chunk_size;
            let total_chunks = file_size.div_ceil(chunk_size);

            for chunk_idx in start_chunk..total_chunks {
                result
                    .reencoded_chunks
                    .push(get_chunk_id(chunk_idx, chunk_offsets));
            }

            // Some chunks may be removed entirely
            let new_total_size = file_size.saturating_sub(*length);
            let new_total_chunks = new_total_size.div_ceil(chunk_size);
            if new_total_chunks < total_chunks {
                for chunk_idx in new_total_chunks..total_chunks {
                    result
                        .removed_chunks
                        .push(get_chunk_id(chunk_idx, chunk_offsets));
                }
            }
        }

        DeltaType::Append { data } => {
            // Appends only need to encode new chunks
            let current_chunks = file_size.div_ceil(chunk_size);
            let bytes_in_last_chunk = if file_size == 0 {
                0
            } else {
                ((file_size - 1) % chunk_size) + 1
            };
            let remaining_in_last = if bytes_in_last_chunk > 0 {
                chunk_size - bytes_in_last_chunk
            } else {
                0
            };

            if remaining_in_last > 0 && !data.is_empty() {
                // Need to modify last chunk to append some data
                if current_chunks > 0 {
                    result
                        .reencoded_chunks
                        .push(get_chunk_id(current_chunks - 1, chunk_offsets));
                }
            }

            // New chunks for remaining data
            let data_for_new_chunks = if remaining_in_last >= data.len() {
                0
            } else {
                data.len() - remaining_in_last
            };
            let new_chunk_count = data_for_new_chunks.div_ceil(chunk_size);

            for i in 0..new_chunk_count {
                let chunk_start = remaining_in_last + i * chunk_size;
                let chunk_end = (chunk_start + chunk_size).min(data.len());
                result.new_chunks.push(NewChunk {
                    data: data[chunk_start..chunk_end].to_vec(),
                    file_offset: file_size + chunk_start,
                });
            }
        }

        DeltaType::Truncate { new_length, .. } => {
            if *new_length >= file_size {
                return result; // Nothing to truncate
            }

            let new_chunks = (*new_length).div_ceil(chunk_size);
            let old_chunks = file_size.div_ceil(chunk_size);

            // Last chunk may need modification if truncation is mid-chunk
            if *new_length % chunk_size != 0 && new_chunks > 0 {
                result
                    .reencoded_chunks
                    .push(get_chunk_id(new_chunks - 1, chunk_offsets));
            }

            // Remove chunks beyond new length
            for chunk_idx in new_chunks..old_chunks {
                result
                    .removed_chunks
                    .push(get_chunk_id(chunk_idx, chunk_offsets));
            }
        }
    }

    result
}

/// Helper to get chunk ID from index, using offset info if available
fn get_chunk_id(chunk_idx: usize, chunk_offsets: Option<&[ChunkOffset]>) -> ChunkId {
    chunk_offsets
        .and_then(|co| co.get(chunk_idx))
        .map(|c| c.chunk_id)
        .unwrap_or(chunk_idx)
}

/// VSA delta operations for incremental engram modification
///
/// # Architecture
///
/// Delta encoding works at the **chunk level**, not the individual byte level.
/// Due to VSA's holographic nature, modifying bytes within a bundled vector
/// introduces interference noise. The efficient approach is:
///
/// 1. **Identify** affected chunks using `analyze_delta()`
/// 2. **Decode** each affected chunk to bytes (using correction layer)
/// 3. **Apply** byte changes to the decoded data
/// 4. **Re-encode** only the affected chunks
/// 5. **Update** the root engram by unbundling old chunk, bundling new chunk
///
/// This is much faster than full file re-encode because:
/// - Only affected chunks are processed (typically 1-3 for local edits)
/// - Root engram update is O(2) operations (unbundle + bundle)
/// - Corrections only need recalculation for modified chunks
pub mod vsa_delta {
    use embeddenator_vsa::{ReversibleVSAEncoder, SparseVec};

    /// Re-encode a chunk after applying byte changes
    ///
    /// This is the primary delta operation: given the original chunk data
    /// with modifications applied, re-encode it to get a new chunk engram.
    ///
    /// # Arguments
    /// * `encoder` - The VSA encoder
    /// * `modified_data` - The chunk data with modifications already applied
    /// * `start_position` - The position offset for this chunk
    ///
    /// # Returns
    /// A new chunk engram encoding the modified data
    pub fn reencode_chunk(
        encoder: &mut ReversibleVSAEncoder,
        modified_data: &[u8],
        start_position: usize,
    ) -> SparseVec {
        if modified_data.is_empty() {
            return SparseVec::new();
        }

        // Ensure position vectors exist
        let max_pos = start_position + modified_data.len() - 1;
        if max_pos < embeddenator_vsa::MAX_POSITIONS {
            encoder.ensure_positions(max_pos);
        }

        let byte_vectors = encoder.get_byte_vectors();

        // Encode first byte
        let pos_vec = encoder.get_position_vector_ref(start_position);
        let mut result = pos_vec.bind(&byte_vectors[modified_data[0] as usize]);

        // Bundle remaining bytes
        for (i, &byte) in modified_data.iter().enumerate().skip(1) {
            let pos_vec = encoder.get_position_vector_ref(start_position + i);
            let encoded = pos_vec.bind(&byte_vectors[byte as usize]);
            result = result.bundle(&encoded);
        }

        result
    }

    /// Update root engram by replacing a chunk
    ///
    /// Uses VSA algebra: root_new = (root_old ⊙ old_chunk) ⊕ new_chunk
    /// The bind (⊙) unbinds the old chunk, then bundle (⊕) adds the new.
    ///
    /// Note: For small files (single chunk), this is equivalent to just
    /// replacing the root with the new chunk.
    ///
    /// # Arguments
    /// * `root` - Current root engram
    /// * `old_chunk` - The chunk being replaced
    /// * `new_chunk` - The replacement chunk
    ///
    /// # Returns
    /// Updated root engram
    pub fn update_root_chunk(
        root: &SparseVec,
        old_chunk: &SparseVec,
        new_chunk: &SparseVec,
    ) -> SparseVec {
        // Unbind old chunk from root
        let unbound = root.bind(old_chunk);
        // Bundle new chunk
        unbound.bundle(new_chunk)
    }

    /// Encode new data into a fresh chunk
    ///
    /// Used when creating entirely new chunks for appends.
    pub fn encode_new_chunk(
        encoder: &mut ReversibleVSAEncoder,
        data: &[u8],
        start_position: usize,
    ) -> SparseVec {
        reencode_chunk(encoder, data, start_position)
    }

    /// Add a new chunk to the root engram
    ///
    /// Simply bundles the new chunk into the existing root.
    pub fn add_chunk_to_root(root: &SparseVec, new_chunk: &SparseVec) -> SparseVec {
        root.bundle(new_chunk)
    }

    /// Remove a chunk from the root engram
    ///
    /// Uses bind (self-inverse) to remove the chunk's contribution.
    /// Note: Due to VSA noise accumulation, this works best with
    /// correction layer support.
    pub fn remove_chunk_from_root(root: &SparseVec, chunk: &SparseVec) -> SparseVec {
        root.bind(chunk)
    }
}

/// Result of applying a delta operation
#[derive(Debug)]
pub struct DeltaResult {
    /// Modified chunks (chunk_id -> new engram)
    pub modified_chunks: Vec<(ChunkId, Vec<u8>)>,

    /// New chunks to add (data, file_offset)
    pub new_chunks: Vec<(Vec<u8>, usize)>,

    /// Chunks to remove
    pub removed_chunks: Vec<ChunkId>,

    /// New file size after delta
    pub new_size: usize,

    /// Whether corrections need recalculation for modified chunks
    pub needs_correction_update: bool,
}

impl DeltaResult {
    /// Create an empty delta result
    pub fn empty(current_size: usize) -> Self {
        Self {
            modified_chunks: Vec::new(),
            new_chunks: Vec::new(),
            removed_chunks: Vec::new(),
            new_size: current_size,
            needs_correction_update: false,
        }
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_byte_replace_analysis() {
        let delta = Delta::new(DeltaType::ByteReplace {
            offset: 100,
            old_value: b'A',
            new_value: b'B',
        });

        let result = analyze_delta(&delta, 1000, 64, None);

        assert_eq!(result.modified_chunks.len(), 1);
        assert_eq!(result.modified_chunks[0].chunk_id, 1); // 100 / 64 = 1
        assert_eq!(result.modified_chunks[0].byte_changes.len(), 1);
        assert_eq!(result.modified_chunks[0].byte_changes[0], (36, b'A', b'B'));
        // 100 % 64 = 36
    }

    #[test]
    fn test_multi_byte_replace_groups_by_chunk() {
        let delta = Delta::new(DeltaType::MultiByteReplace {
            changes: vec![
                (10, b'A', b'X'),  // Chunk 0
                (20, b'B', b'Y'),  // Chunk 0
                (100, b'C', b'Z'), // Chunk 1
            ],
        });

        let result = analyze_delta(&delta, 1000, 64, None);

        assert_eq!(result.modified_chunks.len(), 2);
    }

    #[test]
    fn test_append_creates_new_chunks() {
        let delta = Delta::new(DeltaType::Append {
            data: vec![0u8; 200], // 200 bytes
        });

        // File is 64 bytes (1 chunk), appending 200 bytes needs ~3 more chunks
        let result = analyze_delta(&delta, 64, 64, None);

        assert!(result.new_chunks.len() >= 2);
    }

    #[test]
    fn test_truncate_removes_chunks() {
        let delta = Delta::new(DeltaType::Truncate {
            new_length: 100,
            truncated_data: vec![0u8; 156], // Removing 156 bytes
        });

        // File is 256 bytes (4 chunks), truncating to 100 bytes (2 chunks)
        let result = analyze_delta(&delta, 256, 64, None);

        assert_eq!(result.removed_chunks.len(), 2); // Chunks 2 and 3
    }

    #[test]
    fn test_delta_is_non_shifting() {
        assert!(DeltaType::ByteReplace {
            offset: 0,
            old_value: 0,
            new_value: 1
        }
        .is_non_shifting());
        assert!(DeltaType::RangeReplace {
            offset: 0,
            old_data: vec![0],
            new_data: vec![1]
        }
        .is_non_shifting());
        assert!(!DeltaType::Insert {
            offset: 0,
            data: vec![1]
        }
        .is_non_shifting());
        assert!(!DeltaType::Append { data: vec![1] }.is_non_shifting());
    }

    #[test]
    fn test_vsa_reencode_chunk() {
        use embeddenator_vsa::ReversibleVSAEncoder;

        let mut encoder = ReversibleVSAEncoder::new();

        // Original data would be b"Hello" - we're testing re-encode with modifications

        // Modified data (change 'e' to 'a')
        let modified_data = b"Hallo";

        // Re-encode with modifications
        let chunk = vsa_delta::reencode_chunk(&mut encoder, modified_data, 0);

        // Decode and verify
        let decoded = encoder.decode(&chunk, 5);

        // Should decode to "Hallo"
        assert_eq!(decoded[1], b'a', "Modified byte should be 'a'");
        assert_eq!(decoded[0], b'H', "First byte should be 'H'");
    }

    #[test]
    fn test_vsa_update_root_chunk() {
        use embeddenator_vsa::ReversibleVSAEncoder;

        let mut encoder = ReversibleVSAEncoder::new();

        // Create two chunks bundled into a root
        let chunk1_data = b"AAAA";
        let chunk2_data = b"BBBB";

        let chunk1 = encoder.encode(chunk1_data);
        let chunk2 = vsa_delta::reencode_chunk(&mut encoder, chunk2_data, 4);

        // Bundle them into root
        let root = chunk1.bundle(&chunk2);

        // Now modify chunk1 to "CCCC"
        let new_chunk1_data = b"CCCC";
        let new_chunk1 = encoder.encode(new_chunk1_data);

        // Update root
        let new_root = vsa_delta::update_root_chunk(&root, &chunk1, &new_chunk1);

        // Due to VSA noise from the unbundle/bundle, we check similarity
        // The important thing is the structure is maintained
        let chunk1_sim = new_root.cosine(&new_chunk1);
        assert!(
            chunk1_sim > 0.0,
            "New chunk should have positive correlation with root"
        );
    }

    #[test]
    fn test_vsa_encode_new_chunk() {
        use embeddenator_vsa::ReversibleVSAEncoder;

        let mut encoder = ReversibleVSAEncoder::new();

        // Encode a new chunk starting at position 64 (simulating second chunk)
        let data = b"New";
        let chunk = vsa_delta::encode_new_chunk(&mut encoder, data, 64);

        // Verify it's not empty by checking cosine with itself is 1.0
        let self_sim = chunk.cosine(&chunk);
        assert!(self_sim > 0.99, "New chunk should have valid structure");

        // Decode at the correct positions
        let pos_vec_64 = encoder.get_position_vector_ref(64);
        let query = chunk.bind(pos_vec_64);

        // Find best match
        let byte_vectors = encoder.get_byte_vectors();
        let mut best_byte = 0u8;
        let mut best_sim = f64::NEG_INFINITY;
        for (i, bv) in byte_vectors.iter().enumerate() {
            let sim = query.cosine(bv);
            if sim > best_sim {
                best_sim = sim;
                best_byte = i as u8;
            }
        }

        assert_eq!(best_byte, b'N', "First byte at position 64 should be 'N'");
    }

    #[test]
    fn test_vsa_add_chunk_to_root() {
        use embeddenator_vsa::ReversibleVSAEncoder;

        let mut encoder = ReversibleVSAEncoder::new();

        // Create initial root
        let chunk1 = encoder.encode(b"Hello");

        // Add a new chunk
        let chunk2 = vsa_delta::encode_new_chunk(&mut encoder, b"World", 5);
        let root = vsa_delta::add_chunk_to_root(&chunk1, &chunk2);

        // Root should have similarity with both chunks
        let sim1 = root.cosine(&chunk1);
        let sim2 = root.cosine(&chunk2);

        assert!(sim1 > 0.0, "Root should correlate with first chunk");
        assert!(sim2 > 0.0, "Root should correlate with second chunk");
    }
}