Skip to main content

embeddenator_fs/fs/
delta.rs

1//! Delta Encoding for Incremental Engram Updates
2//!
3//! This module provides efficient incremental updates to holographic engrams
4//! without full re-encoding. It leverages VSA's algebraic properties for
5//! in-place modifications.
6//!
7//! # Architecture Alignment
8//!
9//! Per ADR-001 and ARCHITECTURE.md:
10//! - Engrams use superposition storage (bundle all chunks into root)
11//! - The correction layer (CORRECTION.md) already stores deltas efficiently
12//! - Immutability is preserved through copy-on-write semantics
13//!
14//! # VSA Algebra for Delta Encoding
15//!
16//! Given position-aware encoding: `encoded_byte = bind(position_vec, byte_vec)`
17//!
18//! To modify byte at position P from old_value to new_value:
19//! ```text
20//! 1. old_encoded = bind(position_vec[P], byte_vec[old_value])
21//! 2. new_encoded = bind(position_vec[P], byte_vec[new_value])
22//! 3. chunk_new = (chunk_old ⊙ old_encoded) ⊕ new_encoded
23//!              = unbind(chunk_old, old_encoded) ⊕ new_encoded
24//! ```
25//!
26//! The correction layer automatically handles any accuracy loss from this operation.
27//!
28//! # Performance
29//!
30//! | Operation | Full Re-encode | Delta Encoding | Speedup |
31//! |-----------|----------------|----------------|---------|
32//! | 1 byte change | ~90ms | ~1ms | ~90x |
33//! | 10 byte changes | ~90ms | ~10ms | ~9x |
34//! | Chunk append | ~90ms × N | ~90ms × 1 | ~Nx |
35//!
36//! # Example
37//!
38//! ```rust,ignore
39//! use embeddenator_fs::fs::delta::{Delta, DeltaType};
40//!
41//! let delta = Delta::new(DeltaType::ByteReplace {
42//!     offset: 100,
43//!     old_value: b'A',
44//!     new_value: b'B',
45//! });
46//!
47//! fs.apply_delta("file.txt", &delta)?;
48//! ```
49
50use super::versioned::types::{ChunkId, ChunkOffset};
51
52/// A single modification operation
53#[derive(Debug, Clone, PartialEq, Eq)]
54pub enum DeltaType {
55    /// Replace a single byte at a specific offset
56    ///
57    /// Most efficient: O(1) VSA operations per byte
58    ByteReplace {
59        /// File offset of the byte to replace
60        offset: usize,
61        /// Original byte value (for verification/undo)
62        old_value: u8,
63        /// New byte value
64        new_value: u8,
65    },
66
67    /// Replace multiple bytes at specified offsets
68    ///
69    /// More efficient than multiple ByteReplace for nearby changes
70    MultiByteReplace {
71        /// List of (offset, old_value, new_value)
72        changes: Vec<(usize, u8, u8)>,
73    },
74
75    /// Replace a contiguous range of bytes
76    ///
77    /// Efficient for block-level modifications
78    RangeReplace {
79        /// Start offset in the file
80        offset: usize,
81        /// Original data (for verification/undo)
82        old_data: Vec<u8>,
83        /// New data (must be same length as old_data)
84        new_data: Vec<u8>,
85    },
86
87    /// Insert bytes at a position (shifts subsequent bytes)
88    ///
89    /// More expensive: requires re-encoding positions after insertion point
90    Insert {
91        /// Position to insert at
92        offset: usize,
93        /// Data to insert
94        data: Vec<u8>,
95    },
96
97    /// Delete bytes from a position (shifts subsequent bytes)
98    ///
99    /// More expensive: requires re-encoding positions after deletion point
100    Delete {
101        /// Start position of deletion
102        offset: usize,
103        /// Number of bytes to delete
104        length: usize,
105        /// Deleted data (for undo)
106        deleted_data: Vec<u8>,
107    },
108
109    /// Append bytes to the end of the file
110    ///
111    /// Efficient: only encodes new chunks, bundles into existing root
112    Append {
113        /// Data to append
114        data: Vec<u8>,
115    },
116
117    /// Truncate file to a specific length
118    ///
119    /// Efficient: removes chunks from manifest, may need partial chunk re-encode
120    Truncate {
121        /// New file length
122        new_length: usize,
123        /// Truncated data (for undo)
124        truncated_data: Vec<u8>,
125    },
126}
127
128impl DeltaType {
129    /// Returns true if this delta type can be applied without shifting positions
130    ///
131    /// Non-shifting deltas are much more efficient as they don't require
132    /// re-encoding subsequent bytes.
133    pub fn is_non_shifting(&self) -> bool {
134        matches!(
135            self,
136            DeltaType::ByteReplace { .. }
137                | DeltaType::MultiByteReplace { .. }
138                | DeltaType::RangeReplace { .. }
139        )
140    }
141
142    /// Returns true if this delta type changes the file length
143    pub fn changes_length(&self) -> bool {
144        matches!(
145            self,
146            DeltaType::Insert { .. }
147                | DeltaType::Delete { .. }
148                | DeltaType::Append { .. }
149                | DeltaType::Truncate { .. }
150        )
151    }
152
153    /// Estimate the number of chunks affected by this delta
154    pub fn affected_chunk_count(&self, chunk_size: usize) -> usize {
155        match self {
156            DeltaType::ByteReplace { .. } => 1,
157            DeltaType::MultiByteReplace { changes } => {
158                if changes.is_empty() {
159                    return 0;
160                }
161                let min_offset = changes.iter().map(|(o, _, _)| *o).min().unwrap_or(0);
162                let max_offset = changes.iter().map(|(o, _, _)| *o).max().unwrap_or(0);
163                (max_offset / chunk_size) - (min_offset / chunk_size) + 1
164            }
165            DeltaType::RangeReplace {
166                offset, new_data, ..
167            } => {
168                let end = offset + new_data.len();
169                (end / chunk_size) - (offset / chunk_size) + 1
170            }
171            DeltaType::Insert { offset: _, data } => {
172                // Affects from insertion point to end of file
173                // Return direct chunks estimate
174                let direct_chunks = data.len().div_ceil(chunk_size);
175                direct_chunks.max(1)
176            }
177            DeltaType::Delete { offset, length, .. } => {
178                let end = offset + length;
179                (end / chunk_size) - (offset / chunk_size) + 1
180            }
181            DeltaType::Append { data } => data.len().div_ceil(chunk_size),
182            DeltaType::Truncate { .. } => 1, // At most affects final chunk
183        }
184    }
185}
186
187/// A complete delta operation with metadata
188#[derive(Debug, Clone)]
189pub struct Delta {
190    /// The type of modification
191    pub delta_type: DeltaType,
192
193    /// Expected file version before applying delta (for optimistic locking)
194    pub expected_version: Option<u64>,
195
196    /// Whether to verify the old data matches before applying
197    pub verify_old_data: bool,
198}
199
200impl Delta {
201    /// Create a new delta operation
202    pub fn new(delta_type: DeltaType) -> Self {
203        Self {
204            delta_type,
205            expected_version: None,
206            verify_old_data: true,
207        }
208    }
209
210    /// Create a delta with version checking
211    pub fn with_version(delta_type: DeltaType, expected_version: u64) -> Self {
212        Self {
213            delta_type,
214            expected_version: Some(expected_version),
215            verify_old_data: true,
216        }
217    }
218
219    /// Disable old data verification (faster but less safe)
220    pub fn without_verification(mut self) -> Self {
221        self.verify_old_data = false;
222        self
223    }
224}
225
226/// Result of analyzing which chunks are affected by a delta
227#[derive(Debug, Clone)]
228pub struct AffectedChunks {
229    /// Chunks that need to be modified in-place (non-shifting deltas)
230    pub modified_chunks: Vec<ChunkModification>,
231
232    /// Chunks that need to be completely re-encoded (shifting deltas)
233    pub reencoded_chunks: Vec<ChunkId>,
234
235    /// New chunks to add (appends)
236    pub new_chunks: Vec<NewChunk>,
237
238    /// Chunks to remove from manifest (truncation)
239    pub removed_chunks: Vec<ChunkId>,
240}
241
242/// Information about a chunk modification
243#[derive(Debug, Clone)]
244pub struct ChunkModification {
245    /// The chunk being modified
246    pub chunk_id: ChunkId,
247
248    /// Byte changes within this chunk: (offset_within_chunk, old_value, new_value)
249    pub byte_changes: Vec<(usize, u8, u8)>,
250
251    /// The chunk's offset info (if available from manifest)
252    pub chunk_offset: Option<ChunkOffset>,
253}
254
255/// Information about a new chunk to create
256#[derive(Debug, Clone)]
257pub struct NewChunk {
258    /// The data for the new chunk
259    pub data: Vec<u8>,
260
261    /// Position in the file (byte offset)
262    pub file_offset: usize,
263}
264
265impl AffectedChunks {
266    /// Create an empty affected chunks result
267    pub fn empty() -> Self {
268        Self {
269            modified_chunks: Vec::new(),
270            reencoded_chunks: Vec::new(),
271            new_chunks: Vec::new(),
272            removed_chunks: Vec::new(),
273        }
274    }
275
276    /// Check if any chunks are affected
277    pub fn is_empty(&self) -> bool {
278        self.modified_chunks.is_empty()
279            && self.reencoded_chunks.is_empty()
280            && self.new_chunks.is_empty()
281            && self.removed_chunks.is_empty()
282    }
283
284    /// Total number of affected chunks
285    pub fn total_affected(&self) -> usize {
286        self.modified_chunks.len()
287            + self.reencoded_chunks.len()
288            + self.new_chunks.len()
289            + self.removed_chunks.len()
290    }
291}
292
293/// Analyze a delta to determine which chunks are affected
294///
295/// This function examines the delta and the file's chunk structure to
296/// determine the minimal set of chunks that need modification.
297///
298/// # Arguments
299/// * `delta` - The delta operation to analyze
300/// * `file_size` - Current size of the file
301/// * `chunk_size` - Size of each chunk (typically 64 for holographic, 4096 for standard)
302/// * `chunk_offsets` - Optional chunk offset information from manifest
303pub fn analyze_delta(
304    delta: &Delta,
305    file_size: usize,
306    chunk_size: usize,
307    chunk_offsets: Option<&[ChunkOffset]>,
308) -> AffectedChunks {
309    let mut result = AffectedChunks::empty();
310
311    match &delta.delta_type {
312        DeltaType::ByteReplace {
313            offset,
314            old_value,
315            new_value,
316        } => {
317            if *offset >= file_size {
318                return result; // Out of bounds
319            }
320            let chunk_idx = offset / chunk_size;
321            let offset_in_chunk = offset % chunk_size;
322
323            result.modified_chunks.push(ChunkModification {
324                chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
325                byte_changes: vec![(offset_in_chunk, *old_value, *new_value)],
326                chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
327            });
328        }
329
330        DeltaType::MultiByteReplace { changes } => {
331            // Group changes by chunk
332            let mut changes_by_chunk: std::collections::HashMap<usize, Vec<(usize, u8, u8)>> =
333                std::collections::HashMap::new();
334
335            for &(offset, old_val, new_val) in changes {
336                if offset >= file_size {
337                    continue;
338                }
339                let chunk_idx = offset / chunk_size;
340                let offset_in_chunk = offset % chunk_size;
341                changes_by_chunk.entry(chunk_idx).or_default().push((
342                    offset_in_chunk,
343                    old_val,
344                    new_val,
345                ));
346            }
347
348            for (chunk_idx, byte_changes) in changes_by_chunk {
349                result.modified_chunks.push(ChunkModification {
350                    chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
351                    byte_changes,
352                    chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
353                });
354            }
355        }
356
357        DeltaType::RangeReplace {
358            offset,
359            old_data,
360            new_data,
361        } => {
362            if old_data.len() != new_data.len() {
363                // Length-changing range replace is actually insert+delete
364                // For now, treat as needing full re-encode of affected chunks
365                let start_chunk = offset / chunk_size;
366                let end_chunk = (offset + old_data.len().max(new_data.len())) / chunk_size;
367                for chunk_idx in start_chunk..=end_chunk {
368                    result
369                        .reencoded_chunks
370                        .push(get_chunk_id(chunk_idx, chunk_offsets));
371                }
372            } else {
373                // Same length: can do byte-by-byte replacement
374                let mut changes_by_chunk: std::collections::HashMap<usize, Vec<(usize, u8, u8)>> =
375                    std::collections::HashMap::new();
376
377                for (i, (&old_byte, &new_byte)) in old_data.iter().zip(new_data.iter()).enumerate()
378                {
379                    if old_byte != new_byte {
380                        let file_offset = offset + i;
381                        let chunk_idx = file_offset / chunk_size;
382                        let offset_in_chunk = file_offset % chunk_size;
383                        changes_by_chunk.entry(chunk_idx).or_default().push((
384                            offset_in_chunk,
385                            old_byte,
386                            new_byte,
387                        ));
388                    }
389                }
390
391                for (chunk_idx, byte_changes) in changes_by_chunk {
392                    result.modified_chunks.push(ChunkModification {
393                        chunk_id: get_chunk_id(chunk_idx, chunk_offsets),
394                        byte_changes,
395                        chunk_offset: chunk_offsets.and_then(|co| co.get(chunk_idx).copied()),
396                    });
397                }
398            }
399        }
400
401        DeltaType::Insert { offset, data } => {
402            // Insertions require re-encoding from insertion point onward
403            // because position vectors shift
404            let start_chunk = offset / chunk_size;
405            let total_chunks = file_size.div_ceil(chunk_size);
406
407            for chunk_idx in start_chunk..total_chunks {
408                result
409                    .reencoded_chunks
410                    .push(get_chunk_id(chunk_idx, chunk_offsets));
411            }
412
413            // Calculate new chunks needed for inserted data
414            let new_total_size = file_size + data.len();
415            let new_total_chunks = new_total_size.div_ceil(chunk_size);
416            if new_total_chunks > total_chunks {
417                for _ in total_chunks..new_total_chunks {
418                    result.new_chunks.push(NewChunk {
419                        data: Vec::new(), // Data will be filled during apply
420                        file_offset: 0,   // Will be calculated during apply
421                    });
422                }
423            }
424        }
425
426        DeltaType::Delete { offset, length, .. } => {
427            // Deletions require re-encoding from deletion point onward
428            let start_chunk = offset / chunk_size;
429            let total_chunks = file_size.div_ceil(chunk_size);
430
431            for chunk_idx in start_chunk..total_chunks {
432                result
433                    .reencoded_chunks
434                    .push(get_chunk_id(chunk_idx, chunk_offsets));
435            }
436
437            // Some chunks may be removed entirely
438            let new_total_size = file_size.saturating_sub(*length);
439            let new_total_chunks = new_total_size.div_ceil(chunk_size);
440            if new_total_chunks < total_chunks {
441                for chunk_idx in new_total_chunks..total_chunks {
442                    result
443                        .removed_chunks
444                        .push(get_chunk_id(chunk_idx, chunk_offsets));
445                }
446            }
447        }
448
449        DeltaType::Append { data } => {
450            // Appends only need to encode new chunks
451            let current_chunks = file_size.div_ceil(chunk_size);
452            let bytes_in_last_chunk = if file_size == 0 {
453                0
454            } else {
455                ((file_size - 1) % chunk_size) + 1
456            };
457            let remaining_in_last = if bytes_in_last_chunk > 0 {
458                chunk_size - bytes_in_last_chunk
459            } else {
460                0
461            };
462
463            if remaining_in_last > 0 && !data.is_empty() {
464                // Need to modify last chunk to append some data
465                if current_chunks > 0 {
466                    result
467                        .reencoded_chunks
468                        .push(get_chunk_id(current_chunks - 1, chunk_offsets));
469                }
470            }
471
472            // New chunks for remaining data
473            let data_for_new_chunks = if remaining_in_last >= data.len() {
474                0
475            } else {
476                data.len() - remaining_in_last
477            };
478            let new_chunk_count = data_for_new_chunks.div_ceil(chunk_size);
479
480            for i in 0..new_chunk_count {
481                let chunk_start = remaining_in_last + i * chunk_size;
482                let chunk_end = (chunk_start + chunk_size).min(data.len());
483                result.new_chunks.push(NewChunk {
484                    data: data[chunk_start..chunk_end].to_vec(),
485                    file_offset: file_size + chunk_start,
486                });
487            }
488        }
489
490        DeltaType::Truncate { new_length, .. } => {
491            if *new_length >= file_size {
492                return result; // Nothing to truncate
493            }
494
495            let new_chunks = (*new_length).div_ceil(chunk_size);
496            let old_chunks = file_size.div_ceil(chunk_size);
497
498            // Last chunk may need modification if truncation is mid-chunk
499            if *new_length % chunk_size != 0 && new_chunks > 0 {
500                result
501                    .reencoded_chunks
502                    .push(get_chunk_id(new_chunks - 1, chunk_offsets));
503            }
504
505            // Remove chunks beyond new length
506            for chunk_idx in new_chunks..old_chunks {
507                result
508                    .removed_chunks
509                    .push(get_chunk_id(chunk_idx, chunk_offsets));
510            }
511        }
512    }
513
514    result
515}
516
517/// Helper to get chunk ID from index, using offset info if available
518fn get_chunk_id(chunk_idx: usize, chunk_offsets: Option<&[ChunkOffset]>) -> ChunkId {
519    chunk_offsets
520        .and_then(|co| co.get(chunk_idx))
521        .map(|c| c.chunk_id)
522        .unwrap_or(chunk_idx)
523}
524
525/// VSA delta operations for incremental engram modification
526///
527/// # Architecture
528///
529/// Delta encoding works at the **chunk level**, not the individual byte level.
530/// Due to VSA's holographic nature, modifying bytes within a bundled vector
531/// introduces interference noise. The efficient approach is:
532///
533/// 1. **Identify** affected chunks using `analyze_delta()`
534/// 2. **Decode** each affected chunk to bytes (using correction layer)
535/// 3. **Apply** byte changes to the decoded data
536/// 4. **Re-encode** only the affected chunks
537/// 5. **Update** the root engram by unbundling old chunk, bundling new chunk
538///
539/// This is much faster than full file re-encode because:
540/// - Only affected chunks are processed (typically 1-3 for local edits)
541/// - Root engram update is O(2) operations (unbundle + bundle)
542/// - Corrections only need recalculation for modified chunks
543pub mod vsa_delta {
544    use embeddenator_vsa::{ReversibleVSAEncoder, SparseVec};
545
546    /// Re-encode a chunk after applying byte changes
547    ///
548    /// This is the primary delta operation: given the original chunk data
549    /// with modifications applied, re-encode it to get a new chunk engram.
550    ///
551    /// # Arguments
552    /// * `encoder` - The VSA encoder
553    /// * `modified_data` - The chunk data with modifications already applied
554    /// * `start_position` - The position offset for this chunk
555    ///
556    /// # Returns
557    /// A new chunk engram encoding the modified data
558    pub fn reencode_chunk(
559        encoder: &mut ReversibleVSAEncoder,
560        modified_data: &[u8],
561        start_position: usize,
562    ) -> SparseVec {
563        if modified_data.is_empty() {
564            return SparseVec::new();
565        }
566
567        // Ensure position vectors exist
568        let max_pos = start_position + modified_data.len() - 1;
569        if max_pos < embeddenator_vsa::MAX_POSITIONS {
570            encoder.ensure_positions(max_pos);
571        }
572
573        let byte_vectors = encoder.get_byte_vectors();
574
575        // Encode first byte
576        let pos_vec = encoder.get_position_vector_ref(start_position);
577        let mut result = pos_vec.bind(&byte_vectors[modified_data[0] as usize]);
578
579        // Bundle remaining bytes
580        for (i, &byte) in modified_data.iter().enumerate().skip(1) {
581            let pos_vec = encoder.get_position_vector_ref(start_position + i);
582            let encoded = pos_vec.bind(&byte_vectors[byte as usize]);
583            result = result.bundle(&encoded);
584        }
585
586        result
587    }
588
589    /// Update root engram by replacing a chunk
590    ///
591    /// Uses VSA algebra: root_new = (root_old ⊙ old_chunk) ⊕ new_chunk
592    /// The bind (⊙) unbinds the old chunk, then bundle (⊕) adds the new.
593    ///
594    /// Note: For small files (single chunk), this is equivalent to just
595    /// replacing the root with the new chunk.
596    ///
597    /// # Arguments
598    /// * `root` - Current root engram
599    /// * `old_chunk` - The chunk being replaced
600    /// * `new_chunk` - The replacement chunk
601    ///
602    /// # Returns
603    /// Updated root engram
604    pub fn update_root_chunk(
605        root: &SparseVec,
606        old_chunk: &SparseVec,
607        new_chunk: &SparseVec,
608    ) -> SparseVec {
609        // Unbind old chunk from root
610        let unbound = root.bind(old_chunk);
611        // Bundle new chunk
612        unbound.bundle(new_chunk)
613    }
614
615    /// Encode new data into a fresh chunk
616    ///
617    /// Used when creating entirely new chunks for appends.
618    pub fn encode_new_chunk(
619        encoder: &mut ReversibleVSAEncoder,
620        data: &[u8],
621        start_position: usize,
622    ) -> SparseVec {
623        reencode_chunk(encoder, data, start_position)
624    }
625
626    /// Add a new chunk to the root engram
627    ///
628    /// Simply bundles the new chunk into the existing root.
629    pub fn add_chunk_to_root(root: &SparseVec, new_chunk: &SparseVec) -> SparseVec {
630        root.bundle(new_chunk)
631    }
632
633    /// Remove a chunk from the root engram
634    ///
635    /// Uses bind (self-inverse) to remove the chunk's contribution.
636    /// Note: Due to VSA noise accumulation, this works best with
637    /// correction layer support.
638    pub fn remove_chunk_from_root(root: &SparseVec, chunk: &SparseVec) -> SparseVec {
639        root.bind(chunk)
640    }
641}
642
643/// Result of applying a delta operation
644#[derive(Debug)]
645pub struct DeltaResult {
646    /// Modified chunks (chunk_id -> new engram)
647    pub modified_chunks: Vec<(ChunkId, Vec<u8>)>,
648
649    /// New chunks to add (data, file_offset)
650    pub new_chunks: Vec<(Vec<u8>, usize)>,
651
652    /// Chunks to remove
653    pub removed_chunks: Vec<ChunkId>,
654
655    /// New file size after delta
656    pub new_size: usize,
657
658    /// Whether corrections need recalculation for modified chunks
659    pub needs_correction_update: bool,
660}
661
662impl DeltaResult {
663    /// Create an empty delta result
664    pub fn empty(current_size: usize) -> Self {
665        Self {
666            modified_chunks: Vec::new(),
667            new_chunks: Vec::new(),
668            removed_chunks: Vec::new(),
669            new_size: current_size,
670            needs_correction_update: false,
671        }
672    }
673}
674
675#[cfg(test)]
676mod tests {
677    use super::*;
678
679    #[test]
680    fn test_byte_replace_analysis() {
681        let delta = Delta::new(DeltaType::ByteReplace {
682            offset: 100,
683            old_value: b'A',
684            new_value: b'B',
685        });
686
687        let result = analyze_delta(&delta, 1000, 64, None);
688
689        assert_eq!(result.modified_chunks.len(), 1);
690        assert_eq!(result.modified_chunks[0].chunk_id, 1); // 100 / 64 = 1
691        assert_eq!(result.modified_chunks[0].byte_changes.len(), 1);
692        assert_eq!(result.modified_chunks[0].byte_changes[0], (36, b'A', b'B'));
693        // 100 % 64 = 36
694    }
695
696    #[test]
697    fn test_multi_byte_replace_groups_by_chunk() {
698        let delta = Delta::new(DeltaType::MultiByteReplace {
699            changes: vec![
700                (10, b'A', b'X'),  // Chunk 0
701                (20, b'B', b'Y'),  // Chunk 0
702                (100, b'C', b'Z'), // Chunk 1
703            ],
704        });
705
706        let result = analyze_delta(&delta, 1000, 64, None);
707
708        assert_eq!(result.modified_chunks.len(), 2);
709    }
710
711    #[test]
712    fn test_append_creates_new_chunks() {
713        let delta = Delta::new(DeltaType::Append {
714            data: vec![0u8; 200], // 200 bytes
715        });
716
717        // File is 64 bytes (1 chunk), appending 200 bytes needs ~3 more chunks
718        let result = analyze_delta(&delta, 64, 64, None);
719
720        assert!(result.new_chunks.len() >= 2);
721    }
722
723    #[test]
724    fn test_truncate_removes_chunks() {
725        let delta = Delta::new(DeltaType::Truncate {
726            new_length: 100,
727            truncated_data: vec![0u8; 156], // Removing 156 bytes
728        });
729
730        // File is 256 bytes (4 chunks), truncating to 100 bytes (2 chunks)
731        let result = analyze_delta(&delta, 256, 64, None);
732
733        assert_eq!(result.removed_chunks.len(), 2); // Chunks 2 and 3
734    }
735
736    #[test]
737    fn test_delta_is_non_shifting() {
738        assert!(DeltaType::ByteReplace {
739            offset: 0,
740            old_value: 0,
741            new_value: 1
742        }
743        .is_non_shifting());
744        assert!(DeltaType::RangeReplace {
745            offset: 0,
746            old_data: vec![0],
747            new_data: vec![1]
748        }
749        .is_non_shifting());
750        assert!(!DeltaType::Insert {
751            offset: 0,
752            data: vec![1]
753        }
754        .is_non_shifting());
755        assert!(!DeltaType::Append { data: vec![1] }.is_non_shifting());
756    }
757
758    #[test]
759    fn test_vsa_reencode_chunk() {
760        use embeddenator_vsa::ReversibleVSAEncoder;
761
762        let mut encoder = ReversibleVSAEncoder::new();
763
764        // Original data would be b"Hello" - we're testing re-encode with modifications
765
766        // Modified data (change 'e' to 'a')
767        let modified_data = b"Hallo";
768
769        // Re-encode with modifications
770        let chunk = vsa_delta::reencode_chunk(&mut encoder, modified_data, 0);
771
772        // Decode and verify
773        let decoded = encoder.decode(&chunk, 5);
774
775        // Should decode to "Hallo"
776        assert_eq!(decoded[1], b'a', "Modified byte should be 'a'");
777        assert_eq!(decoded[0], b'H', "First byte should be 'H'");
778    }
779
780    #[test]
781    fn test_vsa_update_root_chunk() {
782        use embeddenator_vsa::ReversibleVSAEncoder;
783
784        let mut encoder = ReversibleVSAEncoder::new();
785
786        // Create two chunks bundled into a root
787        let chunk1_data = b"AAAA";
788        let chunk2_data = b"BBBB";
789
790        let chunk1 = encoder.encode(chunk1_data);
791        let chunk2 = vsa_delta::reencode_chunk(&mut encoder, chunk2_data, 4);
792
793        // Bundle them into root
794        let root = chunk1.bundle(&chunk2);
795
796        // Now modify chunk1 to "CCCC"
797        let new_chunk1_data = b"CCCC";
798        let new_chunk1 = encoder.encode(new_chunk1_data);
799
800        // Update root
801        let new_root = vsa_delta::update_root_chunk(&root, &chunk1, &new_chunk1);
802
803        // Due to VSA noise from the unbundle/bundle, we check similarity
804        // The important thing is the structure is maintained
805        let chunk1_sim = new_root.cosine(&new_chunk1);
806        assert!(
807            chunk1_sim > 0.0,
808            "New chunk should have positive correlation with root"
809        );
810    }
811
812    #[test]
813    fn test_vsa_encode_new_chunk() {
814        use embeddenator_vsa::ReversibleVSAEncoder;
815
816        let mut encoder = ReversibleVSAEncoder::new();
817
818        // Encode a new chunk starting at position 64 (simulating second chunk)
819        let data = b"New";
820        let chunk = vsa_delta::encode_new_chunk(&mut encoder, data, 64);
821
822        // Verify it's not empty by checking cosine with itself is 1.0
823        let self_sim = chunk.cosine(&chunk);
824        assert!(self_sim > 0.99, "New chunk should have valid structure");
825
826        // Decode at the correct positions
827        let pos_vec_64 = encoder.get_position_vector_ref(64);
828        let query = chunk.bind(pos_vec_64);
829
830        // Find best match
831        let byte_vectors = encoder.get_byte_vectors();
832        let mut best_byte = 0u8;
833        let mut best_sim = f64::NEG_INFINITY;
834        for (i, bv) in byte_vectors.iter().enumerate() {
835            let sim = query.cosine(bv);
836            if sim > best_sim {
837                best_sim = sim;
838                best_byte = i as u8;
839            }
840        }
841
842        assert_eq!(best_byte, b'N', "First byte at position 64 should be 'N'");
843    }
844
845    #[test]
846    fn test_vsa_add_chunk_to_root() {
847        use embeddenator_vsa::ReversibleVSAEncoder;
848
849        let mut encoder = ReversibleVSAEncoder::new();
850
851        // Create initial root
852        let chunk1 = encoder.encode(b"Hello");
853
854        // Add a new chunk
855        let chunk2 = vsa_delta::encode_new_chunk(&mut encoder, b"World", 5);
856        let root = vsa_delta::add_chunk_to_root(&chunk1, &chunk2);
857
858        // Root should have similarity with both chunks
859        let sim1 = root.cosine(&chunk1);
860        let sim2 = root.cosine(&chunk2);
861
862        assert!(sim1 > 0.0, "Root should correlate with first chunk");
863        assert!(sim2 > 0.0, "Root should correlate with second chunk");
864    }
865}