infiniloom_engine/embedding/
manifest.rs

1//! Manifest storage and diffing for incremental updates
2//!
3//! The manifest tracks all chunks generated for a repository, enabling:
4//! - Incremental updates (only re-embed changed chunks)
5//! - Change detection (added, modified, removed)
6//! - Integrity verification (detect tampering)
7//!
8//! # Storage Format
9//!
10//! Manifests are stored in bincode format (5-10x faster than JSON) with:
11//! - BLAKE3 integrity checksum
12//! - Version compatibility checking
13//! - Settings validation
14
15use std::collections::BTreeMap;
16use std::path::Path;
17
18use bincode::Options;
19use serde::{Deserialize, Serialize};
20
21use super::error::EmbedError;
22use super::hasher::IncrementalHasher;
23use super::types::{ChunkKind, EmbedChunk, EmbedSettings};
24use crate::bincode_safe::deserialize_with_limit;
25
26/// Current manifest format version
27pub const MANIFEST_VERSION: u32 = 2;
28
29/// Manifest tracking all chunks for incremental updates
30///
31/// # Determinism Note
32///
33/// The manifest binary file is **not byte-deterministic** across saves due to the
34/// `updated_at` timestamp. However, the **checksum is deterministic** because it
35/// excludes the timestamp from its calculation.
36///
37/// For comparing manifests:
38/// - **Wrong**: Compare raw binary files (will differ due to timestamp)
39/// - **Right**: Compare checksums via `manifest.checksum` (deterministic)
40///
41/// This design allows incremental updates while still detecting actual content changes.
42///
43/// # CI/CD Integration
44///
45/// If you need byte-deterministic manifests (e.g., for Docker layer caching):
46/// - Compare checksums instead of file hashes
47/// - Or set `updated_at = None` before saving in test environments
48#[derive(Debug, Clone, Serialize, Deserialize)]
49pub struct EmbedManifest {
50    /// Manifest format version
51    pub version: u32,
52
53    /// Relative repository path (from git root or CWD)
54    pub repo_path: String,
55
56    /// Git commit hash when manifest was created (for reference only)
57    /// Note: We always serialize Option fields for bincode compatibility
58    #[serde(default)]
59    pub commit_hash: Option<String>,
60
61    /// Timestamp of last update (Unix seconds)
62    ///
63    /// **Important**: This field is excluded from the integrity checksum calculation
64    /// to allow the checksum to remain stable across re-saves of unchanged content.
65    /// The binary file will differ byte-for-byte on each save, but the checksum will
66    /// only change if actual chunk content changes.
67    #[serde(default)]
68    pub updated_at: Option<u64>,
69
70    /// Settings used to generate chunks (part of integrity)
71    pub settings: EmbedSettings,
72
73    /// All chunks indexed by location key
74    /// Using BTreeMap for deterministic iteration order (critical for cross-platform consistency)
75    pub chunks: BTreeMap<String, ManifestEntry>,
76
77    /// Integrity checksum (BLAKE3 of settings + sorted chunk entries)
78    /// Excluded from serialization, computed on save, verified on load
79    #[serde(default)]
80    pub checksum: Option<String>,
81}
82
83/// Entry in the manifest for a single chunk
84#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
85pub struct ManifestEntry {
86    /// Content-addressable chunk ID (128-bit)
87    pub chunk_id: String,
88
89    /// Full content hash for collision detection (256-bit)
90    pub full_hash: String,
91
92    /// Token count
93    pub tokens: u32,
94
95    /// Line range (1-indexed, inclusive)
96    pub lines: (u32, u32),
97}
98
99impl EmbedManifest {
100    /// Create a new empty manifest
101    pub fn new(repo_path: String, settings: EmbedSettings) -> Self {
102        Self {
103            version: MANIFEST_VERSION,
104            repo_path,
105            commit_hash: None,
106            updated_at: None,
107            settings,
108            chunks: BTreeMap::new(),
109            checksum: None,
110        }
111    }
112
113    /// Generate deterministic location key for a chunk
114    ///
115    /// Format: `file::symbol::kind`
116    /// Uses `::` as separator (unlikely in paths/symbols)
117    pub fn location_key(file: &str, symbol: &str, kind: ChunkKind) -> String {
118        format!("{}::{}::{}", file, symbol, kind.name())
119    }
120
121    /// Compute integrity checksum over settings and chunk entries
122    fn compute_checksum(&self) -> String {
123        let mut hasher = IncrementalHasher::new();
124
125        // Hash manifest version
126        hasher.update_u32(self.version);
127
128        // Hash settings (affects chunk generation)
129        let settings_json = serde_json::to_string(&self.settings).unwrap_or_default();
130        hasher.update_str(&settings_json);
131
132        // Hash chunks in deterministic order (sorted by key)
133        let mut keys: Vec<_> = self.chunks.keys().collect();
134        keys.sort();
135
136        for key in keys {
137            if let Some(entry) = self.chunks.get(key) {
138                hasher.update_str(key);
139                hasher.update_str(&entry.chunk_id);
140                hasher.update_str(&entry.full_hash);
141                hasher.update_u32(entry.tokens);
142                hasher.update_u32(entry.lines.0);
143                hasher.update_u32(entry.lines.1);
144            }
145        }
146
147        hasher.finalize_hex()
148    }
149
150    /// Save manifest to file with integrity checksum
151    ///
152    /// # Behavior
153    ///
154    /// This method:
155    /// 1. Updates `updated_at` to the current timestamp
156    /// 2. Computes a new checksum (excluding timestamp)
157    /// 3. Serializes to bincode format
158    ///
159    /// # Determinism
160    ///
161    /// The resulting binary file is **not byte-deterministic** because the timestamp
162    /// changes on every save. However, the checksum **is deterministic** - it only
163    /// changes when actual chunk content or settings change.
164    ///
165    /// For deterministic testing, set `self.updated_at = None` before saving.
166    ///
167    /// # Note
168    ///
169    /// This method mutates `self` to set checksum and timestamp.
170    /// This avoids cloning the entire manifest (which can be large).
171    pub fn save(&mut self, path: &Path) -> Result<(), EmbedError> {
172        // Create parent directories
173        if let Some(parent) = path.parent() {
174            std::fs::create_dir_all(parent)
175                .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
176        }
177
178        // Update timestamp
179        self.updated_at = Some(
180            std::time::SystemTime::now()
181                .duration_since(std::time::UNIX_EPOCH)
182                .map(|d| d.as_secs())
183                .unwrap_or(0)
184        );
185
186        // Compute checksum (excludes timestamp for deterministic checksums across saves)
187        self.checksum = Some(self.compute_checksum());
188
189        // Use bincode for faster I/O (5-10x faster than JSON for large manifests)
190        // Note: Must use bincode::options() to match deserialize_with_limit() in load()
191        let bytes = bincode::options()
192            .serialize(self)
193            .map_err(|e| EmbedError::SerializationError { reason: e.to_string() })?;
194
195        std::fs::write(path, bytes)
196            .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
197
198        Ok(())
199    }
200
201    /// Load manifest from file with integrity verification
202    pub fn load(path: &Path) -> Result<Self, EmbedError> {
203        let bytes = std::fs::read(path)
204            .map_err(|e| EmbedError::IoError { path: path.to_path_buf(), source: e })?;
205
206        let mut manifest: Self = deserialize_with_limit(&bytes)
207            .map_err(|e| EmbedError::DeserializationError { reason: e.to_string() })?;
208
209        // Version check
210        if manifest.version > MANIFEST_VERSION {
211            return Err(EmbedError::ManifestVersionTooNew {
212                found: manifest.version,
213                max_supported: MANIFEST_VERSION,
214            });
215        }
216
217        // Integrity verification using constant-time comparison to prevent timing attacks
218        if let Some(stored_checksum) = manifest.checksum.take() {
219            let computed = manifest.compute_checksum();
220            if !constant_time_eq(stored_checksum.as_bytes(), computed.as_bytes()) {
221                return Err(EmbedError::ManifestCorrupted {
222                    path: path.to_path_buf(),
223                    expected: stored_checksum,
224                    actual: computed,
225                });
226            }
227        }
228
229        // Validate settings
230        manifest.settings.validate()?;
231
232        Ok(manifest)
233    }
234
235    /// Load manifest if it exists, otherwise return None
236    pub fn load_if_exists(path: &Path) -> Result<Option<Self>, EmbedError> {
237        if path.exists() {
238            Ok(Some(Self::load(path)?))
239        } else {
240            Ok(None)
241        }
242    }
243
244    /// Update manifest with current chunks, detecting collisions
245    pub fn update(&mut self, chunks: &[EmbedChunk]) -> Result<(), EmbedError> {
246        // Collision detection: track id -> full_hash mappings
247        // Using BTreeMap for deterministic iteration (critical for cross-platform consistency)
248        let mut id_to_hash: BTreeMap<&str, &str> = BTreeMap::new();
249
250        self.chunks.clear();
251
252        for chunk in chunks {
253            // Check for hash collision
254            if let Some(&existing_hash) = id_to_hash.get(chunk.id.as_str()) {
255                if existing_hash != chunk.full_hash.as_str() {
256                    return Err(EmbedError::HashCollision {
257                        id: chunk.id.clone(),
258                        hash1: existing_hash.to_string(),
259                        hash2: chunk.full_hash.clone(),
260                    });
261                }
262            }
263            id_to_hash.insert(&chunk.id, &chunk.full_hash);
264
265            let key = Self::location_key(
266                &chunk.source.file,
267                &chunk.source.symbol,
268                chunk.kind,
269            );
270
271            self.chunks.insert(key, ManifestEntry {
272                chunk_id: chunk.id.clone(),
273                full_hash: chunk.full_hash.clone(),
274                tokens: chunk.tokens,
275                lines: chunk.source.lines,
276            });
277        }
278
279        Ok(())
280    }
281
282    /// Compute diff between current chunks and manifest
283    pub fn diff(&self, current_chunks: &[EmbedChunk]) -> EmbedDiff {
284        let mut added = Vec::new();
285        let mut modified = Vec::new();
286        let mut removed = Vec::new();
287        let mut unchanged = Vec::new();
288
289        // Build map of current chunks by location key
290        // Using BTreeMap for deterministic iteration in "added" detection
291        let current_map: BTreeMap<String, &EmbedChunk> = current_chunks
292            .iter()
293            .map(|c| (Self::location_key(&c.source.file, &c.source.symbol, c.kind), c))
294            .collect();
295
296        // Find modified and unchanged (iterate manifest)
297        for (key, entry) in &self.chunks {
298            if let Some(current) = current_map.get(key) {
299                if current.id == entry.chunk_id {
300                    unchanged.push(current.id.clone());
301                } else {
302                    modified.push(ModifiedChunk {
303                        old_id: entry.chunk_id.clone(),
304                        new_id: current.id.clone(),
305                        chunk: (*current).clone(),
306                    });
307                }
308            } else {
309                // In manifest but not in current = removed
310                removed.push(RemovedChunk {
311                    id: entry.chunk_id.clone(),
312                    location_key: key.clone(),
313                });
314            }
315        }
316
317        // Find added (in current but not in manifest)
318        for (key, chunk) in &current_map {
319            if !self.chunks.contains_key(key) {
320                added.push((*chunk).clone());
321            }
322        }
323
324        let summary = DiffSummary {
325            added: added.len(),
326            modified: modified.len(),
327            removed: removed.len(),
328            unchanged: unchanged.len(),
329            total_chunks: current_chunks.len(),
330        };
331
332        EmbedDiff { summary, added, modified, removed, unchanged }
333    }
334
335    /// Check if settings match the manifest settings
336    pub fn settings_match(&self, settings: &EmbedSettings) -> bool {
337        &self.settings == settings
338    }
339
340    /// Get the number of chunks in the manifest
341    pub fn chunk_count(&self) -> usize {
342        self.chunks.len()
343    }
344}
345
346/// Result of diffing current state against manifest
347#[derive(Debug, Clone, Serialize, Deserialize)]
348pub struct EmbedDiff {
349    /// Summary statistics
350    pub summary: DiffSummary,
351
352    /// New chunks (not in manifest)
353    pub added: Vec<EmbedChunk>,
354
355    /// Changed chunks (different content)
356    pub modified: Vec<ModifiedChunk>,
357
358    /// Deleted chunks (in manifest but not current)
359    pub removed: Vec<RemovedChunk>,
360
361    /// Unchanged chunk IDs (same content)
362    pub unchanged: Vec<String>,
363}
364
365impl EmbedDiff {
366    /// Check if there are any changes
367    pub fn has_changes(&self) -> bool {
368        self.summary.added > 0 || self.summary.modified > 0 || self.summary.removed > 0
369    }
370
371    /// Get all chunks that need to be upserted (added + modified)
372    pub fn chunks_to_upsert(&self) -> Vec<&EmbedChunk> {
373        let mut chunks: Vec<&EmbedChunk> = self.added.iter().collect();
374        chunks.extend(self.modified.iter().map(|m| &m.chunk));
375        chunks
376    }
377
378    /// Get all IDs that need to be deleted
379    pub fn ids_to_delete(&self) -> Vec<&str> {
380        let mut ids: Vec<&str> = self.removed.iter().map(|r| r.id.as_str()).collect();
381        // Also delete old IDs for modified chunks
382        ids.extend(self.modified.iter().map(|m| m.old_id.as_str()));
383        ids
384    }
385
386    /// Split diff into batches for vector DB operations
387    pub fn batches(&self, batch_size: usize) -> Vec<DiffBatch> {
388        let mut batches = Vec::new();
389        let mut batch_num = 0;
390
391        // Batch added chunks
392        for chunk in self.added.chunks(batch_size) {
393            batches.push(DiffBatch {
394                batch_number: batch_num,
395                operation: BatchOperation::Upsert,
396                chunks: chunk.to_vec(),
397                ids: Vec::new(),
398            });
399            batch_num += 1;
400        }
401
402        // Batch modified chunks
403        for chunk in self.modified.chunks(batch_size) {
404            batches.push(DiffBatch {
405                batch_number: batch_num,
406                operation: BatchOperation::Upsert,
407                chunks: chunk.iter().map(|m| m.chunk.clone()).collect(),
408                ids: chunk.iter().map(|m| m.old_id.clone()).collect(), // Old IDs to delete
409            });
410            batch_num += 1;
411        }
412
413        // Batch removed IDs
414        for ids in self.removed.chunks(batch_size) {
415            batches.push(DiffBatch {
416                batch_number: batch_num,
417                operation: BatchOperation::Delete,
418                chunks: Vec::new(),
419                ids: ids.iter().map(|r| r.id.clone()).collect(),
420            });
421            batch_num += 1;
422        }
423
424        batches
425    }
426}
427
428/// Summary of changes between manifest and current state
429#[derive(Debug, Clone, Serialize, Deserialize)]
430pub struct DiffSummary {
431    /// Number of new chunks
432    pub added: usize,
433
434    /// Number of modified chunks
435    pub modified: usize,
436
437    /// Number of removed chunks
438    pub removed: usize,
439
440    /// Number of unchanged chunks
441    pub unchanged: usize,
442
443    /// Total chunks in current state
444    pub total_chunks: usize,
445}
446
447/// A chunk that was modified (content changed)
448#[derive(Debug, Clone, Serialize, Deserialize)]
449pub struct ModifiedChunk {
450    /// Previous chunk ID
451    pub old_id: String,
452
453    /// New chunk ID
454    pub new_id: String,
455
456    /// The updated chunk
457    pub chunk: EmbedChunk,
458}
459
460/// A chunk that was removed
461#[derive(Debug, Clone, Serialize, Deserialize)]
462pub struct RemovedChunk {
463    /// Chunk ID that was removed
464    pub id: String,
465
466    /// Location key for reference
467    pub location_key: String,
468}
469
470/// Batch of operations for vector DB
471#[derive(Debug, Clone, Serialize, Deserialize)]
472pub struct DiffBatch {
473    /// Batch number (0-indexed)
474    pub batch_number: usize,
475
476    /// Operation type
477    pub operation: BatchOperation,
478
479    /// Chunks to upsert (for Upsert operation)
480    pub chunks: Vec<EmbedChunk>,
481
482    /// IDs to delete (for Delete operation, or old IDs for Upsert)
483    pub ids: Vec<String>,
484}
485
486/// Type of batch operation
487#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
488#[serde(rename_all = "snake_case")]
489pub enum BatchOperation {
490    /// Insert or update chunks
491    Upsert,
492    /// Delete chunks by ID
493    Delete,
494}
495
496/// Constant-time byte comparison to prevent timing attacks
497///
498/// Returns true if both slices are equal, using constant-time comparison
499/// that doesn't short-circuit on first difference.
500#[inline]
501fn constant_time_eq(a: &[u8], b: &[u8]) -> bool {
502    if a.len() != b.len() {
503        return false;
504    }
505
506    // XOR all bytes and accumulate - runs in constant time regardless of content
507    let mut result = 0u8;
508    for (x, y) in a.iter().zip(b.iter()) {
509        result |= x ^ y;
510    }
511    result == 0
512}
513
514#[cfg(test)]
515mod tests {
516    use super::*;
517    use tempfile::TempDir;
518    use crate::embedding::types::{ChunkSource, ChunkContext, Visibility, RepoIdentifier};
519
520    fn create_test_chunk(id: &str, file: &str, symbol: &str) -> EmbedChunk {
521        EmbedChunk {
522            id: id.to_string(),
523            full_hash: format!("{}_full", id),
524            content: "fn test() {}".to_string(),
525            tokens: 10,
526            kind: ChunkKind::Function,
527            source: ChunkSource {
528                repo: RepoIdentifier::default(),
529                file: file.to_string(),
530                lines: (1, 5),
531                symbol: symbol.to_string(),
532                fqn: None,
533                language: "rust".to_string(),
534                parent: None,
535                visibility: Visibility::Public,
536                is_test: false,
537            },
538            context: ChunkContext::default(),
539            part: None,
540        }
541    }
542
543    #[test]
544    fn test_new_manifest() {
545        let manifest = EmbedManifest::new(
546            "my-repo".to_string(),
547            EmbedSettings::default()
548        );
549
550        assert_eq!(manifest.version, MANIFEST_VERSION);
551        assert_eq!(manifest.repo_path, "my-repo");
552        assert!(manifest.chunks.is_empty());
553    }
554
555    #[test]
556    fn test_location_key() {
557        let key = EmbedManifest::location_key("src/auth.rs", "validate", ChunkKind::Function);
558        assert_eq!(key, "src/auth.rs::validate::function");
559    }
560
561    #[test]
562    fn test_save_and_load() {
563        let temp_dir = TempDir::new().unwrap();
564        let manifest_path = temp_dir.path().join("test.bin");
565
566        // Create and save manifest
567        let mut manifest = EmbedManifest::new(
568            "my-repo".to_string(),
569            EmbedSettings::default()
570        );
571
572        let chunks = vec![
573            create_test_chunk("ec_123", "src/foo.rs", "foo"),
574            create_test_chunk("ec_456", "src/bar.rs", "bar"),
575        ];
576        manifest.update(&chunks).unwrap();
577        manifest.save(&manifest_path).unwrap();
578
579        // Load and verify
580        let loaded = EmbedManifest::load(&manifest_path).unwrap();
581        assert_eq!(loaded.repo_path, "my-repo");
582        assert_eq!(loaded.chunks.len(), 2);
583    }
584
585    #[test]
586    fn test_integrity_verification() {
587        let temp_dir = TempDir::new().unwrap();
588        let manifest_path = temp_dir.path().join("test.bin");
589
590        // Create and save manifest
591        let mut manifest = EmbedManifest::new(
592            "my-repo".to_string(),
593            EmbedSettings::default()
594        );
595        manifest.save(&manifest_path).unwrap();
596
597        // Tamper with file
598        let mut bytes = std::fs::read(&manifest_path).unwrap();
599        if bytes.len() >= 10 {
600            let idx = bytes.len() - 10;
601            bytes[idx] ^= 0xFF;
602            std::fs::write(&manifest_path, bytes).unwrap();
603        }
604
605        // Should detect tampering
606        let result = EmbedManifest::load(&manifest_path);
607        assert!(matches!(result, Err(EmbedError::ManifestCorrupted { .. }) | Err(EmbedError::DeserializationError { .. })));
608    }
609
610    #[test]
611    fn test_diff_added() {
612        let manifest = EmbedManifest::new(
613            "my-repo".to_string(),
614            EmbedSettings::default()
615        );
616
617        let chunks = vec![
618            create_test_chunk("ec_123", "src/foo.rs", "foo"),
619        ];
620
621        let diff = manifest.diff(&chunks);
622        assert_eq!(diff.summary.added, 1);
623        assert_eq!(diff.summary.modified, 0);
624        assert_eq!(diff.summary.removed, 0);
625    }
626
627    #[test]
628    fn test_diff_modified() {
629        let mut manifest = EmbedManifest::new(
630            "my-repo".to_string(),
631            EmbedSettings::default()
632        );
633
634        let old_chunks = vec![
635            create_test_chunk("ec_old", "src/foo.rs", "foo"),
636        ];
637        manifest.update(&old_chunks).unwrap();
638
639        // Same location, different ID = modified
640        let new_chunks = vec![
641            create_test_chunk("ec_new", "src/foo.rs", "foo"),
642        ];
643
644        let diff = manifest.diff(&new_chunks);
645        assert_eq!(diff.summary.added, 0);
646        assert_eq!(diff.summary.modified, 1);
647        assert_eq!(diff.summary.removed, 0);
648        assert_eq!(diff.modified[0].old_id, "ec_old");
649        assert_eq!(diff.modified[0].new_id, "ec_new");
650    }
651
652    #[test]
653    fn test_diff_removed() {
654        let mut manifest = EmbedManifest::new(
655            "my-repo".to_string(),
656            EmbedSettings::default()
657        );
658
659        let old_chunks = vec![
660            create_test_chunk("ec_123", "src/foo.rs", "foo"),
661        ];
662        manifest.update(&old_chunks).unwrap();
663
664        // Empty current = all removed
665        let diff = manifest.diff(&[]);
666        assert_eq!(diff.summary.added, 0);
667        assert_eq!(diff.summary.modified, 0);
668        assert_eq!(diff.summary.removed, 1);
669    }
670
671    #[test]
672    fn test_diff_unchanged() {
673        let mut manifest = EmbedManifest::new(
674            "my-repo".to_string(),
675            EmbedSettings::default()
676        );
677
678        let chunks = vec![
679            create_test_chunk("ec_123", "src/foo.rs", "foo"),
680        ];
681        manifest.update(&chunks).unwrap();
682
683        // Same chunks = unchanged
684        let diff = manifest.diff(&chunks);
685        assert_eq!(diff.summary.unchanged, 1);
686        assert!(!diff.has_changes());
687    }
688
689    #[test]
690    fn test_batches() {
691        let manifest = EmbedManifest::new(
692            "my-repo".to_string(),
693            EmbedSettings::default()
694        );
695
696        let chunks: Vec<_> = (0..5)
697            .map(|i| create_test_chunk(&format!("ec_{i}"), &format!("src/f{i}.rs"), &format!("f{i}")))
698            .collect();
699
700        let diff = manifest.diff(&chunks);
701        let batches = diff.batches(2);
702
703        // 5 chunks / batch size 2 = 3 batches
704        assert_eq!(batches.len(), 3);
705        assert_eq!(batches[0].chunks.len(), 2);
706        assert_eq!(batches[1].chunks.len(), 2);
707        assert_eq!(batches[2].chunks.len(), 1);
708    }
709
710    #[test]
711    fn test_load_if_exists() {
712        let temp_dir = TempDir::new().unwrap();
713        let manifest_path = temp_dir.path().join("nonexistent.bin");
714
715        // Non-existent returns None
716        let result = EmbedManifest::load_if_exists(&manifest_path).unwrap();
717        assert!(result.is_none());
718
719        // Existing returns Some
720        let mut manifest = EmbedManifest::new("test".to_string(), EmbedSettings::default());
721        manifest.save(&manifest_path).unwrap();
722
723        let result = EmbedManifest::load_if_exists(&manifest_path).unwrap();
724        assert!(result.is_some());
725    }
726
727    #[test]
728    fn test_collision_detection() {
729        let mut manifest = EmbedManifest::new(
730            "my-repo".to_string(),
731            EmbedSettings::default()
732        );
733
734        // Create two chunks with same ID but different hashes
735        let mut chunk1 = create_test_chunk("ec_same", "src/foo.rs", "foo");
736        let mut chunk2 = create_test_chunk("ec_same", "src/bar.rs", "bar");
737        chunk1.full_hash = "hash1".to_string();
738        chunk2.full_hash = "hash2".to_string();
739
740        let result = manifest.update(&[chunk1, chunk2]);
741        assert!(matches!(result, Err(EmbedError::HashCollision { .. })));
742    }
743
744    #[test]
745    fn test_settings_match() {
746        let manifest = EmbedManifest::new(
747            "my-repo".to_string(),
748            EmbedSettings::default()
749        );
750
751        assert!(manifest.settings_match(&EmbedSettings::default()));
752
753        let mut different = EmbedSettings::default();
754        different.max_tokens = 2000;
755        assert!(!manifest.settings_match(&different));
756    }
757}