embeddenator 0.20.0-alpha.1

Sparse ternary VSA holographic computing substrate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
//! Tests for incremental update functionality (TASK-007)
//!
//! This test suite verifies that the incremental update API works correctly:
//! - Adding files to existing engrams
//! - Removing files (marking as deleted)
//! - Modifying existing files
//! - Compacting engrams to remove deleted files
//! - Hierarchical engram updates
//!
//! Key properties tested:
//! - Bit-perfect reconstruction after updates
//! - Correct manifest tracking of deleted files
//! - Associativity of bundle operations
//! - Space reclamation via compaction

use embeddenator::{EmbrFS, ReversibleVSAConfig};
use std::io::Write;
use tempfile::TempDir;

/// Helper: create a temporary file with given content
fn create_temp_file(dir: &TempDir, name: &str, content: &[u8]) -> std::path::PathBuf {
    let path = dir.path().join(name);
    if let Some(parent) = path.parent() {
        std::fs::create_dir_all(parent).unwrap();
    }
    let mut file = std::fs::File::create(&path).unwrap();
    file.write_all(content).unwrap();
    path
}

#[test]
fn test_add_single_file_to_empty_engram() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"hello world");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Add file to empty engram
    fs.add_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    
    // Verify manifest
    assert_eq!(fs.manifest.files.len(), 1);
    assert_eq!(fs.manifest.files[0].path, "file1.txt");
    assert!(!fs.manifest.files[0].deleted);
    assert_eq!(fs.manifest.files[0].size, 11);
    
    // Verify codebook populated
    assert!(fs.engram.codebook.len() > 0);
    
    // Verify extraction
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("file1.txt")).unwrap();
    assert_eq!(extracted, b"hello world");
}

#[test]
fn test_add_file_to_existing_engram() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"first file");
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"second file");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Ingest first file
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    let chunks_after_first = fs.manifest.total_chunks;
    
    // Add second file incrementally
    fs.add_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Verify manifest
    assert_eq!(fs.manifest.files.len(), 2);
    assert!(fs.manifest.total_chunks > chunks_after_first);
    
    // Verify both files extract correctly
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted1 = std::fs::read(extract_dir.path().join("file1.txt")).unwrap();
    let extracted2 = std::fs::read(extract_dir.path().join("file2.txt")).unwrap();
    assert_eq!(extracted1, b"first file");
    assert_eq!(extracted2, b"second file");
}

#[test]
fn test_add_file_duplicate_error() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"content");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Add file once
    fs.add_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    
    // Try to add same file again - should error
    let result = fs.add_file(&file1, "file1.txt".to_string(), false, &config);
    assert!(result.is_err());
    assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::AlreadyExists);
}

#[test]
fn test_remove_file_marks_as_deleted() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"to be removed");
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"to be kept");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Ingest two files
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs.ingest_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Remove first file
    fs.remove_file("file1.txt", false).unwrap();
    
    // Verify manifest still has both entries but first is deleted
    assert_eq!(fs.manifest.files.len(), 2);
    assert!(fs.manifest.files[0].deleted);
    assert!(!fs.manifest.files[1].deleted);
    
    // Verify extraction skips deleted file
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    assert!(!extract_dir.path().join("file1.txt").exists());
    assert!(extract_dir.path().join("file2.txt").exists());
    let extracted2 = std::fs::read(extract_dir.path().join("file2.txt")).unwrap();
    assert_eq!(extracted2, b"to be kept");
}

#[test]
fn test_remove_nonexistent_file_error() {
    let mut fs = EmbrFS::new();
    
    let result = fs.remove_file("nonexistent.txt", false);
    assert!(result.is_err());
    assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::NotFound);
}

#[test]
fn test_remove_already_deleted_file_error() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"content");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs.remove_file("file1.txt", false).unwrap();
    
    // Try to remove again - should error
    let result = fs.remove_file("file1.txt", false);
    assert!(result.is_err());
    assert_eq!(result.unwrap_err().kind(), std::io::ErrorKind::NotFound);
}

#[test]
fn test_modify_file_updates_content() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"original content");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Ingest original file
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    
    // Modify file
    let file1_modified = create_temp_file(&temp_dir, "file1_modified.txt", b"updated content");
    fs.modify_file(&file1_modified, "file1.txt".to_string(), false, &config).unwrap();
    
    // Verify manifest has two entries (old deleted, new active)
    assert_eq!(fs.manifest.files.len(), 2);
    assert!(fs.manifest.files[0].deleted);
    assert!(!fs.manifest.files[1].deleted);
    assert_eq!(fs.manifest.files[1].path, "file1.txt");
    
    // Verify extraction gets new content
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("file1.txt")).unwrap();
    assert_eq!(extracted, b"updated content");
}

#[test]
fn test_modify_nonexistent_file_error() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"content");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    let result = fs.modify_file(&file1, "nonexistent.txt".to_string(), false, &config);
    assert!(result.is_err());
}

#[test]
fn test_compact_removes_deleted_files() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"file to delete");
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"file to keep");
    let file3 = create_temp_file(&temp_dir, "file3.txt", b"another deleted");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Ingest three files
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs.ingest_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    fs.ingest_file(&file3, "file3.txt".to_string(), false, &config).unwrap();
    
    let chunks_before = fs.manifest.total_chunks;
    
    // Delete two files
    fs.remove_file("file1.txt", false).unwrap();
    fs.remove_file("file3.txt", false).unwrap();
    
    // Before compaction: still 3 entries in manifest
    assert_eq!(fs.manifest.files.len(), 3);
    
    // Compact
    fs.compact(false, &config).unwrap();
    
    // After compaction: only 1 file in manifest
    assert_eq!(fs.manifest.files.len(), 1);
    assert_eq!(fs.manifest.files[0].path, "file2.txt");
    assert!(!fs.manifest.files[0].deleted);
    
    // Chunks should be reduced
    assert!(fs.manifest.total_chunks < chunks_before);
    
    // Verify extraction
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    assert!(!extract_dir.path().join("file1.txt").exists());
    assert!(extract_dir.path().join("file2.txt").exists());
    assert!(!extract_dir.path().join("file3.txt").exists());
    
    let extracted2 = std::fs::read(extract_dir.path().join("file2.txt")).unwrap();
    assert_eq!(extracted2, b"file to keep");
}

#[test]
fn test_compact_empty_engram() {
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Compact empty engram - should not error
    fs.compact(false, &config).unwrap();
    
    assert_eq!(fs.manifest.files.len(), 0);
    assert_eq!(fs.manifest.total_chunks, 0);
}

#[test]
fn test_compact_no_deleted_files() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"keep me");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    let chunks_before = fs.manifest.total_chunks;
    
    // Compact without any deletions
    fs.compact(false, &config).unwrap();
    
    // Should still have same file
    assert_eq!(fs.manifest.files.len(), 1);
    assert_eq!(fs.manifest.files[0].path, "file1.txt");
    
    // Chunk count should be similar (may differ slightly due to re-encoding)
    assert!(fs.manifest.total_chunks >= chunks_before - 1);
}

#[test]
fn test_multiple_add_remove_cycle() {
    let temp_dir = TempDir::new().unwrap();
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Add file 1
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"first");
    fs.add_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    
    // Add file 2
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"second");
    fs.add_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Remove file 1
    fs.remove_file("file1.txt", false).unwrap();
    
    // Add file 3
    let file3 = create_temp_file(&temp_dir, "file3.txt", b"third");
    fs.add_file(&file3, "file3.txt".to_string(), false, &config).unwrap();
    
    // Verify state before compaction
    assert_eq!(fs.manifest.files.len(), 3);
    
    // Extract and verify
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    assert!(!extract_dir.path().join("file1.txt").exists());
    assert!(extract_dir.path().join("file2.txt").exists());
    assert!(extract_dir.path().join("file3.txt").exists());
    
    // Compact
    fs.compact(false, &config).unwrap();
    assert_eq!(fs.manifest.files.len(), 2);
    
    // Extract again and verify
    let extract_dir2 = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir2.path(), false, &config).unwrap();
    
    let extracted2 = std::fs::read(extract_dir2.path().join("file2.txt")).unwrap();
    let extracted3 = std::fs::read(extract_dir2.path().join("file3.txt")).unwrap();
    assert_eq!(extracted2, b"second");
    assert_eq!(extracted3, b"third");
}

#[test]
fn test_add_large_file_incrementally() {
    let temp_dir = TempDir::new().unwrap();
    
    // Create large file (larger than chunk size)
    let large_content = vec![b'X'; 20_000]; // 20KB
    let file1 = create_temp_file(&temp_dir, "large.bin", &large_content);
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Add large file
    fs.add_file(&file1, "large.bin".to_string(), false, &config).unwrap();
    
    // Verify multiple chunks created
    assert!(fs.manifest.files[0].chunks.len() > 1);
    
    // Verify extraction
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("large.bin")).unwrap();
    assert_eq!(extracted, large_content);
}

#[test]
fn test_modify_with_different_size() {
    let temp_dir = TempDir::new().unwrap();
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Start with small file
    let file1 = create_temp_file(&temp_dir, "file.txt", b"small");
    fs.ingest_file(&file1, "file.txt".to_string(), false, &config).unwrap();
    
    // Modify to larger file
    let large_content = vec![b'L'; 10_000];
    let file2 = create_temp_file(&temp_dir, "file_large.txt", &large_content);
    fs.modify_file(&file2, "file.txt".to_string(), false, &config).unwrap();
    
    // Extract and verify
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("file.txt")).unwrap();
    assert_eq!(extracted, large_content);
}

#[test]
fn test_add_binary_file() {
    let temp_dir = TempDir::new().unwrap();
    
    // Create binary file with various byte values
    let binary_content = (0u8..=255).collect::<Vec<u8>>();
    let file1 = create_temp_file(&temp_dir, "binary.bin", &binary_content);
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    fs.add_file(&file1, "binary.bin".to_string(), false, &config).unwrap();
    
    // Verify bit-perfect reconstruction
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("binary.bin")).unwrap();
    assert_eq!(extracted, binary_content);
}

#[test]
fn test_compact_preserves_corrections() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"test content with corrections");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    fs.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    
    // Get correction stats before compaction
    let _stats_before = fs.correction_stats();
    
    // Compact (should preserve corrections)
    fs.compact(false, &config).unwrap();
    
    // Verify extraction still works perfectly
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    let extracted = std::fs::read(extract_dir.path().join("file1.txt")).unwrap();
    assert_eq!(extracted, b"test content with corrections");
}

#[test]
fn test_incremental_updates_maintain_determinism() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"deterministic content");
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"more content");
    
    let config = ReversibleVSAConfig::default();
    
    // Create engram via full ingestion
    let mut fs_full = EmbrFS::new();
    fs_full.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs_full.ingest_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Create engram via incremental updates
    let mut fs_inc = EmbrFS::new();
    fs_inc.ingest_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs_inc.add_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Both should extract identically
    let extract_dir_full = TempDir::new().unwrap();
    let extract_dir_inc = TempDir::new().unwrap();
    
    EmbrFS::extract(&fs_full.engram, &fs_full.manifest, extract_dir_full.path(), false, &config).unwrap();
    EmbrFS::extract(&fs_inc.engram, &fs_inc.manifest, extract_dir_inc.path(), false, &config).unwrap();
    
    let content_full_1 = std::fs::read(extract_dir_full.path().join("file1.txt")).unwrap();
    let content_full_2 = std::fs::read(extract_dir_full.path().join("file2.txt")).unwrap();
    let content_inc_1 = std::fs::read(extract_dir_inc.path().join("file1.txt")).unwrap();
    let content_inc_2 = std::fs::read(extract_dir_inc.path().join("file2.txt")).unwrap();
    
    assert_eq!(content_full_1, content_inc_1);
    assert_eq!(content_full_2, content_inc_2);
}

#[test]
fn test_add_after_delete_and_compact() {
    let temp_dir = TempDir::new().unwrap();
    let file1 = create_temp_file(&temp_dir, "file1.txt", b"first");
    let file2 = create_temp_file(&temp_dir, "file2.txt", b"second");
    
    let mut fs = EmbrFS::new();
    let config = ReversibleVSAConfig::default();
    
    // Add, remove, compact, add again
    fs.add_file(&file1, "file1.txt".to_string(), false, &config).unwrap();
    fs.remove_file("file1.txt", false).unwrap();
    fs.compact(false, &config).unwrap();
    fs.add_file(&file2, "file2.txt".to_string(), false, &config).unwrap();
    
    // Should have only file2
    assert_eq!(fs.manifest.files.len(), 1);
    assert_eq!(fs.manifest.files[0].path, "file2.txt");
    
    // Extract and verify
    let extract_dir = TempDir::new().unwrap();
    EmbrFS::extract(&fs.engram, &fs.manifest, extract_dir.path(), false, &config).unwrap();
    
    assert!(!extract_dir.path().join("file1.txt").exists());
    let extracted2 = std::fs::read(extract_dir.path().join("file2.txt")).unwrap();
    assert_eq!(extracted2, b"second");
}