use embeddenator::{EmbrFS, HierarchicalManifest, Manifest, ReversibleVSAConfig};
use std::fs::{self, File};
use std::path::Path;
use std::sync::{Arc, Barrier};
use std::thread;
use tempfile::TempDir;
fn create_test_dataset(dir: &Path) -> std::io::Result<()> {
fs::write(dir.join("small.txt"), "Small test file")?;
fs::write(dir.join("medium.txt"), "Medium ".repeat(100))?;
let large_data: Vec<u8> = (0..10000).map(|i| (i % 256) as u8).collect();
fs::write(dir.join("large.bin"), large_data)?;
Ok(())
}
fn create_valid_engram_and_manifest(
temp_dir: &TempDir,
) -> std::io::Result<(std::path::PathBuf, std::path::PathBuf)> {
let input_dir = temp_dir.path().join("input");
fs::create_dir(&input_dir)?;
create_test_dataset(&input_dir)?;
let config = ReversibleVSAConfig::default();
let mut fsys = EmbrFS::new();
fsys.ingest_directory(&input_dir, false, &config)?;
let engram_path = temp_dir.path().join("test.engram");
let manifest_path = temp_dir.path().join("test.manifest.json");
fsys.save_engram(&engram_path)?;
fsys.save_manifest(&manifest_path)?;
Ok((engram_path, manifest_path))
}
fn corrupt_file_random(path: &Path, num_bytes: usize) -> std::io::Result<()> {
use rand::Rng;
let mut data = fs::read(path)?;
if data.is_empty() {
return Ok(());
}
let mut rng = rand::thread_rng();
for _ in 0..num_bytes {
let idx = rng.gen_range(0..data.len());
data[idx] ^= 0xFF; }
fs::write(path, data)?;
Ok(())
}
fn truncate_file(path: &Path, new_size: usize) -> std::io::Result<()> {
let data = fs::read(path)?;
if new_size >= data.len() {
return Ok(());
}
fs::write(path, &data[..new_size])?;
Ok(())
}
#[test]
fn test_corrupted_engram_recovery() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (engram_path, _) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid engram");
let file_size = fs::metadata(&engram_path).unwrap().len() as usize;
let corruption_amount = file_size / 2;
corrupt_file_random(&engram_path, corruption_amount).expect("Failed to corrupt file");
let result = EmbrFS::load_engram(&engram_path);
assert!(
result.is_err(),
"Loading heavily corrupted engram should fail, but succeeded"
);
match result {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_truncated_engram_file() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (engram_path, _) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid engram");
let original_size = fs::metadata(&engram_path)
.expect("Failed to get file metadata")
.len() as usize;
let truncated_size = original_size / 4;
truncate_file(&engram_path, truncated_size).expect("Failed to truncate file");
let result = EmbrFS::load_engram(&engram_path);
assert!(
result.is_err(),
"Loading truncated engram should fail, but succeeded"
);
match result {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_empty_engram_file() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let engram_path = temp_dir.path().join("empty.engram");
File::create(&engram_path).expect("Failed to create empty file");
let result = EmbrFS::load_engram(&engram_path);
assert!(
result.is_err(),
"Loading empty engram should fail, but succeeded"
);
}
#[test]
fn test_non_bincode_engram_file() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let engram_path = temp_dir.path().join("invalid.engram");
fs::write(&engram_path, "This is not a valid bincode engram").expect("Failed to write file");
let result = EmbrFS::load_engram(&engram_path);
assert!(
result.is_err(),
"Loading non-bincode engram should fail, but succeeded"
);
}
#[test]
fn test_malformed_json_manifest() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("malformed.manifest.json");
fs::write(&manifest_path, r#"{"files": [{"path": "test.txt""#)
.expect("Failed to write malformed manifest");
let result = EmbrFS::load_manifest(&manifest_path);
assert!(
result.is_err(),
"Loading malformed manifest should fail, but succeeded"
);
match result {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_manifest_missing_required_fields() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("incomplete.manifest.json");
fs::write(&manifest_path, r#"{"files": []}"#).expect("Failed to write incomplete manifest");
let result = EmbrFS::load_manifest(&manifest_path);
assert!(
result.is_err(),
"Loading incomplete manifest should fail, but succeeded"
);
match result {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_manifest_invalid_field_types() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("invalid_types.manifest.json");
fs::write(
&manifest_path,
r#"{"files": [], "total_chunks": "not_a_number"}"#,
)
.expect("Failed to write invalid manifest");
let result = EmbrFS::load_manifest(&manifest_path);
assert!(
result.is_err(),
"Loading manifest with invalid types should fail, but succeeded"
);
}
#[test]
fn test_hierarchical_manifest_version_mismatch() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("future_version.json");
let future_manifest = serde_json::json!({
"version": 999,
"levels": [],
"sub_engrams": {}
});
fs::write(&manifest_path, future_manifest.to_string()).expect("Failed to write manifest");
let result: Result<HierarchicalManifest, _> =
serde_json::from_reader(File::open(&manifest_path).unwrap());
if result.is_ok() {
let manifest = result.unwrap();
assert_eq!(
manifest.version, 999,
"Future version should be preserved"
);
eprintln!("Warning: No version validation currently implemented");
}
}
#[test]
fn test_manifest_with_invalid_paths() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (_, manifest_path) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid manifest");
let mut manifest: Manifest = serde_json::from_reader(File::open(&manifest_path).unwrap())
.expect("Failed to read manifest");
manifest.files.push(embeddenator::FileEntry {
path: "../../../../etc/passwd".to_string(), is_text: true,
size: 100,
chunks: vec![999],
deleted: false,
});
let modified_path = temp_dir.path().join("modified.manifest.json");
serde_json::to_writer_pretty(File::create(&modified_path).unwrap(), &manifest)
.expect("Failed to write modified manifest");
let loaded = EmbrFS::load_manifest(&modified_path);
assert!(
loaded.is_ok(),
"Manifest with suspicious paths should deserialize"
);
}
#[test]
fn test_empty_manifest() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("empty.manifest.json");
File::create(&manifest_path).expect("Failed to create empty file");
let result = EmbrFS::load_manifest(&manifest_path);
assert!(
result.is_err(),
"Loading empty manifest should fail, but succeeded"
);
}
#[test]
fn test_extremely_large_chunk_count() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let manifest_path = temp_dir.path().join("huge_chunks.manifest.json");
let manifest = serde_json::json!({
"files": [{
"path": "fake.txt",
"is_text": true,
"size": 1_000_000_000_000_u64,
"chunks": vec![0u32; 1000] }],
"total_chunks": 1_000_000_000_000_u64
});
fs::write(&manifest_path, manifest.to_string()).expect("Failed to write manifest");
let result = EmbrFS::load_manifest(&manifest_path);
assert!(
result.is_ok(),
"Loading manifest with large counts should succeed"
);
}
#[test]
fn test_memory_limit_graceful_failure() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_dir = temp_dir.path().join("input");
fs::create_dir(&input_dir).expect("Failed to create input dir");
let large_data: Vec<u8> = vec![0xAB; 10_000_000]; fs::write(input_dir.join("huge.bin"), large_data).expect("Failed to write large file");
let config = ReversibleVSAConfig::default();
let mut fsys = EmbrFS::new();
let result = fsys.ingest_directory(&input_dir, false, &config);
assert!(
result.is_ok(),
"Ingesting 10MB file should succeed: {:?}",
result.err()
);
assert!(
fsys.engram.codebook.len() > 0,
"Codebook should contain chunks"
);
}
#[test]
fn test_very_deep_directory_structure() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let input_dir = temp_dir.path().join("input");
let mut deep_path = input_dir.clone();
for i in 0..100 {
deep_path = deep_path.join(format!("level_{}", i));
}
fs::create_dir_all(&deep_path).expect("Failed to create deep structure");
fs::write(deep_path.join("deep_file.txt"), "Deep file content")
.expect("Failed to write deep file");
let config = ReversibleVSAConfig::default();
let mut fsys = EmbrFS::new();
let result = fsys.ingest_directory(&input_dir, false, &config);
assert!(
result.is_ok(),
"Ingesting deep directory should succeed: {:?}",
result.err()
);
}
#[test]
fn test_concurrent_read_safety() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (engram_path, manifest_path) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid engram");
let engram_path = Arc::new(engram_path);
let manifest_path = Arc::new(manifest_path);
let barrier = Arc::new(Barrier::new(5));
let mut handles = vec![];
for thread_id in 0..5 {
let engram_path = Arc::clone(&engram_path);
let manifest_path = Arc::clone(&manifest_path);
let barrier = Arc::clone(&barrier);
let handle = thread::spawn(move || {
barrier.wait();
let engram_result = EmbrFS::load_engram(engram_path.as_ref());
let manifest_result = EmbrFS::load_manifest(manifest_path.as_ref());
(thread_id, engram_result.is_ok(), manifest_result.is_ok())
});
handles.push(handle);
}
for handle in handles {
let (thread_id, engram_ok, manifest_ok) = handle.join().expect("Thread panicked");
assert!(
engram_ok,
"Thread {} failed to load engram",
thread_id
);
assert!(
manifest_ok,
"Thread {} failed to load manifest",
thread_id
);
}
}
#[test]
fn test_concurrent_write_to_different_files() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let base_path = temp_dir.path();
let barrier = Arc::new(Barrier::new(3));
let mut handles = vec![];
for thread_id in 0..3 {
let base_path = base_path.to_path_buf();
let barrier = Arc::clone(&barrier);
let handle = thread::spawn(move || {
barrier.wait();
let input_dir = base_path.join(format!("input_{}", thread_id));
fs::create_dir(&input_dir).expect("Failed to create input dir");
fs::write(input_dir.join("file.txt"), format!("Thread {}", thread_id))
.expect("Failed to write file");
let config = ReversibleVSAConfig::default();
let mut fsys = EmbrFS::new();
fsys.ingest_directory(&input_dir, false, &config)
.expect("Failed to ingest");
let engram_path = base_path.join(format!("thread_{}.engram", thread_id));
let manifest_path = base_path.join(format!("thread_{}.manifest.json", thread_id));
fsys.save_engram(&engram_path).expect("Failed to save engram");
fsys.save_manifest(&manifest_path)
.expect("Failed to save manifest");
thread_id
});
handles.push(handle);
}
for handle in handles {
let thread_id = handle.join().expect("Thread panicked");
let engram_path = base_path.join(format!("thread_{}.engram", thread_id));
let manifest_path = base_path.join(format!("thread_{}.manifest.json", thread_id));
assert!(
engram_path.exists(),
"Thread {} engram not created",
thread_id
);
assert!(
manifest_path.exists(),
"Thread {} manifest not created",
thread_id
);
}
}
#[test]
fn test_read_during_corruption_detection() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (engram_path, _) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid engram");
let original_load = EmbrFS::load_engram(&engram_path);
assert!(original_load.is_ok(), "Original engram should load");
let file_size = fs::metadata(&engram_path).unwrap().len() as usize;
let corruption_amount = file_size / 2;
corrupt_file_random(&engram_path, corruption_amount).expect("Failed to corrupt");
let corrupted_load = EmbrFS::load_engram(&engram_path);
assert!(
corrupted_load.is_err(),
"Heavily corrupted engram should fail to load"
);
match corrupted_load {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_error_messages_contain_context() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let nonexistent_path = temp_dir.path().join("does_not_exist.engram");
let result = EmbrFS::load_engram(&nonexistent_path);
assert!(result.is_err(), "Should fail for non-existent file");
match result {
Err(error) => {
let error_msg = error.to_string();
assert!(
!error_msg.is_empty(),
"Error message should not be empty"
);
}
Ok(_) => panic!("Expected error but got Ok"),
}
}
#[test]
fn test_no_silent_failures_on_invalid_data() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let test_cases = vec![
("empty.engram", vec![]),
("garbage.engram", vec![0xFF; 1000]),
("partial.engram", vec![0x42; 10]),
];
for (filename, data) in test_cases {
let path = temp_dir.path().join(filename);
fs::write(&path, data).expect("Failed to write test file");
let result = EmbrFS::load_engram(&path);
assert!(
result.is_err(),
"Loading {} should fail, but succeeded",
filename
);
}
}
#[test]
fn test_manifest_load_preserves_all_data() {
let temp_dir = TempDir::new().expect("Failed to create temp dir");
let (_, manifest_path) = create_valid_engram_and_manifest(&temp_dir)
.expect("Failed to create valid manifest");
let manifest = EmbrFS::load_manifest(&manifest_path).expect("Failed to load manifest");
assert!(
manifest.total_chunks > 0,
"Manifest should have chunks recorded"
);
assert!(
!manifest.files.is_empty(),
"Manifest should have file entries"
);
for file_entry in &manifest.files {
assert!(
!file_entry.path.is_empty(),
"File entry should have non-empty path"
);
assert!(
!file_entry.chunks.is_empty(),
"File entry should reference chunks"
);
}
}