use serde::{Deserialize, Serialize};
use std::collections::HashSet;
use std::path::PathBuf;
use walcraft::{Size, Wal, WalBuilder};
#[derive(Serialize, Deserialize, Debug, PartialEq)]
struct Log {
seq: usize,
payload: Vec<u8>,
}
fn build_wal(dir: &str, storage_mb: usize) -> Wal {
WalBuilder::new()
.location(dir)
.page_size(Size::Kb(4))
.storage_size(Size::Mb(storage_mb))
.sync_interval(50)
.build()
.unwrap()
}
fn write_logs(dir: &str, storage_mb: usize, count: usize) {
std::fs::remove_dir_all(dir).ok();
let wal = build_wal(dir, storage_mb);
for i in 0..count {
wal.append_struct(Log {
seq: i,
payload: vec![(i % 256) as u8; 500],
})
.unwrap();
}
wal.flush().unwrap();
}
fn read_logs(dir: &str, storage_mb: usize) -> Vec<Log> {
let wal = build_wal(dir, storage_mb);
wal.iter()
.unwrap()
.into_iter()
.map(|e| e.to_struct::<Log>().unwrap())
.collect()
}
fn assert_entries_valid(entries: &[Log]) {
for window in entries.windows(2) {
assert!(
window[1].seq > window[0].seq,
"Entries out of order: seq {} followed by {}",
window[0].seq,
window[1].seq,
);
}
for entry in entries {
let expected_byte = (entry.seq % 256) as u8;
assert_eq!(entry.payload.len(), 500);
assert!(
entry.payload.iter().all(|&b| b == expected_byte),
"Payload mismatch at seq {}",
entry.seq,
);
}
}
fn wal_files(dir: &str) -> Vec<String> {
let logs_dir = PathBuf::from(dir).join("logs");
let mut files: Vec<String> = std::fs::read_dir(&logs_dir)
.unwrap()
.filter_map(|entry| {
let name = entry.unwrap().file_name().into_string().unwrap();
if name.starts_with("wal_") && name.ends_with(".bin") {
Some(name)
} else {
None
}
})
.collect();
files.sort();
files
}
fn parse_file_id(filename: &str) -> u32 {
filename[4..filename.len() - 4].parse().unwrap()
}
fn read_meta_segments(dir: &str) -> Vec<toml::Value> {
let meta_path = PathBuf::from(dir).join("meta.toml");
let content = std::fs::read_to_string(&meta_path).unwrap();
let meta: toml::Value = content.parse().unwrap();
meta["segments"].as_array().unwrap().clone()
}
#[test]
fn data_spans_multiple_files() {
let dir = "./tmp/testing_mf_span";
let total_entries = 5_000;
write_logs(dir, 8, total_entries);
let files = wal_files(dir);
assert!(
files.len() > 1,
"Expected multiple WAL files, got {}",
files.len(),
);
let entries = read_logs(dir, 8);
assert_eq!(entries.len(), total_entries);
assert_entries_valid(&entries);
}
#[test]
fn gc_preserves_recent_entries() {
let dir = "./tmp/testing_mf_gc";
let storage_mb = 4;
let total_entries = 20_000;
write_logs(dir, storage_mb, total_entries);
let entries = read_logs(dir, storage_mb);
assert!(!entries.is_empty(), "Expected surviving entries after GC");
assert!(
entries.len() < total_entries,
"GC should have removed some entries, but all {} survived",
total_entries,
);
assert_entries_valid(&entries);
assert_eq!(entries.last().unwrap().seq, total_entries - 1);
}
#[test]
#[ignore]
fn files_match_meta_after_gc() {
let dir = "./tmp/testing_mf_meta";
let storage_mb = 4;
write_logs(dir, storage_mb, 20_000);
let segments = read_meta_segments(dir);
assert!(!segments.is_empty());
let logs_dir = PathBuf::from(dir).join("logs");
let tracked_ids: HashSet<u32> = segments
.iter()
.map(|s| s["file_id"].as_integer().unwrap() as u32)
.collect();
for &id in &tracked_ids {
let width = u32::MAX.to_string().len();
let filename = format!("wal_{:0width$}.bin", id, width = width);
assert!(
logs_dir.join(&filename).exists(),
"Segment {} in meta.toml has no file on disk",
id,
);
}
for file in wal_files(dir) {
let id = parse_file_id(&file);
assert!(
tracked_ids.contains(&id),
"Orphan file {} not tracked in meta.toml",
file,
);
}
let total_size: usize = segments
.iter()
.map(|s| s["file_size"].as_integer().unwrap() as usize)
.sum();
let storage_limit = storage_mb * 1024 * 1024;
assert!(
total_size <= storage_limit,
"Total storage {} bytes exceeds limit {} bytes",
total_size,
storage_limit,
);
let first_id = segments[0]["file_id"].as_integer().unwrap();
assert!(
first_id > 1,
"GC should have removed oldest files, but first segment ID is {}",
first_id,
);
}