mod common;
use common::build_test_index;
use leann_core::index::{IndexMeta, IndexPaths};
use std::io::Read;
#[test]
fn test_meta_json_has_python_required_fields() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let content = std::fs::read_to_string(paths.meta_path()).unwrap();
let raw: serde_json::Value = serde_json::from_str(&content).unwrap();
assert!(raw.get("backend_name").is_some());
assert!(raw.get("embedding_model").is_some());
assert!(raw.get("embedding_mode").is_some());
assert!(raw.get("passage_sources").is_some());
assert!(raw.get("dimensions").is_some());
assert!(raw.get("version").is_some());
let sources = raw["passage_sources"].as_array().unwrap();
assert!(!sources.is_empty());
let source = &sources[0];
assert_eq!(
source.get("type").and_then(|v| v.as_str()),
Some("jsonl"),
"Python expects 'type' key (serde renames source_type → type)"
);
assert!(source.get("path").is_some(), "Missing 'path' in source");
assert!(
source.get("index_path").is_some(),
"Missing 'index_path' in source"
);
}
#[test]
fn test_meta_json_storage_flags() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let content = std::fs::read_to_string(paths.meta_path()).unwrap();
let raw: serde_json::Value = serde_json::from_str(&content).unwrap();
assert_eq!(
raw.get("is_compact").and_then(|v| v.as_bool()),
Some(true),
"is_compact should be true"
);
assert_eq!(
raw.get("is_pruned").and_then(|v| v.as_bool()),
Some(true),
"is_pruned should be true (maps from is_recompute)"
);
}
#[test]
fn test_meta_json_backend_kwargs() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let meta = IndexMeta::load(&paths.meta_path()).unwrap();
if !meta.backend_kwargs.is_empty() {
assert!(meta.backend_kwargs.contains_key("M"));
assert!(meta.backend_kwargs.contains_key("efConstruction"));
assert!(meta.backend_kwargs.contains_key("distance_metric"));
}
}
#[test]
fn test_passages_jsonl_python_compatible() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(15, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let content = std::fs::read_to_string(paths.passages_path()).unwrap();
for (i, line) in content.lines().enumerate() {
if line.trim().is_empty() {
continue;
}
let parsed: serde_json::Value =
serde_json::from_str(line).unwrap_or_else(|e| panic!("Line {}: {}", i, e));
assert!(parsed.get("id").is_some(), "Line {}: missing 'id'", i);
assert!(parsed.get("text").is_some(), "Line {}: missing 'text'", i);
assert!(
parsed.get("metadata").is_some(),
"Line {}: missing 'metadata'",
i
);
}
}
#[test]
fn test_passages_idx_is_text_format() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let content = std::fs::read_to_string(paths.offset_path()).unwrap();
let offsets: Vec<u64> = content
.lines()
.filter(|l| !l.trim().is_empty())
.map(|l| l.trim().parse::<u64>().unwrap())
.collect();
assert_eq!(offsets.len(), 10, "Expected 10 offsets");
for window in offsets.windows(2) {
assert!(
window[0] < window[1],
"Offsets should be strictly increasing: {:?}",
offsets
);
}
}
#[test]
fn test_python_idx_format_documented() {
}
#[test]
fn test_hnsw_index_faiss_fourcc() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let mut file = std::fs::File::open(paths.index_file_path()).unwrap();
let mut fourcc_bytes = [0u8; 4];
file.read_exact(&mut fourcc_bytes).unwrap();
let fourcc = u32::from_le_bytes(fourcc_bytes);
let expected = u32::from_le_bytes(*b"IHNf");
assert_eq!(
fourcc, expected,
"Expected FAISS FourCC 'IHNf' (0x{:08x}), got 0x{:08x}",
expected, fourcc
);
}
#[test]
fn test_hnsw_index_header_fields() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let data = std::fs::read(paths.index_file_path()).unwrap();
let d = i32::from_le_bytes(data[4..8].try_into().unwrap());
let ntotal = i64::from_le_bytes(data[8..16].try_into().unwrap());
let is_trained = data[32];
assert_eq!(d, 64, "Expected dimensions=64, got {}", d);
assert_eq!(ntotal, 10, "Expected ntotal=10, got {}", ntotal);
assert_eq!(is_trained, 1, "Expected is_trained=1, got {}", is_trained);
}
#[test]
fn test_id_map_format() {
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(10, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let content = std::fs::read_to_string(paths.id_map_path()).unwrap();
let ids: Vec<&str> = content.lines().filter(|l| !l.is_empty()).collect();
assert_eq!(ids.len(), 10);
for (i, id) in ids.iter().enumerate() {
assert_eq!(*id, i.to_string());
}
}
#[test]
fn test_compact_index_roundtrip() {
use leann_core::hnsw::io::read_hnsw_index;
use std::io::Cursor;
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(20, dir.path(), true, true).unwrap();
let paths = IndexPaths::new(&index_path);
let data = std::fs::read(paths.index_file_path()).unwrap();
let mut cursor = Cursor::new(&data);
let graph = read_hnsw_index(&mut cursor).unwrap();
assert_eq!(graph.ntotal, 20);
assert_eq!(graph.dimensions, 64);
assert!(graph.is_compact());
}
#[test]
fn test_standard_index_roundtrip() {
use leann_core::hnsw::io::read_hnsw_index;
use std::io::Cursor;
let dir = tempfile::tempdir().unwrap();
let index_path = build_test_index(20, dir.path(), false, false).unwrap();
let paths = IndexPaths::new(&index_path);
let data = std::fs::read(paths.index_file_path()).unwrap();
let mut cursor = Cursor::new(&data);
let graph = read_hnsw_index(&mut cursor).unwrap();
assert_eq!(graph.ntotal, 20);
assert_eq!(graph.dimensions, 64);
assert!(!graph.is_compact());
}