use std::{collections::BTreeMap, fs, path::Path, sync::Arc};
use tinyquant_core::{
codec::{Codebook, CodecConfig},
corpus::{CompressionPolicy, Corpus},
};
fn load_fixture() -> Vec<String> {
let p =
Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/corpus/insertion_order.json");
let raw = fs::read_to_string(&p)
.unwrap_or_else(|e| panic!("fixture missing at {}: {e}", p.display()));
serde_json::from_str::<Vec<String>>(&raw)
.unwrap_or_else(|e| panic!("malformed fixture JSON: {e}"))
}
#[test]
fn corpus_iter_matches_python_insertion_order() {
let ids = load_fixture();
assert_eq!(ids.len(), 100, "fixture must contain exactly 100 ids");
let dim = 8_u32;
let config = CodecConfig::new(4, 42, dim, false).unwrap();
let training: Vec<f32> = (0..8_000).map(|i| i as f32 * 0.001).collect();
let codebook = Codebook::train(&training, &config).unwrap();
let mut corpus = Corpus::new(
Arc::from("order-corpus"),
config,
codebook,
CompressionPolicy::Passthrough,
BTreeMap::new(),
);
let _ = corpus.drain_events();
let vector: Vec<f32> = (0..dim as usize).map(|i| i as f32 * 0.001).collect();
for id in &ids {
corpus
.insert(Arc::from(id.as_str()), &vector, None, 0)
.unwrap_or_else(|e| panic!("insert {id} failed: {e}"));
}
let actual: Vec<&str> = corpus.iter().map(|(id, _)| id.as_ref()).collect();
assert_eq!(actual.len(), ids.len());
for (i, (got, want)) in actual.iter().zip(ids.iter()).enumerate() {
assert_eq!(
*got,
want.as_str(),
"position {i}: got {got:?}, expected {want:?}"
);
}
}