tinyquant-core 0.0.0

CPU-only vector quantization codec — core types, codec, corpus, and backend trait (no_std).
Documentation
//! Insertion-order parity test — verifies that [`Corpus::iter`] yields
//! vectors in the exact order recorded in the Python-generated fixture.
//!
//! Fixture: `tests/fixtures/corpus/insertion_order.json`
//! Generated by: `np.random.default_rng(42).permutation(100)`
//! Format: a plain JSON array of 100 string ids.

use std::{collections::BTreeMap, fs, path::Path, sync::Arc};

use tinyquant_core::{
    codec::{Codebook, CodecConfig},
    corpus::{CompressionPolicy, Corpus},
};

fn load_fixture() -> Vec<String> {
    let p =
        Path::new(env!("CARGO_MANIFEST_DIR")).join("tests/fixtures/corpus/insertion_order.json");
    let raw = fs::read_to_string(&p)
        .unwrap_or_else(|e| panic!("fixture missing at {}: {e}", p.display()));
    serde_json::from_str::<Vec<String>>(&raw)
        .unwrap_or_else(|e| panic!("malformed fixture JSON: {e}"))
}

#[test]
fn corpus_iter_matches_python_insertion_order() {
    let ids = load_fixture();
    assert_eq!(ids.len(), 100, "fixture must contain exactly 100 ids");

    // Small dimension for speed — 8 dims, no residual.
    let dim = 8_u32;
    let config = CodecConfig::new(4, 42, dim, false).unwrap();
    let training: Vec<f32> = (0..8_000).map(|i| i as f32 * 0.001).collect();
    let codebook = Codebook::train(&training, &config).unwrap();

    let mut corpus = Corpus::new(
        Arc::from("order-corpus"),
        config,
        codebook,
        CompressionPolicy::Passthrough,
        BTreeMap::new(),
    );
    let _ = corpus.drain_events();

    // Insert a tiny distinct vector per id.
    let vector: Vec<f32> = (0..dim as usize).map(|i| i as f32 * 0.001).collect();
    for id in &ids {
        corpus
            .insert(Arc::from(id.as_str()), &vector, None, 0)
            .unwrap_or_else(|e| panic!("insert {id} failed: {e}"));
    }

    // Collect iteration order.
    let actual: Vec<&str> = corpus.iter().map(|(id, _)| id.as_ref()).collect();

    // Compare element-by-element for a clear failure message.
    assert_eq!(actual.len(), ids.len());
    for (i, (got, want)) in actual.iter().zip(ids.iter()).enumerate() {
        assert_eq!(
            *got,
            want.as_str(),
            "position {i}: got {got:?}, expected {want:?}"
        );
    }
}