use approx::assert_relative_eq;
use diskann::utils::IntoUsize;
use diskann_vector::{
Half, PreprocessedDistanceFunction, PureDistanceFunction,
distance::{Cosine, InnerProduct, SquaredL2},
};
use rand::{Rng, distr::Distribution};
use rand_distr::{Normal, Uniform};
use crate::model::{FixedChunkPQTable, pq::calculate_chunk_offsets_auto};
pub(crate) trait TestDistribution: Sized + Copy {
fn generate<R>(dim: usize, rng: &mut R) -> Vec<Self>
where
R: Rng;
}
impl TestDistribution for f32 {
fn generate<R>(dim: usize, rng: &mut R) -> Vec<Self>
where
R: Rng,
{
let distribution = Normal::<f32>::new(0.0, 10.0).unwrap();
(0..dim).map(|_| distribution.sample(rng)).collect()
}
}
impl TestDistribution for Half {
fn generate<R>(dim: usize, rng: &mut R) -> Vec<Self>
where
R: Rng,
{
let distribution = Normal::<f32>::new(0.0, 10.0).unwrap();
(0..dim)
.map(|_| Half::from_f32(distribution.sample(rng)))
.collect()
}
}
impl TestDistribution for i8 {
fn generate<R>(dim: usize, rng: &mut R) -> Vec<Self>
where
R: Rng,
{
let distribution = rand::distr::StandardUniform {};
(0..dim).map(|_| distribution.sample(rng)).collect()
}
}
impl TestDistribution for u8 {
fn generate<R>(dim: usize, rng: &mut R) -> Vec<Self>
where
R: Rng,
{
let distribution = rand::distr::StandardUniform {};
(0..dim).map(|_| distribution.sample(rng)).collect()
}
}
#[derive(Clone, Copy)]
pub(crate) struct RelativeAndAbsolute {
pub(crate) relative: f32,
pub(crate) absolute: f32,
}
#[derive(Clone, Copy)]
pub(crate) struct TableConfig {
pub(crate) dim: usize,
pub(crate) pq_chunks: usize,
pub(crate) num_pivots: usize,
pub(crate) start_value: f32,
}
pub(crate) fn generate_expected_vector(
code: &[u8],
offsets: &[usize],
start_value: f32,
) -> Vec<f32> {
assert_eq!(code.len() + 1, offsets.len());
let mut v = Vec::new();
for (i, c) in code.iter().enumerate() {
let len = offsets[i + 1] - offsets[i];
for _ in 0..len {
v.push(start_value + ((i + c.into_usize()) as f32))
}
}
v
}
pub(crate) fn seed_pivot_table(config: TableConfig) -> FixedChunkPQTable {
let offsets = calculate_chunk_offsets_auto(config.dim, config.pq_chunks);
let mut pivots = Vec::<f32>::new();
for i in 0..config.num_pivots {
for j in 0..config.pq_chunks {
let start = offsets[j];
let stop = offsets[j + 1];
let val = config.start_value + ((i + j) as f32);
for _ in 0..(stop - start) {
pivots.push(val);
}
}
}
assert_eq!(pivots.len(), config.dim * config.num_pivots);
let centroid = vec![0.0f32; config.dim];
FixedChunkPQTable::new(config.dim, pivots.into(), centroid.into(), offsets.into()).unwrap()
}
pub(crate) fn generate_random_code<R>(
num_pivots: usize,
num_pq_chunks: usize,
rng: &mut R,
) -> Vec<u8>
where
R: Rng,
{
assert!(num_pivots != 0);
let num_pivots: u8 = (num_pivots - 1).try_into().unwrap();
let dist = Uniform::try_from(0..=num_pivots).unwrap();
(0..num_pq_chunks).map(|_| dist.sample(rng)).collect()
}
pub(super) fn test_l2_inner<'a, T, F, R>(
create: impl Fn(&'a FixedChunkPQTable, &[T]) -> F,
table: &'a FixedChunkPQTable,
num_trials: usize,
config: TableConfig,
rng: &mut R,
errors: RelativeAndAbsolute,
) where
T: Into<f32> + TestDistribution,
F: for<'any> PreprocessedDistanceFunction<&'any [u8], f32>,
R: Rng,
{
for _ in 0..num_trials {
let input: Vec<T> = T::generate(config.dim, rng);
let mut input_f32: Vec<f32> = input.iter().map(|x| (*x).into()).collect();
table.preprocess_query(&mut input_f32);
let computer = create(table, &input);
for _ in 0..num_trials {
let code = generate_random_code(config.num_pivots, config.pq_chunks, rng);
let expected_vector =
generate_expected_vector(&code, table.get_chunk_offsets(), config.start_value);
let got = computer.evaluate_similarity(&code);
let expected = SquaredL2::evaluate(input_f32.as_slice(), expected_vector.as_slice());
assert_relative_eq!(
got,
expected,
epsilon = errors.absolute,
max_relative = errors.relative
);
}
}
}
pub(super) fn test_ip_inner<'a, T, F, R>(
create: impl Fn(&'a FixedChunkPQTable, &[T]) -> F,
table: &'a FixedChunkPQTable,
num_trials: usize,
config: TableConfig,
rng: &mut R,
errors: RelativeAndAbsolute,
) where
T: Into<f32> + TestDistribution,
F: for<'any> PreprocessedDistanceFunction<&'any [u8], f32>,
R: Rng,
{
for _ in 0..num_trials {
let input: Vec<T> = T::generate(config.dim, rng);
let input_f32: Vec<f32> = input.iter().map(|x| (*x).into()).collect();
let computer = create(table, &input);
for _ in 0..num_trials {
let code = generate_random_code(config.num_pivots, config.pq_chunks, rng);
let expected_vector =
generate_expected_vector(&code, table.get_chunk_offsets(), config.start_value);
let got = computer.evaluate_similarity(&code);
let expected = InnerProduct::evaluate(input_f32.as_slice(), expected_vector.as_slice());
assert_relative_eq!(
got,
expected,
epsilon = errors.absolute,
max_relative = errors.relative
);
}
}
}
pub(super) fn test_cosine_inner<'a, T, F, R>(
create: impl Fn(&'a FixedChunkPQTable, &[T]) -> F,
table: &'a FixedChunkPQTable,
num_trials: usize,
config: TableConfig,
rng: &mut R,
errors: RelativeAndAbsolute,
) where
T: Into<f32> + TestDistribution,
F: for<'any> PreprocessedDistanceFunction<&'any [u8], f32>,
R: Rng,
{
for _ in 0..num_trials {
let input: Vec<T> = T::generate(config.dim, rng);
let input_f32: Vec<f32> = input.iter().map(|x| (*x).into()).collect();
let computer = create(table, &input);
for _ in 0..num_trials {
let code = generate_random_code(config.num_pivots, config.pq_chunks, rng);
let expected_vector =
generate_expected_vector(&code, table.get_chunk_offsets(), config.start_value);
let got = computer.evaluate_similarity(&code);
let expected = Cosine::evaluate(input_f32.as_slice(), expected_vector.as_slice());
assert_relative_eq!(
got,
expected,
epsilon = errors.absolute,
max_relative = errors.relative
);
}
}
}