use std::ptr::NonNull;
use thiserror::Error;
use diskann_utils::views::MatrixView;
macro_rules! features {
(
#![$meta:meta]
$($item:item)*
) => {
$(
#[cfg($meta)]
#[cfg_attr(docsrs, doc(cfg($meta)))]
$item
)*
}
}
pub(crate) use features;
pub(crate) fn as_nonnull<T>(slice: &[T]) -> NonNull<T> {
unsafe { NonNull::new_unchecked(slice.as_ptr().cast_mut()) }
}
pub(crate) fn as_nonnull_mut<T>(slice: &mut [T]) -> NonNull<T> {
unsafe { NonNull::new_unchecked(slice.as_mut_ptr()) }
}
pub(crate) fn box_into_nonnull<T>(b: Box<[T]>) -> NonNull<T> {
unsafe { NonNull::new_unchecked(Box::into_raw(b).cast::<T>()) }
}
pub(crate) fn div_round_up<T>(x: T, y: T) -> T
where
T: std::ops::Div<Output = T>
+ std::ops::Rem<Output = T>
+ std::ops::Add<Output = T>
+ Default
+ std::cmp::Eq
+ From<u8>
+ Copy,
{
(x / y)
+ if (x % y) != T::default() {
T::from(1)
} else {
T::default()
}
}
pub(crate) fn is_strictly_monotonic<I>(mut itr: I) -> bool
where
I: Iterator,
I::Item: Ord,
{
let mut x = match itr.next() {
None => {
return true;
}
Some(x) => x,
};
for y in itr {
if y <= x {
return false;
}
x = y;
}
true
}
#[derive(Debug, Clone, Copy, Error)]
#[non_exhaustive]
#[error("data cannot be empty")]
pub(crate) struct CannotBeEmpty;
pub(crate) fn compute_means_and_average_norm<T>(data: MatrixView<T>) -> (Vec<f64>, f64)
where
T: Into<f64> + Copy,
{
let mut means: Vec<f64> = vec![0.0; data.ncols()];
let norm_sum = data.row_iter().fold(0.0f64, |accum: f64, row| {
std::iter::zip(means.iter_mut(), row.iter()).for_each(|(m, r)| {
let r: f64 = (*r).into();
*m += r;
});
let norm = row
.iter()
.map(|&x| {
let x: f64 = x.into();
let y = x;
y * y
})
.sum::<f64>()
.sqrt();
accum + norm
});
let mean_norm: f64 = norm_sum / (data.nrows() as f64);
means.iter_mut().for_each(|m| *m /= data.nrows() as f64);
(means, mean_norm)
}
pub(crate) fn compute_normalized_means<T>(data: MatrixView<T>) -> Result<Vec<f64>, CannotBeEmpty>
where
T: Into<f64> + Copy,
{
if data.nrows() == 0 {
return Err(CannotBeEmpty);
}
if data.ncols() == 0 {
return Ok(Vec::new());
}
let mut means: Vec<f64> = vec![0.0; data.ncols()];
let square = |x: &T| -> f64 {
let x: f64 = (*x).into();
x * x
};
data.row_iter().for_each(|row| {
let norm = row.iter().map(square).sum::<f64>().sqrt();
let inv_norm = if norm == 0.0 { 1.0 } else { 1.0 / norm };
std::iter::zip(means.iter_mut(), row.iter()).for_each(|(m, r)| {
let r: f64 = (*r).into() * inv_norm;
*m += r;
});
});
means.iter_mut().for_each(|m| *m /= data.nrows() as f64);
Ok(means)
}
pub(crate) fn compute_variances<T>(data: MatrixView<T>, means: &[f64]) -> Vec<f64>
where
T: Into<f64> + Copy,
{
assert_eq!(data.ncols(), means.len());
let mut variances: Vec<f64> = vec![0.0; data.ncols()];
data.row_iter().for_each(|row| {
variances
.iter_mut()
.zip(std::iter::zip(row.iter(), means.iter()))
.for_each(|(v, (r, c))| {
let r: f64 = (*r).into();
let d: f64 = r - *c;
*v += d * d;
});
});
variances.iter_mut().for_each(|v| *v /= data.nrows() as f64);
variances
}
#[cfg(test)]
mod tests {
use diskann_utils::views::Matrix;
use diskann_vector::{Norm, norm::FastL2Norm};
use rand::{SeedableRng, rngs::StdRng};
use super::*;
use crate::test_util::create_test_problem;
fn normalize(x: &mut [f32]) {
let norm: f32 = (FastL2Norm).evaluate(&*x);
x.iter_mut().for_each(|i| *i /= norm);
}
fn check_on_test_problem(nrows: usize, ncols: usize, seed: u64) {
let mut rng = StdRng::seed_from_u64(seed);
let test_problem = create_test_problem(nrows, ncols, &mut rng);
let (means, mean_norm) = compute_means_and_average_norm(test_problem.data.as_view());
for (i, (&expected, &got)) in
std::iter::zip(test_problem.means.iter(), means.iter()).enumerate()
{
assert_eq!(
expected, got,
"computed mean failed for dim {} (nrows = {}, ncols = {})",
i, nrows, ncols
);
}
assert_eq!(
test_problem.mean_norm, mean_norm,
"mean norm computation failed (nrows = {}, ncols = {})",
nrows, ncols
);
let variances = compute_variances(test_problem.data.as_view(), &means);
println!("expected = {:?}", test_problem.variances);
println!("got = {:?}", variances);
for (i, (&expected, &got)) in
std::iter::zip(test_problem.variances.iter(), variances.iter()).enumerate()
{
assert_eq!(
expected, got,
"computed variance failed for dim {} (nrows = {}, ncols = {})",
i, nrows, ncols
);
}
}
#[test]
fn test_mean_and_norm() {
check_on_test_problem(10, 16, 0x1d66d1dbf1cfedf4);
check_on_test_problem(11, 7, 0x571fdc0150fedc24);
}
fn check_normalized_on_test_problem(nrows: usize, ncols: usize, seed: u64) {
const ERROR_BOUND: f64 = 1.0e-6;
let mut rng = StdRng::seed_from_u64(seed);
let test_problem = create_test_problem(nrows, ncols, &mut rng);
let mut normalized_data = test_problem.data.clone();
normalized_data.row_iter_mut().for_each(normalize);
let (expected_means, expected_mean_norm) =
compute_means_and_average_norm(normalized_data.as_view());
let norm_error = (expected_mean_norm - 1.0).abs();
assert!(norm_error < 1e-7, "got a norm error of {}", norm_error);
let means = compute_normalized_means(test_problem.data.as_view()).unwrap();
for (i, (&expected, &got)) in
std::iter::zip(expected_means.iter(), means.iter()).enumerate()
{
let error = if got == 0.0 && expected == 0.0 {
0.0
} else {
(got - expected).abs() / expected.abs()
};
assert!(
error < ERROR_BOUND,
"got {}, expected {}, error = {}, bound = {} for dim {} (nrows = {}, ncols = {})",
got,
expected,
error,
ERROR_BOUND,
i,
nrows,
ncols
);
}
}
#[test]
fn test_normalized_mean_and_norm() {
check_normalized_on_test_problem(1, 20, 0x81daae2ac2d06d7c);
check_normalized_on_test_problem(10, 16, 0x1d66d1dbf1cfedf4);
check_normalized_on_test_problem(11, 7, 0x571fdc0150fedc24);
}
#[test]
fn test_normalized_means_corner_cases() {
let data = Matrix::new(1.0f32, 10, 0);
let means = compute_normalized_means(data.as_view()).unwrap();
assert!(means.is_empty());
let data = Matrix::new(1.0f32, 0, 10);
let _: CannotBeEmpty = compute_normalized_means(data.as_view()).unwrap_err();
}
#[test]
fn test_div_round_up() {
assert_eq!(div_round_up(10, 2), 5);
assert_eq!(div_round_up(10, 3), 4);
assert_eq!(div_round_up(10, 4), 3);
assert_eq!(div_round_up(10, 5), 2);
assert_eq!(div_round_up(10, 6), 2);
assert_eq!(div_round_up(10, 7), 2);
assert_eq!(div_round_up(10, 8), 2);
assert_eq!(div_round_up(10, 9), 2);
assert_eq!(div_round_up(10, 10), 1);
assert_eq!(div_round_up(10, 11), 1);
}
#[test]
fn test_is_strictly_monotonic() {
let x: &[usize] = &[];
assert!(
is_strictly_monotonic(x.iter()),
"empty ranges are monotonic"
);
let x: &[usize] = &[100];
assert!(
is_strictly_monotonic(x.iter()),
"ranges of length 1 are monotonic"
);
let x: &[usize] = &[100, 101];
assert!(is_strictly_monotonic(x.iter()));
let x: &[usize] = &[100, 102, 104, 105];
assert!(is_strictly_monotonic(x.iter()));
let x: &[usize] = &[100, 100, 105];
assert!(!is_strictly_monotonic(x.iter()));
let x: &[usize] = &[100, 90, 105];
assert!(!is_strictly_monotonic(x.iter()));
let x: &[usize] = &[100, 105, 105];
assert!(!is_strictly_monotonic(x.iter()));
let x: &[usize] = &[100, 105, 100];
assert!(!is_strictly_monotonic(x.iter()));
}
}