#![feature(iter_array_chunks)]
#![cfg_attr(target_arch = "x86_64", feature(stdarch_x86_mm_shuffle))]
#![feature(portable_simd)]
#![feature(thread_id_value)]
pub mod topk_selectors;
use pyo3::types::PyModuleMethods;
pub mod pylib;
use crate::pylib::DensePQHNSW as DensePQIndexPy;
use crate::pylib::DensePlainHNSW as DensePlainIndexPy;
use crate::pylib::DensePlainHNSWf16 as DensePlainIndexPyf16;
use crate::pylib::SparsePlainHNSW as SparsePlainIndexPy;
use crate::pylib::SparsePlainHNSWf16 as SparsePlainIndexPyf16;
use num_traits::{ToPrimitive, Zero};
use pyo3::prelude::PyModule;
use pyo3::{pymodule, Bound, PyResult};
pub mod clustering {
pub mod kmeans;
pub use kmeans::KMeans;
pub use kmeans::KMeansBuilder;
}
pub mod graph;
pub mod quantizers;
pub use quantizers::decoder;
pub use quantizers::encoder;
pub use quantizers::plain_quantizer;
pub use quantizers::pq;
pub use quantizers::quantizer;
pub use quantizers::sparse_plain_quantizer;
pub mod visited_set;
pub mod datasets {
pub mod dataset;
pub mod dense_dataset;
pub mod sparse_dataset;
pub mod utils;
}
pub use datasets::dataset::Dataset;
pub use datasets::dataset::GrowableDataset;
pub use datasets::dense_dataset::DenseDataset;
pub use datasets::dense_dataset::DenseDatasetIter;
pub use datasets::sparse_dataset::ParSparseDatasetIter;
pub use datasets::sparse_dataset::SparseDataset;
pub use datasets::sparse_dataset::SparseDatasetIter;
pub use datasets::utils::*;
type PlainDenseDataset<T> = DenseDataset<plain_quantizer::PlainQuantizer<T>>;
pub mod distances;
pub use distances::dot_product::*;
pub use distances::euclidean_distance::*;
pub use distances::simd::distances as simd_distances;
pub use distances::simd::transpose as simd_transpose;
pub use distances::simd::utils as simd_utils;
pub mod utils;
pub mod indexes;
pub use indexes::{graph_index, hnsw, hnsw_utils};
pub mod index_serializer;
pub use index_serializer::IndexSerializer;
use half::f16;
use serde::{Deserialize, Serialize};
#[derive(Default, Debug, Copy, Clone, Serialize, Deserialize, PartialEq)]
pub enum DistanceType {
#[default]
Euclidean,
DotProduct,
}
pub trait Float: Copy + Default + ToPrimitive + PartialOrd + Zero + Send + Sync {}
impl Float for f32 {}
impl Float for f16 {}
pub trait AsRefItem {
type Item;
fn as_ref_item(&self) -> &[Self::Item];
}
impl<U> AsRefItem for Vec<U> {
type Item = U;
#[inline(always)]
fn as_ref_item(&self) -> &[Self::Item] {
self.as_slice()
}
}
impl<U> AsRefItem for Box<[U]> {
type Item = U;
#[inline(always)]
fn as_ref_item(&self) -> &[Self::Item] {
self.as_ref()
}
}
impl<U> AsRefItem for &[U] {
type Item = U;
#[inline(always)]
fn as_ref_item(&self) -> &[Self::Item] {
self
}
}
impl<U> AsRefItem for &mut [U] {
type Item = U;
#[inline(always)]
fn as_ref_item(&self) -> &[Self::Item] {
self
}
}
pub trait Vector1D {
type ComponentsType;
type ValuesType;
fn len(&self) -> usize;
fn is_empty(&self) -> bool {
self.len() == 0
}
fn components_as_slice(&self) -> &[Self::ComponentsType];
fn values_as_slice(&self) -> &[Self::ValuesType];
}
static EMPTY_COMPONENTS: [(); 0] = [];
#[derive(Debug, Clone, PartialEq)]
pub struct DenseVector1D<T: AsRefItem> {
components: (),
values: T,
}
impl<T: AsRefItem> DenseVector1D<T> {
#[inline]
pub fn new(values: T) -> Self {
DenseVector1D {
components: (),
values,
}
}
}
impl<T: AsRefItem> Vector1D for DenseVector1D<T> {
type ComponentsType = ();
type ValuesType = T::Item;
#[inline(always)]
fn len(&self) -> usize {
self.values.as_ref_item().len()
}
#[inline(always)]
fn components_as_slice(&self) -> &[Self::ComponentsType] {
&EMPTY_COMPONENTS
}
#[inline(always)]
fn values_as_slice(&self) -> &[Self::ValuesType] {
self.values.as_ref_item()
}
}
#[derive(Debug, Clone, PartialEq)]
pub struct SparseVector1D<V, T>
where
V: AsRefItem<Item = u16>,
T: AsRefItem,
{
components: V,
values: T,
max_component_id: u16,
d: usize, }
impl<V, T> SparseVector1D<V, T>
where
V: AsRefItem<Item = u16>,
T: AsRefItem,
{
#[inline]
pub fn new(components: V, values: T, d: usize) -> Self {
let max_component_id = components.as_ref_item().iter().max().copied().unwrap_or(0);
SparseVector1D {
components,
values,
max_component_id,
d,
}
}
}
impl<V, T> Vector1D for SparseVector1D<V, T>
where
V: AsRefItem<Item = u16>,
T: AsRefItem,
{
type ComponentsType = V::Item;
type ValuesType = T::Item;
#[inline(always)]
fn len(&self) -> usize {
(self.max_component_id as usize) + 1
}
#[inline(always)]
fn components_as_slice(&self) -> &[Self::ComponentsType] {
self.components.as_ref_item()
}
#[inline(always)]
fn values_as_slice(&self) -> &[Self::ValuesType] {
self.values.as_ref_item()
}
}
#[pymodule]
pub fn kannolo(m: &Bound<'_, PyModule>) -> PyResult<()> {
m.add_class::<DensePlainIndexPy>()?;
m.add_class::<DensePlainIndexPyf16>()?;
m.add_class::<SparsePlainIndexPy>()?;
m.add_class::<SparsePlainIndexPyf16>()?;
m.add_class::<DensePQIndexPy>()?;
Ok(())
}