Skip to main content

diskann_benchmark_core/build/
ids.rs

1/*
2 * Copyright (c) Microsoft Corporation.
3 * Licensed under the MIT license.
4 */
5
6//! Tools for creating external IDs for index construction.
7
8use std::marker::PhantomData;
9
10use diskann::{ANNError, ANNErrorKind, ANNResult};
11use diskann_utils::future::AsyncFriendly;
12
13/// Convert an implicit data index to an external ID.
14///
15/// The [`crate::build::Build`] trait uses indices in a range `0..N` to refer to items
16/// for insertion. This trait provides a bridge between these indices and the actual external
17/// ID type used by a [`diskann::provider::DataProvider`].
18pub trait ToId<T>: std::fmt::Debug + AsyncFriendly {
19    /// Convert the given implicit index `i` to an external ID of type `T`.
20    fn to_id(&self, i: usize) -> ANNResult<T>;
21}
22
23/// An extension of [`ToId`] that tracks the number of IDs.
24pub trait ToIdSized<T>: ToId<T> {
25    /// Returns the number of IDs that can be produced.
26    ///
27    /// Implementations must ensure that for all `i` in `0..self.len()`, `self.to_id(i)` returns
28    /// a valid ID. Indexing outside of this range should return an error.
29    fn len(&self) -> usize;
30
31    /// Returns `true` only if `self.len() == 0`.
32    fn is_empty(&self) -> bool {
33        self.len() == 0
34    }
35}
36
37//----------//
38// Identity //
39//----------//
40
41/// A [`ToId`] implementation that treats the implicit index as the external ID.
42///
43/// This requires that the external ID type `T` can be constructed from a `usize`.
44pub struct Identity<T>(PhantomData<T>);
45
46impl<T> Identity<T> {
47    /// Construct a new [`Identity`].
48    pub fn new() -> Self {
49        Self(PhantomData)
50    }
51}
52
53impl<T> Clone for Identity<T> {
54    fn clone(&self) -> Self {
55        *self
56    }
57}
58
59impl<T> Copy for Identity<T> {}
60
61impl<T> Default for Identity<T> {
62    fn default() -> Self {
63        Self::new()
64    }
65}
66
67impl<T> std::fmt::Debug for Identity<T> {
68    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
69        f.debug_tuple(&format!("Identity<{}>", std::any::type_name::<T>()))
70            .field(&"_")
71            .finish()
72    }
73}
74
75impl<T> ToId<T> for Identity<T>
76where
77    T: TryFrom<usize, Error: std::error::Error + AsyncFriendly> + AsyncFriendly,
78{
79    fn to_id(&self, i: usize) -> ANNResult<T> {
80        T::try_from(i).map_err(ANNError::opaque)
81    }
82}
83
84//-------//
85// Slice //
86//-------//
87
88/// An implementation of [`ToId`] and [`ToIdSized`] that encodes external IDs as a slice.
89///
90/// ```rust
91/// use diskann_benchmark_core::build::ids::{Slice, ToId, ToIdSized};
92///
93/// let slice = Slice::new([10u32, 20, 30].into());
94/// assert_eq!(slice.len(), 3);
95/// assert_eq!(slice.to_id(0).unwrap(), 10);
96/// assert_eq!(slice.to_id(1).unwrap(), 20);
97/// assert_eq!(slice.to_id(2).unwrap(), 30);
98/// assert!(slice.to_id(3).is_err());
99/// ```
100#[derive(Debug)]
101pub struct Slice<T>(Box<[T]>);
102
103impl<T> Slice<T> {
104    /// Construct a new [`Slice`] from the given boxed slice.
105    ///
106    /// The contents of the [`Slice`] will be identical to the provided box.
107    pub fn new(slice: Box<[T]>) -> Self {
108        Self(slice)
109    }
110}
111
112impl<T> ToId<T> for Slice<T>
113where
114    T: Clone + std::fmt::Debug + AsyncFriendly,
115{
116    fn to_id(&self, i: usize) -> ANNResult<T> {
117        self.0.get(i).cloned().ok_or_else(|| {
118            ANNError::message(
119                ANNErrorKind::Opaque,
120                format!(
121                    "tried to index a slice of length {} at index {}",
122                    self.0.len(),
123                    i
124                ),
125            )
126        })
127    }
128}
129
130impl<T> ToIdSized<T> for Slice<T>
131where
132    T: Clone + std::fmt::Debug + AsyncFriendly,
133{
134    fn len(&self) -> usize {
135        self.0.len()
136    }
137}
138
139//-------//
140// Range //
141//-------//
142
143/// An implementation of [`ToId`] and [`ToIdSized`] that encodes external IDs as a range.
144///
145/// ```rust
146/// use diskann_benchmark_core::build::ids::{Range, ToId, ToIdSized};
147///
148/// let range = Range::new(100u32..105);
149/// assert_eq!(range.len(), 5);
150/// assert_eq!(range.to_id(0).unwrap(), 100);
151/// assert_eq!(range.to_id(1).unwrap(), 101);
152/// assert_eq!(range.to_id(4).unwrap(), 104);
153/// assert!(range.to_id(5).is_err());
154/// ```
155#[derive(Debug)]
156pub struct Range<T>(std::ops::Range<T>);
157
158impl<T> Range<T> {
159    /// Construct a new [`Range`] from the given standard library range.
160    pub fn new(range: std::ops::Range<T>) -> Self {
161        Self(range)
162    }
163}
164
165macro_rules! impl_range {
166    ($T:ty) => {
167        impl ToId<$T> for Range<$T> {
168            fn to_id(&self, i: usize) -> ANNResult<$T> {
169                self.0.clone().nth(i).ok_or_else(|| {
170                    ANNError::message(
171                        ANNErrorKind::Opaque,
172                        format!(
173                            "tried to index a range of length {} at index {}",
174                            self.0.len(),
175                            i
176                        ),
177                    )
178                })
179            }
180        }
181
182        impl ToIdSized<$T> for Range<$T> {
183            fn len(&self) -> usize {
184                self.0.len()
185            }
186        }
187    };
188}
189
190impl_range!(u32);
191
192///////////
193// Tests //
194///////////
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199
200    fn assert_is_copy<T: Copy>(_x: T) {}
201
202    #[test]
203    fn identity_to_id_u32() {
204        let identity = Identity::<u32>::default();
205        assert_eq!(identity.to_id(0).unwrap(), 0u32);
206        assert_eq!(identity.to_id(42).unwrap(), 42u32);
207        assert_eq!(identity.to_id(u32::MAX as usize).unwrap(), u32::MAX);
208
209        assert_is_copy(identity);
210    }
211
212    #[test]
213    fn identity_to_id_u64() {
214        let identity = Identity::<u64>::new();
215        assert_eq!(identity.to_id(0).unwrap(), 0u64);
216        assert_eq!(identity.to_id(usize::MAX).unwrap(), usize::MAX as u64);
217    }
218
219    #[test]
220    fn identity_to_id_overflow() {
221        let identity = Identity::<u8>::new();
222        assert!(identity.to_id(256).is_err());
223    }
224
225    #[test]
226    fn identity_debug() {
227        let identity = Identity::<u32>::default();
228        let debug_str = format!("{:?}", identity);
229        assert!(debug_str.contains("Identity"));
230        assert!(debug_str.contains("u32"));
231    }
232
233    //-------------//
234    // Slice Tests //
235    //-------------//
236
237    #[test]
238    fn test_slice() {
239        let slice = Slice::new(vec![10u32, 20, 30, 40, 50].into_boxed_slice());
240
241        assert_eq!(slice.len(), 5);
242        assert!(!slice.is_empty());
243
244        assert_eq!(slice.to_id(0).unwrap(), 10);
245        assert_eq!(slice.to_id(1).unwrap(), 20);
246        assert_eq!(slice.to_id(2).unwrap(), 30);
247        assert_eq!(slice.to_id(3).unwrap(), 40);
248        assert_eq!(slice.to_id(4).unwrap(), 50);
249        assert!(slice.to_id(5).is_err());
250
251        // Empty Slice
252        let slice = Slice::<u32>::new(vec![].into_boxed_slice());
253        assert!(slice.is_empty());
254        assert_eq!(slice.len(), 0);
255    }
256
257    //-------------//
258    // Range Tests //
259    //-------------//
260
261    #[test]
262    fn test_range() {
263        let range = Range::new(100u32..105);
264
265        assert_eq!(range.len(), 5);
266        assert!(!range.is_empty());
267
268        assert_eq!(range.to_id(0).unwrap(), 100);
269        assert_eq!(range.to_id(1).unwrap(), 101);
270        assert_eq!(range.to_id(4).unwrap(), 104);
271
272        assert!(range.to_id(5).is_err());
273
274        // Empty Range
275        let range = Range::new(5u32..5);
276        assert_eq!(range.len(), 0);
277        assert!(range.is_empty());
278    }
279}