granne/index/
mod.rs

1use fxhash::FxBuildHasher;
2use madvise::{AccessPattern, AdviseMemory};
3use ordered_float::NotNan;
4use parking_lot;
5use pbr;
6use rayon::prelude::*;
7use std::cmp;
8use std::collections::{BinaryHeap, HashSet};
9use std::convert::TryFrom;
10use std::time;
11
12#[cfg(test)]
13mod tests;
14
15mod io;
16pub mod reorder;
17
18#[cfg(feature = "rw_granne")]
19pub mod rw;
20
21use crate::{
22    max_size_heap,
23    slice_vector::{FixedWidthSliceVector, MultiSetVector},
24    {ElementContainer, ExtendableElementContainer, Permutable},
25};
26
27type NeighborId = u32;
28const UNUSED: NeighborId = NeighborId::max_value();
29
30/** An index for fast approximate nearest neighbor search.
31 The index is built by using [`GranneBuilder`](struct.GranneBuilder.html) and can be stored to
32 disk.
33
34`Granne` can be created either from a [file](struct.Granne.html#method.from_file) or a
35[`u8` slice](struct.Granne.html#method.from_bytes).
36
37 */
38pub struct Granne<'a, Elements> {
39    layers: FileOrMemoryLayers<'a>,
40    elements: Elements,
41}
42
43impl<'a, Elements> Granne<'a, Elements> {
44    fn from_parts<L: Into<Layers<'a>>>(layers: L, elements: Elements) -> Self {
45        Self {
46            layers: FileOrMemoryLayers::Memory(layers.into()),
47            elements,
48        }
49    }
50}
51
52/// This trait is implemented for any `Granne` and contains methods that are common for all element
53/// types.
54pub trait Index {
55    /// Returns the number of elements in this index.
56    fn len(self: &Self) -> usize;
57
58    /// Returns the number of layers in this index.
59    fn num_layers(self: &Self) -> usize;
60
61    /// Returns the number of nodes in `layer`.
62    fn layer_len(self: &Self, layer: usize) -> usize;
63
64    /// Returns the neighbors of the node at `index` in `layer`.
65    fn get_neighbors(self: &Self, index: usize, layer: usize) -> Vec<usize>;
66
67    /// Write the index to `buffer`.
68    fn write_index<B: std::io::Write + std::io::Seek>(self: &Self, buffer: &mut B) -> std::io::Result<()>
69    where
70        Self: Sized;
71}
72
73impl<'a, Elements: ElementContainer> Index for Granne<'a, Elements> {
74    /// Returns the number of elements in this index.
75    /// Note that it might be less than the number of elements in `elements`.
76    fn len(self: &Self) -> usize {
77        let layers = self.layers.load();
78        if layers.len() > 0 {
79            self.layer_len(layers.len() - 1)
80        } else {
81            0
82        }
83    }
84
85    /// Returns the number of layers in this index.
86    fn num_layers(self: &Self) -> usize {
87        self.layers.load().len()
88    }
89
90    /// Returns the number of nodes in `layer`.
91    fn layer_len(self: &Self, layer: usize) -> usize {
92        self.layers.load().as_graph(layer).len()
93    }
94
95    /// Returns the neighbors of the node at `index` in `layer`.
96    fn get_neighbors(self: &Self, index: usize, layer: usize) -> Vec<usize> {
97        self.layers.load().as_graph(layer).get_neighbors(index)
98    }
99
100    /// Write the index to `buffer`.
101    fn write_index<B: std::io::Write + std::io::Seek>(self: &Self, buffer: &mut B) -> std::io::Result<()> {
102        io::write_index(&self.layers.load(), buffer)
103    }
104}
105
106impl<'a, Elements: ElementContainer> Granne<'a, Elements> {
107    /// Loads this index from bytes.
108    pub fn from_bytes(index: &'a [u8], elements: Elements) -> Self {
109        Self {
110            layers: FileOrMemoryLayers::Memory(io::load_layers(index)),
111            elements,
112        }
113    }
114
115    /// Loads the index from a file. The index will be memory mapped.
116    ///
117    /// ## Safety
118    ///
119    /// This is unsafe because the underlying file can be modified, which would result in undefined
120    /// behavior. The caller needs to guarantee that the file is not modified while being
121    /// memory-mapped.
122    pub unsafe fn from_file(file: &std::fs::File, elements: Elements) -> std::io::Result<Self> {
123        let file = memmap::Mmap::map(file)?;
124        file.advise_memory_access(AccessPattern::Random)?;
125
126        let index = Self {
127            layers: FileOrMemoryLayers::File(file),
128            elements,
129        };
130
131        // verify that it works
132        let _ = index.len();
133
134        Ok(index)
135    }
136
137    /// Searches for the `num_neighbors` neighbors closest to `element` in this index.
138    /// `max_search` controls the number of nodes visited during the search. Returns a
139    /// `Vec` containing the id and distance from `element`.
140    pub fn search(
141        self: &Self,
142        element: &Elements::Element,
143        max_search: usize,
144        num_neighbors: usize,
145    ) -> Vec<(usize, f32)> {
146        match self.layers.load() {
147            Layers::FixWidth(layers) => self.search_internal(&layers, element, max_search, num_neighbors),
148            Layers::Compressed(layers) => self.search_internal(&layers, element, max_search, num_neighbors),
149        }
150    }
151
152    /// Returns the element at `index`.
153    pub fn get_element(self: &Self, index: usize) -> Elements::Element {
154        self.elements.get(index)
155    }
156
157    /// Returns a reference to the elements of this index.
158    pub fn get_elements(self: &Self) -> &Elements {
159        &self.elements
160    }
161}
162
163impl<'a, Elements: ElementContainer + crate::io::Writeable> Granne<'a, Elements> {
164    /// Writes the elements of this index to `buffer`.
165    pub fn write_elements<B: std::io::Write>(self: &Self, buffer: &mut B) -> std::io::Result<usize> {
166        self.elements.write(buffer)
167    }
168}
169
170impl<'a, Elements> Granne<'a, &Elements>
171where
172    Elements: std::borrow::ToOwned,
173{
174    /// Creates an owned index from a borrowed one.
175    pub fn to_owned(self: &Self) -> Granne<'static, Elements::Owned> {
176        let layers = match self.layers.load() {
177            Layers::FixWidth(layers) => Layers::FixWidth(layers.into_iter().map(|layer| layer.into_owned()).collect()),
178            Layers::Compressed(layers) => {
179                Layers::Compressed(layers.into_iter().map(|layer| layer.into_owned()).collect())
180            }
181        };
182
183        Granne::from_parts(layers, self.elements.to_owned())
184    }
185}
186
187/// `BuildConfig` is used to configure a [`GranneBuilder`](struct.GranneBuilder.html).
188///
189/// # Examples
190/// ```
191/// # use granne::*;
192/// let config = BuildConfig::new()
193///     .num_neighbors(30)
194///     .layer_multiplier(15.0)
195///     .max_search(200);
196/// let mut builder = GranneBuilder::new(config, angular::Vectors::new());
197/// ```
198#[derive(Copy, Clone, Debug)]
199pub struct BuildConfig {
200    /// Each layer includes `layer_multiplier` times more elements than the previous layer.
201    layer_multiplier: f32,
202
203    /// Needs to be used when building before all elements have been inserted into the builder.
204    expected_num_elements: Option<usize>,
205
206    /// The maximum number of neighbors per node and layer.
207    num_neighbors: usize,
208
209    /// The `max_search` parameter used during build time (see
210    /// [`Granne::search`](struct.Granne.html#method.search)).
211    max_search: usize,
212
213    /// Whether to reinsert all the elements in each layers. Takes more time, but improves recall.
214    reinsert_elements: bool,
215
216    /// Whether to output progress information to STDOUT while building.
217    show_progress: bool,
218}
219
220impl Default for BuildConfig {
221    fn default() -> Self {
222        BuildConfig {
223            layer_multiplier: 15.0,
224            expected_num_elements: None,
225            num_neighbors: 30,
226            max_search: 200,
227            reinsert_elements: true,
228            show_progress: false,
229        }
230    }
231}
232
233impl BuildConfig {
234    /// Creates a `BuildConfig` for `GranneBuilder` with default settings.
235    pub fn new() -> Self {
236        Self::default()
237    }
238
239    /// Configures the maximum number of neighbors per node in each layer.
240    ///
241    /// Default: 30
242    pub fn num_neighbors(mut self: Self, num_neighbors: usize) -> Self {
243        self.num_neighbors = num_neighbors;
244        self
245    }
246
247    /// Configures the `max_search` parameter used during build time (see
248    /// [`Granne::search`](struct.Granne.html#method.search)). Larger values increase recall but
249    /// makes building slower.
250    ///
251    /// Default: 200
252    pub fn max_search(mut self: Self, max_search: usize) -> Self {
253        self.max_search = max_search;
254        self
255    }
256
257    /// Configures the expected number of elements in the final graph. This is only required
258    /// when building (`builder.build()`) before all elements have been inserted into the builder.
259    pub fn expected_num_elements(mut self: Self, expected_num_elements: usize) -> Self {
260        self.expected_num_elements = Some(expected_num_elements);
261        self
262    }
263
264    /// Configures the layer multiplier for the hierarchical graph:
265    /// each new layer will have `layer_multiplier` times more elements than the previous.
266    /// E.g. `layer_multiplier == 10.0` implies `n`, `10n`, `100n`, ... nodes per layer for
267    /// some `n < 10`.
268    ///
269    /// Default: 15.0
270    pub fn layer_multiplier(mut self: Self, layer_multiplier: f32) -> Self {
271        self.layer_multiplier = layer_multiplier;
272        self
273    }
274
275    /// Enables reinsertion of all the elements in each layers. Takes more time, but improves
276    /// recall.
277    ///
278    /// This option is enabled by default.
279    pub fn reinsert_elements(mut self: Self, yes: bool) -> Self {
280        self.reinsert_elements = yes;
281        self
282    }
283
284    /// Enables printing progress information to STDOUT while building.
285    ///
286    /// This option is disabled by default.
287    pub fn show_progress(mut self: Self, yes: bool) -> Self {
288        self.show_progress = yes;
289        self
290    }
291}
292
293/// A builder for creating an index to be searched using [`Granne`](struct.Granne.html). Configured
294/// by [`BuildConfig`](struct.BuildConfig.html).
295pub struct GranneBuilder<Elements: ElementContainer> {
296    elements: Elements,
297    layers: Vec<FixedWidthSliceVector<'static, NeighborId>>,
298    config: BuildConfig,
299}
300
301/// This trait is implemented for any `GranneBuilder` and contains methods that are common for all
302/// element types.
303pub trait Builder: Index {
304    /// Builds an index for approximate nearest neighbor search.
305    fn build(self: &mut Self);
306
307    /// Builds the search index for the first num_elements elements
308    /// Can be used for long-running jobs where intermediate steps needs to be stored
309    ///
310    /// Note: already indexed elements are not reindexed
311    fn build_partial(self: &mut Self, num_elements: usize);
312
313    /// Returns the number of elements.
314    fn num_elements(self: &Self) -> usize;
315}
316
317impl<Elements: ElementContainer + Sync> Index for GranneBuilder<Elements> {
318    /// Returns the number of indexed elements.
319    /// Note that it might be less than the number of elements in `elements`.
320    /// # Examples
321    /// ```
322    /// # use granne::*;
323    /// # let elements: angular::Vectors = test_helper::random_vectors(3, 1000);
324    /// assert_eq!(1000, elements.len());
325    /// let mut builder = GranneBuilder::new(BuildConfig::default(), elements);
326    /// builder.build_partial(100);
327    /// assert_eq!(100, builder.len());
328    /// assert_eq!(1000, builder.num_elements());
329    fn len(self: &Self) -> usize {
330        self.get_index().len()
331    }
332
333    /// Returns the number of layers in this index.
334    fn num_layers(self: &Self) -> usize {
335        self.get_index().num_layers()
336    }
337
338    /// Returns the number of nodes in `layer`.
339    fn layer_len(self: &Self, layer: usize) -> usize {
340        self.get_index().layer_len(layer)
341    }
342
343    /// Returns the neighbors of the node at `index` in `layer`.
344    fn get_neighbors(self: &Self, index: usize, layer: usize) -> Vec<usize> {
345        self.get_index().get_neighbors(index, layer)
346    }
347
348    /// Writes the index to `buffer`.
349    /// # Examples
350    /// ```
351    /// # use granne::*;
352    /// # let builder = GranneBuilder::new(BuildConfig::default(), angular::Vectors::new());
353    /// # let path = "/tmp/write_index.bin";
354    /// let mut file = std::fs::File::create(path)?;
355    /// builder.write_index(&mut file)?;
356    /// # Ok::<(), std::io::Error>(())
357    /// ```
358    fn write_index<B: std::io::Write + std::io::Seek>(self: &Self, buffer: &mut B) -> std::io::Result<()> {
359        let layers: Layers = Layers::FixWidth(self.layers.iter().map(|layer| layer.borrow()).collect());
360        io::write_index(&layers, buffer)
361    }
362}
363
364impl<Elements: ElementContainer + Sync> Builder for GranneBuilder<Elements> {
365    /// Builds an index for approximate nearest neighbor search.
366    fn build(self: &mut Self) {
367        self.build_partial(self.elements.len())
368    }
369
370    /// Builds the search index for the first num_elements elements.
371    /// Can be used for long-running jobs where intermediate steps needs to be stored.
372    ///
373    /// Note: already indexed elements are not reindexed.
374    fn build_partial(self: &mut Self, num_elements: usize) {
375        if num_elements == 0 {
376            return;
377        }
378
379        assert!(
380            num_elements >= self.layers.last().map_or(0, |layer| layer.len()),
381            "Cannot index fewer elements than already in index."
382        );
383        assert!(
384            num_elements <= self.elements.len(),
385            "Cannot index more elements than exist."
386        );
387
388        if !self.layers.is_empty() {
389            self.index_elements_in_last_layer(num_elements);
390        }
391
392        while self.len() < num_elements {
393            let new_layer = self.layers.last().map_or_else(
394                || FixedWidthSliceVector::with_width(self.config.num_neighbors),
395                |prev_layer| prev_layer.clone(),
396            );
397
398            self.layers.push(new_layer);
399
400            self.index_elements_in_last_layer(num_elements);
401        }
402    }
403
404    /// Returns the number of elements.
405    fn num_elements(self: &Self) -> usize {
406        self.elements.len()
407    }
408}
409
410impl<Elements: ElementContainer + Sync> GranneBuilder<Elements> {
411    /// Creates a new GranneBuilder with a `BuildConfig` and `elements`.
412    /// # Examples
413    ///
414    /// ```
415    /// # use granne::*;
416    /// let config = BuildConfig::default().num_neighbors(20).max_search(100).show_progress(true);
417    /// let mut builder = GranneBuilder::new(config, angular::Vectors::new());
418    /// ```
419    pub fn new(config: BuildConfig, elements: Elements) -> Self {
420        assert!(elements.len() < UNUSED as usize);
421        Self {
422            elements,
423            layers: Vec::new(),
424            config,
425        }
426    }
427
428    /// Creates a `GranneBuilder` by reading an already built index from `buffer` together with
429    /// `elements`.
430    pub fn from_bytes(config: BuildConfig, buffer: &[u8], elements: Elements) -> Self {
431        let mut builder = Self::new(config, elements);
432
433        let layers = io::load_layers(buffer);
434
435        match layers {
436            Layers::FixWidth(layers) => {
437                builder.layers = layers.iter().map(|l| l.borrow().into_owned()).collect();
438            }
439            Layers::Compressed(layers) => {
440                for layer in layers {
441                    builder.layers.push({
442                        let mut new_layer = FixedWidthSliceVector::with_width(builder.config.num_neighbors);
443                        new_layer.reserve(layer.len());
444
445                        let mut neighbors = Vec::new();
446                        for i in 0..layer.len() {
447                            layer.get_into(i, &mut neighbors);
448                            neighbors.resize(builder.config.num_neighbors, UNUSED);
449
450                            new_layer.push(&neighbors);
451                            neighbors.clear();
452                        }
453
454                        new_layer
455                    });
456                }
457            }
458        }
459
460        builder
461    }
462
463    /// Creates a `GranneBuilder` by reading an already built index from `buffer` together with
464    /// `elements`.
465    pub fn from_file(config: BuildConfig, file: &std::fs::File, elements: Elements) -> std::io::Result<Self> {
466        let bytes = unsafe { memmap::Mmap::map(file)? };
467
468        Ok(Self::from_bytes(config, &bytes[..], elements))
469    }
470
471    /// Returns a searchable index from this builder.
472    /// # Examples
473    /// ```
474    /// # use granne::*;
475    /// # let elements: angular::Vectors = test_helper::random_vectors(3, 1000);
476    /// # let element = elements.get_element(123).into_owned();
477    /// # let max_search = 10; let num_neighbors = 20;
478    /// let mut builder = GranneBuilder::new(BuildConfig::default(), elements);
479    /// builder.build();
480    /// let index = builder.get_index();
481    /// index.search(&element, max_search, num_neighbors);
482    /// ```
483    pub fn get_index(self: &Self) -> Granne<&Elements> {
484        Granne::from_parts(
485            self.layers.iter().map(|l| l.borrow()).collect::<Vec<_>>(),
486            &self.elements,
487        )
488    }
489
490    /// Returns a reference to the elements in this builder.
491    pub fn get_elements(self: &Self) -> &Elements {
492        &self.elements
493    }
494}
495
496impl<Elements: ElementContainer + crate::io::Writeable> GranneBuilder<Elements> {
497    /// Writes the elements of this builder to `buffer`.
498    /// # Examples
499    /// ```
500    /// # use granne::*;
501    /// # let builder = GranneBuilder::new(BuildConfig::default(), angular::Vectors::new());
502    /// # let path = "/tmp/write_elements.bin";
503    /// let mut file = std::fs::File::create(path)?;
504    /// builder.write_elements(&mut file)?;
505    /// # Ok::<(), std::io::Error>(())
506    /// ```
507    pub fn write_elements<B: std::io::Write>(self: &Self, buffer: &mut B) -> std::io::Result<usize> {
508        self.elements.write(buffer)
509    }
510}
511
512impl<Elements: ExtendableElementContainer> GranneBuilder<Elements> {
513    /// Push a new element into this builder. In order to insert it into the index
514    /// a call to `build` or `build_partial` is required.
515    /// # Examples
516    /// ```
517    /// # use granne::*;
518    /// # let element0 = test_helper::random_vector(3);
519    /// # let element1 = test_helper::random_vector(3);
520    /// let mut builder = GranneBuilder::new(BuildConfig::default(), angular::Vectors::new());
521    /// builder.push(element0);
522    /// builder.push(element1);
523    /// assert_eq!(0, builder.len());
524    /// builder.build();
525    /// assert_eq!(2, builder.len());
526    /// ```
527    pub fn push(self: &mut Self, element: Elements::InternalElement) {
528        assert!(self.elements.len() < UNUSED as usize - 1);
529        self.elements.push(element);
530    }
531}
532
533// implementation
534
535trait Graph {
536    fn get_neighbors(self: &Self, idx: usize) -> Vec<usize>;
537    fn len(self: &Self) -> usize;
538}
539
540impl<'a> Graph for FixedWidthSliceVector<'a, NeighborId> {
541    fn get_neighbors(self: &Self, idx: usize) -> Vec<usize> {
542        self.get(idx)
543            .iter()
544            .take_while(|&&x| x != UNUSED)
545            .map(|&x| usize::try_from(x).unwrap())
546            .collect()
547    }
548
549    fn len(self: &Self) -> usize {
550        self.len()
551    }
552}
553
554impl<'a> Graph for MultiSetVector<'a> {
555    fn get_neighbors(self: &Self, idx: usize) -> Vec<usize> {
556        self.get(idx).iter().map(|&x| usize::try_from(x).unwrap()).collect()
557    }
558
559    fn len(self: &Self) -> usize {
560        self.len()
561    }
562}
563
564impl<'a> Graph for [parking_lot::RwLock<&'a mut [NeighborId]>] {
565    fn get_neighbors(self: &Self, idx: usize) -> Vec<usize> {
566        self[idx]
567            .read()
568            .iter()
569            .take_while(|&&x| x != UNUSED)
570            .map(|&x| usize::try_from(x).unwrap())
571            .collect()
572    }
573
574    fn len(self: &Self) -> usize {
575        self.len()
576    }
577}
578
579enum FileOrMemoryLayers<'a> {
580    File(memmap::Mmap),
581    Memory(Layers<'a>),
582}
583
584impl<'a> FileOrMemoryLayers<'a> {
585    fn load<'b>(self: &'b Self) -> Layers<'b>
586    where
587        'a: 'b,
588    {
589        match self {
590            Self::File(mmap) => io::load_layers(&mmap[..]),
591            Self::Memory(layers) => layers.borrow(),
592        }
593    }
594}
595
596enum Layers<'a> {
597    FixWidth(Vec<FixedWidthSliceVector<'a, NeighborId>>),
598    Compressed(Vec<MultiSetVector<'a>>),
599}
600
601impl<'a> Layers<'a> {
602    fn len(self: &Self) -> usize {
603        match self {
604            Self::FixWidth(layers) => layers.len(),
605            Self::Compressed(layers) => layers.len(),
606        }
607    }
608
609    fn as_graph(self: &Self, layer: usize) -> &dyn Graph {
610        match self {
611            Self::FixWidth(layers) => &layers[layer],
612            Self::Compressed(layers) => &layers[layer],
613        }
614    }
615
616    fn borrow<'b>(self: &'b Self) -> Layers<'b>
617    where
618        'a: 'b,
619    {
620        match self {
621            Self::FixWidth(layers) => Layers::FixWidth(layers.iter().map(|l| l.borrow()).collect()),
622            Self::Compressed(layers) => Layers::Compressed(layers.iter().map(|l| l.borrow()).collect()),
623        }
624    }
625}
626
627impl<'a> From<Vec<FixedWidthSliceVector<'a, NeighborId>>> for Layers<'a> {
628    fn from(fix_width: Vec<FixedWidthSliceVector<'a, NeighborId>>) -> Self {
629        Self::FixWidth(fix_width)
630    }
631}
632
633/// Computes the number of elements that should be in layer `layer_idx`.
634fn compute_num_elements_in_layer(total_num_elements: usize, layer_multiplier: f32, layer_idx: usize) -> usize {
635    let layer_multiplier = layer_multiplier as f64;
636
637    cmp::min(
638        (total_num_elements as f64
639            / (layer_multiplier.powf((total_num_elements as f64).log(layer_multiplier).floor() - layer_idx as f64)))
640        .ceil() as usize,
641        total_num_elements,
642    )
643}
644
645impl<Elements: ElementContainer + Sync> GranneBuilder<Elements> {
646    fn index_elements_in_last_layer(self: &mut Self, max_num_elements: usize) {
647        let total_num_elements = self.config.expected_num_elements.unwrap_or(self.elements.len());
648        let ideal_num_elements_in_layer = compute_num_elements_in_layer(
649            cmp::max(total_num_elements, self.elements.len()),
650            self.config.layer_multiplier,
651            self.layers.len() - 1,
652        );
653
654        if ideal_num_elements_in_layer <= self.layers.last().unwrap().len() {
655            // nothing to index in this layer
656            return;
657        }
658
659        let num_elements_in_layer = cmp::min(max_num_elements, ideal_num_elements_in_layer);
660        let additional = ideal_num_elements_in_layer - self.layers.last().unwrap().len();
661
662        let mut config = self.config;
663
664        // if not last layer
665        if ideal_num_elements_in_layer < total_num_elements {
666            // use half num_neighbors on upper layers
667            config.num_neighbors = cmp::max(1, config.num_neighbors / 2);
668        }
669
670        let mut layer = self.layers.pop().unwrap();
671
672        layer.reserve_exact(additional);
673
674        let prev_layers = self.get_index();
675
676        if self.config.show_progress {
677            println!(
678                "Building layer {} with {} elements...",
679                self.layers.len(),
680                num_elements_in_layer
681            );
682        }
683
684        Self::index_elements(
685            &config,
686            &self.elements,
687            num_elements_in_layer,
688            &prev_layers,
689            &mut layer,
690            false,
691        );
692
693        if self.config.reinsert_elements {
694            if self.config.show_progress {
695                println!("Reinserting elements...");
696            }
697
698            // use half max_search when reindexing
699            config.max_search = std::cmp::max(1, config.max_search / 2);
700
701            // reinsert elements to improve index quality
702            Self::index_elements(
703                &config,
704                &self.elements,
705                num_elements_in_layer,
706                &prev_layers,
707                &mut layer,
708                true,
709            );
710        }
711
712        self.layers.push(layer);
713    }
714
715    /// Indexes elements in `layer`.
716    fn index_elements(
717        config: &BuildConfig,
718        elements: &Elements,
719        num_elements: usize,
720        prev_layers: &Granne<&Elements>,
721        layer: &mut FixedWidthSliceVector<'static, NeighborId>,
722        reinsert_elements: bool,
723    ) {
724        assert!(layer.len() <= num_elements);
725
726        let mut already_indexed = layer.len();
727        if reinsert_elements {
728            already_indexed = 0;
729        } else {
730            layer.resize(num_elements, UNUSED);
731        }
732
733        // set up progress bar
734        let step_size = cmp::max(100, num_elements / 400);
735        let (progress_bar, start_time) = {
736            if config.show_progress {
737                let mut progress_bar = pbr::ProgressBar::new(elements.len() as u64);
738                let info_text = format!("Layer {}: ", prev_layers.layers.load().len());
739                progress_bar.message(&info_text);
740                progress_bar.set((step_size * (already_indexed / step_size)) as u64);
741
742                // if too many elements were already indexed, the shown speed
743                // is misrepresenting and not of much help
744                if already_indexed > num_elements / 3 {
745                    progress_bar.show_speed = false;
746                    progress_bar.show_time_left = false;
747                }
748
749                (Some(parking_lot::Mutex::new(progress_bar)), Some(time::Instant::now()))
750            } else {
751                (None, None)
752            }
753        };
754
755        {
756            // create RwLocks for underlying nodes
757            let layer: Vec<parking_lot::RwLock<&mut [NeighborId]>> =
758                layer.iter_mut().map(parking_lot::RwLock::new).collect();
759
760            let insert_element = |(idx, _)| {
761                Self::index_element(config, elements, prev_layers, &layer, idx);
762
763                // This only shows approximate progress because of par_iter
764                if idx % step_size == 0 {
765                    if let Some(ref progress_bar) = progress_bar {
766                        progress_bar.lock().add(step_size as u64);
767                    }
768                }
769            };
770
771            #[cfg(feature = "singlethreaded")]
772            let layer_iter = layer.iter();
773            #[cfg(not(feature = "singlethreaded"))]
774            let layer_iter = layer.par_iter();
775
776            if reinsert_elements {
777                // reinserting elements is done in reverse order
778                layer_iter.enumerate().rev().for_each(insert_element);
779            } else {
780                // index elements, skipping already indexed
781                layer_iter.enumerate().skip(already_indexed).for_each(insert_element);
782            };
783        }
784
785        if let Some(progress_bar) = progress_bar {
786            progress_bar.lock().set(layer.len() as u64);
787        }
788
789        #[cfg(feature = "singlethreaded")]
790        let layer_iter_mut = layer.iter_mut();
791        #[cfg(not(feature = "singlethreaded"))]
792        let layer_iter_mut = layer.par_iter_mut();
793
794        // limit number of neighbors (i.e. apply heuristic for neighbor selection)
795        layer_iter_mut.enumerate().for_each(|(i, node)| {
796            Self::add_and_limit_neighbors(elements, node, i, &[], config.num_neighbors);
797        });
798
799        if let Some(start_time) = start_time {
800            println!("Time: {} s", start_time.elapsed().as_secs());
801        }
802    }
803
804    /// Indexes the element with index `idx`.
805    fn index_element(
806        config: &BuildConfig,
807        elements: &Elements,
808        prev_layers: &Granne<&Elements>,
809        layer: &[parking_lot::RwLock<&mut [NeighborId]>],
810        idx: usize,
811    ) {
812        // do not index elements that are zero
813        if elements.dist(idx, idx) > NotNan::new(100.0 * std::f32::EPSILON).unwrap() {
814            return;
815        }
816
817        let element = elements.get(idx);
818
819        let entrypoint = prev_layers.search(&element, 1, 1).first().map_or(0, |r| r.0);
820        let candidates = search_for_neighbors(layer, entrypoint, elements, &element, config.max_search);
821
822        let candidates: Vec<_> = candidates.into_iter().filter(|&(id, _)| id != idx).collect();
823
824        let neighbors = Self::select_neighbors(elements, candidates, config.num_neighbors);
825
826        // if the current element is a duplicate of too many of its potential neighbors, do not
827        // connect it to the graph, this effectively creates a dead node
828        if let Some((_, d)) = neighbors.get(config.num_neighbors / 2) {
829            if *d < NotNan::new(100.0 * std::f32::EPSILON).unwrap() {
830                return;
831            }
832        }
833
834        // if current node is empty, initialize it with the neighbors
835        if layer[idx].read()[0] == UNUSED {
836            Self::initialize_node(&layer[idx], &neighbors[..]);
837        } else {
838            for &(neighbor, d) in &neighbors {
839                Self::connect_nodes(elements, &layer[idx], idx, neighbor, d);
840            }
841        }
842
843        for (neighbor, d) in neighbors {
844            Self::connect_nodes(elements, &layer[neighbor], neighbor, idx, d);
845        }
846    }
847
848    /// Given a vec of `candidates`, selects the neighbors for an element.
849    fn select_neighbors(
850        elements: &Elements,
851        candidates: Vec<(usize, NotNan<f32>)>,
852        max_neighbors: usize,
853    ) -> Vec<(usize, NotNan<f32>)> {
854        if candidates.len() <= max_neighbors {
855            return candidates;
856        }
857
858        // candidates need to be sorted on distance from idx
859        debug_assert!(candidates
860            .iter()
861            .zip(candidates.iter().skip(1))
862            .all(|((_, d0), (_, d1))| d0 <= d1));
863
864        let mut neighbors: Vec<(usize, NotNan<f32>)> = Vec::new();
865
866        for (j, d) in candidates {
867            if neighbors.len() >= max_neighbors {
868                break;
869            }
870
871            // add j to neighbors if j is closer to idx,
872            // than to all previously added neighbors
873            let element = elements.get(j);
874            if neighbors
875                .iter()
876                .all(|&(n, _)| d <= elements.dist_to_element(n, &element))
877            {
878                neighbors.push((j, d));
879            }
880        }
881
882        neighbors
883    }
884
885    /// Sets neighbors for `node`.
886    fn initialize_node(node: &parking_lot::RwLock<&mut [NeighborId]>, neighbors: &[(usize, NotNan<f32>)]) {
887        debug_assert_eq!(&UNUSED, &node.read()[0]);
888
889        // Write Lock!
890        let mut node = node.write();
891
892        for (i, &(idx, _)) in neighbors.iter().enumerate().take(node.len()) {
893            node[i] = NeighborId::try_from(idx).unwrap();
894        }
895    }
896
897    /// Tries to add `j` as a neighbor to `i`. If the neighbor list is full, uses `select_neighbors`
898    /// to limit the number of neighbors.
899    fn connect_nodes(
900        elements: &Elements,
901        node: &parking_lot::RwLock<&mut [NeighborId]>,
902        i: usize,
903        j: usize,
904        d: NotNan<f32>,
905    ) {
906        if i == j {
907            return;
908        }
909
910        // Write Lock!
911        let mut node = node.write();
912
913        // Do not insert duplicates
914        let j_id = NeighborId::try_from(j).unwrap();
915        if let Some(free_pos) = node.iter().position(|x| *x == UNUSED || *x == j_id) {
916            node[free_pos] = j_id;
917        } else {
918            let num_neighbors = node.len();
919            Self::add_and_limit_neighbors(elements, &mut node, i, &[(j, d)], num_neighbors);
920        }
921    }
922
923    fn add_and_limit_neighbors(
924        elements: &Elements,
925        node: &mut [NeighborId],
926        node_id: usize,
927        extra: &[(usize, NotNan<f32>)],
928        num_neighbors: usize,
929    ) {
930        assert!(num_neighbors <= node.len());
931
932        let neighbors: Vec<usize> = node
933            .iter()
934            .take_while(|&&x| x != UNUSED)
935            .map(|&x| usize::try_from(x).unwrap())
936            .collect();
937
938        let dists = elements.dists(node_id, &neighbors);
939        let mut candidates: Vec<_> = neighbors.iter().copied().zip(dists.into_iter()).collect();
940
941        for &(j, d) in extra {
942            candidates.push((j, d));
943        }
944
945        candidates.sort_unstable_by_key(|&(_, d)| d);
946
947        let neighbors = Self::select_neighbors(elements, candidates, num_neighbors);
948
949        // set new neighbors and mark last positions as unused
950        for (k, n) in neighbors
951            .into_iter()
952            .map(|(n, _)| NeighborId::try_from(n).unwrap())
953            .chain(std::iter::repeat(UNUSED))
954            .enumerate()
955            .take(node.len())
956        {
957            node[k] = n;
958        }
959    }
960}
961
962impl<'a, Elements: ElementContainer> Granne<'a, Elements> {
963    fn search_internal(
964        self: &Self,
965        layers: &[impl Graph],
966        element: &Elements::Element,
967        max_search: usize,
968        num_neighbors: usize,
969    ) -> Vec<(usize, f32)> {
970        if let Some((bottom_layer, top_layers)) = layers.split_last() {
971            let entrypoint = find_entrypoint(top_layers, &self.elements, element);
972
973            search_for_neighbors(bottom_layer, entrypoint, &self.elements, element, max_search)
974                .into_iter()
975                .take(num_neighbors)
976                .map(|(i, d)| (i, d.into_inner()))
977                .collect()
978        } else {
979            Vec::new()
980        }
981    }
982}
983
984fn find_entrypoint<Layer: Graph, Elements: ElementContainer>(
985    layers: &[Layer],
986    elements: &Elements,
987    element: &Elements::Element,
988) -> usize {
989    let mut entrypoint = 0;
990    for layer in layers {
991        let res = search_for_neighbors(layer, entrypoint, elements, element, 1);
992
993        entrypoint = res[0].0;
994    }
995
996    entrypoint
997}
998
999fn search_for_neighbors<Layer: Graph + ?Sized, Elements: ElementContainer>(
1000    layer: &Layer,
1001    entrypoint: usize,
1002    elements: &Elements,
1003    goal: &Elements::Element,
1004    max_search: usize,
1005) -> Vec<(usize, NotNan<f32>)> {
1006    let mut res: max_size_heap::MaxSizeHeap<(NotNan<f32>, usize)> = max_size_heap::MaxSizeHeap::new(max_search); // TODO: should this really be max_search or num_neighbors?
1007    let mut pq: BinaryHeap<cmp::Reverse<_>> = BinaryHeap::new();
1008
1009    let num_neighbors = 20; //layer.at(0).len();
1010    let mut visited = HashSet::with_capacity_and_hasher(max_search * num_neighbors, FxBuildHasher::default());
1011
1012    let distance = elements.dist_to_element(entrypoint, goal);
1013
1014    pq.push(cmp::Reverse((distance, entrypoint)));
1015
1016    visited.insert(entrypoint);
1017
1018    while let Some(cmp::Reverse((d, idx))) = pq.pop() {
1019        if res.is_full() && d > res.peek().unwrap().0 {
1020            break;
1021        }
1022
1023        res.push((d, idx));
1024
1025        for neighbor_idx in layer.get_neighbors(idx) {
1026            if visited.insert(neighbor_idx) {
1027                let distance = elements.dist_to_element(neighbor_idx, goal);
1028
1029                if !res.is_full() || distance < res.peek().unwrap().0 {
1030                    pq.push(cmp::Reverse((distance, neighbor_idx)));
1031                }
1032            }
1033        }
1034    }
1035
1036    res.into_sorted_vec().into_iter().map(|(d, idx)| (idx, d)).collect()
1037}