1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
use std::{cmp::Reverse, collections::BinaryHeap};

use linfa::Float;
use ndarray::{ArrayBase, ArrayView2, Data, Ix2};
use noisy_float::NoisyFloat;
#[cfg(feature = "serde")]
use serde_crate::{Deserialize, Serialize};

use crate::{
    distance::Distance, heap_elem::MinHeapElem, BuildError, NearestNeighbour, NearestNeighbourBox,
    NearestNeighbourIndex, NnError, Point,
};

/// Spatial indexing structure created by [`LinearSearch`](LinearSearch)
#[derive(Debug, Clone, PartialEq)]
pub struct LinearSearchIndex<'a, F: Float, D: Distance<F>>(ArrayView2<'a, F>, D);

impl<'a, F: Float, D: Distance<F>> LinearSearchIndex<'a, F, D> {
    /// Creates a new `LinearSearchIndex`
    pub fn new<DT: Data<Elem = F>>(
        batch: &'a ArrayBase<DT, Ix2>,
        dist_fn: D,
    ) -> Result<Self, BuildError> {
        if batch.ncols() == 0 {
            Err(BuildError::ZeroDimension)
        } else {
            Ok(Self(batch.view(), dist_fn))
        }
    }
}

impl<'a, F: Float, D: Distance<F>> NearestNeighbourIndex<F> for LinearSearchIndex<'a, F, D> {
    fn k_nearest<'b>(
        &self,
        point: Point<'b, F>,
        k: usize,
    ) -> Result<Vec<(Point<F>, usize)>, NnError> {
        if self.0.ncols() != point.len() {
            Err(NnError::WrongDimension)
        } else {
            let mut heap = BinaryHeap::with_capacity(self.0.nrows());
            for (i, pt) in self.0.rows().into_iter().enumerate() {
                let dist = self.1.rdistance(point.reborrow(), pt.reborrow());
                heap.push(MinHeapElem {
                    elem: (pt.reborrow(), i),
                    dist: Reverse(NoisyFloat::new(dist)),
                });
            }

            Ok((0..k.min(heap.len()))
                .map(|_| heap.pop().unwrap().elem)
                .collect())
        }
    }

    fn within_range<'b>(
        &self,
        point: Point<'b, F>,
        range: F,
    ) -> Result<Vec<(Point<F>, usize)>, NnError> {
        if self.0.ncols() != point.len() {
            Err(NnError::WrongDimension)
        } else {
            let range = self.1.dist_to_rdist(range);
            Ok(self
                .0
                .rows()
                .into_iter()
                .enumerate()
                .filter(|(_, pt)| self.1.rdistance(point.reborrow(), pt.reborrow()) < range)
                .map(|(i, pt)| (pt, i))
                .collect())
        }
    }
}

/// Implementation of linear search, which is the simplest nearest neighbour algorithm. All queries
/// are implemented by scanning through every point, so all of them are `O(N)`. Calling
/// `from_batch` returns a [`LinearSearchIndex`](LinearSearchIndex).
#[derive(Default, Clone, Debug, PartialEq, Eq)]
#[cfg_attr(
    feature = "serde",
    derive(Serialize, Deserialize),
    serde(crate = "serde_crate")
)]
pub struct LinearSearch;

impl LinearSearch {
    /// Creates an instance of `LinearSearch`
    pub fn new() -> Self {
        Self
    }
}

impl NearestNeighbour for LinearSearch {
    fn from_batch_with_leaf_size<'a, F: Float, DT: Data<Elem = F>, D: 'a + Distance<F>>(
        &self,
        batch: &'a ArrayBase<DT, Ix2>,
        leaf_size: usize,
        dist_fn: D,
    ) -> Result<NearestNeighbourBox<'a, F>, BuildError> {
        if leaf_size == 0 {
            return Err(BuildError::EmptyLeaf);
        }
        LinearSearchIndex::new(batch, dist_fn).map(|v| Box::new(v) as NearestNeighbourBox<F>)
    }
}