mih_rs/
index.rs

1mod ops;
2mod siggen;
3mod sparsehash;
4
5use crate::CodeInt;
6
7/// Multi-index hashing for neighbor searches on binary codes in the Hamming space.
8///
9/// [`Index`] implements the multi-index hashing proposed by
10/// [Norouzi et al.](https://arxiv.org/abs/1307.2982), which provides
11/// fast and memory-efficient neighbor searches on binary codes in the Hamming space.
12///
13/// The following two search options are supported:
14///
15///  - *Range search* finds neighbor codes whose Hamming distances to a given code are within a radius.
16///  - *Top-K search* finds the top-K codes that are closest to a given code.
17///
18/// # Arguments
19///
20/// [`Index`] takes a generic type parameter of [`CodeInt`] to represent a binary code.
21///
22/// # Examples
23///
24/// ```
25/// use mih_rs::Index;
26///
27/// // Database of codes
28/// let codes: Vec<u64> = vec![
29///     0b1111111111111111111111011111111111111111111111111011101111111111, // #zeros = 3
30///     0b1111111111111111111111111111111101111111111011111111111111111111, // #zeros = 2
31///     0b1111111011011101111111111111111101111111111111111111111111111111, // #zeros = 4
32///     0b1111111111111101111111111111111111111000111111111110001111111110, // #zeros = 8
33///     0b1101111111111111111111111111111111111111111111111111111111111111, // #zeros = 1
34///     0b1111111111111111101111111011111111111111111101001110111111111111, // #zeros = 6
35///     0b1111111111111111111111111111111111101111111111111111011111111111, // #zeros = 2
36///     0b1110110101011011011111111111111101111111111111111000011111111111, // #zeros = 11
37/// ];
38///
39/// // Query code
40/// let qcode: u64 = 0b1111111111111111111111111111111111111111111111111111111111111111; // #zeros = 0
41///
42/// // Construct the index
43/// let index = Index::new(codes).unwrap();
44///
45/// // Find the ids of neighbor codes whose Hamming distances are within 2
46/// let mut searcher = index.range_searcher();
47/// let answers = searcher.run(qcode, 2);
48/// assert_eq!(answers, vec![1, 4, 6]);
49///
50/// // Find the ids of the top-4 nearest neighbor codes
51/// let mut searcher = index.topk_searcher();
52/// let answers = searcher.run(qcode, 4);
53/// assert_eq!(answers, vec![4, 1, 6, 0]);
54///
55/// // Serialization/Deserialization
56/// let mut data = vec![];
57/// index.serialize_into(&mut data).unwrap();
58/// let other = Index::<u64>::deserialize_from(&data[..]).unwrap();
59/// assert_eq!(index, other);
60/// ```
61#[derive(Clone, PartialEq, Eq, Debug)]
62pub struct Index<T: CodeInt> {
63    num_blocks: usize,
64    codes: Vec<T>,
65    tables: Vec<sparsehash::Table>,
66    masks: Vec<T>,
67    begs: Vec<usize>,
68}
69
70/// Range searcher created by [`Index::range_searcher()`].
71pub struct RangeSearcher<'a, T: CodeInt> {
72    index: &'a Index<T>,
73    siggen: siggen::SigGenerator64,
74    answers: Vec<u32>,
75}
76
77/// Top-K searcher created by [`Index::range_searcher()`].
78pub struct TopkSearcher<'a, T: CodeInt> {
79    index: &'a Index<T>,
80    siggen: siggen::SigGenerator64,
81    answers: Vec<u32>,
82    checked: std::collections::HashSet<usize>,
83}