pub struct Index<T: CodeInt> { /* private fields */ }Expand description
Multi-index hashing for neighbor searches on binary codes in the Hamming space.
Index implements the multi-index hashing proposed by
Norouzi et al., which provides
fast and memory-efficient neighbor searches on binary codes in the Hamming space.
The following two search options are supported:
- Range search finds neighbor codes whose Hamming distances to a given code are within a radius.
- Top-K search finds the top-K codes that are closest to a given code.
§Arguments
Index takes a generic type parameter of CodeInt to represent a binary code.
§Examples
use mih_rs::Index;
// Database of codes
let codes: Vec<u64> = vec![
0b1111111111111111111111011111111111111111111111111011101111111111, // #zeros = 3
0b1111111111111111111111111111111101111111111011111111111111111111, // #zeros = 2
0b1111111011011101111111111111111101111111111111111111111111111111, // #zeros = 4
0b1111111111111101111111111111111111111000111111111110001111111110, // #zeros = 8
0b1101111111111111111111111111111111111111111111111111111111111111, // #zeros = 1
0b1111111111111111101111111011111111111111111101001110111111111111, // #zeros = 6
0b1111111111111111111111111111111111101111111111111111011111111111, // #zeros = 2
0b1110110101011011011111111111111101111111111111111000011111111111, // #zeros = 11
];
// Query code
let qcode: u64 = 0b1111111111111111111111111111111111111111111111111111111111111111; // #zeros = 0
// Construct the index
let index = Index::new(codes).unwrap();
// Find the ids of neighbor codes whose Hamming distances are within 2
let mut searcher = index.range_searcher();
let answers = searcher.run(qcode, 2);
assert_eq!(answers, vec![1, 4, 6]);
// Find the ids of the top-4 nearest neighbor codes
let mut searcher = index.topk_searcher();
let answers = searcher.run(qcode, 4);
assert_eq!(answers, vec![4, 1, 6, 0]);
// Serialization/Deserialization
let mut data = vec![];
index.serialize_into(&mut data).unwrap();
let other = Index::<u64>::deserialize_from(&data[..]).unwrap();
assert_eq!(index, other);Implementations§
Source§impl<T: CodeInt> Index<T>
impl<T: CodeInt> Index<T>
Sourcepub fn new(codes: Vec<T>) -> Result<Self>
pub fn new(codes: Vec<T>) -> Result<Self>
Builds an index from binary codes.
The number of blocks for multi-index is set to the optimal one
estimated from the number of input codes.
The input database codes is stolen, but the reference can be gotten with Index::codes().
§Arguments
codes: Vector of binary codes of typeCodeInt.
§Errors
anyhow::Error will be returned when
- the
codesis empty, or - the number of entries in
codesis more thanu32::max_value().
Sourcepub fn with_blocks(codes: Vec<T>, num_blocks: usize) -> Result<Self>
pub fn with_blocks(codes: Vec<T>, num_blocks: usize) -> Result<Self>
Builds an index from binary codes with a manually specified number of blocks.
The input database codes is stolen, but the reference can be gotten with Index::codes().
§Arguments
codes: Vector of binary codes of typeCodeInt.num_blocks: The number of blocks for multi-index.
§Errors
anyhow::Error will be returned when
- the
codesis empty, - the number of entries in
codesis more thanu32::max_value(), or num_blocksis less than 2 or more than the number of dimensions in a binary code.
Sourcepub fn range_searcher(&self) -> RangeSearcher<'_, T>
pub fn range_searcher(&self) -> RangeSearcher<'_, T>
Returns a searcher RangeSearcher to find neighbor codes
whose Hamming distances to a query code are within a query radius.
§Examples
use mih_rs::Index;
let codes: Vec<u64> = vec![
0b1111111111111111111111011111111111111111111111111011101111111111, // #zeros = 3
0b1111111111111111111111111111111101111111111011111111111111111111, // #zeros = 2
0b1111111011011101111111111111111101111111111111111111111111111111, // #zeros = 4
0b1111111111111101111111111111111111111000111111111110001111111110, // #zeros = 8
0b1101111111111111111111111111111111111111111111111111111111111111, // #zeros = 1
0b1111111111111111101111111011111111111111111101001110111111111111, // #zeros = 6
0b1111111111111111111111111111111111101111111111111111011111111111, // #zeros = 2
0b1110110101011011011111111111111101111111111111111000011111111111, // #zeros = 11
];
let index = Index::new(codes).unwrap();
let mut searcher = index.range_searcher();
let qcode: u64 = 0b1111111111111111111111111111111111111111111111111111111111111111; // #zeros = 0
let answers = searcher.run(qcode, 2);
assert_eq!(answers, vec![1, 4, 6]);Sourcepub fn topk_searcher(&self) -> TopkSearcher<'_, T>
pub fn topk_searcher(&self) -> TopkSearcher<'_, T>
Returns a searcher TopkSearcher to find top-K codes that are closest to a query code.
§Examples
use mih_rs::Index;
let codes: Vec<u64> = vec![
0b1111111111111111111111011111111111111111111111111011101111111111, // #zeros = 3
0b1111111111111111111111111111111101111111111011111111111111111111, // #zeros = 2
0b1111111011011101111111111111111101111111111111111111111111111111, // #zeros = 4
0b1111111111111101111111111111111111111000111111111110001111111110, // #zeros = 8
0b1101111111111111111111111111111111111111111111111111111111111111, // #zeros = 1
0b1111111111111111101111111011111111111111111101001110111111111111, // #zeros = 6
0b1111111111111111111111111111111111101111111111111111011111111111, // #zeros = 2
0b1110110101011011011111111111111101111111111111111000011111111111, // #zeros = 11
];
let index = Index::new(codes).unwrap();
let mut searcher = index.topk_searcher();
let qcode: u64 = 0b1111111111111111111111111111111111111111111111111111111111111111; // #zeros = 0
let answers = searcher.run(qcode, 4);
assert_eq!(answers, vec![4, 1, 6, 0]);Sourcepub fn codes(&self) -> &[T]
pub fn codes(&self) -> &[T]
Gets the reference of the input database.
§Examples
use mih_rs::Index;
let codes: Vec<u64> = vec![
0b1111111111111111111111011111111111111111111111111011101111111111, // #zeros = 3
0b1111111111111111111111111111111101111111111011111111111111111111, // #zeros = 2
0b1111111011011101111111111111111101111111111111111111111111111111, // #zeros = 4
0b1111111111111101111111111111111111111000111111111110001111111110, // #zeros = 8
0b1101111111111111111111111111111111111111111111111111111111111111, // #zeros = 1
0b1111111111111111101111111011111111111111111101001110111111111111, // #zeros = 6
0b1111111111111111111111111111111111101111111111111111011111111111, // #zeros = 2
0b1110110101011011011111111111111101111111111111111000011111111111, // #zeros = 11
];
let index = Index::new(codes.clone()).unwrap();
assert_eq!(codes, index.codes());Sourcepub fn num_blocks(&self) -> usize
pub fn num_blocks(&self) -> usize
Gets the number of defined blocks in multi-index.
Sourcepub fn serialize_into<W: Write>(&self, writer: W) -> Result<()>
pub fn serialize_into<W: Write>(&self, writer: W) -> Result<()>
Serializes the index into the file.
Sourcepub fn deserialize_from<R: Read>(reader: R) -> Result<Self>
pub fn deserialize_from<R: Read>(reader: R) -> Result<Self>
Deserializes the index from the file.