1#![warn(missing_docs)]
21
22pub mod blocking;
23pub mod dense_simhash;
24pub mod lsh;
25pub mod minhash;
26pub mod simhash;
27
28pub use blocking::{BlockingConfig, MinHashTextLSH};
29pub use dense_simhash::DenseSimHashLSH;
30pub use lsh::{LSHIndex, MinHashLSH, SimHashLSH};
31pub use minhash::{MinHash, MinHashSignature};
32pub use simhash::{simhash_fingerprint, SimHashFingerprint};
33
34use std::fmt;
35use std::hash::Hasher;
36
37#[derive(Debug)]
39#[non_exhaustive]
40pub enum Error {
41 InvalidParam(&'static str),
43 DimensionMismatch {
45 expected: usize,
47 got: usize,
49 },
50 EmptyIndex,
52 NotBuilt,
54 AddAfterBuild,
56 NonFiniteInput,
58}
59
60impl fmt::Display for Error {
61 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
62 match self {
63 Error::InvalidParam(msg) => write!(f, "invalid parameter: {msg}"),
64 Error::DimensionMismatch { expected, got } => {
65 write!(f, "dimension mismatch (expected {expected}, got {got})")
66 }
67 Error::EmptyIndex => f.write_str("empty index"),
68 Error::NotBuilt => f.write_str("index not built"),
69 Error::AddAfterBuild => f.write_str("cannot add after build"),
70 Error::NonFiniteInput => f.write_str("input contains non-finite values (NaN or Inf)"),
71 }
72 }
73}
74
75impl std::error::Error for Error {}
76
77pub(crate) struct Fnv1a64 {
81 state: u64,
82}
83
84impl Fnv1a64 {
85 pub(crate) fn new() -> Self {
86 Self {
88 state: 0xcbf29ce484222325,
89 }
90 }
91}
92
93impl Hasher for Fnv1a64 {
94 fn finish(&self) -> u64 {
95 self.state
96 }
97
98 fn write(&mut self, bytes: &[u8]) {
99 const PRIME: u64 = 0x00000100000001B3;
101 for &b in bytes {
102 self.state ^= b as u64;
103 self.state = self.state.wrapping_mul(PRIME);
104 }
105 }
106}
107
108pub(crate) fn lcg_next(state: &mut u64) -> u64 {
110 *state = state
111 .wrapping_mul(6364136223846793005)
112 .wrapping_add(1442695040888963407);
113 *state
114}
115
116pub(crate) fn lcg_f32(state: &mut u64) -> f32 {
118 lcg_next(state);
119 let u = (*state >> 16) as u32;
120 (u as f32 / u32::MAX as f32) * 2.0 - 1.0
121}
122
123pub(crate) fn all_finite(values: &[f32]) -> bool {
125 values.iter().all(|v| v.is_finite())
126}