1#![allow(clippy::type_complexity)]
2
3#[cfg(not(windows))]
4#[cfg(test)]
5#[global_allocator]
6pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
7
8#[macro_use]
9pub mod documents;
10
11mod asc_desc;
12mod attribute_patterns;
13mod criterion;
14pub mod database_stats;
15pub mod disabled_typos_terms;
16mod error;
17mod external_documents_ids;
18pub mod facet;
19mod fields_ids_map;
20mod filter_parser;
21mod filterable_attributes_rules;
22mod flatten_serde_json;
23pub mod heed_codec;
24pub mod index;
25mod json_depth_checker;
26mod localized_attributes_rules;
27pub mod order_by_map;
28pub mod prompt;
29pub mod proximity;
30pub mod score_details;
31mod search;
32mod thread_pool_no_abort;
33pub mod update;
34pub mod vector;
35
36#[cfg(test)]
37#[macro_use]
38pub mod snapshot_tests;
39pub mod constants;
40mod fieldids_weights_map;
41pub mod progress;
42
43use std::collections::{BTreeMap, HashMap};
44use std::convert::{TryFrom, TryInto};
45use std::fmt;
46use std::hash::BuildHasherDefault;
47
48use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
49pub use filter_parser::{Condition, FilterCondition, Span, Token};
50use fxhash::{FxHasher32, FxHasher64};
51pub use grenad::CompressionType;
52pub use search::new::{
53 execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
54 SearchLogger, VisualSearchLogger,
55};
56use serde_json::Value;
57pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
58pub use {charabia as tokenizer, heed, rhai};
59
60pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
61pub use self::attribute_patterns::AttributePatterns;
62pub use self::attribute_patterns::PatternMatch;
63pub use self::criterion::{default_criteria, Criterion, CriterionError};
64pub use self::error::{
65 Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
66};
67pub use self::external_documents_ids::ExternalDocumentsIds;
68pub use self::fieldids_weights_map::FieldidsWeightsMap;
69pub use self::fields_ids_map::{FieldsIdsMap, GlobalFieldsIdsMap};
70pub use self::filterable_attributes_rules::{
71 FilterFeatures, FilterableAttributesFeatures, FilterableAttributesPatterns,
72 FilterableAttributesRule,
73};
74pub use self::heed_codec::{
75 BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
76 CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
77 RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
78 UncheckedU8StrStrCodec,
79};
80pub use self::index::Index;
81pub use self::localized_attributes_rules::LocalizedAttributesRule;
82pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
83pub use self::search::similar::Similar;
84pub use self::search::{
85 FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
86 Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
87};
88pub use self::update::ChannelCongestion;
89
90pub use arroy;
91
92pub type Result<T> = std::result::Result<T, error::Error>;
93
94pub type Attribute = u32;
95pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
96pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
97pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
98pub type DocumentId = u32;
99pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
100pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
101pub type FieldDistribution = BTreeMap<String, u64>;
102pub type FieldId = u16;
103pub type Weight = u16;
104pub type Object = serde_json::Map<String, serde_json::Value>;
105pub type Position = u32;
106pub type RelativePosition = u16;
107pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
108pub type Prefix = smallstr::SmallString<[u8; 16]>;
109pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
110pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
111pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
112
113pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>;
117
118const MAX_LMDB_KEY_LENGTH: usize = 500;
123
124pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
129
130pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
132
133pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
134
135#[derive(Clone)]
136pub struct TimeBudget {
137 started_at: std::time::Instant,
138 budget: std::time::Duration,
139
140 #[cfg(test)]
143 stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>,
144}
145
146impl fmt::Debug for TimeBudget {
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 f.debug_struct("TimeBudget")
149 .field("started_at", &self.started_at)
150 .field("budget", &self.budget)
151 .field("left", &(self.budget - self.started_at.elapsed()))
152 .finish()
153 }
154}
155
156impl Default for TimeBudget {
157 fn default() -> Self {
158 Self::new(std::time::Duration::from_millis(1500))
159 }
160}
161
162impl TimeBudget {
163 pub fn new(budget: std::time::Duration) -> Self {
164 Self {
165 started_at: std::time::Instant::now(),
166 budget,
167
168 #[cfg(test)]
169 stop_after: None,
170 }
171 }
172
173 pub fn max() -> Self {
174 Self::new(std::time::Duration::from_secs(u64::MAX))
175 }
176
177 #[cfg(test)]
178 pub fn with_stop_after(mut self, stop_after: usize) -> Self {
179 use std::sync::atomic::AtomicUsize;
180 use std::sync::Arc;
181
182 self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after));
183 self
184 }
185
186 pub fn exceeded(&self) -> bool {
187 #[cfg(test)]
188 if let Some((current, stop_after)) = &self.stop_after {
189 let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
190 if current >= *stop_after {
191 return true;
192 } else {
193 return false;
195 }
196 }
197
198 self.started_at.elapsed() > self.budget
199 }
200}
201
202pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, RelativePosition) {
206 ((absolute >> 16) as u16, (absolute & 0xFFFF) as u16)
207}
208
209pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
211 ((field_id as u32) << 16) | (relative as u32)
212}
213pub fn bucketed_position(relative: u16) -> u16 {
218 if relative < 16 {
220 relative
221 } else if relative < 24 {
222 24
224 } else {
225 (relative as f64).log2().ceil().exp2() as u16
228 }
229}
230
231pub fn obkv_to_json(
233 displayed_fields: &[FieldId],
234 fields_ids_map: &FieldsIdsMap,
235 obkv: &obkv::KvReaderU16,
236) -> Result<Object> {
237 displayed_fields
238 .iter()
239 .copied()
240 .flat_map(|id| obkv.get(id).map(|value| (id, value)))
241 .map(|(id, value)| {
242 let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
243 field_id: id,
244 process: "obkv_to_json",
245 })?;
246 let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
247 Ok((name.to_owned(), value))
248 })
249 .collect()
250}
251
252pub fn all_obkv_to_json(obkv: &obkv::KvReaderU16, fields_ids_map: &FieldsIdsMap) -> Result<Object> {
254 let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
255 obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
256}
257
258pub fn json_to_string(value: &Value) -> Option<String> {
260 fn inner(value: &Value, output: &mut String) -> bool {
261 use std::fmt::Write;
262 match value {
263 Value::Null => false,
264 Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
265 Value::Number(number) => write!(output, "{}", number).is_ok(),
266 Value::String(string) => write!(output, "{}", string).is_ok(),
267 Value::Array(array) => {
268 let mut count = 0;
269 for value in array {
270 if inner(value, output) {
271 output.push_str(". ");
272 count += 1;
273 }
274 }
275 count != 0
277 }
278 Value::Object(object) => {
279 let mut buffer = String::new();
280 let mut count = 0;
281 for (key, value) in object {
282 buffer.clear();
283 let _ = write!(&mut buffer, "{}: ", key);
284 if inner(value, &mut buffer) {
285 buffer.push_str(". ");
286 output.push_str(&buffer);
289 count += 1;
290 }
291 }
292 count != 0
294 }
295 }
296 }
297
298 let mut string = String::new();
299 if inner(value, &mut string) {
300 Some(string)
301 } else {
302 None
303 }
304}
305
306fn try_split_at<T>(slice: &[T], mid: usize) -> Option<(&[T], &[T])> {
308 if mid <= slice.len() {
309 Some(slice.split_at(mid))
310 } else {
311 None
312 }
313}
314
315fn try_split_array_at<T, const N: usize>(slice: &[T]) -> Option<([T; N], &[T])>
318where
319 [T; N]: for<'a> TryFrom<&'a [T]>,
320{
321 let (head, tail) = try_split_at(slice, N)?;
322 let head = head.try_into().ok()?;
323 Some((head, tail))
324}
325
326pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
329 let a = geoutils::Location::new(a[0], a[1]);
330 let b = geoutils::Location::new(b[0], b[1]);
331
332 a.haversine_distance_to(&b).meters()
333}
334
335pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
338 let [lat, lng] = coord.map(|f| f.to_radians());
339 let x = lat.cos() * lng.cos();
340 let y = lat.cos() * lng.sin();
341 let z = lat.sin();
342
343 [x, y, z]
344}
345
346pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsRef<str>>) -> bool {
349 faceted_fields.into_iter().any(|facet| is_faceted_by(field, facet.as_ref()))
350}
351
352pub fn is_faceted_by(field: &str, facet: &str) -> bool {
379 field.starts_with(facet) && field[facet.len()..].chars().next().is_none_or(|c| c == '.')
380}
381
382pub fn normalize_facet(original: &str) -> String {
383 CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
384}
385
386#[cfg(test)]
387mod tests {
388 use serde_json::json;
389
390 use super::*;
391
392 #[test]
393 fn json_to_string_object() {
394 let value = json!({
395 "name": "John Doe",
396 "age": 43,
397 "not_there": null,
398 });
399
400 let string = json_to_string(&value).unwrap();
401 assert_eq!(string, "name: John Doe. age: 43. ");
402 }
403
404 #[test]
405 fn json_to_string_array() {
406 let value = json!([
407 { "name": "John Doe" },
408 43,
409 "hello",
410 [ "I", "am", "fine" ],
411 null,
412 ]);
413
414 let string = json_to_string(&value).unwrap();
415 assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
418 }
419
420 #[test]
421 fn test_relative_position_conversion() {
422 assert_eq!((0x0000, 0x0000), relative_from_absolute_position(0x00000000));
423 assert_eq!((0x0000, 0xFFFF), relative_from_absolute_position(0x0000FFFF));
424 assert_eq!((0xFFFF, 0x0000), relative_from_absolute_position(0xFFFF0000));
425 assert_eq!((0xFF00, 0xFF00), relative_from_absolute_position(0xFF00FF00));
426 assert_eq!((0xFF00, 0x00FF), relative_from_absolute_position(0xFF0000FF));
427 assert_eq!((0x1234, 0x5678), relative_from_absolute_position(0x12345678));
428 assert_eq!((0xFFFF, 0xFFFF), relative_from_absolute_position(0xFFFFFFFF));
429 }
430
431 #[test]
432 fn test_absolute_position_conversion() {
433 assert_eq!(0x00000000, absolute_from_relative_position(0x0000, 0x0000));
434 assert_eq!(0x0000FFFF, absolute_from_relative_position(0x0000, 0xFFFF));
435 assert_eq!(0xFFFF0000, absolute_from_relative_position(0xFFFF, 0x0000));
436 assert_eq!(0xFF00FF00, absolute_from_relative_position(0xFF00, 0xFF00));
437 assert_eq!(0xFF0000FF, absolute_from_relative_position(0xFF00, 0x00FF));
438 assert_eq!(0x12345678, absolute_from_relative_position(0x1234, 0x5678));
439 assert_eq!(0xFFFFFFFF, absolute_from_relative_position(0xFFFF, 0xFFFF));
440 }
441
442 #[test]
443 fn test_all_obkv_to_json() {
444 let mut fields_ids_map = FieldsIdsMap::new();
445 let id1 = fields_ids_map.insert("field1").unwrap();
446 let id2 = fields_ids_map.insert("field2").unwrap();
447
448 let mut writer = obkv::KvWriterU16::memory();
449 writer.insert(id1, b"1234").unwrap();
450 writer.insert(id2, b"4321").unwrap();
451 let contents = writer.into_inner().unwrap();
452 let obkv = obkv::KvReaderU16::from_slice(&contents);
453
454 let expected = json!({
455 "field1": 1234,
456 "field2": 4321,
457 });
458 let expected = expected.as_object().unwrap();
459 let actual = all_obkv_to_json(obkv, &fields_ids_map).unwrap();
460
461 assert_eq!(&actual, expected);
462 }
463}