stork_lib/
lib.rs

1#![warn(clippy::pedantic)]
2#![allow(clippy::missing_errors_doc)]
3#![allow(clippy::missing_panics_doc)]
4#![allow(clippy::must_use_candidate)]
5
6use bytes::Bytes;
7use lazy_static::lazy_static;
8
9use std::collections::HashMap;
10use std::convert::TryFrom;
11use std::sync::Mutex;
12use thiserror::Error;
13
14#[cfg(feature = "build-v3")]
15use {
16    num_format::{Locale, ToFormattedString},
17    std::fmt::Display,
18};
19
20pub type Fields = HashMap<String, String>;
21
22mod output;
23pub use output::{
24    Entry, Excerpt, HighlightRange, IndexMetadata, InternalWordAnnotation, Output, Result,
25};
26
27mod input;
28use input::{IndexVersioningError, VersionedIndex};
29
30mod stopwords;
31use stopwords::STOPWORDS as stopwords;
32
33mod config;
34pub use config::{Config, ConfigReadError};
35
36#[cfg(feature = "search-v2")]
37mod index_v2;
38#[cfg(feature = "search-v3")]
39mod index_v3;
40
41#[cfg(feature = "search-v2")]
42use {index_v2::search as V2Search, index_v2::Index as V2Index};
43
44#[cfg(feature = "search-v3")]
45use {index_v3::search as V3Search, index_v3::Index as V3Index};
46
47#[cfg(feature = "build-v3")]
48pub use index_v3::DocumentError;
49
50#[cfg(feature = "build-v3")]
51use {
52    index_v3::build as V3Build, index_v3::BuildResult as V3BuildResult,
53    index_v3::IndexGenerationError,
54};
55
56// We can't pass a parsed index over the WASM boundary so we store the parsed indices here
57lazy_static! {
58    static ref INDEX_CACHE: Mutex<HashMap<String, ParsedIndex>> = Mutex::new(HashMap::new());
59}
60
61/**
62 * An error that may occur when trying to parse an index file.
63 */
64#[derive(Error, Debug)]
65pub enum IndexParseError {
66    #[error("{0}")]
67    VersioningError(#[from] IndexVersioningError),
68
69    #[error("Could not parse index, despite knowing the version.")]
70    ParseError(),
71
72    #[error("{0}")]
73    V2Error(String),
74
75    #[error("{0}")]
76    V3Error(String),
77}
78
79#[derive(Debug)]
80pub enum ParsedIndex {
81    #[cfg(feature = "search-v2")]
82    V2(V2Index),
83
84    #[cfg(feature = "search-v3")]
85    V3(V3Index),
86
87    #[cfg(not(any(feature = "search-v2", feature = "search-v3")))]
88    Unknown,
89}
90
91impl ParsedIndex {
92    pub fn get_metadata(&self) -> IndexMetadata {
93        match self {
94            #[cfg(feature = "search-v2")]
95            ParsedIndex::V2(_) => IndexMetadata {
96                index_version: "stork-2".to_string(),
97            },
98
99            #[cfg(feature = "search-v3")]
100            ParsedIndex::V3(_) => IndexMetadata {
101                index_version: "stork-3".to_string(),
102            },
103
104            #[cfg(not(any(feature = "search-v2", feature = "search-v3")))]
105            ParsedIndex::Unknown => IndexMetadata {
106                index_version: "unknown".to_string(),
107            },
108        }
109    }
110}
111
112#[allow(unreachable_patterns)]
113pub fn index_from_bytes(bytes: Bytes) -> core::result::Result<ParsedIndex, IndexParseError> {
114    let versioned = VersionedIndex::try_from(bytes)?;
115
116    match versioned {
117        #[cfg(feature = "search-v2")]
118        VersionedIndex::V2(bytes) => V2Index::try_from(bytes)
119            .map_err(|e| IndexParseError::V2Error(e.to_string()))
120            .map(ParsedIndex::V2),
121
122        #[cfg(feature = "search-v3")]
123        VersionedIndex::V3(bytes) => V3Index::try_from(bytes)
124            .map_err(|e| IndexParseError::V3Error(e.to_string()))
125            .map(ParsedIndex::V3),
126
127        _ => Err(IndexParseError::ParseError()),
128    }
129}
130#[derive(Debug, Error)]
131pub enum BuildError {
132    #[error("{0}")]
133    ConfigReadError(#[from] ConfigReadError),
134
135    #[error("The Stork binary was not compiled with the ability to build indexes. Please recompile with the `build_v3` feature enabled.")]
136    BinaryNotBuiltWithFeature,
137
138    #[error("{0}")]
139    #[cfg(feature = "build-v3")]
140    IndexGenerationError(#[from] IndexGenerationError),
141}
142
143#[cfg(feature = "build-v3")]
144#[derive(Debug)]
145pub struct IndexDescription {
146    pub entries_count: usize,
147    pub tokens_count: usize,
148    pub index_size_bytes: usize,
149    pub warnings: Vec<DocumentError>,
150}
151
152#[cfg(feature = "build-v3")]
153impl From<&V3BuildResult> for IndexDescription {
154    fn from(build_result: &V3BuildResult) -> Self {
155        Self {
156            entries_count: build_result.index.entries_len(),
157            tokens_count: build_result.index.search_term_count(),
158            index_size_bytes: Bytes::from(&build_result.index).len(),
159            warnings: build_result.errors.clone(),
160        }
161    }
162}
163
164#[cfg(feature = "build-v3")]
165impl Display for IndexDescription {
166    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
167        f.write_fmt(format_args!(
168            r#"{}Index stats:
169  - {} entries
170  - {} search terms
171  - {} bytes per entry
172  - {} bytes per search term"#,
173            if self.warnings.is_empty() {
174                String::new()
175            } else {
176                DocumentError::display_list(&self.warnings) + "\n"
177            },
178            self.entries_count.to_formatted_string(&Locale::en),
179            self.tokens_count.to_formatted_string(&Locale::en),
180            (self.index_size_bytes / self.entries_count).to_formatted_string(&Locale::en),
181            (self.index_size_bytes / self.tokens_count).to_formatted_string(&Locale::en),
182        ))
183    }
184}
185
186#[cfg(feature = "build-v3")]
187pub struct BuildOutput {
188    pub bytes: Bytes,
189    pub description: IndexDescription,
190}
191
192#[cfg(not(feature = "build-v3"))]
193pub fn build_index(_config: &Config) -> core::result::Result<(), BuildError> {
194    Err(BuildError::BinaryNotBuiltWithFeature)
195}
196
197#[cfg(feature = "build-v3")]
198pub fn build_index(config: &Config) -> core::result::Result<BuildOutput, BuildError> {
199    let result = V3Build(config)?;
200    let description = IndexDescription::from(&result);
201    let bytes = Bytes::from(&result.index);
202    Ok(BuildOutput { bytes, description })
203}
204
205pub fn register_index(
206    name: &str,
207    bytes: Bytes,
208) -> core::result::Result<IndexMetadata, IndexParseError> {
209    let parsed = index_from_bytes(bytes)?;
210    // todo: save deserialized index to cache
211    let metadata = parsed.get_metadata();
212    INDEX_CACHE.lock().unwrap().insert(name.to_string(), parsed);
213    Ok(metadata)
214}
215
216#[derive(Debug, Error)]
217pub enum SearchError {
218    #[error("{0}")]
219    IndexParseError(#[from] IndexParseError),
220
221    #[error("The index is not supported. You might need to recompile Stork with a different set of features enabled.")]
222    IndexVersionNotSupported,
223
224    #[error(
225        "Index `{0}` has not been registered. You need to register the index before performing searches with it."
226    )]
227    IndexNotInCache(String),
228}
229
230pub fn search_from_cache(key: &str, query: &str) -> core::result::Result<Output, SearchError> {
231    let cache = INDEX_CACHE.lock().unwrap();
232    let parsed = match cache.get(key) {
233        Some(parsed) => parsed,
234        None => return Err(SearchError::IndexNotInCache(key.to_string())),
235    };
236
237    match parsed {
238        #[cfg(feature = "search-v2")]
239        ParsedIndex::V2(index) => Ok(V2Search(index, query)),
240
241        #[cfg(feature = "search-v3")]
242        ParsedIndex::V3(index) => Ok(V3Search(index, query)),
243
244        #[cfg(not(any(feature = "search-v2", feature = "search-v3")))]
245        ParsedIndex::Unknown => Err(SearchError::IndexVersionNotSupported),
246    }
247}
248
249#[allow(unused_variables)]
250pub fn search(index: Bytes, query: &str) -> core::result::Result<Output, SearchError> {
251    let index = index_from_bytes(index)?;
252
253    #[allow(unreachable_patterns)]
254    match index {
255        #[cfg(feature = "search-v3")]
256        ParsedIndex::V3(index) => Ok(V3Search(&index, query)),
257
258        #[cfg(feature = "search-v2")]
259        ParsedIndex::V2(index) => Ok(V2Search(&index, query)),
260
261        _ => Err(SearchError::IndexVersionNotSupported),
262    }
263}