1use crate::model::errors::{LbResult, Unexpected};
2use crate::model::file::File;
3use crate::service::activity::RankingWeights;
4use crate::service::events::Event;
5use crate::Lb;
6use serde::Serialize;
7use std::ops::Range;
8use std::sync::atomic::AtomicBool;
9use std::sync::Arc;
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{Schema, Value, STORED, TEXT};
13use tantivy::{
14 doc, Index, IndexReader, IndexWriter, ReloadPolicy, SnippetGenerator, TantivyDocument, Term,
15};
16use tokio::sync::RwLock;
17use uuid::Uuid;
18
19const CONTENT_MAX_LEN_BYTES: usize = 128 * 1024; #[derive(Clone)]
22pub struct SearchIndex {
23 pub ready: Arc<AtomicBool>,
24
25 pub metadata_index: Arc<RwLock<SearchMetadata>>,
26 pub tantivy_index: Index,
27 pub tantivy_reader: IndexReader,
28}
29
30#[derive(Copy, Clone, Debug)]
31pub enum SearchConfig {
32 Paths,
33 Documents,
34 PathsAndDocuments,
35}
36
37#[derive(Debug)]
38pub enum SearchResult {
39 DocumentMatch { id: Uuid, path: String, content_matches: Vec<ContentMatch> },
40 PathMatch { id: Uuid, path: String, matched_indices: Vec<usize>, score: i64 },
41}
42
43impl Lb {
44 #[instrument(level = "debug", skip(self), err(Debug))]
62 pub async fn search(&self, input: &str, cfg: SearchConfig) -> LbResult<Vec<SearchResult>> {
63 if input.is_empty() {
65 return self.search.metadata_index.read().await.empty_search();
66 }
67
68 match cfg {
69 SearchConfig::Paths => {
70 let mut results = self.search.metadata_index.read().await.path_search(input)?;
71 results.truncate(5);
72 Ok(results)
73 }
74 SearchConfig::Documents => {
75 let mut results = self.search_content(input).await?;
76 results.truncate(10);
77 Ok(results)
78 }
79 SearchConfig::PathsAndDocuments => {
80 let mut results = self.search.metadata_index.read().await.path_search(input)?;
81 results.truncate(4);
82 results.append(&mut self.search_content(input).await?);
83 Ok(results)
84 }
85 }
86 }
87
88 async fn search_content(&self, input: &str) -> LbResult<Vec<SearchResult>> {
89 let searcher = self.search.tantivy_reader.searcher();
90 let schema = self.search.tantivy_index.schema();
91 let id_field = schema.get_field("id").unwrap();
92 let content = schema.get_field("content").unwrap();
93
94 let query_parser = QueryParser::for_index(&self.search.tantivy_index, vec![content]);
95 let mut results = vec![];
96
97 if let Ok(query) = query_parser.parse_query(input) {
98 let mut snippet_generator =
99 SnippetGenerator::create(&searcher, &query, content).map_unexpected()?;
100 snippet_generator.set_max_num_chars(100);
101
102 let top_docs = searcher
103 .search(&query, &TopDocs::with_limit(10))
104 .map_unexpected()?;
105
106 for (_score, doc_address) in top_docs {
107 let retrieved_doc: TantivyDocument = searcher.doc(doc_address).map_unexpected()?;
108 let id = Uuid::from_slice(
109 retrieved_doc
110 .get_first(id_field)
111 .map(|val| val.as_bytes().unwrap_or_default())
112 .unwrap_or_default(),
113 )
114 .map_unexpected()?;
115
116 let snippet = snippet_generator.snippet_from_doc(&retrieved_doc);
117 let path = self
118 .search
119 .metadata_index
120 .read()
121 .await
122 .paths
123 .iter()
124 .find(|(path_id, _)| *path_id == id)
125 .map(|(_, path)| path.to_string())
126 .unwrap_or_default();
127
128 results.push(SearchResult::DocumentMatch {
129 id,
130 path,
131 content_matches: vec![ContentMatch {
132 paragraph: snippet.fragment().to_string(),
133 matched_indices: Self::highlight_to_matches(snippet.highlighted()),
134 score: 0,
135 }],
136 });
137 }
138 }
139 Ok(results)
140 }
141
142 fn highlight_to_matches(ranges: &[Range<usize>]) -> Vec<usize> {
143 let mut matches = vec![];
144 for range in ranges {
145 for i in range.clone() {
146 matches.push(i);
147 }
148 }
149
150 matches
151 }
152
153 #[instrument(level = "debug", skip(self), err(Debug))]
154 pub async fn build_index(&self) -> LbResult<()> {
155 if self.keychain.get_account().is_err() {
158 return Ok(());
159 }
160
161 let metadata_index = SearchMetadata::populate(self).await?;
162 *self.search.metadata_index.write().await = metadata_index.clone();
163 self.update_tantivy(vec![], metadata_index.files.iter().map(|f| f.id).collect())
164 .await;
165
166 Ok(())
167 }
168
169 #[instrument(level = "debug", skip(self))]
170 pub fn setup_search(&self) {
171 if self.config.background_work {
172 let lb = self.clone();
173 let mut rx = self.subscribe();
174 tokio::spawn(async move {
175 lb.build_index().await.unwrap();
176 loop {
177 let evt = match rx.recv().await {
178 Ok(evt) => evt,
179 Err(err) => {
180 error!("failed to receive from a channel {err}");
181 return;
182 }
183 };
184
185 match evt {
186 Event::MetadataChanged => {
187 if let Some(replacement_index) =
188 SearchMetadata::populate(&lb).await.log_and_ignore()
189 {
190 let current_index = lb.search.metadata_index.read().await.clone();
191 let deleted_ids = replacement_index.compute_deleted(¤t_index);
192 *lb.search.metadata_index.write().await = replacement_index;
193 lb.update_tantivy(vec![], deleted_ids).await;
194 }
195 }
196 Event::DocumentWritten(id, _) => {
197 lb.update_tantivy(vec![id], vec![id]).await;
198 }
199 _ => {}
200 };
201 }
202 });
203 }
204 }
205
206 async fn update_tantivy(&self, delete: Vec<Uuid>, add: Vec<Uuid>) {
207 let mut index_writer: IndexWriter = self.search.tantivy_index.writer(50_000_000).unwrap();
208 let schema = self.search.tantivy_index.schema();
209 let id_field = schema.get_field("id").unwrap();
210 let id_str = schema.get_field("id_str").unwrap();
211 let content = schema.get_field("content").unwrap();
212
213 for id in delete {
214 let term = Term::from_field_text(id_str, &id.to_string());
215 index_writer.delete_term(term);
216 }
217
218 for id in add {
219 let id_bytes = id.as_bytes().as_slice();
220 let id_string = id.to_string();
221 let Some(file) = self
222 .search
223 .metadata_index
224 .read()
225 .await
226 .files
227 .iter()
228 .find(|f| f.id == id)
229 .cloned()
230 else {
231 continue;
232 };
233
234 if !file.name.ends_with(".md") || file.is_folder() {
235 continue;
236 };
237
238 let doc = String::from_utf8(self.read_document(file.id, false).await.unwrap()).unwrap();
239
240 if doc.len() > CONTENT_MAX_LEN_BYTES {
241 continue;
242 };
243
244 index_writer
245 .add_document(doc!(
246 id_field => id_bytes,
247 id_str => id_string,
248 content => doc,
249 ))
250 .unwrap();
251 }
252
253 index_writer.commit().unwrap();
254 }
255}
256
257impl Default for SearchIndex {
258 fn default() -> Self {
259 let mut schema_builder = Schema::builder();
260 schema_builder.add_bytes_field("id", STORED);
261 schema_builder.add_text_field("id_str", TEXT | STORED);
262 schema_builder.add_text_field("content", TEXT | STORED);
263
264 let schema = schema_builder.build();
265
266 let index = Index::create_in_ram(schema.clone());
267
268 let reader = index
270 .reader_builder()
271 .reload_policy(ReloadPolicy::OnCommitWithDelay)
272 .try_into()
273 .unwrap();
274
275 Self {
276 ready: Default::default(),
277 tantivy_index: index,
278 tantivy_reader: reader,
279 metadata_index: Default::default(),
280 }
281 }
282}
283
284#[derive(Debug, Serialize)]
285pub struct ContentMatch {
286 pub paragraph: String,
287 pub matched_indices: Vec<usize>,
288 pub score: i64,
289}
290
291impl SearchResult {
292 pub fn id(&self) -> Uuid {
293 match self {
294 SearchResult::DocumentMatch { id, .. } | SearchResult::PathMatch { id, .. } => *id,
295 }
296 }
297
298 pub fn path(&self) -> &str {
299 match self {
300 SearchResult::DocumentMatch { path, .. } | SearchResult::PathMatch { path, .. } => path,
301 }
302 }
303
304 pub fn name(&self) -> &str {
305 match self {
306 SearchResult::DocumentMatch { path, .. } | SearchResult::PathMatch { path, .. } => {
307 path.split('/').next_back().unwrap_or_default()
308 }
309 }
310 }
311
312 pub fn score(&self) -> i64 {
313 match self {
314 SearchResult::DocumentMatch { content_matches, .. } => content_matches
315 .iter()
316 .map(|m| m.score)
317 .max()
318 .unwrap_or_default(),
319 SearchResult::PathMatch { score, .. } => *score,
320 }
321 }
322}
323
324#[derive(Default, Clone)]
325pub struct SearchMetadata {
326 files: Vec<File>,
327 paths: Vec<(Uuid, String)>,
328 suggested_docs: Vec<Uuid>,
329}
330
331impl SearchMetadata {
332 async fn populate(lb: &Lb) -> LbResult<Self> {
333 let files = lb.list_metadatas().await?;
334 let paths = lb.list_paths_with_ids(None).await?;
335 let suggested_docs = lb.suggested_docs(RankingWeights::default()).await?;
336
337 Ok(SearchMetadata { files, paths, suggested_docs })
338 }
339
340 fn compute_deleted(&self, old: &SearchMetadata) -> Vec<Uuid> {
341 let mut deleted_ids = vec![];
342
343 for old_file in &old.files {
344 if !self.files.iter().any(|new_f| new_f.id == old_file.id) {
345 deleted_ids.push(old_file.id);
346 }
347 }
348
349 deleted_ids
350 }
351
352 fn empty_search(&self) -> LbResult<Vec<SearchResult>> {
353 let mut results = vec![];
354
355 for id in &self.suggested_docs {
356 let path = self
357 .paths
358 .iter()
359 .find(|(path_id, _)| id == path_id)
360 .map(|(_, path)| path.clone())
361 .unwrap_or_default();
362
363 results.push(SearchResult::PathMatch {
364 id: *id,
365 path,
366 matched_indices: vec![],
367 score: 0,
368 });
369 }
370
371 Ok(results)
372 }
373
374 fn path_search(&self, query: &str) -> LbResult<Vec<SearchResult>> {
375 let mut results = self.path_candidates(query)?;
376 self.score_paths(&mut results);
377
378 results.sort_by_key(|r| -r.score());
379
380 if let Some(result) = self.id_match(query) {
381 results.insert(0, result);
382 }
383
384 Ok(results)
385 }
386
387 fn id_match(&self, query: &str) -> Option<SearchResult> {
388 if query.len() < 8 {
389 return None;
390 }
391
392 let query = if query.starts_with("lb://") {
393 query.replacen("lb://", "", 1)
394 } else {
395 query.to_string()
396 };
397
398 for (id, path) in &self.paths {
399 if id.to_string().contains(&query) {
400 return Some(SearchResult::PathMatch {
401 id: *id,
402 path: path.clone(),
403 matched_indices: vec![],
404 score: 100,
405 });
406 }
407 }
408
409 None
410 }
411
412 fn path_candidates(&self, query: &str) -> LbResult<Vec<SearchResult>> {
413 let mut search_results = vec![];
414
415 for (id, path) in &self.paths {
416 let mut matched_indices = vec![];
417
418 let mut query_iter = query.chars().rev();
419 let mut current_query_char = query_iter.next();
420
421 for (path_ind, path_char) in path.char_indices().rev() {
422 if let Some(qc) = current_query_char {
423 if qc.eq_ignore_ascii_case(&path_char) {
424 matched_indices.push(path_ind);
425 current_query_char = query_iter.next();
426 }
427 } else {
428 break;
429 }
430 }
431
432 if current_query_char.is_none() {
433 search_results.push(SearchResult::PathMatch {
434 id: *id,
435 path: path.clone(),
436 matched_indices,
437 score: 0,
438 });
439 }
440 }
441 Ok(search_results)
442 }
443
444 fn score_paths(&self, candidates: &mut [SearchResult]) {
445 let smaller_paths = 10;
447 let suggested = 10;
448 let filename = 30;
449 let editable = 3;
450
451 candidates.sort_by_key(|a| a.path().len());
452
453 for i in 0..smaller_paths {
455 if let Some(SearchResult::PathMatch { id: _, path: _, matched_indices: _, score }) =
456 candidates.get_mut(i)
457 {
458 *score = (smaller_paths - i) as i64;
459 }
460 }
461
462 for cand in candidates.iter_mut() {
464 if self.suggested_docs.contains(&cand.id()) {
465 if let SearchResult::PathMatch { id: _, path: _, matched_indices: _, score } = cand
466 {
467 *score += suggested;
468 }
469 }
470 }
471
472 for cand in candidates.iter_mut() {
474 if let SearchResult::PathMatch { id: _, path, matched_indices, score } = cand {
475 let mut name_match = 0;
476 let mut name_size = 0;
477
478 for (i, c) in path.char_indices().rev() {
479 if c == '/' {
480 break;
481 }
482 name_size += 1;
483 if matched_indices.contains(&i) {
484 name_match += 1;
485 }
486 }
487
488 let match_portion = name_match as f32 / name_size.max(1) as f32;
489 *score += (match_portion * filename as f32) as i64;
490 }
491 }
492
493 for cand in candidates.iter_mut() {
495 if let SearchResult::PathMatch { id: _, path, matched_indices: _, score } = cand {
496 if path.ends_with(".md") || path.ends_with(".svg") {
497 *score += editable;
498 }
499 }
500 }
501 }
502}