1use crate::model::errors::{LbResult, Unexpected};
2use crate::model::file::File;
3use crate::service::activity::RankingWeights;
4use crate::service::events::Event;
5use crate::{LocalLb, tokio_spawn};
6use serde::{Deserialize, Serialize};
7use std::ops::Range;
8use std::sync::Arc;
9use std::sync::atomic::AtomicBool;
10use tantivy::collector::TopDocs;
11use tantivy::query::QueryParser;
12use tantivy::schema::{INDEXED, STORED, Schema, TEXT, Value};
13use tantivy::snippet::SnippetGenerator;
14use tantivy::{Index, IndexReader, IndexWriter, ReloadPolicy, TantivyDocument, Term, doc};
15use tokio::sync::RwLock;
16use uuid::Uuid;
17
18const CONTENT_MAX_LEN_BYTES: usize = 128 * 1024; #[derive(Clone)]
21pub struct SearchIndex {
22 pub ready: Arc<AtomicBool>,
23
24 pub metadata_index: Arc<RwLock<SearchMetadata>>,
25 pub tantivy_index: Index,
26 pub tantivy_reader: IndexReader,
27}
28
29#[derive(Copy, Clone, Debug, Serialize, Deserialize)]
30pub enum SearchConfig {
31 Paths,
32 Documents,
33 PathsAndDocuments,
34}
35
36#[derive(Debug, Serialize, Deserialize)]
37pub enum SearchResult {
38 DocumentMatch { id: Uuid, path: String, content_matches: Vec<ContentMatch> },
39 PathMatch { id: Uuid, path: String, matched_indices: Vec<usize>, score: i64 },
40}
41
42impl LocalLb {
43 #[instrument(level = "debug", skip(self, input), err(Debug))]
61 pub async fn search(&self, input: &str, cfg: SearchConfig) -> LbResult<Vec<SearchResult>> {
62 if input.is_empty() {
64 return self.search.metadata_index.read().await.empty_search();
65 }
66
67 match cfg {
68 SearchConfig::Paths => {
69 let mut results = self.search.metadata_index.read().await.path_search(input)?;
70 results.truncate(5);
71 Ok(results)
72 }
73 SearchConfig::Documents => {
74 let mut results = self.search_content(input).await?;
75 results.truncate(10);
76 Ok(results)
77 }
78 SearchConfig::PathsAndDocuments => {
79 let mut results = self.search.metadata_index.read().await.path_search(input)?;
80 results.truncate(4);
81 results.append(&mut self.search_content(input).await?);
82 Ok(results)
83 }
84 }
85 }
86
87 async fn search_content(&self, input: &str) -> LbResult<Vec<SearchResult>> {
88 let searcher = self.search.tantivy_reader.searcher();
89 let schema = self.search.tantivy_index.schema();
90 let id_field = schema.get_field("id").unwrap();
91 let content = schema.get_field("content").unwrap();
92
93 let query_parser = QueryParser::for_index(&self.search.tantivy_index, vec![content]);
94 let mut results = vec![];
95
96 if let Ok(query) = query_parser.parse_query(input) {
97 let mut snippet_generator =
98 SnippetGenerator::create(&searcher, &query, content).map_unexpected()?;
99 snippet_generator.set_max_num_chars(100);
100
101 let top_docs = searcher
102 .search(&query, &TopDocs::with_limit(10))
103 .map_unexpected()?;
104
105 for (_score, doc_address) in top_docs {
106 let retrieved_doc: TantivyDocument = searcher.doc(doc_address).map_unexpected()?;
107 let id = Uuid::from_slice(
108 retrieved_doc
109 .get_first(id_field)
110 .map(|val| val.as_bytes().unwrap_or_default())
111 .unwrap_or_default(),
112 )
113 .map_unexpected()?;
114
115 let snippet = snippet_generator.snippet_from_doc(&retrieved_doc);
116 let path = self
117 .search
118 .metadata_index
119 .read()
120 .await
121 .paths
122 .iter()
123 .find(|(path_id, _)| *path_id == id)
124 .map(|(_, path)| path.to_string())
125 .unwrap_or_default();
126
127 results.push(SearchResult::DocumentMatch {
128 id,
129 path,
130 content_matches: vec![ContentMatch {
131 paragraph: snippet.fragment().to_string(),
132 matched_indices: Self::highlight_to_matches(snippet.highlighted()),
133 score: 0,
134 }],
135 });
136 }
137 }
138 Ok(results)
139 }
140
141 fn highlight_to_matches(ranges: &[Range<usize>]) -> Vec<usize> {
142 let mut matches = vec![];
143 for range in ranges {
144 for i in range.clone() {
145 matches.push(i);
146 }
147 }
148
149 matches
150 }
151
152 pub fn reload_search_index(&self) -> LbResult<()> {
153 self.search
154 .tantivy_reader
155 .reload()
156 .map_err(|e| crate::LbErrKind::Unexpected(format!("tantivy reload: {e}")).into())
157 }
158
159 #[instrument(level = "debug", skip(self), err(Debug))]
160 pub async fn build_index(&self) -> LbResult<()> {
161 if self.keychain.get_account().is_err() {
164 return Ok(());
165 }
166
167 let new_metadata = SearchMetadata::populate(self).await?;
168
169 let (deleted_ids, all_current_ids) = {
170 let mut current_metadata = self.search.metadata_index.write().await;
171 let deleted = new_metadata.compute_deleted(¤t_metadata);
172 let current = new_metadata.files.iter().map(|f| f.id).collect::<Vec<_>>();
173 *current_metadata = new_metadata;
174 (deleted, current)
175 };
176
177 self.update_tantivy(deleted_ids, all_current_ids).await;
178
179 Ok(())
180 }
181
182 #[instrument(level = "debug", skip(self))]
183 pub fn setup_search(&self) {
184 if self.config.background_work {
185 let lb = self.clone();
186 let mut rx = self.subscribe();
187 tokio_spawn!(async move {
188 lb.build_index().await.unwrap();
189 loop {
190 let evt = match rx.recv().await {
191 Ok(evt) => evt,
192 Err(err) => {
193 error!("failed to receive from a channel {err}");
194 return;
195 }
196 };
197
198 match evt {
199 Event::UserSignedIn => {
200 lb.build_index().await.log_and_ignore();
201 }
202 Event::MetadataChanged(_) => {
203 if let Some(replacement_index) =
204 SearchMetadata::populate(&lb).await.log_and_ignore()
205 {
206 let current_index = lb.search.metadata_index.read().await.clone();
207 let deleted_ids = replacement_index.compute_deleted(¤t_index);
208 *lb.search.metadata_index.write().await = replacement_index;
209 lb.update_tantivy(deleted_ids, vec![]).await;
210 }
211 }
212 Event::DocumentWritten(id, _) => {
213 lb.update_tantivy(vec![id], vec![id]).await;
214 }
215 _ => {}
216 };
217 }
218 });
219 }
220 }
221
222 async fn update_tantivy(&self, delete: Vec<Uuid>, add: Vec<Uuid>) {
223 let mut index_writer: IndexWriter = self.search.tantivy_index.writer(50_000_000).unwrap();
224 let schema = self.search.tantivy_index.schema();
225 let id_field = schema.get_field("id").unwrap();
226 let id_str = schema.get_field("id_str").unwrap();
227 let content = schema.get_field("content").unwrap();
228
229 for id in delete {
230 let term = Term::from_field_bytes(id_field, id.as_bytes());
231 index_writer.delete_term(term);
232 }
233
234 for id in add {
235 let id_bytes = id.as_bytes().as_slice();
236 let id_string = id.to_string();
237 let Some(file) = self
238 .search
239 .metadata_index
240 .read()
241 .await
242 .files
243 .iter()
244 .find(|f| f.id == id)
245 .cloned()
246 else {
247 continue;
248 };
249
250 if !file.name.ends_with(".md") || file.is_folder() {
251 continue;
252 };
253
254 let Ok(doc) = self.read_document(file.id, false).await else {
255 error!("failed to read doc");
256 continue;
257 };
258
259 if doc.len() > CONTENT_MAX_LEN_BYTES {
260 continue;
261 };
262
263 let Ok(doc) = String::from_utf8(doc) else {
264 continue;
265 };
266
267 index_writer
268 .add_document(doc!(
269 id_field => id_bytes,
270 id_str => id_string,
271 content => doc,
272 ))
273 .unwrap();
274 }
275
276 index_writer.commit().unwrap();
277 }
278}
279
280impl Default for SearchIndex {
281 fn default() -> Self {
282 let mut schema_builder = Schema::builder();
283 schema_builder.add_bytes_field("id", INDEXED | STORED);
284 schema_builder.add_text_field("id_str", TEXT | STORED);
285 schema_builder.add_text_field("content", TEXT | STORED);
286
287 let schema = schema_builder.build();
288
289 let index = Index::create_in_ram(schema.clone());
290
291 let reader = index
293 .reader_builder()
294 .reload_policy(ReloadPolicy::OnCommitWithDelay)
295 .try_into()
296 .unwrap();
297
298 Self {
299 ready: Default::default(),
300 tantivy_index: index,
301 tantivy_reader: reader,
302 metadata_index: Default::default(),
303 }
304 }
305}
306
307#[derive(Debug, Serialize, Deserialize)]
308pub struct ContentMatch {
309 pub paragraph: String,
310 pub matched_indices: Vec<usize>,
311 pub score: i64,
312}
313
314impl SearchResult {
315 pub fn id(&self) -> Uuid {
316 match self {
317 SearchResult::DocumentMatch { id, .. } | SearchResult::PathMatch { id, .. } => *id,
318 }
319 }
320
321 pub fn path(&self) -> &str {
322 match self {
323 SearchResult::DocumentMatch { path, .. } | SearchResult::PathMatch { path, .. } => path,
324 }
325 }
326
327 pub fn name(&self) -> &str {
328 match self {
329 SearchResult::DocumentMatch { path, .. } | SearchResult::PathMatch { path, .. } => {
330 path.split('/').next_back().unwrap_or_default()
331 }
332 }
333 }
334
335 pub fn score(&self) -> i64 {
336 match self {
337 SearchResult::DocumentMatch { content_matches, .. } => content_matches
338 .iter()
339 .map(|m| m.score)
340 .max()
341 .unwrap_or_default(),
342 SearchResult::PathMatch { score, .. } => *score,
343 }
344 }
345}
346
347#[derive(Default, Clone)]
348pub struct SearchMetadata {
349 files: Vec<File>,
350 paths: Vec<(Uuid, String)>,
351 suggested_docs: Vec<Uuid>,
352}
353
354impl SearchMetadata {
355 async fn populate(lb: &LocalLb) -> LbResult<Self> {
356 let files = lb.list_metadatas().await?;
357 let paths = lb.list_paths_with_ids(None).await?;
358 let suggested_docs = lb.suggested_docs(RankingWeights::default()).await?;
359
360 Ok(SearchMetadata { files, paths, suggested_docs })
361 }
362
363 fn compute_deleted(&self, old: &SearchMetadata) -> Vec<Uuid> {
364 let mut deleted_ids = vec![];
365
366 for old_file in &old.files {
367 if !self.files.iter().any(|new_f| new_f.id == old_file.id) {
368 deleted_ids.push(old_file.id);
369 }
370 }
371
372 deleted_ids
373 }
374
375 fn empty_search(&self) -> LbResult<Vec<SearchResult>> {
376 let mut results = vec![];
377
378 for id in &self.suggested_docs {
379 let path = self
380 .paths
381 .iter()
382 .find(|(path_id, _)| id == path_id)
383 .map(|(_, path)| path.clone())
384 .unwrap_or_default();
385
386 results.push(SearchResult::PathMatch {
387 id: *id,
388 path,
389 matched_indices: vec![],
390 score: 0,
391 });
392 }
393
394 Ok(results)
395 }
396
397 fn path_search(&self, query: &str) -> LbResult<Vec<SearchResult>> {
398 let mut results = self.path_candidates(query)?;
399 self.score_paths(&mut results);
400
401 results.sort_by_key(|r| -r.score());
402
403 if let Some(result) = self.id_match(query) {
404 results.insert(0, result);
405 }
406
407 Ok(results)
408 }
409
410 fn id_match(&self, query: &str) -> Option<SearchResult> {
411 if query.len() < 8 {
412 return None;
413 }
414
415 let query = if query.starts_with("lb://") {
416 query.replacen("lb://", "", 1)
417 } else {
418 query.to_string()
419 };
420
421 for (id, path) in &self.paths {
422 if id.to_string().contains(&query) {
423 return Some(SearchResult::PathMatch {
424 id: *id,
425 path: path.clone(),
426 matched_indices: vec![],
427 score: 100,
428 });
429 }
430 }
431
432 None
433 }
434
435 fn path_candidates(&self, query: &str) -> LbResult<Vec<SearchResult>> {
436 let mut search_results = vec![];
437
438 for (id, path) in &self.paths {
439 let mut matched_indices = vec![];
440
441 let mut query_iter = query.chars().rev();
442 let mut current_query_char = query_iter.next();
443
444 for (path_ind, path_char) in path.char_indices().rev() {
445 if let Some(qc) = current_query_char {
446 if qc.eq_ignore_ascii_case(&path_char) {
447 matched_indices.push(path_ind);
448 current_query_char = query_iter.next();
449 }
450 } else {
451 break;
452 }
453 }
454
455 if current_query_char.is_none() {
456 search_results.push(SearchResult::PathMatch {
457 id: *id,
458 path: path.clone(),
459 matched_indices,
460 score: 0,
461 });
462 }
463 }
464 Ok(search_results)
465 }
466
467 fn score_paths(&self, candidates: &mut [SearchResult]) {
468 let smaller_paths = 10;
470 let suggested = 10;
471 let filename = 30;
472 let editable = 3;
473
474 candidates.sort_by_key(|a| a.path().len());
475
476 for i in 0..smaller_paths {
478 if let Some(SearchResult::PathMatch { id: _, path: _, matched_indices: _, score }) =
479 candidates.get_mut(i)
480 {
481 *score = (smaller_paths - i) as i64;
482 }
483 }
484
485 for cand in candidates.iter_mut() {
487 if self.suggested_docs.contains(&cand.id()) {
488 if let SearchResult::PathMatch { id: _, path: _, matched_indices: _, score } = cand
489 {
490 *score += suggested;
491 }
492 }
493 }
494
495 for cand in candidates.iter_mut() {
497 if let SearchResult::PathMatch { id: _, path, matched_indices, score } = cand {
498 let mut name_match = 0;
499 let mut name_size = 0;
500
501 for (i, c) in path.char_indices().rev() {
502 if c == '/' {
503 break;
504 }
505 name_size += 1;
506 if matched_indices.contains(&i) {
507 name_match += 1;
508 }
509 }
510
511 let match_portion = name_match as f32 / name_size.max(1) as f32;
512 *score += (match_portion * filename as f32) as i64;
513 }
514 }
515
516 for cand in candidates.iter_mut() {
518 if let SearchResult::PathMatch { id: _, path, matched_indices: _, score } = cand {
519 if path.ends_with(".md") || path.ends_with(".svg") {
520 *score += editable;
521 }
522 }
523 }
524 }
525}