Skip to main content

binocular/search/matcher/
fuzzy.rs

1use crate::infra::channel::{Receiver, Sender};
2use crate::search::types::{SearchItem, SearchResult};
3use nucleo::{Config, Injector, Matcher, Nucleo, Utf32String};
4use std::sync::atomic::{AtomicBool, Ordering};
5use std::sync::Arc;
6use std::time::Duration;
7
8const MAX_ITEMS_PER_TICK: usize = 100_000;
9
10fn push_batch_to_nucleo(
11    injector: &Injector<SearchItem>,
12    batch: Vec<SearchItem>,
13    use_filename_only: bool,
14) -> usize {
15    let count = batch.len();
16    for item in batch {
17        injector.push(item, |item_ref, cols: &mut [Utf32String]| {
18            cols[0] = Utf32String::from(item_ref.match_text(use_filename_only).as_ref())
19        });
20    }
21    count
22}
23
24pub struct MatcherState {
25    pub results: Vec<SearchResult>,
26    pub total_matches: u64,
27    pub total_items: u64,
28    pub working: bool,
29}
30
31pub enum MatcherCommand {
32    Query(String),
33    Resize(u32),
34}
35
36pub fn spawn_matcher(
37    rx_items: impl Receiver<Vec<SearchItem>>,
38    rx_cmd: impl Receiver<MatcherCommand>,
39    stop: Arc<AtomicBool>,
40    tx_state: impl Sender<MatcherState>,
41    use_filename_only: bool,
42    is_content: bool,
43) -> std::thread::JoinHandle<()> {
44    std::thread::spawn(move || {
45        let mut nucleo = Nucleo::<SearchItem>::new(Config::DEFAULT, Arc::new(|| {}), None, 1);
46
47        let injector = nucleo.injector();
48
49        let mut current_query = String::new();
50        let mut item_limit = 100;
51        let mut search_complete = false;
52        let mut last_sent_total_matches: Option<u64> = None;
53        let mut last_sent_working: Option<bool> = None;
54        let mut last_items_received = std::time::Instant::now();
55        let mut idle_timed_out = false;
56
57        let mut indices_matcher = Matcher::new(Config::DEFAULT);
58
59        while !stop.load(Ordering::Relaxed) {
60            let mut items_processed = 0;
61
62            if !search_complete {
63                loop {
64                    match rx_items.try_recv() {
65                        Ok(Some(batch)) => {
66                            items_processed +=
67                                push_batch_to_nucleo(&injector, batch, use_filename_only);
68                            if items_processed > MAX_ITEMS_PER_TICK {
69                                break;
70                            }
71                        }
72                        Ok(None) => break,
73                        Err(_) => {
74                            search_complete = true;
75                            break;
76                        }
77                    }
78                }
79            }
80
81            let mut needs_reparse = false;
82            let mut resized = false;
83            while let Ok(Some(cmd)) = rx_cmd.try_recv() {
84                match cmd {
85                    MatcherCommand::Query(q) => {
86                        if q != current_query {
87                            current_query = q;
88                            needs_reparse = true;
89                        }
90                    }
91                    MatcherCommand::Resize(n) => {
92                        if n != item_limit {
93                            item_limit = n;
94                            resized = true;
95                        }
96                    }
97                }
98            }
99
100            if needs_reparse {
101                nucleo.pattern.reparse(
102                    0,
103                    &current_query,
104                    nucleo::pattern::CaseMatching::Smart,
105                    nucleo::pattern::Normalization::Smart,
106                    false,
107                );
108            }
109
110            let status = nucleo.tick(10);
111
112            let snapshot = nucleo.snapshot();
113            let total_matches = snapshot.matched_item_count() as u64;
114            let total_items = snapshot.item_count() as u64;
115
116            let end = (item_limit as u64).min(total_matches);
117            let matched_items = snapshot.matched_items(0..end as u32);
118
119            let results: Vec<SearchResult> = matched_items
120                .map(|item| {
121                    let mut indices = Vec::new();
122                    let match_text = item.data.match_text(use_filename_only);
123                    let utf32_text = Utf32String::from(match_text.as_ref());
124
125                    let pattern = nucleo.pattern.column_pattern(0);
126                    let _ =
127                        pattern.indices(utf32_text.slice(..), &mut indices_matcher, &mut indices);
128
129                    let column = if is_content {
130                        item.data.content_match_column(&indices)
131                    } else {
132                        None
133                    };
134
135                    SearchResult {
136                        item: item.data.clone(),
137                        indices,
138                        column,
139                    }
140                })
141                .collect();
142
143            if items_processed > 0 {
144                last_items_received = std::time::Instant::now();
145            } else if !search_complete
146                && !idle_timed_out
147                && last_items_received.elapsed() > Duration::from_millis(100)
148            {
149                idle_timed_out = true;
150            }
151
152            let working = (status.running || !search_complete) && !idle_timed_out;
153            let should_send = items_processed > 0
154                || needs_reparse
155                || resized
156                || last_sent_total_matches != Some(total_matches)
157                || last_sent_working != Some(working);
158
159            if should_send {
160                let _ = tx_state.try_send(MatcherState {
161                    results,
162                    total_matches,
163                    total_items,
164                    working,
165                });
166                last_sent_total_matches = Some(total_matches);
167                last_sent_working = Some(working);
168            }
169
170            if !status.running && items_processed == 0 {
171                std::thread::sleep(Duration::from_millis(10));
172            }
173        }
174    })
175}