bvr_core/matches/
mod.rs

1pub mod composite;
2
3use crate::buf::ContiguousSegmentIterator;
4use crate::cowvec::{CowVec, CowVecSnapshot, CowVecWriter};
5use crate::{LineIndex, Result};
6use regex::bytes::Regex;
7use std::sync::{atomic::AtomicBool, Arc};
8
9pub use composite::CompositeStrategy;
10
11struct LineMatchRemote {
12    buf: CowVecWriter<usize>,
13    completed: Arc<AtomicBool>,
14}
15
16impl LineMatchRemote {
17    pub fn search(mut self, mut iter: ContiguousSegmentIterator, regex: Regex) -> Result<()> {
18        while let Some((idx, start, buf)) = iter.next_buf() {
19            if !self.has_readers() {
20                break;
21            }
22
23            let mut buf_start = 0;
24            while let Some(res) = regex.find_at(buf, buf_start as usize) {
25                let match_start = res.start() as u64 + start;
26                let line_number = idx.line_of_data(match_start).unwrap();
27
28                if let Some(&last) = self.buf.last() {
29                    if last == line_number {
30                        continue;
31                    }
32                    debug_assert!(line_number > last);
33                }
34
35                self.buf.push(line_number);
36
37                buf_start = idx.data_of_line(line_number + 1).unwrap() - start;
38            }
39        }
40        Ok(())
41    }
42
43    pub fn has_readers(&self) -> bool {
44        Arc::strong_count(&self.completed) > 1
45    }
46}
47
48impl Drop for LineMatchRemote {
49    fn drop(&mut self) {
50        self.completed
51            .store(true, std::sync::atomic::Ordering::Relaxed);
52    }
53}
54
55#[derive(Clone)]
56pub enum LineSet {
57    All {
58        buf: LineIndex,
59    },
60    Subset {
61        buf: CowVec<usize>,
62        completed: Arc<AtomicBool>,
63        // Optimization field for composite filters
64        // Minimum length of all filters combined
65        min_len: usize,
66    },
67}
68
69impl LineSet {
70    #[inline]
71    pub fn empty() -> Self {
72        Self::Subset {
73            buf: CowVec::empty(),
74            completed: Arc::new(AtomicBool::new(true)),
75            min_len: 0,
76        }
77    }
78
79    pub fn all(buf: LineIndex) -> Self {
80        Self::All { buf }
81    }
82
83    pub fn is_all(&self) -> bool {
84        matches!(self, Self::All { .. })
85    }
86
87    #[inline]
88    pub fn search(iter: ContiguousSegmentIterator, regex: Regex) -> Self {
89        let (buf, writer) = CowVec::new();
90        let complete = Arc::new(AtomicBool::new(false));
91        std::thread::spawn({
92            let complete = complete.clone();
93            move || {
94                LineMatchRemote {
95                    buf: writer,
96                    completed: complete,
97                }
98                .search(iter, regex)
99            }
100        });
101        Self::Subset {
102            buf,
103            completed: complete,
104            min_len: 0,
105        }
106    }
107
108    #[inline]
109    pub fn compose(
110        mut filters: Vec<Self>,
111        complete: bool,
112        strategy: CompositeStrategy,
113    ) -> Result<Self> {
114        match filters.len() {
115            0 => Ok(Self::empty()),
116            1 => Ok(filters.remove(0)),
117            _ => {
118                let min_len = match strategy {
119                    CompositeStrategy::Intersection => 0,
120                    CompositeStrategy::Union => filters.iter().map(|f| f.len()).max().unwrap(),
121                };
122                let (buf, writer) = CowVec::new();
123                let completed = Arc::new(AtomicBool::new(false));
124                let task = {
125                    let completed = completed.clone();
126                    move || {
127                        composite::LineCompositeRemote {
128                            buf: writer,
129                            completed,
130                            strategy,
131                        }
132                        .compose(filters)
133                    }
134                };
135                if complete {
136                    task()?;
137                } else {
138                    std::thread::spawn(task);
139                }
140                Ok(Self::Subset {
141                    buf,
142                    completed,
143                    min_len,
144                })
145            }
146        }
147    }
148
149    #[cfg(test)]
150    pub(crate) fn into_inner(self) -> CowVec<usize> {
151        match self {
152            Self::All { .. } => unimplemented!(),
153            Self::Subset { buf, .. } => buf,
154        }
155    }
156
157    #[inline]
158    pub fn is_complete(&self) -> bool {
159        match self {
160            LineSet::All { buf } => buf.is_complete(),
161            LineSet::Subset { completed, .. } => {
162                completed.load(std::sync::atomic::Ordering::Relaxed)
163            }
164        }
165    }
166
167    pub fn get(&self, idx: usize) -> Option<usize> {
168        match self {
169            LineSet::All { buf } => {
170                if idx < buf.line_count() {
171                    Some(idx)
172                } else {
173                    None
174                }
175            }
176            LineSet::Subset { buf, .. } => buf.get(idx),
177        }
178    }
179
180    pub fn find(&self, line_number: usize) -> Option<usize> {
181        match self {
182            LineSet::All { buf } => {
183                if line_number < buf.line_count() {
184                    Some(line_number)
185                } else {
186                    None
187                }
188            }
189            LineSet::Subset { buf, .. } => {
190                let slice = buf.snapshot();
191                match *slice.as_slice() {
192                    [first, .., last] if (first..=last).contains(&line_number) => {
193                        slice.binary_search(&line_number).ok()
194                    }
195                    [item] if item == line_number => Some(0),
196                    _ => None,
197                }
198            }
199        }
200    }
201
202    pub fn has_line(&self, line_number: usize) -> bool {
203        self.find(line_number).is_some()
204    }
205
206    pub fn nearest_forward(&self, line_number: usize) -> Option<usize> {
207        match self {
208            LineSet::All { buf } => {
209                if line_number < buf.line_count() {
210                    Some((line_number + 1).min(buf.line_count()))
211                } else {
212                    None
213                }
214            }
215            LineSet::Subset { buf, .. } => {
216                let snap = buf.snapshot();
217                let slice = snap.as_slice();
218                match *slice {
219                    [_, ..] => Some(
220                        slice[match slice.binary_search(&line_number) {
221                            Ok(i) => i.saturating_add(1),
222                            Err(i) => i,
223                        }
224                        .min(slice.len() - 1)],
225                    ),
226                    [] => None,
227                }
228            }
229        }
230    }
231
232    pub fn nearest_backward(&self, line_number: usize) -> Option<usize> {
233        match self {
234            LineSet::All { .. } => line_number.checked_sub(1),
235            LineSet::Subset { buf, .. } => {
236                let snap = buf.snapshot();
237                let slice = snap.as_slice();
238                match *slice {
239                    [_, ..] => Some(
240                        slice[match slice.binary_search(&line_number) {
241                            Ok(i) | Err(i) => i,
242                        }
243                        .saturating_sub(1)
244                        .min(slice.len() - 1)],
245                    ),
246                    [] => None,
247                }
248            }
249        }
250    }
251
252    pub fn len(&self) -> usize {
253        match self {
254            LineSet::All { buf } => buf.line_count(),
255            LineSet::Subset { buf, min_len, .. } => buf.len().max(*min_len),
256        }
257    }
258
259    pub fn is_empty(&self) -> bool {
260        self.len() == 0
261    }
262
263    pub(crate) fn snapshot(&self) -> Option<CowVecSnapshot<usize>> {
264        match self {
265            LineSet::All { .. } => None,
266            LineSet::Subset { buf, .. } => Some(buf.snapshot()),
267        }
268    }
269}
270
271impl From<Vec<usize>> for LineSet {
272    fn from(vec: Vec<usize>) -> Self {
273        Self::Subset {
274            min_len: vec.len(),
275            buf: CowVec::from(vec),
276            completed: Arc::new(AtomicBool::new(true)),
277        }
278    }
279}