1#![allow(clippy::cast_precision_loss)]
3#![allow(clippy::needless_continue)]
4#![allow(clippy::must_use_candidate)]
5#![allow(clippy::len_without_is_empty)]
6
7use super::pattern::Pattern;
10use crate::types::NumEdits;
11use std::borrow::Cow;
12use std::collections::{BTreeMap, BTreeSet};
13
14#[derive(Debug, Clone, PartialEq)]
16pub struct FuzzyMatch<'a> {
17 pub insertions: NumEdits,
19 pub deletions: NumEdits,
21 pub substitutions: NumEdits,
23 pub swaps: NumEdits,
25 pub edits: NumEdits,
27 pub pattern_index: usize,
29 pub pattern: &'a Pattern,
31 pub start: usize,
33 pub end: usize,
35 pub similarity: f32,
37 pub text: &'a str,
39}
40
41#[derive(Debug, Clone, PartialEq)]
43pub struct UnmatchedSegment<'a> {
44 pub start: usize,
46 pub end: usize,
48 pub text: &'a str,
50}
51
52#[derive(Debug, Clone, PartialEq)]
54pub enum Segment<'a> {
55 Matched(FuzzyMatch<'a>),
57 Unmatched(UnmatchedSegment<'a>),
59}
60
61impl<'a> Segment<'a> {
62 #[must_use]
64 pub fn matched(&'a self) -> Option<&'a FuzzyMatch<'a>> {
65 if let Segment::Matched(m) = self {
66 Some(m)
67 } else {
68 None
69 }
70 }
71
72 #[must_use]
74 pub fn unmatched(&'a self) -> Option<&'a UnmatchedSegment<'a>> {
75 if let Segment::Unmatched(u) = self {
76 Some(u)
77 } else {
78 None
79 }
80 }
81
82 #[must_use]
84 pub fn len(&self) -> usize {
85 match self {
86 Segment::Matched(m) => m.text.len(),
87 Segment::Unmatched(u) => u.text.len(),
88 }
89 }
90
91 #[must_use]
93 pub fn is_empty(&self) -> bool {
94 self.len() == 0
95 }
96
97 #[must_use]
99 pub fn as_str(&self) -> &str {
100 match self {
101 Segment::Matched(m) => m.text,
102 Segment::Unmatched(u) => u.text,
103 }
104 }
105}
106
107#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
109pub enum UniqueId {
110 Automatic(usize),
112 Custom(usize),
114}
115
116#[derive(Debug)]
118pub struct FuzzyMatches<'a> {
119 pub(crate) haystack: &'a str,
120 pub inner: Vec<FuzzyMatch<'a>>,
122}
123
124impl<'a> FuzzyMatches<'a> {
125 pub fn default_sort(&mut self) {
127 self.inner.sort_by(|a, b| {
128 b.similarity
129 .total_cmp(&a.similarity)
130 .then_with(|| b.pattern.len().cmp(&a.pattern.len()))
131 .then_with(|| b.text.len().cmp(&a.text.len()))
132 .then_with(|| a.start.cmp(&b.start))
133 });
134 }
135
136 pub fn greedy_sort(&mut self) {
138 self.inner.sort_by(|a, b| {
139 b.pattern
140 .len()
141 .cmp(&a.pattern.len())
142 .then_with(|| b.similarity.total_cmp(&a.similarity))
143 .then_with(|| a.start.cmp(&b.start))
144 });
145 }
146
147 pub fn coverage_weighted_sort(&mut self) {
149 self.inner.sort_by(|a, b| {
150 let score_a = a.similarity * a.similarity * a.pattern.len() as f32;
151 let score_b = b.similarity * b.similarity * b.pattern.len() as f32;
152 score_b
153 .total_cmp(&score_a)
154 .then_with(|| b.similarity.total_cmp(&a.similarity))
155 .then_with(|| a.start.cmp(&b.start))
156 });
157 }
158
159 pub fn non_overlapping(&mut self) {
161 let mut occupied: BTreeMap<usize, usize> = BTreeMap::new();
162 self.inner.retain(|m| {
163 let overlaps_before = occupied
164 .range(..=m.start)
165 .next_back()
166 .is_some_and(|(_, &end)| end > m.start);
167 let overlaps_after = occupied
168 .range(m.start..)
169 .next()
170 .is_some_and(|(&start, _)| start < m.end);
171
172 if !overlaps_before && !overlaps_after {
173 occupied.insert(m.start, m.end);
174 true
175 } else {
176 false
177 }
178 });
179 self.inner.sort_by_key(|m| m.start);
180 }
181
182 pub fn non_overlapping_unique(&mut self) {
184 let mut used_patterns: BTreeSet<UniqueId> = BTreeSet::new();
185 let mut occupied: BTreeMap<usize, usize> = BTreeMap::new();
186
187 self.inner.retain(|m| {
188 let unique_id = m
189 .pattern
190 .custom_unique_id
191 .map_or(UniqueId::Automatic(m.pattern_index), UniqueId::Custom);
192
193 if used_patterns.contains(&unique_id) {
194 return false;
195 }
196
197 let overlaps_before = occupied
198 .range(..=m.start)
199 .next_back()
200 .is_some_and(|(_, &end)| end > m.start);
201 let overlaps_after = occupied
202 .range(m.start..)
203 .next()
204 .is_some_and(|(&start, _)| start < m.end);
205
206 if !overlaps_before && !overlaps_after {
207 used_patterns.insert(unique_id);
208 occupied.insert(m.start, m.end);
209 true
210 } else {
211 false
212 }
213 });
214 self.inner.sort_by_key(|m| m.start);
215 }
216
217 #[must_use]
219 pub fn replace<F, S>(&self, callback: F) -> String
220 where
221 F: Fn(&FuzzyMatch<'a>) -> Option<S>,
222 S: Into<Cow<'a, str>>,
223 {
224 let mut result = String::new();
225 let mut last = 0;
226
227 for m in &self.inner {
228 if m.start >= last {
229 result.push_str(&self.haystack[last..m.start]);
230 last = m.end;
231
232 match callback(m) {
233 Some(repl) => result.push_str(&repl.into()),
234 None => result.push_str(m.text),
235 }
236 }
237 }
238 result.push_str(&self.haystack[last..]);
239 result
240 }
241
242 #[must_use]
244 pub fn strip_prefix(self) -> String {
245 let mut result = String::new();
246 let mut skipping = true;
247
248 for segment in self.segment_iter() {
249 match segment {
250 Segment::Matched(_) if skipping => {}
251 Segment::Matched(m) => result.push_str(m.text),
252 Segment::Unmatched(u) if skipping => {
253 if u.text.trim().is_empty() {
254 continue;
255 }
256 skipping = false;
257 result.push_str(u.text.trim_start());
258 }
259 Segment::Unmatched(u) => result.push_str(u.text),
260 }
261 }
262 result
263 }
264
265 #[must_use]
267 pub fn strip_postfix(self) -> String {
268 let segments: Vec<_> = self.segment_iter().collect();
269 let mut keep = 0;
270
271 for (i, seg) in segments.iter().enumerate() {
272 if let Segment::Unmatched(u) = seg
273 && !u.text.trim().is_empty()
274 {
275 keep = i + 1;
276 }
277 }
278
279 let mut result = String::new();
280 for (i, seg) in segments.into_iter().take(keep).enumerate() {
281 let is_last = i + 1 == keep;
282 match seg {
283 Segment::Matched(m) => result.push_str(m.text),
284 Segment::Unmatched(u) if is_last => result.push_str(u.text.trim_end()),
285 Segment::Unmatched(u) => result.push_str(u.text),
286 }
287 }
288 result
289 }
290
291 pub fn split(self) -> impl Iterator<Item = &'a str> + 'a {
293 let mut segments = self.segment_iter();
294 std::iter::from_fn(move || {
295 for seg in segments.by_ref() {
296 if let Segment::Unmatched(u) = seg {
297 return Some(u.text);
298 }
299 }
300 None
301 })
302 }
303
304 pub fn iter(&self) -> impl Iterator<Item = &FuzzyMatch<'a>> {
306 self.inner.iter()
307 }
308
309 pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut FuzzyMatch<'a>> {
311 self.inner.iter_mut()
312 }
313
314 pub fn inner_mut(&mut self) -> &mut Vec<FuzzyMatch<'a>> {
316 &mut self.inner
317 }
318
319 #[must_use]
321 pub fn len(&self) -> usize {
322 self.inner.len()
323 }
324
325 #[must_use]
327 pub fn is_empty(&self) -> bool {
328 self.inner.is_empty()
329 }
330
331 pub fn retain<F>(&mut self, pred: F) -> &mut Self
333 where
334 F: Fn(&FuzzyMatch<'a>) -> bool,
335 {
336 self.inner.retain(pred);
337 self
338 }
339
340 #[must_use]
342 pub fn filter<F>(&self, pred: F) -> FuzzyMatches<'a>
343 where
344 F: Fn(&FuzzyMatch<'a>) -> bool,
345 {
346 FuzzyMatches {
347 haystack: self.haystack,
348 inner: self.inner.iter().filter(|m| pred(m)).cloned().collect(),
349 }
350 }
351
352 #[must_use]
354 pub fn matched_spans(&self) -> Vec<(usize, usize)> {
355 self.inner.iter().map(|m| (m.start, m.end)).collect()
356 }
357
358 #[must_use]
360 pub fn matched_strings(&self) -> Vec<&'a str> {
361 self.inner.iter().map(|m| m.text).collect()
362 }
363
364 pub fn segment_iter(self) -> impl Iterator<Item = Segment<'a>> {
366 let mut segments = Vec::new();
367 let mut last = 0;
368
369 for m in self.inner {
370 if m.start >= last {
371 if m.start > last {
372 segments.push(Segment::Unmatched(UnmatchedSegment {
373 start: last,
374 end: m.start,
375 text: &self.haystack[last..m.start],
376 }));
377 }
378 last = m.end;
379 segments.push(Segment::Matched(m));
380 }
381 }
382
383 if last < self.haystack.len() {
384 segments.push(Segment::Unmatched(UnmatchedSegment {
385 start: last,
386 end: self.haystack.len(),
387 text: &self.haystack[last..],
388 }));
389 }
390
391 segments.into_iter()
392 }
393
394 #[must_use]
396 pub fn segment_text(self) -> String {
397 const SPACE: [char; 2] = [' ', '\t'];
398 const NO_LEADING_SPACE: [char; 9] = [',', '.', '?', '!', ';', ':', '—', '-', '…'];
399
400 let mut result = String::new();
401 let mut prev_matched = false;
402
403 for segment in self.segment_iter() {
404 match segment {
405 Segment::Matched(m) => {
406 if prev_matched || (!result.is_empty() && !result.ends_with(SPACE)) {
407 result.push(' ');
408 }
409 prev_matched = true;
410 result.push_str(m.text);
411 }
412 Segment::Unmatched(u) => {
413 if prev_matched && !u.text.starts_with(NO_LEADING_SPACE) {
414 result.push(' ');
415 }
416 prev_matched = false;
417 result.push_str(u.text);
418 }
419 }
420 }
421 result
422 }
423}
424
425impl<'a, 'b> IntoIterator for &'b FuzzyMatches<'a> {
427 type Item = &'b FuzzyMatch<'a>;
428 type IntoIter = std::slice::Iter<'b, FuzzyMatch<'a>>;
429
430 fn into_iter(self) -> Self::IntoIter {
431 self.inner.iter()
432 }
433}
434
435impl<'a, 'b> IntoIterator for &'b mut FuzzyMatches<'a> {
436 type Item = &'b mut FuzzyMatch<'a>;
437 type IntoIter = std::slice::IterMut<'b, FuzzyMatch<'a>>;
438
439 fn into_iter(self) -> Self::IntoIter {
440 self.inner.iter_mut()
441 }
442}
443
444impl<'a> IntoIterator for FuzzyMatches<'a> {
445 type Item = FuzzyMatch<'a>;
446 type IntoIter = std::vec::IntoIter<FuzzyMatch<'a>>;
447
448 fn into_iter(self) -> Self::IntoIter {
449 self.inner.into_iter()
450 }
451}
452
453impl<'a> std::ops::Deref for FuzzyMatches<'a> {
454 type Target = [FuzzyMatch<'a>];
455
456 fn deref(&self) -> &Self::Target {
457 &self.inner
458 }
459}
460
461impl std::ops::DerefMut for FuzzyMatches<'_> {
462 fn deref_mut(&mut self) -> &mut Self::Target {
463 &mut self.inner
464 }
465}