#![allow(clippy::cast_precision_loss)]
#![allow(clippy::needless_continue)]
#![allow(clippy::must_use_candidate)]
#![allow(clippy::len_without_is_empty)]
use super::pattern::Pattern;
use crate::types::NumEdits;
use std::borrow::Cow;
use std::collections::{BTreeMap, BTreeSet};
#[derive(Debug, Clone, PartialEq)]
pub struct FuzzyMatch<'a> {
pub insertions: NumEdits,
pub deletions: NumEdits,
pub substitutions: NumEdits,
pub swaps: NumEdits,
pub edits: NumEdits,
pub pattern_index: usize,
pub pattern: &'a Pattern,
pub start: usize,
pub end: usize,
pub similarity: f32,
pub text: &'a str,
}
#[derive(Debug, Clone, PartialEq)]
pub struct UnmatchedSegment<'a> {
pub start: usize,
pub end: usize,
pub text: &'a str,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Segment<'a> {
Matched(FuzzyMatch<'a>),
Unmatched(UnmatchedSegment<'a>),
}
impl<'a> Segment<'a> {
#[must_use]
pub fn matched(&'a self) -> Option<&'a FuzzyMatch<'a>> {
if let Segment::Matched(m) = self {
Some(m)
} else {
None
}
}
#[must_use]
pub fn unmatched(&'a self) -> Option<&'a UnmatchedSegment<'a>> {
if let Segment::Unmatched(u) = self {
Some(u)
} else {
None
}
}
#[must_use]
pub fn len(&self) -> usize {
match self {
Segment::Matched(m) => m.text.len(),
Segment::Unmatched(u) => u.text.len(),
}
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[must_use]
pub fn as_str(&self) -> &str {
match self {
Segment::Matched(m) => m.text,
Segment::Unmatched(u) => u.text,
}
}
}
#[derive(Debug, Clone, Ord, PartialOrd, Eq, PartialEq)]
pub enum UniqueId {
Automatic(usize),
Custom(usize),
}
#[derive(Debug)]
pub struct FuzzyMatches<'a> {
pub(crate) haystack: &'a str,
pub inner: Vec<FuzzyMatch<'a>>,
}
impl<'a> FuzzyMatches<'a> {
pub fn default_sort(&mut self) {
self.inner.sort_by(|a, b| {
b.similarity
.total_cmp(&a.similarity)
.then_with(|| b.pattern.len().cmp(&a.pattern.len()))
.then_with(|| b.text.len().cmp(&a.text.len()))
.then_with(|| a.start.cmp(&b.start))
});
}
pub fn greedy_sort(&mut self) {
self.inner.sort_by(|a, b| {
b.pattern
.len()
.cmp(&a.pattern.len())
.then_with(|| b.similarity.total_cmp(&a.similarity))
.then_with(|| a.start.cmp(&b.start))
});
}
pub fn coverage_weighted_sort(&mut self) {
self.inner.sort_by(|a, b| {
let score_a = a.similarity * a.similarity * a.pattern.len() as f32;
let score_b = b.similarity * b.similarity * b.pattern.len() as f32;
score_b
.total_cmp(&score_a)
.then_with(|| b.similarity.total_cmp(&a.similarity))
.then_with(|| a.start.cmp(&b.start))
});
}
pub fn non_overlapping(&mut self) {
let mut occupied: BTreeMap<usize, usize> = BTreeMap::new();
self.inner.retain(|m| {
let overlaps_before = occupied
.range(..=m.start)
.next_back()
.is_some_and(|(_, &end)| end > m.start);
let overlaps_after = occupied
.range(m.start..)
.next()
.is_some_and(|(&start, _)| start < m.end);
if !overlaps_before && !overlaps_after {
occupied.insert(m.start, m.end);
true
} else {
false
}
});
self.inner.sort_by_key(|m| m.start);
}
pub fn non_overlapping_unique(&mut self) {
let mut used_patterns: BTreeSet<UniqueId> = BTreeSet::new();
let mut occupied: BTreeMap<usize, usize> = BTreeMap::new();
self.inner.retain(|m| {
let unique_id = m
.pattern
.custom_unique_id
.map_or(UniqueId::Automatic(m.pattern_index), UniqueId::Custom);
if used_patterns.contains(&unique_id) {
return false;
}
let overlaps_before = occupied
.range(..=m.start)
.next_back()
.is_some_and(|(_, &end)| end > m.start);
let overlaps_after = occupied
.range(m.start..)
.next()
.is_some_and(|(&start, _)| start < m.end);
if !overlaps_before && !overlaps_after {
used_patterns.insert(unique_id);
occupied.insert(m.start, m.end);
true
} else {
false
}
});
self.inner.sort_by_key(|m| m.start);
}
#[must_use]
pub fn replace<F, S>(&self, callback: F) -> String
where
F: Fn(&FuzzyMatch<'a>) -> Option<S>,
S: Into<Cow<'a, str>>,
{
let mut result = String::new();
let mut last = 0;
for m in &self.inner {
if m.start >= last {
result.push_str(&self.haystack[last..m.start]);
last = m.end;
match callback(m) {
Some(repl) => result.push_str(&repl.into()),
None => result.push_str(m.text),
}
}
}
result.push_str(&self.haystack[last..]);
result
}
#[must_use]
pub fn strip_prefix(self) -> String {
let mut result = String::new();
let mut skipping = true;
for segment in self.segment_iter() {
match segment {
Segment::Matched(_) if skipping => {}
Segment::Matched(m) => result.push_str(m.text),
Segment::Unmatched(u) if skipping => {
if u.text.trim().is_empty() {
continue;
}
skipping = false;
result.push_str(u.text.trim_start());
}
Segment::Unmatched(u) => result.push_str(u.text),
}
}
result
}
#[must_use]
pub fn strip_postfix(self) -> String {
let segments: Vec<_> = self.segment_iter().collect();
let mut keep = 0;
for (i, seg) in segments.iter().enumerate() {
if let Segment::Unmatched(u) = seg
&& !u.text.trim().is_empty()
{
keep = i + 1;
}
}
let mut result = String::new();
for (i, seg) in segments.into_iter().take(keep).enumerate() {
let is_last = i + 1 == keep;
match seg {
Segment::Matched(m) => result.push_str(m.text),
Segment::Unmatched(u) if is_last => result.push_str(u.text.trim_end()),
Segment::Unmatched(u) => result.push_str(u.text),
}
}
result
}
pub fn split(self) -> impl Iterator<Item = &'a str> + 'a {
let mut segments = self.segment_iter();
std::iter::from_fn(move || {
for seg in segments.by_ref() {
if let Segment::Unmatched(u) = seg {
return Some(u.text);
}
}
None
})
}
pub fn iter(&self) -> impl Iterator<Item = &FuzzyMatch<'a>> {
self.inner.iter()
}
pub fn iter_mut(&mut self) -> impl Iterator<Item = &mut FuzzyMatch<'a>> {
self.inner.iter_mut()
}
pub fn inner_mut(&mut self) -> &mut Vec<FuzzyMatch<'a>> {
&mut self.inner
}
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
pub fn retain<F>(&mut self, pred: F) -> &mut Self
where
F: Fn(&FuzzyMatch<'a>) -> bool,
{
self.inner.retain(pred);
self
}
#[must_use]
pub fn filter<F>(&self, pred: F) -> FuzzyMatches<'a>
where
F: Fn(&FuzzyMatch<'a>) -> bool,
{
FuzzyMatches {
haystack: self.haystack,
inner: self.inner.iter().filter(|m| pred(m)).cloned().collect(),
}
}
#[must_use]
pub fn matched_spans(&self) -> Vec<(usize, usize)> {
self.inner.iter().map(|m| (m.start, m.end)).collect()
}
#[must_use]
pub fn matched_strings(&self) -> Vec<&'a str> {
self.inner.iter().map(|m| m.text).collect()
}
pub fn segment_iter(self) -> impl Iterator<Item = Segment<'a>> {
let mut segments = Vec::new();
let mut last = 0;
for m in self.inner {
if m.start >= last {
if m.start > last {
segments.push(Segment::Unmatched(UnmatchedSegment {
start: last,
end: m.start,
text: &self.haystack[last..m.start],
}));
}
last = m.end;
segments.push(Segment::Matched(m));
}
}
if last < self.haystack.len() {
segments.push(Segment::Unmatched(UnmatchedSegment {
start: last,
end: self.haystack.len(),
text: &self.haystack[last..],
}));
}
segments.into_iter()
}
#[must_use]
pub fn segment_text(self) -> String {
const SPACE: [char; 2] = [' ', '\t'];
const NO_LEADING_SPACE: [char; 9] = [',', '.', '?', '!', ';', ':', '—', '-', '…'];
let mut result = String::new();
let mut prev_matched = false;
for segment in self.segment_iter() {
match segment {
Segment::Matched(m) => {
if prev_matched || (!result.is_empty() && !result.ends_with(SPACE)) {
result.push(' ');
}
prev_matched = true;
result.push_str(m.text);
}
Segment::Unmatched(u) => {
if prev_matched && !u.text.starts_with(NO_LEADING_SPACE) {
result.push(' ');
}
prev_matched = false;
result.push_str(u.text);
}
}
}
result
}
}
impl<'a, 'b> IntoIterator for &'b FuzzyMatches<'a> {
type Item = &'b FuzzyMatch<'a>;
type IntoIter = std::slice::Iter<'b, FuzzyMatch<'a>>;
fn into_iter(self) -> Self::IntoIter {
self.inner.iter()
}
}
impl<'a, 'b> IntoIterator for &'b mut FuzzyMatches<'a> {
type Item = &'b mut FuzzyMatch<'a>;
type IntoIter = std::slice::IterMut<'b, FuzzyMatch<'a>>;
fn into_iter(self) -> Self::IntoIter {
self.inner.iter_mut()
}
}
impl<'a> IntoIterator for FuzzyMatches<'a> {
type Item = FuzzyMatch<'a>;
type IntoIter = std::vec::IntoIter<FuzzyMatch<'a>>;
fn into_iter(self) -> Self::IntoIter {
self.inner.into_iter()
}
}
impl<'a> std::ops::Deref for FuzzyMatches<'a> {
type Target = [FuzzyMatch<'a>];
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl std::ops::DerefMut for FuzzyMatches<'_> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}