1use std::cmp::max;
2
3use levenshtein::levenshtein;
4
5use crate::{Candidate, LTerm};
6
7#[derive(PartialEq, Clone, Debug)]
9pub struct ResultItem {
10 pub raw: String,
12 pub keyword: LTerm,
14 pub score: f64,
16}
17
18impl PartialEq<(&str, &str, f64)> for ResultItem {
19 fn eq(&self, (raw, key_phrase, score): &(&str, &str, f64)) -> bool {
20 self.raw.eq(raw) && self.keyword.eq(key_phrase) && self.score.eq(score)
21 }
22}
23
24impl From<Candidate<'_>> for ResultItem {
25 fn from(candidate: Candidate) -> Self {
26 ResultItem { raw: candidate.raw.join(" "), keyword: candidate.lc_terms.join(" "), score: candidate.score }
27 }
28}
29
30pub(crate) fn remove_duplicates(threshold: f64, results: Vec<ResultItem>, n: usize) -> Vec<ResultItem> {
31 let mut unique: Vec<ResultItem> = Vec::new();
32
33 for res in results {
34 if unique.len() >= n {
35 break;
36 }
37
38 let is_duplicate = unique.iter().any(|it| levenshtein_ratio(&it.keyword, &res.keyword) > threshold);
39
40 if !is_duplicate {
41 unique.push(res);
42 }
43 }
44
45 unique
46}
47
48fn levenshtein_ratio(seq1: &str, seq2: &str) -> f64 {
50 let distance = if seq1.len() <= seq2.len() { levenshtein(seq1, seq2) } else { levenshtein(seq2, seq1) };
51 let length = max(seq1.len(), seq2.len());
52 1.0 - (distance as f64 / length as f64)
53}