1use jaro_winkler::jaro_winkler;
2use levenshtein::levenshtein;
3use regex::Regex;
4use std::path::Path;
5
6pub const ERROR_MARGIN: f64 = 0.001;
8
9#[must_use]
19pub fn glob_to_regex_pattern(pattern: &str) -> String {
20 let mut regex_pattern = String::new();
21 let mut escaping = false;
22
23 for c in pattern.chars() {
24 match c {
25 '*' if !escaping => regex_pattern.push_str(".*"), '?' if !escaping => regex_pattern.push('.'), '.' | '+' | '(' | ')' | '|' | '^' | '$' | '[' | ']' | '{' | '}' | '\\' if !escaping => {
28 regex_pattern.push('\\'); regex_pattern.push(c);
30 }
31 '\\' if !escaping => escaping = true, _ => {
33 regex_pattern.push(c); escaping = false;
35 }
36 }
37 }
38 regex_pattern
39}
40
41#[must_use]
51pub fn is_close_to_upper_bound(score: f64) -> bool {
52 (score - 1.0).abs() < ERROR_MARGIN
53}
54
55#[must_use]
70pub fn match_filename_with_glob_pattern(path: &Path, pattern: &str) -> bool {
71 let regex_pattern = glob_to_regex_pattern(pattern);
72 let re = Regex::new(®ex_pattern).unwrap_or(Regex::new(".*").unwrap());
73
74 if let Some(name) = path.file_name().map(|s| s.to_string_lossy().to_string()) {
75 if re.is_match(&name) {
76 return true;
77 }
78 }
79
80 false
81}
82
83#[allow(clippy::cast_precision_loss)]
106#[must_use]
107pub fn match_string(s1: &str, s2: &str) -> f64 {
108 let s1 = s1.to_lowercase();
109 let s2 = s2.to_lowercase();
110
111 if s1.is_empty() || s2.is_empty() {
112 return if s1.is_empty() == s2.is_empty() {
113 1.0
114 } else {
115 0.0
116 };
117 }
118
119 if s1.contains(&s2) || s2.contains(&s1) {
120 return 1.0;
121 }
122
123 let len1 = s1.chars().count();
124 let len2 = s2.chars().count();
125 let shorter_len = len1.min(len2);
126
127 if shorter_len == 0 {
128 return 0.0;
129 }
130
131 let distance = levenshtein(&s1, &s2) as f64;
132 let score = 1.0 - (distance / shorter_len as f64);
133
134 score.clamp(0.0, 1.0)
135}
136
137#[must_use]
160pub fn string_similarity<S1, S2>(s1: S1, s2: S2) -> f64
161where
162 S1: AsRef<str>,
163 S2: AsRef<str>,
164{
165 string_similarity_impl(s1.as_ref(), s2.as_ref())
166}
167
168fn string_similarity_impl(s1: &str, s2: &str) -> f64 {
170 let s1 = s1.trim().to_lowercase();
171 let s2 = s2.trim().to_lowercase();
172
173 if s1.is_empty() || s2.is_empty() {
174 return 0.0;
175 }
176
177 if s1.contains(&s2) || s2.contains(&s1) {
178 return 1.0;
179 }
180
181 jaro_winkler(&s1, &s2)
182}
183
184#[macro_export]
186macro_rules! assert_match_string {
187 ($s1:expr, $s2:expr, $expected:expr) => {
188 let actual = $crate::pattern::match_string($s1, $s2);
189 assert!(
190 (actual - $expected).abs() < $crate::pattern::ERROR_MARGIN,
191 "Left: {}\nRight: {}",
192 actual,
193 $expected
194 );
195 };
196}
197
198#[macro_export]
200macro_rules! assert_string_similarity {
201 ($s1:expr, $s2:expr, $expected:expr) => {
202 let actual = $crate::pattern::string_similarity($s1, $s2);
203 assert!(
204 (actual - $expected).abs() < $crate::pattern::ERROR_MARGIN,
205 "Left: {}\nRight: {}",
206 actual,
207 $expected
208 );
209 };
210}
211
212#[cfg(test)]
213mod tests {
214 use super::{glob_to_regex_pattern, match_filename_with_glob_pattern};
215 use crate::{assert_match_string, pattern::is_close_to_upper_bound};
216 use std::path::Path;
217
218 #[test]
219 fn test_glob_to_regex() {
220 assert_eq!(glob_to_regex_pattern("fish*.txt"), "fish.*\\.txt");
221 assert_eq!(glob_to_regex_pattern("fish?txt"), "fish.txt");
222 assert_eq!(glob_to_regex_pattern("fish+txt"), "fish\\+txt");
223 assert_eq!(glob_to_regex_pattern("fish\\txt"), "fish\\\\txt");
224 assert_eq!(glob_to_regex_pattern("fish\\(txt"), "fish\\\\\\(txt");
225 }
226
227 #[test]
228 fn test_is_close_to_upper_bound() {
229 assert!(is_close_to_upper_bound(1.0));
230 assert!(is_close_to_upper_bound(0.9999));
231 }
232
233 #[test]
234 #[should_panic(expected = "is_close_to_upper_bound(0.999)")]
235 fn test_is_close_to_upper_bound_false() {
236 assert!(is_close_to_upper_bound(0.999));
237 }
238
239 #[test]
240 fn test_match_filename_with_glob_pattern() {
241 assert!(match_filename_with_glob_pattern(
242 Path::new("fish.txt"),
243 "f*.txt"
244 ));
245 assert!(!match_filename_with_glob_pattern(
246 Path::new("fish.txt"),
247 "f*.jpg"
248 ));
249 }
250
251 #[test]
252 fn test_match_string() {
253 assert_match_string!("kitten", "kissing", 0.333);
254 assert_match_string!("Salvage Yard", "yard", 1.0);
255 assert_match_string!("raiju", "yard", 0.0);
256 }
257
258 #[test]
259 fn test_string_similarity() {
260 assert_string_similarity!("kitten", "kissing", 0.714);
261 assert_string_similarity!("Salvage Yard", "yard", 1.0);
262 assert_string_similarity!("Salvage Yard", "yad", 0.472);
263 assert_string_similarity!("raiju", "yard", 0.483);
264 }
265}