1use crate::config::SmartSelectionRule;
9use regex::Regex;
10
11pub struct SmartSelectionMatcher {
13 rules: Vec<CompiledRule>,
15}
16
17struct CompiledRule {
18 #[allow(dead_code)]
19 name: String,
20 regex: Regex,
21 precision: f64,
22}
23
24impl SmartSelectionMatcher {
25 pub fn new(rules: &[SmartSelectionRule]) -> Self {
27 let mut compiled: Vec<CompiledRule> = rules
28 .iter()
29 .filter(|r| r.enabled)
30 .filter_map(|r| match Regex::new(&r.regex) {
31 Ok(regex) => Some(CompiledRule {
32 name: r.name.clone(),
33 regex,
34 precision: r.precision.value(),
35 }),
36 Err(e) => {
37 log::warn!(
38 "Failed to compile smart selection regex '{}': {}",
39 r.name,
40 e
41 );
42 None
43 }
44 })
45 .collect();
46
47 compiled.sort_by(|a, b| {
49 b.precision
50 .partial_cmp(&a.precision)
51 .unwrap_or(std::cmp::Ordering::Equal)
52 });
53
54 Self { rules: compiled }
55 }
56
57 pub fn find_match_at(&self, line: &str, col: usize) -> Option<(usize, usize)> {
66 let byte_offset = char_to_byte_offset(line, col)?;
68
69 for rule in &self.rules {
70 for mat in rule.regex.find_iter(line) {
72 let match_start_byte = mat.start();
73 let match_end_byte = mat.end();
74
75 if byte_offset >= match_start_byte && byte_offset < match_end_byte {
77 let start_col = byte_to_char_offset(line, match_start_byte)?;
79 let end_col = byte_to_char_offset(line, match_end_byte)?.saturating_sub(1);
80
81 return Some((start_col, end_col));
82 }
83 }
84 }
85
86 None
87 }
88}
89
90fn char_to_byte_offset(s: &str, char_offset: usize) -> Option<usize> {
92 s.char_indices()
93 .nth(char_offset)
94 .map(|(byte_idx, _)| byte_idx)
95 .or_else(|| {
96 if char_offset >= s.chars().count() {
98 Some(s.len())
99 } else {
100 None
101 }
102 })
103}
104
105fn byte_to_char_offset(s: &str, byte_offset: usize) -> Option<usize> {
107 if byte_offset > s.len() {
108 return None;
109 }
110 Some(s[..byte_offset].chars().count())
111}
112
113pub fn is_word_char(ch: char, word_characters: &str) -> bool {
123 ch.is_alphanumeric() || word_characters.contains(ch)
124}
125
126pub fn find_word_boundaries(line: &str, col: usize, word_characters: &str) -> (usize, usize) {
130 let chars: Vec<char> = line.chars().collect();
131
132 if chars.is_empty() || col >= chars.len() {
133 return (col, col);
134 }
135
136 let mut start_col = col;
137 let mut end_col = col;
138
139 while start_col > 0 && is_word_char(chars[start_col - 1], word_characters) {
141 start_col -= 1;
142 }
143
144 if !is_word_char(chars[col], word_characters) {
146 return (col, col);
147 }
148
149 while end_col < chars.len() - 1 && is_word_char(chars[end_col + 1], word_characters) {
151 end_col += 1;
152 }
153
154 (start_col, end_col)
155}
156
157pub struct SmartSelectionCache {
159 matcher: Option<SmartSelectionMatcher>,
161 rules_hash: u64,
163}
164
165impl Default for SmartSelectionCache {
166 fn default() -> Self {
167 Self::new()
168 }
169}
170
171impl SmartSelectionCache {
172 pub fn new() -> Self {
173 Self {
174 matcher: None,
175 rules_hash: 0,
176 }
177 }
178
179 pub fn get_matcher(&mut self, rules: &[SmartSelectionRule]) -> &SmartSelectionMatcher {
181 let hash = hash_rules(rules);
182
183 if self.rules_hash != hash || self.matcher.is_none() {
184 self.matcher = Some(SmartSelectionMatcher::new(rules));
185 self.rules_hash = hash;
186 }
187
188 self.matcher.as_ref().unwrap()
189 }
190}
191
192fn hash_rules(rules: &[SmartSelectionRule]) -> u64 {
194 use std::collections::hash_map::DefaultHasher;
195 use std::hash::{Hash, Hasher};
196
197 let mut hasher = DefaultHasher::new();
198 for rule in rules {
199 rule.name.hash(&mut hasher);
200 rule.regex.hash(&mut hasher);
201 rule.enabled.hash(&mut hasher);
202 std::mem::discriminant(&rule.precision).hash(&mut hasher);
204 }
205 hasher.finish()
206}
207
208#[cfg(test)]
209mod tests {
210 use super::*;
211 use crate::config::{SmartSelectionPrecision, SmartSelectionRule};
212
213 fn test_rules() -> Vec<SmartSelectionRule> {
214 vec![
215 SmartSelectionRule::new(
216 "HTTP URL",
217 r"https?://[^\s]+",
218 SmartSelectionPrecision::VeryHigh,
219 ),
220 SmartSelectionRule::new(
221 "Email",
222 r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
223 SmartSelectionPrecision::High,
224 ),
225 SmartSelectionRule::new(
226 "File path",
227 r"~?/?(?:[a-zA-Z0-9._-]+/)+[a-zA-Z0-9._-]+/?",
228 SmartSelectionPrecision::Normal,
229 ),
230 ]
231 }
232
233 #[test]
234 fn test_find_url_match() {
235 let matcher = SmartSelectionMatcher::new(&test_rules());
236 let line = "Check out https://example.com/path for more info";
237
238 let result = matcher.find_match_at(line, 10);
240 assert_eq!(result, Some((10, 33)));
241
242 let result = matcher.find_match_at(line, 18);
244 assert_eq!(result, Some((10, 33)));
245
246 let result = matcher.find_match_at(line, 0);
248 assert_eq!(result, None);
249 }
250
251 #[test]
252 fn test_find_email_match() {
253 let matcher = SmartSelectionMatcher::new(&test_rules());
254 let line = "Contact user@example.com for help";
255
256 let result = matcher.find_match_at(line, 8);
258 assert_eq!(result, Some((8, 23)));
259
260 let result = matcher.find_match_at(line, 12);
262 assert_eq!(result, Some((8, 23)));
263 }
264
265 #[test]
266 fn test_find_path_match() {
267 let matcher = SmartSelectionMatcher::new(&test_rules());
268 let line = "Edit ~/Documents/file.txt and save";
269
270 let result = matcher.find_match_at(line, 7);
272 assert_eq!(result, Some((5, 24)));
273 }
274
275 #[test]
276 fn test_word_boundaries_default() {
277 let line = "hello_world test-case foo.bar";
278 let word_chars = "/-+\\~_.";
279
280 let (start, end) = find_word_boundaries(line, 6, word_chars);
282 assert_eq!(
283 &line.chars().collect::<Vec<_>>()[start..=end]
284 .iter()
285 .collect::<String>(),
286 "hello_world"
287 );
288
289 let (start, end) = find_word_boundaries(line, 12, word_chars);
291 assert_eq!(
292 &line.chars().collect::<Vec<_>>()[start..=end]
293 .iter()
294 .collect::<String>(),
295 "test-case"
296 );
297 }
298
299 #[test]
300 fn test_word_boundaries_empty_config() {
301 let line = "hello_world test-case";
302 let word_chars = "";
303
304 let (start, end) = find_word_boundaries(line, 6, word_chars);
308 assert_eq!(
309 &line.chars().collect::<Vec<_>>()[start..=end]
310 .iter()
311 .collect::<String>(),
312 "world"
313 );
314
315 let (start, end) = find_word_boundaries(line, 0, word_chars);
317 assert_eq!(
318 &line.chars().collect::<Vec<_>>()[start..=end]
319 .iter()
320 .collect::<String>(),
321 "hello"
322 );
323
324 let (start, end) = find_word_boundaries(line, 12, word_chars);
326 assert_eq!(
327 &line.chars().collect::<Vec<_>>()[start..=end]
328 .iter()
329 .collect::<String>(),
330 "test"
331 );
332 }
333
334 #[test]
335 fn test_is_word_char() {
336 let word_chars = "/-+\\~_.";
337
338 assert!(is_word_char('a', word_chars));
339 assert!(is_word_char('Z', word_chars));
340 assert!(is_word_char('5', word_chars));
341 assert!(is_word_char('_', word_chars));
342 assert!(is_word_char('-', word_chars));
343 assert!(is_word_char('/', word_chars));
344 assert!(is_word_char('.', word_chars));
345
346 assert!(!is_word_char(' ', word_chars));
347 assert!(!is_word_char('@', word_chars));
348 assert!(!is_word_char('!', word_chars));
349 }
350
351 #[test]
352 fn test_unicode_handling() {
353 let matcher = SmartSelectionMatcher::new(&test_rules());
354 let line = "日本語 https://example.com 中文";
355
356 let result = matcher.find_match_at(line, 4);
359 assert_eq!(result, Some((4, 22)));
361 }
362
363 #[test]
364 fn test_disabled_rule() {
365 let mut rules = test_rules();
366 rules[0].enabled = false; let matcher = SmartSelectionMatcher::new(&rules);
369 let line = "Check out https://example.com for more info";
370
371 let result = matcher.find_match_at(line, 10);
373 assert_eq!(result, None);
374 }
375
376 #[test]
377 fn test_precision_ordering() {
378 let rules = vec![
380 SmartSelectionRule::new("Whitespace-bounded", r"\S+", SmartSelectionPrecision::Low),
381 SmartSelectionRule::new(
382 "Email",
383 r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
384 SmartSelectionPrecision::High,
385 ),
386 ];
387
388 let matcher = SmartSelectionMatcher::new(&rules);
389 let line = "Contact user@example.com for help";
390
391 let result = matcher.find_match_at(line, 12);
393 assert_eq!(result, Some((8, 23)));
394 }
395}