Skip to main content

par_term/
smart_selection.rs

1//! Smart selection module for pattern-based text selection.
2//!
3//! This module provides intelligent double-click selection based on regex patterns.
4//! When the user double-clicks, the system first tries smart selection rules (sorted
5//! by precision, highest first). If a pattern matches at the cursor position, that
6//! text is selected. Otherwise, it falls back to word boundary selection.
7
8use crate::config::SmartSelectionRule;
9use regex::Regex;
10
11/// Compiled smart selection rules with cached regex patterns
12pub struct SmartSelectionMatcher {
13    /// Compiled rules sorted by precision (highest first)
14    rules: Vec<CompiledRule>,
15}
16
17struct CompiledRule {
18    #[allow(dead_code)]
19    name: String,
20    regex: Regex,
21    precision: f64,
22}
23
24impl SmartSelectionMatcher {
25    /// Create a new matcher from a list of smart selection rules
26    pub fn new(rules: &[SmartSelectionRule]) -> Self {
27        let mut compiled: Vec<CompiledRule> = rules
28            .iter()
29            .filter(|r| r.enabled)
30            .filter_map(|r| match Regex::new(&r.regex) {
31                Ok(regex) => Some(CompiledRule {
32                    name: r.name.clone(),
33                    regex,
34                    precision: r.precision.value(),
35                }),
36                Err(e) => {
37                    log::warn!(
38                        "Failed to compile smart selection regex '{}': {}",
39                        r.name,
40                        e
41                    );
42                    None
43                }
44            })
45            .collect();
46
47        // Sort by precision descending (highest first)
48        compiled.sort_by(|a, b| {
49            b.precision
50                .partial_cmp(&a.precision)
51                .unwrap_or(std::cmp::Ordering::Equal)
52        });
53
54        Self { rules: compiled }
55    }
56
57    /// Try to find a pattern match at the given character position in the line.
58    ///
59    /// Returns the start and end column indices (inclusive) if a match is found,
60    /// or None if no pattern matches at this position.
61    ///
62    /// # Arguments
63    /// * `line` - The full text of the line
64    /// * `col` - The column position (character index) where the cursor is
65    pub fn find_match_at(&self, line: &str, col: usize) -> Option<(usize, usize)> {
66        // Convert col to byte offset for regex matching
67        let byte_offset = char_to_byte_offset(line, col)?;
68
69        for rule in &self.rules {
70            // Find all matches in the line
71            for mat in rule.regex.find_iter(line) {
72                let match_start_byte = mat.start();
73                let match_end_byte = mat.end();
74
75                // Check if the cursor position is within this match
76                if byte_offset >= match_start_byte && byte_offset < match_end_byte {
77                    // Convert byte offsets back to character offsets
78                    let start_col = byte_to_char_offset(line, match_start_byte)?;
79                    let end_col = byte_to_char_offset(line, match_end_byte)?.saturating_sub(1);
80
81                    return Some((start_col, end_col));
82                }
83            }
84        }
85
86        None
87    }
88}
89
90/// Convert a character offset to a byte offset in a UTF-8 string
91fn char_to_byte_offset(s: &str, char_offset: usize) -> Option<usize> {
92    s.char_indices()
93        .nth(char_offset)
94        .map(|(byte_idx, _)| byte_idx)
95        .or_else(|| {
96            // If char_offset is at or past the end, return the string length
97            if char_offset >= s.chars().count() {
98                Some(s.len())
99            } else {
100                None
101            }
102        })
103}
104
105/// Convert a byte offset to a character offset in a UTF-8 string
106fn byte_to_char_offset(s: &str, byte_offset: usize) -> Option<usize> {
107    if byte_offset > s.len() {
108        return None;
109    }
110    Some(s[..byte_offset].chars().count())
111}
112
113/// Check if a character should be considered part of a word.
114///
115/// A character is part of a word if:
116/// - It is alphanumeric (a-z, A-Z, 0-9)
117/// - It is in the user-defined word_characters set
118///
119/// Note: Unlike some terminals, underscore is NOT hardcoded as a word character.
120/// It is included in the default word_characters setting (`/-+\~_.`) but can be
121/// removed by the user for full control over word selection behavior.
122pub fn is_word_char(ch: char, word_characters: &str) -> bool {
123    ch.is_alphanumeric() || word_characters.contains(ch)
124}
125
126/// Find word boundaries at the given position using configurable word characters.
127///
128/// Returns (start_col, end_col) as inclusive indices.
129pub fn find_word_boundaries(line: &str, col: usize, word_characters: &str) -> (usize, usize) {
130    let chars: Vec<char> = line.chars().collect();
131
132    if chars.is_empty() || col >= chars.len() {
133        return (col, col);
134    }
135
136    let mut start_col = col;
137    let mut end_col = col;
138
139    // Expand left
140    while start_col > 0 && is_word_char(chars[start_col - 1], word_characters) {
141        start_col -= 1;
142    }
143
144    // Make sure the clicked position is a word character, otherwise return single char
145    if !is_word_char(chars[col], word_characters) {
146        return (col, col);
147    }
148
149    // Expand right
150    while end_col < chars.len() - 1 && is_word_char(chars[end_col + 1], word_characters) {
151        end_col += 1;
152    }
153
154    (start_col, end_col)
155}
156
157/// Cache for compiled smart selection matchers to avoid recompilation
158pub struct SmartSelectionCache {
159    /// Cached matcher (recreated when rules change)
160    matcher: Option<SmartSelectionMatcher>,
161    /// Hash of the rules used to create the cached matcher
162    rules_hash: u64,
163}
164
165impl Default for SmartSelectionCache {
166    fn default() -> Self {
167        Self::new()
168    }
169}
170
171impl SmartSelectionCache {
172    pub fn new() -> Self {
173        Self {
174            matcher: None,
175            rules_hash: 0,
176        }
177    }
178
179    /// Get or create a matcher for the given rules
180    pub fn get_matcher(&mut self, rules: &[SmartSelectionRule]) -> &SmartSelectionMatcher {
181        let hash = hash_rules(rules);
182
183        if self.rules_hash != hash || self.matcher.is_none() {
184            self.matcher = Some(SmartSelectionMatcher::new(rules));
185            self.rules_hash = hash;
186        }
187
188        self.matcher.as_ref().unwrap()
189    }
190}
191
192/// Simple hash for rules to detect changes
193fn hash_rules(rules: &[SmartSelectionRule]) -> u64 {
194    use std::collections::hash_map::DefaultHasher;
195    use std::hash::{Hash, Hasher};
196
197    let mut hasher = DefaultHasher::new();
198    for rule in rules {
199        rule.name.hash(&mut hasher);
200        rule.regex.hash(&mut hasher);
201        rule.enabled.hash(&mut hasher);
202        // Use precision ordinal for hashing
203        std::mem::discriminant(&rule.precision).hash(&mut hasher);
204    }
205    hasher.finish()
206}
207
208#[cfg(test)]
209mod tests {
210    use super::*;
211    use crate::config::{SmartSelectionPrecision, SmartSelectionRule};
212
213    fn test_rules() -> Vec<SmartSelectionRule> {
214        vec![
215            SmartSelectionRule::new(
216                "HTTP URL",
217                r"https?://[^\s]+",
218                SmartSelectionPrecision::VeryHigh,
219            ),
220            SmartSelectionRule::new(
221                "Email",
222                r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
223                SmartSelectionPrecision::High,
224            ),
225            SmartSelectionRule::new(
226                "File path",
227                r"~?/?(?:[a-zA-Z0-9._-]+/)+[a-zA-Z0-9._-]+/?",
228                SmartSelectionPrecision::Normal,
229            ),
230        ]
231    }
232
233    #[test]
234    fn test_find_url_match() {
235        let matcher = SmartSelectionMatcher::new(&test_rules());
236        let line = "Check out https://example.com/path for more info";
237
238        // Click on 'h' in https
239        let result = matcher.find_match_at(line, 10);
240        assert_eq!(result, Some((10, 33)));
241
242        // Click on 'e' in example
243        let result = matcher.find_match_at(line, 18);
244        assert_eq!(result, Some((10, 33)));
245
246        // Click on 'C' in Check (not in URL)
247        let result = matcher.find_match_at(line, 0);
248        assert_eq!(result, None);
249    }
250
251    #[test]
252    fn test_find_email_match() {
253        let matcher = SmartSelectionMatcher::new(&test_rules());
254        let line = "Contact user@example.com for help";
255
256        // Click on 'u' in user
257        let result = matcher.find_match_at(line, 8);
258        assert_eq!(result, Some((8, 23)));
259
260        // Click on '@'
261        let result = matcher.find_match_at(line, 12);
262        assert_eq!(result, Some((8, 23)));
263    }
264
265    #[test]
266    fn test_find_path_match() {
267        let matcher = SmartSelectionMatcher::new(&test_rules());
268        let line = "Edit ~/Documents/file.txt and save";
269
270        // Click on 'D' in Documents
271        let result = matcher.find_match_at(line, 7);
272        assert_eq!(result, Some((5, 24)));
273    }
274
275    #[test]
276    fn test_word_boundaries_default() {
277        let line = "hello_world test-case foo.bar";
278        let word_chars = "/-+\\~_.";
279
280        // Click on 'w' in world
281        let (start, end) = find_word_boundaries(line, 6, word_chars);
282        assert_eq!(
283            &line.chars().collect::<Vec<_>>()[start..=end]
284                .iter()
285                .collect::<String>(),
286            "hello_world"
287        );
288
289        // Click on 't' in test
290        let (start, end) = find_word_boundaries(line, 12, word_chars);
291        assert_eq!(
292            &line.chars().collect::<Vec<_>>()[start..=end]
293                .iter()
294                .collect::<String>(),
295            "test-case"
296        );
297    }
298
299    #[test]
300    fn test_word_boundaries_empty_config() {
301        let line = "hello_world test-case";
302        let word_chars = "";
303
304        // With empty word_chars, only alphanumeric characters are word chars
305        // underscore is NOT hardcoded - it must be in word_characters to be included
306        // Click on 'w' in world - should stop at underscore
307        let (start, end) = find_word_boundaries(line, 6, word_chars);
308        assert_eq!(
309            &line.chars().collect::<Vec<_>>()[start..=end]
310                .iter()
311                .collect::<String>(),
312            "world"
313        );
314
315        // Click on 'h' in hello - should stop at underscore
316        let (start, end) = find_word_boundaries(line, 0, word_chars);
317        assert_eq!(
318            &line.chars().collect::<Vec<_>>()[start..=end]
319                .iter()
320                .collect::<String>(),
321            "hello"
322        );
323
324        // Click on 't' in test - should stop at hyphen
325        let (start, end) = find_word_boundaries(line, 12, word_chars);
326        assert_eq!(
327            &line.chars().collect::<Vec<_>>()[start..=end]
328                .iter()
329                .collect::<String>(),
330            "test"
331        );
332    }
333
334    #[test]
335    fn test_is_word_char() {
336        let word_chars = "/-+\\~_.";
337
338        assert!(is_word_char('a', word_chars));
339        assert!(is_word_char('Z', word_chars));
340        assert!(is_word_char('5', word_chars));
341        assert!(is_word_char('_', word_chars));
342        assert!(is_word_char('-', word_chars));
343        assert!(is_word_char('/', word_chars));
344        assert!(is_word_char('.', word_chars));
345
346        assert!(!is_word_char(' ', word_chars));
347        assert!(!is_word_char('@', word_chars));
348        assert!(!is_word_char('!', word_chars));
349    }
350
351    #[test]
352    fn test_unicode_handling() {
353        let matcher = SmartSelectionMatcher::new(&test_rules());
354        let line = "日本語 https://example.com 中文";
355
356        // The URL starts at character position 4 (after "日本語 ")
357        // Click on URL after unicode - verify the URL starts at position 4
358        let result = matcher.find_match_at(line, 4);
359        // The URL "https://example.com" is 19 characters (4+19-1 = 22 for inclusive end)
360        assert_eq!(result, Some((4, 22)));
361    }
362
363    #[test]
364    fn test_disabled_rule() {
365        let mut rules = test_rules();
366        rules[0].enabled = false; // Disable URL rule
367
368        let matcher = SmartSelectionMatcher::new(&rules);
369        let line = "Check out https://example.com for more info";
370
371        // URL rule is disabled, so no match
372        let result = matcher.find_match_at(line, 10);
373        assert_eq!(result, None);
374    }
375
376    #[test]
377    fn test_precision_ordering() {
378        // Create rules where a lower precision rule would match a broader pattern
379        let rules = vec![
380            SmartSelectionRule::new("Whitespace-bounded", r"\S+", SmartSelectionPrecision::Low),
381            SmartSelectionRule::new(
382                "Email",
383                r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b",
384                SmartSelectionPrecision::High,
385            ),
386        ];
387
388        let matcher = SmartSelectionMatcher::new(&rules);
389        let line = "Contact user@example.com for help";
390
391        // Should match email (higher precision) not the whole word
392        let result = matcher.find_match_at(line, 12);
393        assert_eq!(result, Some((8, 23)));
394    }
395}