Skip to main content

broot/pattern/
exact_pattern.rs

1//! a simple exact pattern matcher for filename filtering / sorting.
2//! It's not meant for file contents but for small strings (less than 1000 chars)
3//!  such as file names.
4
5use {
6    super::NameMatch,
7    smallvec::SmallVec,
8    std::{
9        fmt,
10        fs::File,
11        io::{
12            self,
13            BufRead,
14            BufReader,
15        },
16        path::Path,
17    },
18};
19
20// weights used in match score computing
21// (but we always take the leftist match)
22const BONUS_MATCH: i32 = 50_000;
23const BONUS_EXACT: i32 = 1_000;
24const BONUS_START: i32 = 10;
25const BONUS_START_WORD: i32 = 5;
26const BONUS_CANDIDATE_LENGTH: i32 = -1; // per byte
27const BONUS_DISTANCE_FROM_START: i32 = -1; // per byte
28
29/// A pattern for exact matching
30#[derive(Debug, Clone)]
31pub struct ExactPattern {
32    pattern: String,
33    chars_count: usize,
34}
35
36impl fmt::Display for ExactPattern {
37    fn fmt(
38        &self,
39        f: &mut fmt::Formatter<'_>,
40    ) -> fmt::Result {
41        self.pattern.fmt(f)
42    }
43}
44
45fn is_word_separator(c: u8) -> bool {
46    matches!(c, b'_' | b' ' | b'-' | b'/')
47}
48
49impl ExactPattern {
50    /// build a pattern which will later be usable for fuzzy search.
51    /// A pattern should be reused
52    pub fn from(pattern: &str) -> Self {
53        Self {
54            pattern: pattern.to_string(),
55            chars_count: pattern.chars().count(),
56        }
57    }
58
59    pub fn is_empty(&self) -> bool {
60        self.chars_count == 0
61    }
62
63    fn score(
64        &self,
65        start: usize,
66        candidate: &str,
67    ) -> i32 {
68        // start is the byte index
69        let mut score = BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32;
70        if start == 0 {
71            score += BONUS_START;
72            if candidate.len() == self.pattern.len() {
73                score += BONUS_EXACT;
74            }
75        } else {
76            if is_word_separator(candidate.as_bytes()[start - 1]) {
77                score += BONUS_START_WORD;
78            }
79            score += BONUS_DISTANCE_FROM_START * start as i32;
80        }
81        score
82    }
83
84    /// return a match if the pattern can be found in the candidate string.
85    pub fn find(
86        &self,
87        candidate: &str,
88    ) -> Option<NameMatch> {
89        candidate.find(&self.pattern).map(|start| {
90            let score = self.score(start, candidate);
91            // we must find the start in chars, not bytes
92            for (char_idx, (byte_idx, _)) in candidate.char_indices().enumerate() {
93                if byte_idx == start {
94                    let mut pos = SmallVec::with_capacity(self.chars_count);
95                    for i in 0..self.chars_count {
96                        pos.push(i + char_idx);
97                    }
98                    return NameMatch { score, pos };
99                }
100            }
101            unreachable!(); // if there was a match, pos should have been reached
102        })
103    }
104
105    /// get the line of the first match, if any
106    /// (not used today, we use `content_pattern` to search in files)
107    pub fn try_get_match_line_count(
108        &self,
109        path: &Path,
110    ) -> io::Result<Option<usize>> {
111        let mut line_count = 1; // first line in text editors is 1
112        for line in BufReader::new(File::open(path)?).lines() {
113            let line = line?;
114            if line.contains(&self.pattern) {
115                return Ok(Some(line_count));
116            }
117            line_count = 1;
118        }
119        Ok(None)
120    }
121
122    /// get the line of the first match, if any
123    /// (not used today, we use `content_pattern` to search in files)
124    pub fn get_match_line_count(
125        &self,
126        path: &Path,
127    ) -> Option<usize> {
128        self.try_get_match_line_count(path).unwrap_or(None)
129    }
130
131    /// compute the score of the best match
132    pub fn score_of(
133        &self,
134        candidate: &str,
135    ) -> Option<i32> {
136        candidate
137            .find(&self.pattern)
138            .map(|start| self.score(start, candidate))
139    }
140}