broot 1.6.2 - Docs.rs

use {
    super::NameMatch,
    secular,
    smallvec::{smallvec, SmallVec},
    std::{
        cmp::Reverse,
        ops::Range,
    },
};

type CandChars = SmallVec<[char; 32]>;

static SEPARATORS: &[char] = &[',', ';'];

// weights used in match score computing
const BONUS_MATCH: i32 = 50_000;
const BONUS_CANDIDATE_LENGTH: i32 = -1; // per char

pub fn norm_chars(s: &str) -> Box<[char]> {
    secular::normalized_lower_lay_string(s)
        .chars()
        .collect::<Vec<char>>()
        .into_boxed_slice()
}

/// a list of tokens we want to find, non overlapping
/// and in any order, in strings
#[derive(Debug, Clone, PartialEq)]
pub struct TokPattern {
    toks: Vec<Box<[char]>>,
    sum_len: usize,
}

// scoring basis ?
// - number of parts of the candidats (separated by / for example)
//   that are touched by a tok ?
// - malus for adjacent ranges
// - bonus for ranges starting just after a separator
// - bonus for order ?
impl TokPattern {

    pub fn new(pattern: &str) -> Self {
        // we accept several separators. The first one
        // we encounter among the possible ones is the
        // separator of the whole. This allows using the
        // other char: In ";ab,er", the comma isn't seen
        // as a separator but as part of a tok
        let sep = pattern.chars().find(|c| SEPARATORS.contains(c));
        let mut toks: Vec<Box<[char]>> = if let Some(sep) = sep {
            pattern.split(sep)
                .filter(|s| !s.is_empty())
                .map(norm_chars)
                .collect()
        } else {
            if pattern.is_empty() {
                Vec::new()
            } else {
                vec![norm_chars(pattern)]
            }
        };
        // we sort the tokens from biggest to smallest
        // because the current algorithm stops at the
        // first match for any tok. Thus it would fail
        // to find "abc,b" in "abcdb" if it looked first
        // at the "b" token
        toks.sort_by_key(|t| Reverse(t.len()));
        let sum_len = toks.iter().map(|s| s.len()).sum();
        Self {
            toks,
            sum_len,
        }
    }

    /// an "empty" pattern is one which accepts everything because
    /// it has no discriminant
    pub fn is_empty(&self) -> bool {
        self.sum_len == 0
    }

    /// return either None (no match) or a vec whose size is the number
    /// of tokens
    fn find_ranges(&self, candidate: &str) -> Option<Vec<Range<usize>>> {
        if candidate.len() < self.sum_len || self.sum_len == 0 {
            return None;
        }
        let mut cand_chars: CandChars = SmallVec::with_capacity(candidate.len());
        cand_chars.extend(candidate.chars().map(secular::lower_lay_char));
        // we first look for the first tok, it's simpler
        let first_tok = &self.toks[0];
        let l = first_tok.len();
        let first_matching_range = (0..cand_chars.len()+1-l)
            .map(|idx| idx..idx+l)
            .find(|r| {
                &cand_chars[r.start..r.end] == first_tok.as_ref()
            });
        // we initialize the vec only when the first tok is found
        first_matching_range
            .and_then(|first_matching_range| {
                let mut matching_ranges = vec![first_matching_range];
                for tok in self.toks.iter().skip(1) {
                    let l = tok.len();
                    let matching_range = (0..cand_chars.len()+1-l)
                        .map(|idx| idx..idx+l)
                        .filter(|r| {
                            &cand_chars[r.start..r.end] == tok.as_ref()
                        })
                        .find(|r| {
                            // check we're not intersecting a previous range
                            for pr in &matching_ranges {
                                if pr.contains(&r.start) || pr.contains(&(r.end-1)) {
                                    return false;
                                }
                            }
                            true
                        });
                    if let Some(r) = matching_range {
                        matching_ranges.push(r);
                    } else {
                        return None;
                    }
                }
                Some(matching_ranges)
            })
    }

    fn score_of_matching(&self, candidate: &str) -> i32 {
        BONUS_MATCH + BONUS_CANDIDATE_LENGTH * candidate.len() as i32
    }

    /// note that it should not be called on empty patterns
    pub fn find(&self, candidate: &str) -> Option<NameMatch> {
        self.find_ranges(candidate)
            .map(|matching_ranges| {
                let mut pos = smallvec![0; self.sum_len];
                let mut i = 0;
                for r in matching_ranges {
                    for p in r {
                        pos[i] = p;
                        i += 1;
                    }
                }
                pos.sort_unstable();
                let score = self.score_of_matching(candidate);
                NameMatch { score, pos }
            })
    }

    /// compute the score of the best match
    /// Note that it should not be called on empty patterns
    pub fn score_of(&self, candidate: &str) -> Option<i32> {
        self.find_ranges(candidate)
            .map(|_| self.score_of_matching(candidate))
    }
}

#[cfg(test)]
mod tok_pattern_tests {

    use {
        super::*,
        crate::pattern::Pos,
    };

    /// check position of the match of the pattern in name
    fn check_pos(pattern: &str, name: &str, pos: &str) {
        println!("checking pattern={:?} name={:?}", pattern, name);
        let pat = TokPattern::new(pattern);
        let match_pos = pat.find(name).unwrap().pos;
        let target_pos: Pos = pos.chars()
            .enumerate()
            .filter(|(_, c)| *c=='^')
            .map(|(i, _)| i)
            .collect();
        assert_eq!(match_pos, target_pos);
    }

    #[test]
    fn check_match_pos() {
        check_pos(
            "m,",
            "miaou",
            "^   ",
        );
        check_pos(
            "bat",
            "cabat",
            "  ^^^",
        );
        check_pos(
            ";ba",
            "babababaaa",
            "^^        ",
        );
        check_pos(
            "ba,ca",
            "bababacaa",
            "^^    ^^ ",
        );
        check_pos(
            "sub,doc,2",
            "/home/user/path2/subpath/Documents/",
            "               ^ ^^^     ^^^",
        );
        check_pos(
            "ab,abc",
            "0123/abc/ab/cdg",
            "     ^^^ ^^    ",
        );
    }

    fn check_match(pattern: &str, name: &str, do_match: bool) {
        assert_eq!(
            TokPattern::new(pattern).find(name).is_some(),
            do_match,
        );
    }

    #[test]
    fn test_separators() {
        let a = TokPattern::new("ab;cd;ef");
        let b = TokPattern::new("ab,cd,ef");
        assert_eq!(a, b);
        let a = TokPattern::new(",ab;cd;ef");
        assert_eq!(a.toks.len(), 1);
        assert_eq!(a.toks[0].len(), 8);
        let a = TokPattern::new(";ab,cd,ef;");
        assert_eq!(a.toks.len(), 1);
        assert_eq!(a.toks[0].len(), 8);
    }

    #[test]
    fn test_match() {
        check_match("mia", "android/phonegap", false);
        check_match("mi", "a", false);
        check_match("mi", "π", false);
        check_match("mi", "miaou/a", true);
    }

    #[test]
    fn test_tok_repetitions() {
        check_match("sub", "rasub", true);
        check_match("sub,sub", "rasub", false);
        check_match("sub,sub", "rasubandsub", true);
        check_match("sub,sub,sub", "rasubandsub", false);
        check_match("ghi,abc,def,ccc", "abccc/Defghi", false);
        check_match("ghi,abc,def,ccc", "abcccc/Defghi", true);
    }

}