use crate::Scoring;
const DELIMITERS: [u8; 7] = [b' ', b'/', b'.', b',', b'_', b'-', b':'];
pub fn match_greedy<S1: AsRef<str>, S2: AsRef<str>>(
needle: S1,
haystack: S2,
scoring: &Scoring,
) -> (u16, Vec<usize>, bool) {
let needle = needle.as_ref().as_bytes();
let haystack = haystack.as_ref().as_bytes();
let mut score = 0;
let mut indices = vec![];
let mut haystack_idx = 0;
let mut delimiter_bonus_enabled = false;
let mut previous_haystack_is_lower = false;
let mut previous_haystack_is_delimiter = false;
'outer: for needle_idx in 0..needle.len() {
let needle_char = needle[needle_idx];
let needle_is_upper = (65..=90).contains(&needle_char);
let needle_is_lower = (97..=122).contains(&needle_char);
let needle_lower_char = if needle_is_upper {
needle_char + 32
} else {
needle_char
};
let needle_upper_char = if needle_is_lower {
needle_char - 32
} else {
needle_char
};
let haystack_start_idx = haystack_idx;
while haystack_idx <= (haystack.len() - needle.len() + needle_idx) {
let haystack_char = haystack[haystack_idx];
let haystack_is_delimiter = DELIMITERS.contains(&haystack_char);
let haystack_is_upper = (65..=90).contains(&haystack_char);
let haystack_is_lower = (97..=122).contains(&haystack_char);
if !haystack_is_delimiter {
delimiter_bonus_enabled = true;
}
if needle_lower_char != haystack_char && needle_upper_char != haystack_char {
previous_haystack_is_delimiter = delimiter_bonus_enabled && haystack_is_delimiter;
previous_haystack_is_lower = haystack_is_lower;
haystack_idx += 1;
continue;
}
score += scoring.match_score;
if haystack_idx != haystack_start_idx && needle_idx != 0 {
score = score.saturating_sub(
scoring.gap_open_penalty
+ scoring.gap_extend_penalty
* (haystack_idx - haystack_start_idx).saturating_sub(1) as u16,
);
}
if needle_char == haystack_char {
score += scoring.matching_case_bonus;
}
if haystack_is_upper && previous_haystack_is_lower {
score += scoring.capitalization_bonus;
}
if haystack_idx == 0 {
score += scoring.prefix_bonus;
}
if previous_haystack_is_delimiter && !haystack_is_delimiter {
score += scoring.delimiter_bonus;
}
previous_haystack_is_delimiter = delimiter_bonus_enabled && haystack_is_delimiter;
previous_haystack_is_lower = haystack_is_lower;
indices.push(haystack_idx);
haystack_idx += 1;
continue 'outer;
}
return (0, vec![], false);
}
let exact = haystack == needle;
if exact {
score += scoring.exact_match_bonus;
}
(score, indices, exact)
}
#[cfg(test)]
mod tests {
use super::*;
use crate::r#const::*;
const CHAR_SCORE: u16 = MATCH_SCORE + MATCHING_CASE_BONUS;
fn get_score(needle: &str, haystack: &str) -> u16 {
match_greedy(needle, haystack, &Scoring::default()).0
}
#[test]
fn test_score_basic() {
assert_eq!(get_score("b", "abc"), CHAR_SCORE);
assert_eq!(get_score("c", "abc"), CHAR_SCORE);
assert_eq!(
get_score("fbb", "barbazfoobarbaz"),
CHAR_SCORE - GAP_OPEN_PENALTY - GAP_EXTEND_PENALTY + CHAR_SCORE
- GAP_OPEN_PENALTY
- GAP_EXTEND_PENALTY
+ CHAR_SCORE
);
}
#[test]
fn test_no_match() {
assert_eq!(get_score("a", "b"), 0);
assert_eq!(get_score("ab", "ba"), 0);
}
#[test]
fn test_score_prefix() {
assert_eq!(get_score("a", "abc"), CHAR_SCORE + PREFIX_BONUS);
assert_eq!(get_score("a", "aabc"), CHAR_SCORE + PREFIX_BONUS);
assert_eq!(get_score("a", "babc"), CHAR_SCORE);
}
#[test]
fn test_score_exact_match() {
assert_eq!(
get_score("a", "a"),
CHAR_SCORE + EXACT_MATCH_BONUS + PREFIX_BONUS
);
assert_eq!(
get_score("abc", "abc"),
3 * CHAR_SCORE + EXACT_MATCH_BONUS + PREFIX_BONUS
);
}
#[test]
fn test_score_delimiter() {
assert_eq!(get_score("-", "a--bc"), CHAR_SCORE);
assert_eq!(get_score("b", "a-b"), CHAR_SCORE + DELIMITER_BONUS);
assert_eq!(get_score("a", "a-b-c"), CHAR_SCORE + PREFIX_BONUS);
assert_eq!(get_score("b", "a--b"), CHAR_SCORE + DELIMITER_BONUS);
assert_eq!(get_score("c", "a--bc"), CHAR_SCORE);
assert_eq!(get_score("a", "-a--bc"), CHAR_SCORE);
}
#[test]
fn test_score_no_delimiter_for_delimiter_chars() {
assert_eq!(get_score("-", "a-bc"), CHAR_SCORE);
assert_eq!(get_score("-", "a--bc"), CHAR_SCORE);
assert!(get_score("a_b", "a_bb") > get_score("a_b", "a__b"));
}
#[test]
fn test_score_affine_gap() {
assert_eq!(
get_score("test", "Uterst"),
CHAR_SCORE * 4 - GAP_OPEN_PENALTY
);
assert_eq!(
get_score("test", "Uterrst"),
CHAR_SCORE * 4 - GAP_OPEN_PENALTY - GAP_EXTEND_PENALTY
);
}
#[test]
fn test_score_capital_bonus() {
assert_eq!(get_score("a", "A"), MATCH_SCORE + PREFIX_BONUS);
assert_eq!(get_score("A", "Aa"), CHAR_SCORE + PREFIX_BONUS);
assert_eq!(
get_score("d", "forDist"),
MATCH_SCORE + CAPITALIZATION_BONUS
);
assert_eq!(get_score("D", "forDist"), CHAR_SCORE + CAPITALIZATION_BONUS);
assert_eq!(get_score("D", "foRDist"), CHAR_SCORE);
assert_eq!(get_score("D", "FOR_DIST"), CHAR_SCORE + DELIMITER_BONUS);
}
#[test]
fn test_score_prefix_beats_delimiter() {
assert!(get_score("swap", "swap(test)") > get_score("swap", "iter_swap(test)"));
assert!(get_score("_", "_private_member") > get_score("_", "public_member"));
}
}