neco-fuzzy 0.2.0

Minimal fuzzy score core for commands, paths, and short identifiers
Documentation
#[derive(Debug, Clone, Copy, PartialEq)]
pub(crate) struct CandidateChar {
    pub(crate) byte: usize,
    pub(crate) ch: char,
    pub(crate) boundary: f32,
}

pub(crate) fn compute_boundary(previous: Option<char>, current: char, char_index: usize) -> f32 {
    if char_index == 0 {
        return 1.0;
    }

    let Some(prev) = previous else {
        return 0.0;
    };

    if prev == '/' {
        return 1.0;
    }

    if matches!(prev, '_' | '-' | '.' | ' ') {
        return 0.76;
    }

    if prev.is_lowercase() && current.is_uppercase() {
        return 0.8;
    }

    if (prev.is_ascii_digit() && current.is_ascii_alphabetic())
        || (prev.is_ascii_alphabetic() && current.is_ascii_digit())
    {
        return 0.6;
    }

    0.0
}

pub(crate) type PreparedParts = (
    Vec<CandidateChar>,
    usize,
    Option<Vec<u8>>,
    Option<Vec<u8>>,
    u64,
);

pub(crate) fn build_prepared_parts(candidate: &str) -> PreparedParts {
    let mut chars = Vec::with_capacity(candidate.chars().count());
    let mut previous = None;
    for (char_index, (byte, ch)) in candidate.char_indices().enumerate() {
        chars.push(CandidateChar {
            byte,
            ch,
            boundary: compute_boundary(previous, ch, char_index),
        });
        previous = Some(ch);
    }

    let basename_start_byte = candidate.rfind('/').map_or(0, |index| index + 1);
    let basename_start_char = chars
        .iter()
        .position(|slot| slot.byte >= basename_start_byte)
        .unwrap_or(chars.len());
    let ascii_bytes = candidate.is_ascii().then(|| candidate.as_bytes().to_vec());
    let ascii_folded = ascii_bytes
        .as_ref()
        .map(|bytes| bytes.iter().map(u8::to_ascii_lowercase).collect());

    (
        chars,
        basename_start_char,
        ascii_bytes,
        ascii_folded,
        candidate_fingerprint(candidate),
    )
}

pub fn candidate_fingerprint(candidate: &str) -> u64 {
    let mut state = 0xcbf2_9ce4_8422_2325u64;
    for &byte in candidate.as_bytes() {
        state ^= u64::from(byte);
        state = state.wrapping_mul(0x0000_0100_0000_01b3);
    }
    state
}

#[cfg(test)]
mod tests {
    use super::{build_prepared_parts, candidate_fingerprint, compute_boundary};

    #[test]
    fn computes_expected_boundary_weights() {
        assert_eq!(compute_boundary(Some('a'), 'B', 1), 0.8);
        assert_eq!(compute_boundary(Some('1'), 'a', 1), 0.6);
        assert_eq!(compute_boundary(None, 'x', 0), 1.0);
        assert_eq!(compute_boundary(Some('/'), 'x', 5), 1.0);
        assert_eq!(compute_boundary(Some('a'), 'b', 1), 0.0);
    }

    #[test]
    fn prepared_parts_keep_basename_start_and_ascii_views() {
        let (chars, basename_start_char, ascii_bytes, ascii_folded, fingerprint) =
            build_prepared_parts("src/FooBar-v2.rs");

        assert_eq!(basename_start_char, 4);
        assert_eq!(ascii_bytes.as_deref(), Some(b"src/FooBar-v2.rs".as_slice()));
        assert_eq!(
            ascii_folded.as_deref(),
            Some(b"src/foobar-v2.rs".as_slice())
        );
        assert_eq!(fingerprint, 0xd662_cea5_1829_1d93);

        let boundaries: Vec<(usize, char, f32)> = chars
            .into_iter()
            .map(|slot| (slot.byte, slot.ch, slot.boundary))
            .collect();
        assert_eq!(
            boundaries,
            vec![
                (0, 's', 1.0),
                (1, 'r', 0.0),
                (2, 'c', 0.0),
                (3, '/', 0.0),
                (4, 'F', 1.0),
                (5, 'o', 0.0),
                (6, 'o', 0.0),
                (7, 'B', 0.8),
                (8, 'a', 0.0),
                (9, 'r', 0.0),
                (10, '-', 0.0),
                (11, 'v', 0.76),
                (12, '2', 0.6),
                (13, '.', 0.0),
                (14, 'r', 0.76),
                (15, 's', 0.0),
            ]
        );
    }

    #[test]
    fn fingerprint_matches_fnv1a64() {
        assert_eq!(candidate_fingerprint(""), 0xcbf2_9ce4_8422_2325);
        assert_eq!(candidate_fingerprint("abc"), 0xe71f_a219_0541_574b);
        assert_eq!(
            candidate_fingerprint("src/FooBar-v2.rs"),
            0xd662_cea5_1829_1d93
        );
    }
}