neco-fuzzy 0.2.0

Minimal fuzzy score core for commands, paths, and short identifiers
Documentation
#[derive(Debug, Clone, PartialEq)]
pub struct CorpusStats {
    ascii_idf: [f32; 128],
    candidate_count: usize,
    mean_length: f32,
}

impl CorpusStats {
    pub fn from_candidates(candidates: &[&str]) -> Self {
        if candidates.is_empty() {
            return Self {
                ascii_idf: [0.0; 128],
                candidate_count: 0,
                mean_length: 0.0,
            };
        }

        let mut document_counts = [0usize; 128];
        let mut total_length = 0usize;

        for candidate in candidates {
            total_length += candidate.len();

            let mut seen = [false; 128];
            for &byte in candidate.as_bytes() {
                if byte < 128 {
                    seen[usize::from(byte)] = true;
                }
            }

            for (index, present) in seen.into_iter().enumerate() {
                if present {
                    document_counts[index] += 1;
                }
            }
        }

        let candidate_count = candidates.len();
        let candidate_count_f32 = candidate_count as f32;
        let mut ascii_idf = [0.0; 128];

        for (index, &count) in document_counts.iter().enumerate() {
            if count > 0 {
                let df = (count as f32) / candidate_count_f32;
                ascii_idf[index] = (1.0 / df).ln();
            }
        }

        Self {
            ascii_idf,
            candidate_count,
            mean_length: (total_length as f32) / candidate_count_f32,
        }
    }

    pub fn idf(&self, byte: u8) -> f32 {
        if byte < 128 {
            self.ascii_idf[usize::from(byte)]
        } else {
            0.0
        }
    }

    pub fn candidate_count(&self) -> usize {
        self.candidate_count
    }

    pub fn mean_length(&self) -> f32 {
        self.mean_length
    }
}