dictx-index 0.1.0

Index builder and binary entry storage for DictX.
Documentation
pub fn expand_for_search(input: &str) -> String {
    let mut out = String::with_capacity(input.len() * 2);
    out.push_str(input);

    let cjk: Vec<char> = input.chars().filter(|ch| is_cjk(*ch)).collect();
    if cjk.is_empty() {
        return out;
    }

    out.push(' ');
    for ch in &cjk {
        out.push(*ch);
        out.push(' ');
    }
    for window in cjk.windows(2) {
        out.extend(window);
        out.push(' ');
    }
    for window in cjk.windows(3) {
        out.extend(window);
        out.push(' ');
    }
    out
}

pub fn is_cjk(ch: char) -> bool {
    matches!(
        ch as u32,
        0x3400..=0x4DBF
            | 0x4E00..=0x9FFF
            | 0xF900..=0xFAFF
            | 0x20000..=0x2A6DF
            | 0x2A700..=0x2B73F
            | 0x2B740..=0x2B81F
            | 0x2B820..=0x2CEAF
    )
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn expands_cjk_unigrams_and_bigrams() {
        let text = expand_for_search("苹果");
        assert!(text.contains(""));
        assert!(text.contains(""));
        assert!(text.contains("苹果"));
    }
}