whichlang 0.1.1

A blazingly fast and lightweight language detection library for Rust.
Documentation
use std::io;
use std::io::BufRead;
use std::io::BufReader;
use std::io::BufWriter;
use std::io::Write;

use whichlang::emit_tokens;
use whichlang::DIMENSION;

fn main() -> io::Result<()> {
    let language_codes = whichlang::LANGUAGES
        .iter()
        .map(|lang| lang.three_letter_code())
        .collect::<Vec<&str>>();
    let mut line = String::new();
    let mut features: Vec<usize> = vec![0; whichlang::DIMENSION];
    let stdin = io::stdin();
    let mut stdinlocked = BufReader::new(stdin.lock());
    let stdout = io::stdout();
    let mut stdoutlock = BufWriter::new(stdout.lock());
    write!(&mut stdoutlock, "id,label")?;
    for i in 0..whichlang::DIMENSION {
        write!(&mut stdoutlock, ",feature{i}")?;
    }
    writeln!(&mut stdoutlock)?;
    loop {
        line.clear();
        if stdinlocked.read_line(&mut line)? == 0 {
            break;
        }
        let trimmed_line = line.trim_end().trim_matches('\\');
        features.fill(0);
        let id_label_sentence: Vec<&str> = trimmed_line.splitn(3, "\t").collect();
        if !language_codes.contains(&id_label_sentence[1]) {
            continue;
        }
        let sentence: &str = id_label_sentence[2];
        emit_tokens(sentence, |token| {
            features[token.to_hash() as usize % DIMENSION] += 1;
        });
        write!(
            &mut stdoutlock,
            "{},{}",
            id_label_sentence[0], id_label_sentence[1]
        )?;
        for &feature in &features {
            write!(&mut stdoutlock, ",{feature}")?;
        }
        writeln!(&mut stdoutlock)?;
        line.clear();
    }
    Ok(())
}