use std::path::Path;
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct Detection {
pub language: String,
pub confidence: f32,
}
pub struct Detector {
model: fasttext::FastText,
}
impl Detector {
pub fn open(model_path: &str) -> Result<Self, String> {
if !Path::new(model_path).is_file() {
return Err(format!("model file not found: {model_path}"));
}
let model = fasttext::FastText::load_model(model_path)
.map_err(|e| format!("failed to load fastText model: {e}"))?;
if model.is_quant() {
return Err(
"quantized fastText models are not supported: the fasttext crate's \
quantized inference diverges from upstream (0.8.0); use the dense \
lid.176.bin instead"
.to_string(),
);
}
Ok(Self { model })
}
pub fn detect(&self, text: &str) -> Result<Detection, String> {
let cleaned = text.replace('\n', " ");
let cleaned = cleaned.trim();
let predictions = self.model.predict(cleaned, 1, 0.0);
let top = predictions
.first()
.ok_or_else(|| "no prediction".to_string())?;
let language = top
.label
.strip_prefix("__label__")
.unwrap_or(&top.label)
.to_string();
Ok(Detection {
language,
confidence: top.prob,
})
}
}