use fasttext::FastText;
use thiserror::Error;
#[derive(Error, Debug)]
pub enum FastTextError {
#[error("Failed to load FastText model: {0}")]
FastText(String),
}
pub struct Tagger {
model: FastText,
}
impl Tagger {
pub fn new(model_path: &str) -> Result<Self, FastTextError> {
let mut model = FastText::new();
model
.load_model(model_path)
.map_err(|e| FastTextError::FastText(e.to_string()))?;
Ok(Self { model })
}
pub fn generate_tags(&self, text: String) -> Vec<String> {
let clean_text = text.replace('\n', " ").replace('\r', " ");
if clean_text.trim().is_empty() {
return Vec::new();
}
if let Ok(predictions) = self.model.predict(&clean_text, 5, 0.0) {
let mut formatted_tags = Vec::new();
for (index, prediction) in predictions.iter().enumerate() {
let raw_label = prediction.label.replace("__label__", "");
if index == 0 {
formatted_tags.push(format!("lang:{}", raw_label));
} else {
formatted_tags.push(raw_label);
}
}
return formatted_tags;
}
Vec::new()
}
}