use rayon::prelude::*;
use crate::classify::errors::Result;
use crate::classify::rules::RuleSet;
use crate::classify::tiers::exact::ExactMatcher;
use crate::classify::tiers::fuzzy::FuzzyClassifier;
use crate::classify::tiers::llm::LlmClassifier;
use crate::classify::tiers::regex_tier::RegexMatcher;
use crate::classify::tiers::ClassificationResult;
use crate::core::models::ClassificationMethod;
#[derive(Debug, Clone)]
pub struct ClassificationEngineConfig {
pub use_llm: bool,
pub llm_model: String,
pub confidence_threshold: f64,
}
impl Default for ClassificationEngineConfig {
fn default() -> Self {
Self {
use_llm: false,
llm_model: "gpt-4o-mini".to_string(),
confidence_threshold: 0.7,
}
}
}
pub struct ClassificationEngine {
exact: ExactMatcher,
regex: RegexMatcher,
fuzzy: FuzzyClassifier,
llm: Option<LlmClassifier>,
config: ClassificationEngineConfig,
}
impl ClassificationEngine {
pub fn new(ruleset: RuleSet, config: ClassificationEngineConfig) -> Result<Self> {
let exact = ExactMatcher::new(&ruleset.rules)?;
let regex = RegexMatcher::new(&ruleset.rules)?;
let fuzzy = FuzzyClassifier;
let llm = if config.use_llm {
let api_key = std::env::var("OPENAI_API_KEY").ok();
Some(LlmClassifier::new(&config.llm_model, api_key))
} else {
None
};
Ok(Self {
exact,
regex,
fuzzy,
llm,
config,
})
}
pub fn config(&self) -> &ClassificationEngineConfig {
&self.config
}
pub fn classify_sync(&self, message: &str, is_merge: bool) -> Option<ClassificationResult> {
if let Some(rule) = self.exact.classify(message) {
return Some(ClassificationResult {
category: rule.category.clone(),
subcategory: rule.subcategory.clone(),
confidence: rule.confidence,
method: ClassificationMethod::ExactRule,
ticket_id: RegexMatcher::extract_ticket_id(message),
});
}
if let Some(rule) = self.regex.classify(message) {
return Some(ClassificationResult {
category: rule.category.clone(),
subcategory: rule.subcategory.clone(),
confidence: rule.confidence,
method: ClassificationMethod::RegexRule,
ticket_id: RegexMatcher::extract_ticket_id(message),
});
}
if let Some(mut result) = self.fuzzy.classify(message, is_merge) {
if result.ticket_id.is_none() {
result.ticket_id = RegexMatcher::extract_ticket_id(message);
}
return Some(result);
}
None
}
pub async fn classify(&self, message: &str, is_merge: bool) -> ClassificationResult {
if let Some(r) = self.classify_sync(message, is_merge) {
return r;
}
if let Some(llm) = &self.llm {
if let Some(r) = llm.classify(message).await {
return r;
}
}
let mut fallback = ClassificationResult::unclassified();
fallback.ticket_id = RegexMatcher::extract_ticket_id(message);
fallback
}
pub fn classify_batch(&self, messages: &[(&str, bool)]) -> Vec<ClassificationResult> {
messages
.par_iter()
.map(|(msg, is_merge)| {
self.classify_sync(msg, *is_merge)
.unwrap_or_else(ClassificationResult::unclassified)
})
.collect()
}
}