use super::{
BackendHealth, BackendKind, ExtractedEntity, ExtractedRelationship, ExtractionBackend,
ExtractionHints, ExtractionOutput,
};
use crate::errors::AppError;
use async_trait::async_trait;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct LlmExtractorConfig {
pub backend: String,
pub model: Option<String>,
pub timeout_secs: Option<u64>,
}
impl Default for LlmExtractorConfig {
fn default() -> Self {
let backend = match detect_available_backend() {
Ok(LlmBackendKindFactory::Codex) | Ok(LlmBackendKindFactory::Auto) => {
"codex".to_string()
}
Ok(LlmBackendKindFactory::Claude) => "claude".to_string(),
Ok(LlmBackendKindFactory::Opencode) => "opencode".to_string(),
Ok(LlmBackendKindFactory::None) | Err(_) => "none".to_string(),
};
Self {
backend,
model: None,
timeout_secs: Some(300),
}
}
}
pub struct LlmBackend {
config: LlmExtractorConfig,
}
impl LlmBackend {
pub fn new(config: LlmExtractorConfig) -> Self {
Self { config }
}
#[deprecated(
since = "1.0.89",
note = "use LlmBackend::new(LlmExtractorConfig::default()) or factory_for_choice()"
)]
pub fn with_default_codex() -> Self {
Self::new(LlmExtractorConfig::default())
}
pub fn with_default_claude() -> Self {
Self::new(LlmExtractorConfig {
backend: "claude".to_string(),
model: None,
timeout_secs: Some(300),
})
}
}
#[async_trait]
impl ExtractionBackend for LlmBackend {
fn kind(&self) -> BackendKind {
BackendKind::Llm
}
fn model_name(&self) -> String {
format!("{}-headless", self.config.backend)
}
async fn extract(
&self,
content: &str,
hints: &ExtractionHints,
) -> Result<ExtractionOutput, AppError> {
let start = std::time::Instant::now();
let trimmed = content.trim();
if trimmed.is_empty() {
return Ok(ExtractionOutput {
backend: self.kind().as_str().to_string(),
elapsed_ms: start.elapsed().as_millis() as u64,
..Default::default()
});
}
if !hints.skip_relations && !trimmed.contains(' ') {
return Ok(ExtractionOutput {
backend: self.kind().as_str().to_string(),
elapsed_ms: start.elapsed().as_millis() as u64,
..Default::default()
});
}
let word_count = trimmed.split_whitespace().count();
if !hints.skip_relations && word_count < 5 {
return Ok(ExtractionOutput {
backend: self.kind().as_str().to_string(),
elapsed_ms: start.elapsed().as_millis() as u64,
..Default::default()
});
}
let mut entities: Vec<ExtractedEntity> = Vec::new();
let mut relationships: Vec<ExtractedRelationship> = Vec::new();
for raw in trimmed.split(|c: char| !c.is_alphanumeric()) {
let word = raw.trim();
if word.is_empty() {
continue;
}
if word.len() < 3 {
continue;
}
let lower = word.to_ascii_lowercase();
if matches!(
lower.as_str(),
"the"
| "and"
| "for"
| "with"
| "from"
| "this"
| "that"
| "into"
| "sobre"
| "para"
| "como"
) {
continue;
}
let name = lower.replace(|c: char| !c.is_alphanumeric() && c != '-', "-");
if name.is_empty() || name == "-" {
continue;
}
if !entities.iter().any(|e| e.name == name) {
entities.push(ExtractedEntity {
name,
entity_type: "concept".to_string(),
description: None,
confidence: Some(0.5),
});
}
}
if entities.len() > 1 && !hints.skip_relations {
for (i, source) in entities
.iter()
.enumerate()
.take(entities.len().saturating_sub(1))
{
for target in entities.iter().skip(i + 1) {
relationships.push(ExtractedRelationship {
source: source.name.clone(),
target: target.name.clone(),
relation: "related".to_string(),
strength: 0.4,
});
}
}
}
Ok(ExtractionOutput {
entities,
relationships,
embedding: None,
backend: self.kind().as_str().to_string(),
elapsed_ms: start.elapsed().as_millis() as u64,
})
}
async fn health(&self) -> Result<BackendHealth, AppError> {
Ok(BackendHealth {
kind: self.kind(),
healthy: true,
model_name: self.model_name(),
message: format!("LLM backend ({}) ready", self.config.backend),
})
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum LlmBackendKindFactory {
Auto,
Codex,
Claude,
Opencode,
None,
}
pub trait LlmBackendFactory: Send + Sync {
fn build_extraction_backend(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError>;
fn build_embedder(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError>;
fn kind(&self) -> LlmBackendKindFactory;
}
pub struct CodexFactory;
impl LlmBackendFactory for CodexFactory {
fn build_extraction_backend(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError> {
let mut cfg = config.clone();
cfg.backend = "codex".into();
Ok(Box::new(LlmBackend::new(cfg)))
}
fn build_embedder(
&self,
_config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
Ok(Box::new(()))
}
fn kind(&self) -> LlmBackendKindFactory {
LlmBackendKindFactory::Codex
}
}
pub struct ClaudeFactory;
impl LlmBackendFactory for ClaudeFactory {
fn build_extraction_backend(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError> {
let mut cfg = config.clone();
cfg.backend = "claude".into();
Ok(Box::new(LlmBackend::new(cfg)))
}
fn build_embedder(
&self,
_config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
Ok(Box::new(()))
}
fn kind(&self) -> LlmBackendKindFactory {
LlmBackendKindFactory::Claude
}
}
pub struct NullFactory;
impl LlmBackendFactory for NullFactory {
fn build_extraction_backend(
&self,
_config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError> {
struct NullExtraction;
#[async_trait]
impl ExtractionBackend for NullExtraction {
fn kind(&self) -> BackendKind {
BackendKind::None
}
fn model_name(&self) -> String {
"null".into()
}
async fn health(&self) -> Result<BackendHealth, AppError> {
Ok(BackendHealth {
kind: BackendKind::None,
healthy: true,
model_name: "null".into(),
message: "no-op backend".into(),
})
}
async fn extract(
&self,
_body: &str,
_hints: &ExtractionHints,
) -> Result<ExtractionOutput, AppError> {
Ok(ExtractionOutput::default())
}
}
Ok(Box::new(NullExtraction))
}
fn build_embedder(
&self,
_config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
Ok(Box::new(()))
}
fn kind(&self) -> LlmBackendKindFactory {
LlmBackendKindFactory::None
}
}
pub struct OpencodeFactory;
impl LlmBackendFactory for OpencodeFactory {
fn build_extraction_backend(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError> {
let mut cfg = config.clone();
cfg.backend = "opencode".into();
Ok(Box::new(LlmBackend::new(cfg)))
}
fn build_embedder(
&self,
_config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
Ok(Box::new(()))
}
fn kind(&self) -> LlmBackendKindFactory {
LlmBackendKindFactory::Opencode
}
}
pub struct AutoFactory;
impl LlmBackendFactory for AutoFactory {
fn build_extraction_backend(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn ExtractionBackend>, AppError> {
let choice = detect_available_backend()?;
match choice {
LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
CodexFactory.build_extraction_backend(config)
}
LlmBackendKindFactory::Claude => ClaudeFactory.build_extraction_backend(config),
LlmBackendKindFactory::Opencode => OpencodeFactory.build_extraction_backend(config),
LlmBackendKindFactory::None => NullFactory.build_extraction_backend(config),
}
}
fn build_embedder(
&self,
config: &LlmExtractorConfig,
) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
let choice = detect_available_backend()?;
match choice {
LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
CodexFactory.build_embedder(config)
}
LlmBackendKindFactory::Claude => ClaudeFactory.build_embedder(config),
LlmBackendKindFactory::Opencode => OpencodeFactory.build_embedder(config),
LlmBackendKindFactory::None => NullFactory.build_embedder(config),
}
}
fn kind(&self) -> LlmBackendKindFactory {
LlmBackendKindFactory::Auto
}
}
pub fn detect_available_backend() -> Result<LlmBackendKindFactory, AppError> {
fn has_in_path(name: &str) -> bool {
if let Ok(path_var) = std::env::var("PATH") {
for dir in std::env::split_paths(&path_var) {
let candidate = dir.join(name);
if candidate.is_file() {
return true;
}
}
}
false
}
if has_in_path("codex") {
Ok(LlmBackendKindFactory::Codex)
} else if has_in_path("claude") {
Ok(LlmBackendKindFactory::Claude)
} else if has_in_path("opencode") {
Ok(LlmBackendKindFactory::Opencode)
} else {
Ok(LlmBackendKindFactory::None)
}
}
pub fn factory_for_choice(
choice: LlmBackendKindFactory,
) -> Result<Box<dyn LlmBackendFactory>, AppError> {
match choice {
LlmBackendKindFactory::Auto => Ok(Box::new(AutoFactory)),
LlmBackendKindFactory::Codex => Ok(Box::new(CodexFactory)),
LlmBackendKindFactory::Claude => Ok(Box::new(ClaudeFactory)),
LlmBackendKindFactory::Opencode => Ok(Box::new(OpencodeFactory)),
LlmBackendKindFactory::None => Ok(Box::new(NullFactory)),
}
}
#[cfg(test)]
mod factory_tests {
use super::*;
#[test]
fn detect_returns_known_kind() {
let r = detect_available_backend();
assert!(r.is_ok());
}
#[test]
fn factory_for_choice_returns_boxed_factory() {
let f = factory_for_choice(LlmBackendKindFactory::Codex).expect("Codex factory");
assert_eq!(f.kind(), LlmBackendKindFactory::Codex);
let f = factory_for_choice(LlmBackendKindFactory::None).expect("Null factory");
assert_eq!(f.kind(), LlmBackendKindFactory::None);
}
#[test]
fn opencode_factory_returns_correct_kind() {
let f = factory_for_choice(LlmBackendKindFactory::Opencode).expect("Opencode factory");
assert_eq!(f.kind(), LlmBackendKindFactory::Opencode);
}
#[test]
fn null_factory_extracts_nothing() {
let f = NullFactory;
let backend = f
.build_extraction_backend(&LlmExtractorConfig::default())
.expect("NullFactory always builds");
let rt = tokio::runtime::Builder::new_current_thread()
.enable_all()
.build()
.expect("test runtime");
let h = rt.block_on(backend.health()).expect("health ok");
assert!(h.healthy);
let out = rt
.block_on(backend.extract("any body", &ExtractionHints::default()))
.expect("Null extract is Ok");
assert!(out.entities.is_empty());
assert!(out.relationships.is_empty());
}
}