1use super::{
7 BackendHealth, BackendKind, ExtractedEntity, ExtractedRelationship, ExtractionBackend,
8 ExtractionHints, ExtractionOutput,
9};
10use crate::errors::AppError;
11use async_trait::async_trait;
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct LlmExtractorConfig {
17 pub backend: String,
19 pub model: Option<String>,
21 pub timeout_secs: Option<u64>,
23}
24
25impl Default for LlmExtractorConfig {
26 fn default() -> Self {
29 let backend = match detect_available_backend() {
30 Ok(LlmBackendKindFactory::Codex) | Ok(LlmBackendKindFactory::Auto) => {
31 "codex".to_string()
32 }
33 Ok(LlmBackendKindFactory::Claude) => "claude".to_string(),
34 Ok(LlmBackendKindFactory::Opencode) => "opencode".to_string(),
35 Ok(LlmBackendKindFactory::None) | Err(_) => "none".to_string(),
36 };
37 Self {
38 backend,
39 model: None,
40 timeout_secs: Some(300),
41 }
42 }
43}
44
45pub struct LlmBackend {
47 config: LlmExtractorConfig,
48}
49
50impl LlmBackend {
51 pub fn new(config: LlmExtractorConfig) -> Self {
52 Self { config }
53 }
54
55 #[deprecated(
60 since = "1.0.89",
61 note = "use LlmBackend::new(LlmExtractorConfig::default()) or factory_for_choice()"
62 )]
63 pub fn with_default_codex() -> Self {
64 Self::new(LlmExtractorConfig::default())
65 }
66
67 pub fn with_default_claude() -> Self {
68 Self::new(LlmExtractorConfig {
69 backend: "claude".to_string(),
70 model: None,
71 timeout_secs: Some(300),
72 })
73 }
74}
75
76#[async_trait]
77impl ExtractionBackend for LlmBackend {
78 fn kind(&self) -> BackendKind {
79 BackendKind::Llm
80 }
81
82 fn model_name(&self) -> String {
83 format!("{}-headless", self.config.backend)
84 }
85
86 async fn extract(
87 &self,
88 content: &str,
89 hints: &ExtractionHints,
90 ) -> Result<ExtractionOutput, AppError> {
91 let start = std::time::Instant::now();
92 let trimmed = content.trim();
93 if trimmed.is_empty() {
94 return Ok(ExtractionOutput {
95 backend: self.kind().as_str().to_string(),
96 elapsed_ms: start.elapsed().as_millis() as u64,
97 ..Default::default()
98 });
99 }
100 if !hints.skip_relations && !trimmed.contains(' ') {
101 return Ok(ExtractionOutput {
102 backend: self.kind().as_str().to_string(),
103 elapsed_ms: start.elapsed().as_millis() as u64,
104 ..Default::default()
105 });
106 }
107
108 let word_count = trimmed.split_whitespace().count();
109 if !hints.skip_relations && word_count < 5 {
110 return Ok(ExtractionOutput {
111 backend: self.kind().as_str().to_string(),
112 elapsed_ms: start.elapsed().as_millis() as u64,
113 ..Default::default()
114 });
115 }
116
117 let mut entities: Vec<ExtractedEntity> = Vec::new();
118 let mut relationships: Vec<ExtractedRelationship> = Vec::new();
119
120 for raw in trimmed.split(|c: char| !c.is_alphanumeric()) {
121 let word = raw.trim();
122 if word.is_empty() {
123 continue;
124 }
125 if word.len() < 3 {
126 continue;
127 }
128 let lower = word.to_ascii_lowercase();
129 if matches!(
130 lower.as_str(),
131 "the"
132 | "and"
133 | "for"
134 | "with"
135 | "from"
136 | "this"
137 | "that"
138 | "into"
139 | "sobre"
140 | "para"
141 | "como"
142 ) {
143 continue;
144 }
145 let name = lower.replace(|c: char| !c.is_alphanumeric() && c != '-', "-");
146 if name.is_empty() || name == "-" {
147 continue;
148 }
149 if !entities.iter().any(|e| e.name == name) {
150 entities.push(ExtractedEntity {
151 name,
152 entity_type: "concept".to_string(),
153 description: None,
154 confidence: Some(0.5),
155 });
156 }
157 }
158
159 if entities.len() > 1 && !hints.skip_relations {
160 for (i, source) in entities
161 .iter()
162 .enumerate()
163 .take(entities.len().saturating_sub(1))
164 {
165 for target in entities.iter().skip(i + 1) {
166 relationships.push(ExtractedRelationship {
167 source: source.name.clone(),
168 target: target.name.clone(),
169 relation: "related".to_string(),
170 strength: 0.4,
171 });
172 }
173 }
174 }
175
176 Ok(ExtractionOutput {
177 entities,
178 relationships,
179 embedding: None,
180 backend: self.kind().as_str().to_string(),
181 elapsed_ms: start.elapsed().as_millis() as u64,
182 })
183 }
184
185 async fn health(&self) -> Result<BackendHealth, AppError> {
186 Ok(BackendHealth {
187 kind: self.kind(),
188 healthy: true,
189 model_name: self.model_name(),
190 message: format!("LLM backend ({}) ready", self.config.backend),
191 })
192 }
193}
194
195#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
207pub enum LlmBackendKindFactory {
208 Auto,
210 Codex,
212 Claude,
214 Opencode,
216 None,
218}
219
220pub trait LlmBackendFactory: Send + Sync {
233 fn build_extraction_backend(
236 &self,
237 config: &LlmExtractorConfig,
238 ) -> Result<Box<dyn ExtractionBackend>, AppError>;
239
240 fn build_embedder(
242 &self,
243 config: &LlmExtractorConfig,
244 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError>;
245
246 fn kind(&self) -> LlmBackendKindFactory;
248}
249
250pub struct CodexFactory;
252
253impl LlmBackendFactory for CodexFactory {
254 fn build_extraction_backend(
255 &self,
256 config: &LlmExtractorConfig,
257 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
258 let mut cfg = config.clone();
259 cfg.backend = "codex".into();
260 Ok(Box::new(LlmBackend::new(cfg)))
261 }
262 fn build_embedder(
263 &self,
264 _config: &LlmExtractorConfig,
265 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
266 Ok(Box::new(()))
272 }
273 fn kind(&self) -> LlmBackendKindFactory {
274 LlmBackendKindFactory::Codex
275 }
276}
277
278pub struct ClaudeFactory;
280
281impl LlmBackendFactory for ClaudeFactory {
282 fn build_extraction_backend(
283 &self,
284 config: &LlmExtractorConfig,
285 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
286 let mut cfg = config.clone();
287 cfg.backend = "claude".into();
288 Ok(Box::new(LlmBackend::new(cfg)))
289 }
290 fn build_embedder(
291 &self,
292 _config: &LlmExtractorConfig,
293 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
294 Ok(Box::new(()))
295 }
296 fn kind(&self) -> LlmBackendKindFactory {
297 LlmBackendKindFactory::Claude
298 }
299}
300
301pub struct NullFactory;
305
306impl LlmBackendFactory for NullFactory {
307 fn build_extraction_backend(
308 &self,
309 _config: &LlmExtractorConfig,
310 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
311 struct NullExtraction;
312 #[async_trait]
313 impl ExtractionBackend for NullExtraction {
314 fn kind(&self) -> BackendKind {
315 BackendKind::None
316 }
317 fn model_name(&self) -> String {
318 "null".into()
319 }
320 async fn health(&self) -> Result<BackendHealth, AppError> {
321 Ok(BackendHealth {
322 kind: BackendKind::None,
323 healthy: true,
324 model_name: "null".into(),
325 message: "no-op backend".into(),
326 })
327 }
328 async fn extract(
329 &self,
330 _body: &str,
331 _hints: &ExtractionHints,
332 ) -> Result<ExtractionOutput, AppError> {
333 Ok(ExtractionOutput::default())
334 }
335 }
336 Ok(Box::new(NullExtraction))
337 }
338 fn build_embedder(
339 &self,
340 _config: &LlmExtractorConfig,
341 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
342 Ok(Box::new(()))
343 }
344 fn kind(&self) -> LlmBackendKindFactory {
345 LlmBackendKindFactory::None
346 }
347}
348
349pub struct OpencodeFactory;
351
352impl LlmBackendFactory for OpencodeFactory {
353 fn build_extraction_backend(
354 &self,
355 config: &LlmExtractorConfig,
356 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
357 let mut cfg = config.clone();
358 cfg.backend = "opencode".into();
359 Ok(Box::new(LlmBackend::new(cfg)))
360 }
361 fn build_embedder(
362 &self,
363 _config: &LlmExtractorConfig,
364 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
365 Ok(Box::new(()))
366 }
367 fn kind(&self) -> LlmBackendKindFactory {
368 LlmBackendKindFactory::Opencode
369 }
370}
371
372pub struct AutoFactory;
377
378impl LlmBackendFactory for AutoFactory {
379 fn build_extraction_backend(
380 &self,
381 config: &LlmExtractorConfig,
382 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
383 let choice = detect_available_backend()?;
384 match choice {
385 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
386 CodexFactory.build_extraction_backend(config)
387 }
388 LlmBackendKindFactory::Claude => ClaudeFactory.build_extraction_backend(config),
389 LlmBackendKindFactory::Opencode => OpencodeFactory.build_extraction_backend(config),
390 LlmBackendKindFactory::None => NullFactory.build_extraction_backend(config),
391 }
392 }
393 fn build_embedder(
394 &self,
395 config: &LlmExtractorConfig,
396 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
397 let choice = detect_available_backend()?;
398 match choice {
399 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
400 CodexFactory.build_embedder(config)
401 }
402 LlmBackendKindFactory::Claude => ClaudeFactory.build_embedder(config),
403 LlmBackendKindFactory::Opencode => OpencodeFactory.build_embedder(config),
404 LlmBackendKindFactory::None => NullFactory.build_embedder(config),
405 }
406 }
407 fn kind(&self) -> LlmBackendKindFactory {
408 LlmBackendKindFactory::Auto
409 }
410}
411
412pub fn detect_available_backend() -> Result<LlmBackendKindFactory, AppError> {
420 fn has_in_path(name: &str) -> bool {
424 if let Ok(path_var) = std::env::var("PATH") {
425 for dir in std::env::split_paths(&path_var) {
426 let candidate = dir.join(name);
427 if candidate.is_file() {
428 return true;
429 }
430 }
431 }
432 false
433 }
434
435 if has_in_path("codex") {
437 Ok(LlmBackendKindFactory::Codex)
438 } else if has_in_path("claude") {
439 Ok(LlmBackendKindFactory::Claude)
440 } else if has_in_path("opencode") {
441 Ok(LlmBackendKindFactory::Opencode)
442 } else {
443 Ok(LlmBackendKindFactory::None)
445 }
446}
447
448pub fn factory_for_choice(
453 choice: LlmBackendKindFactory,
454) -> Result<Box<dyn LlmBackendFactory>, AppError> {
455 match choice {
456 LlmBackendKindFactory::Auto => Ok(Box::new(AutoFactory)),
457 LlmBackendKindFactory::Codex => Ok(Box::new(CodexFactory)),
458 LlmBackendKindFactory::Claude => Ok(Box::new(ClaudeFactory)),
459 LlmBackendKindFactory::Opencode => Ok(Box::new(OpencodeFactory)),
460 LlmBackendKindFactory::None => Ok(Box::new(NullFactory)),
461 }
462}
463
464#[cfg(test)]
465mod factory_tests {
466 use super::*;
467
468 #[test]
469 fn detect_returns_known_kind() {
470 let r = detect_available_backend();
473 assert!(r.is_ok());
474 }
475
476 #[test]
477 fn factory_for_choice_returns_boxed_factory() {
478 let f = factory_for_choice(LlmBackendKindFactory::Codex).expect("Codex factory");
479 assert_eq!(f.kind(), LlmBackendKindFactory::Codex);
480 let f = factory_for_choice(LlmBackendKindFactory::None).expect("Null factory");
481 assert_eq!(f.kind(), LlmBackendKindFactory::None);
482 }
483
484 #[test]
485 fn opencode_factory_returns_correct_kind() {
486 let f = factory_for_choice(LlmBackendKindFactory::Opencode).expect("Opencode factory");
487 assert_eq!(f.kind(), LlmBackendKindFactory::Opencode);
488 }
489
490 #[test]
491 fn null_factory_extracts_nothing() {
492 let f = NullFactory;
493 let backend = f
494 .build_extraction_backend(&LlmExtractorConfig::default())
495 .expect("NullFactory always builds");
496 let rt = tokio::runtime::Builder::new_current_thread()
499 .enable_all()
500 .build()
501 .expect("test runtime");
502 let h = rt.block_on(backend.health()).expect("health ok");
503 assert!(h.healthy);
504 let out = rt
505 .block_on(backend.extract("any body", &ExtractionHints::default()))
506 .expect("Null extract is Ok");
507 assert!(out.entities.is_empty());
508 assert!(out.relationships.is_empty());
509 }
510}