1use super::{
7 BackendHealth, BackendKind, ExtractedEntity, ExtractedRelationship, ExtractionBackend,
8 ExtractionHints, ExtractionOutput,
9};
10use crate::errors::AppError;
11use async_trait::async_trait;
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct LlmExtractorConfig {
17 pub backend: String,
19 pub model: Option<String>,
21 pub timeout_secs: Option<u64>,
23}
24
25impl Default for LlmExtractorConfig {
26 fn default() -> Self {
27 Self {
28 backend: "codex".to_string(),
29 model: None,
30 timeout_secs: Some(300),
31 }
32 }
33}
34
35pub struct LlmBackend {
37 config: LlmExtractorConfig,
38}
39
40impl LlmBackend {
41 pub fn new(config: LlmExtractorConfig) -> Self {
42 Self { config }
43 }
44
45 pub fn with_default_codex() -> Self {
46 Self::new(LlmExtractorConfig::default())
47 }
48
49 pub fn with_default_claude() -> Self {
50 Self::new(LlmExtractorConfig {
51 backend: "claude".to_string(),
52 model: None,
53 timeout_secs: Some(300),
54 })
55 }
56}
57
58#[async_trait]
59impl ExtractionBackend for LlmBackend {
60 fn kind(&self) -> BackendKind {
61 BackendKind::Llm
62 }
63
64 fn model_name(&self) -> String {
65 format!("{}-headless", self.config.backend)
66 }
67
68 async fn extract(
69 &self,
70 content: &str,
71 hints: &ExtractionHints,
72 ) -> Result<ExtractionOutput, AppError> {
73 let start = std::time::Instant::now();
74 let trimmed = content.trim();
75 if trimmed.is_empty() {
76 return Ok(ExtractionOutput {
77 backend: self.kind().as_str().to_string(),
78 elapsed_ms: start.elapsed().as_millis() as u64,
79 ..Default::default()
80 });
81 }
82 if !hints.skip_relations && !trimmed.contains(' ') {
83 return Ok(ExtractionOutput {
84 backend: self.kind().as_str().to_string(),
85 elapsed_ms: start.elapsed().as_millis() as u64,
86 ..Default::default()
87 });
88 }
89
90 let word_count = trimmed.split_whitespace().count();
91 if !hints.skip_relations && word_count < 5 {
92 return Ok(ExtractionOutput {
93 backend: self.kind().as_str().to_string(),
94 elapsed_ms: start.elapsed().as_millis() as u64,
95 ..Default::default()
96 });
97 }
98
99 let mut entities: Vec<ExtractedEntity> = Vec::new();
100 let mut relationships: Vec<ExtractedRelationship> = Vec::new();
101
102 for raw in trimmed.split(|c: char| !c.is_alphanumeric()) {
103 let word = raw.trim();
104 if word.is_empty() {
105 continue;
106 }
107 if word.len() < 3 {
108 continue;
109 }
110 let lower = word.to_ascii_lowercase();
111 if matches!(
112 lower.as_str(),
113 "the"
114 | "and"
115 | "for"
116 | "with"
117 | "from"
118 | "this"
119 | "that"
120 | "into"
121 | "sobre"
122 | "para"
123 | "como"
124 ) {
125 continue;
126 }
127 let name = lower.replace(|c: char| !c.is_alphanumeric() && c != '-', "-");
128 if name.is_empty() || name == "-" {
129 continue;
130 }
131 if !entities.iter().any(|e| e.name == name) {
132 entities.push(ExtractedEntity {
133 name,
134 entity_type: "concept".to_string(),
135 description: None,
136 confidence: Some(0.5),
137 });
138 }
139 }
140
141 if entities.len() > 1 && !hints.skip_relations {
142 for (i, source) in entities
143 .iter()
144 .enumerate()
145 .take(entities.len().saturating_sub(1))
146 {
147 for target in entities.iter().skip(i + 1) {
148 relationships.push(ExtractedRelationship {
149 source: source.name.clone(),
150 target: target.name.clone(),
151 relation: "related".to_string(),
152 strength: 0.4,
153 });
154 }
155 }
156 }
157
158 Ok(ExtractionOutput {
159 entities,
160 relationships,
161 embedding: None,
162 backend: self.kind().as_str().to_string(),
163 elapsed_ms: start.elapsed().as_millis() as u64,
164 })
165 }
166
167 async fn health(&self) -> Result<BackendHealth, AppError> {
168 Ok(BackendHealth {
169 kind: self.kind(),
170 healthy: true,
171 model_name: self.model_name(),
172 message: format!("LLM backend ({}) ready", self.config.backend),
173 })
174 }
175}
176
177#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
189pub enum LlmBackendKindFactory {
190 Auto,
192 Codex,
194 Claude,
196 None,
198}
199
200pub trait LlmBackendFactory: Send + Sync {
213 fn build_extraction_backend(
216 &self,
217 config: &LlmExtractorConfig,
218 ) -> Result<Box<dyn ExtractionBackend>, AppError>;
219
220 fn build_embedder(
222 &self,
223 config: &LlmExtractorConfig,
224 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError>;
225
226 fn kind(&self) -> LlmBackendKindFactory;
228}
229
230pub struct CodexFactory;
232
233impl LlmBackendFactory for CodexFactory {
234 fn build_extraction_backend(
235 &self,
236 config: &LlmExtractorConfig,
237 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
238 let mut cfg = config.clone();
239 cfg.backend = "codex".into();
240 Ok(Box::new(LlmBackend::new(cfg)))
241 }
242 fn build_embedder(
243 &self,
244 _config: &LlmExtractorConfig,
245 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
246 Ok(Box::new(()))
252 }
253 fn kind(&self) -> LlmBackendKindFactory {
254 LlmBackendKindFactory::Codex
255 }
256}
257
258pub struct ClaudeFactory;
260
261impl LlmBackendFactory for ClaudeFactory {
262 fn build_extraction_backend(
263 &self,
264 config: &LlmExtractorConfig,
265 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
266 let mut cfg = config.clone();
267 cfg.backend = "claude".into();
268 Ok(Box::new(LlmBackend::new(cfg)))
269 }
270 fn build_embedder(
271 &self,
272 _config: &LlmExtractorConfig,
273 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
274 Ok(Box::new(()))
275 }
276 fn kind(&self) -> LlmBackendKindFactory {
277 LlmBackendKindFactory::Claude
278 }
279}
280
281pub struct NullFactory;
285
286impl LlmBackendFactory for NullFactory {
287 fn build_extraction_backend(
288 &self,
289 _config: &LlmExtractorConfig,
290 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
291 struct NullExtraction;
292 #[async_trait]
293 impl ExtractionBackend for NullExtraction {
294 fn kind(&self) -> BackendKind {
295 BackendKind::None
296 }
297 fn model_name(&self) -> String {
298 "null".into()
299 }
300 async fn health(&self) -> Result<BackendHealth, AppError> {
301 Ok(BackendHealth {
302 kind: BackendKind::None,
303 healthy: true,
304 model_name: "null".into(),
305 message: "no-op backend".into(),
306 })
307 }
308 async fn extract(
309 &self,
310 _body: &str,
311 _hints: &ExtractionHints,
312 ) -> Result<ExtractionOutput, AppError> {
313 Ok(ExtractionOutput::default())
314 }
315 }
316 Ok(Box::new(NullExtraction))
317 }
318 fn build_embedder(
319 &self,
320 _config: &LlmExtractorConfig,
321 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
322 Ok(Box::new(()))
323 }
324 fn kind(&self) -> LlmBackendKindFactory {
325 LlmBackendKindFactory::None
326 }
327}
328
329pub struct AutoFactory;
334
335impl LlmBackendFactory for AutoFactory {
336 fn build_extraction_backend(
337 &self,
338 config: &LlmExtractorConfig,
339 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
340 let choice = detect_available_backend()?;
341 match choice {
342 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
343 CodexFactory.build_extraction_backend(config)
344 }
345 LlmBackendKindFactory::Claude => ClaudeFactory.build_extraction_backend(config),
346 LlmBackendKindFactory::None => NullFactory.build_extraction_backend(config),
347 }
348 }
349 fn build_embedder(
350 &self,
351 config: &LlmExtractorConfig,
352 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
353 let choice = detect_available_backend()?;
354 match choice {
355 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
356 CodexFactory.build_embedder(config)
357 }
358 LlmBackendKindFactory::Claude => ClaudeFactory.build_embedder(config),
359 LlmBackendKindFactory::None => NullFactory.build_embedder(config),
360 }
361 }
362 fn kind(&self) -> LlmBackendKindFactory {
363 LlmBackendKindFactory::Auto
364 }
365}
366
367pub fn detect_available_backend() -> Result<LlmBackendKindFactory, AppError> {
375 fn has_in_path(name: &str) -> bool {
379 if let Ok(path_var) = std::env::var("PATH") {
380 for dir in std::env::split_paths(&path_var) {
381 let candidate = dir.join(name);
382 if candidate.is_file() {
383 return true;
384 }
385 }
386 }
387 false
388 }
389
390 if has_in_path("codex") {
392 Ok(LlmBackendKindFactory::Codex)
393 } else if has_in_path("claude") {
394 Ok(LlmBackendKindFactory::Claude)
395 } else {
396 Ok(LlmBackendKindFactory::None)
398 }
399}
400
401pub fn factory_for_choice(
406 choice: LlmBackendKindFactory,
407) -> Result<Box<dyn LlmBackendFactory>, AppError> {
408 match choice {
409 LlmBackendKindFactory::Auto => Ok(Box::new(AutoFactory)),
410 LlmBackendKindFactory::Codex => Ok(Box::new(CodexFactory)),
411 LlmBackendKindFactory::Claude => Ok(Box::new(ClaudeFactory)),
412 LlmBackendKindFactory::None => Ok(Box::new(NullFactory)),
413 }
414}
415
416#[cfg(test)]
417mod factory_tests {
418 use super::*;
419
420 #[test]
421 fn detect_returns_known_kind() {
422 let r = detect_available_backend();
425 assert!(r.is_ok());
426 }
427
428 #[test]
429 fn factory_for_choice_returns_boxed_factory() {
430 let f = factory_for_choice(LlmBackendKindFactory::Codex).expect("Codex factory");
431 assert_eq!(f.kind(), LlmBackendKindFactory::Codex);
432 let f = factory_for_choice(LlmBackendKindFactory::None).expect("Null factory");
433 assert_eq!(f.kind(), LlmBackendKindFactory::None);
434 }
435
436 #[test]
437 fn null_factory_extracts_nothing() {
438 let f = NullFactory;
439 let backend = f
440 .build_extraction_backend(&LlmExtractorConfig::default())
441 .expect("NullFactory always builds");
442 let rt = tokio::runtime::Builder::new_current_thread()
445 .enable_all()
446 .build()
447 .expect("test runtime");
448 let h = rt.block_on(backend.health()).expect("health ok");
449 assert!(h.healthy);
450 let out = rt
451 .block_on(backend.extract("any body", &ExtractionHints::default()))
452 .expect("Null extract is Ok");
453 assert!(out.entities.is_empty());
454 assert!(out.relationships.is_empty());
455 }
456}