1use super::{
7 BackendHealth, BackendKind, ExtractedEntity, ExtractedRelationship, ExtractionBackend,
8 ExtractionHints, ExtractionOutput,
9};
10use crate::errors::AppError;
11use async_trait::async_trait;
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct LlmExtractorConfig {
17 pub backend: String,
19 pub model: Option<String>,
21 pub timeout_secs: Option<u64>,
23}
24
25impl Default for LlmExtractorConfig {
26 fn default() -> Self {
29 let backend = match detect_available_backend() {
30 Ok(LlmBackendKindFactory::Codex) | Ok(LlmBackendKindFactory::Auto) => "codex".to_string(),
31 Ok(LlmBackendKindFactory::Claude) => "claude".to_string(),
32 Ok(LlmBackendKindFactory::None) | Err(_) => "none".to_string(),
33 };
34 Self {
35 backend,
36 model: None,
37 timeout_secs: Some(300),
38 }
39 }
40}
41
42pub struct LlmBackend {
44 config: LlmExtractorConfig,
45}
46
47impl LlmBackend {
48 pub fn new(config: LlmExtractorConfig) -> Self {
49 Self { config }
50 }
51
52 #[deprecated(since = "1.0.89", note = "use LlmBackend::new(LlmExtractorConfig::default()) or factory_for_choice()")]
57 pub fn with_default_codex() -> Self {
58 Self::new(LlmExtractorConfig::default())
59 }
60
61 pub fn with_default_claude() -> Self {
62 Self::new(LlmExtractorConfig {
63 backend: "claude".to_string(),
64 model: None,
65 timeout_secs: Some(300),
66 })
67 }
68}
69
70#[async_trait]
71impl ExtractionBackend for LlmBackend {
72 fn kind(&self) -> BackendKind {
73 BackendKind::Llm
74 }
75
76 fn model_name(&self) -> String {
77 format!("{}-headless", self.config.backend)
78 }
79
80 async fn extract(
81 &self,
82 content: &str,
83 hints: &ExtractionHints,
84 ) -> Result<ExtractionOutput, AppError> {
85 let start = std::time::Instant::now();
86 let trimmed = content.trim();
87 if trimmed.is_empty() {
88 return Ok(ExtractionOutput {
89 backend: self.kind().as_str().to_string(),
90 elapsed_ms: start.elapsed().as_millis() as u64,
91 ..Default::default()
92 });
93 }
94 if !hints.skip_relations && !trimmed.contains(' ') {
95 return Ok(ExtractionOutput {
96 backend: self.kind().as_str().to_string(),
97 elapsed_ms: start.elapsed().as_millis() as u64,
98 ..Default::default()
99 });
100 }
101
102 let word_count = trimmed.split_whitespace().count();
103 if !hints.skip_relations && word_count < 5 {
104 return Ok(ExtractionOutput {
105 backend: self.kind().as_str().to_string(),
106 elapsed_ms: start.elapsed().as_millis() as u64,
107 ..Default::default()
108 });
109 }
110
111 let mut entities: Vec<ExtractedEntity> = Vec::new();
112 let mut relationships: Vec<ExtractedRelationship> = Vec::new();
113
114 for raw in trimmed.split(|c: char| !c.is_alphanumeric()) {
115 let word = raw.trim();
116 if word.is_empty() {
117 continue;
118 }
119 if word.len() < 3 {
120 continue;
121 }
122 let lower = word.to_ascii_lowercase();
123 if matches!(
124 lower.as_str(),
125 "the"
126 | "and"
127 | "for"
128 | "with"
129 | "from"
130 | "this"
131 | "that"
132 | "into"
133 | "sobre"
134 | "para"
135 | "como"
136 ) {
137 continue;
138 }
139 let name = lower.replace(|c: char| !c.is_alphanumeric() && c != '-', "-");
140 if name.is_empty() || name == "-" {
141 continue;
142 }
143 if !entities.iter().any(|e| e.name == name) {
144 entities.push(ExtractedEntity {
145 name,
146 entity_type: "concept".to_string(),
147 description: None,
148 confidence: Some(0.5),
149 });
150 }
151 }
152
153 if entities.len() > 1 && !hints.skip_relations {
154 for (i, source) in entities
155 .iter()
156 .enumerate()
157 .take(entities.len().saturating_sub(1))
158 {
159 for target in entities.iter().skip(i + 1) {
160 relationships.push(ExtractedRelationship {
161 source: source.name.clone(),
162 target: target.name.clone(),
163 relation: "related".to_string(),
164 strength: 0.4,
165 });
166 }
167 }
168 }
169
170 Ok(ExtractionOutput {
171 entities,
172 relationships,
173 embedding: None,
174 backend: self.kind().as_str().to_string(),
175 elapsed_ms: start.elapsed().as_millis() as u64,
176 })
177 }
178
179 async fn health(&self) -> Result<BackendHealth, AppError> {
180 Ok(BackendHealth {
181 kind: self.kind(),
182 healthy: true,
183 model_name: self.model_name(),
184 message: format!("LLM backend ({}) ready", self.config.backend),
185 })
186 }
187}
188
189#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
201pub enum LlmBackendKindFactory {
202 Auto,
204 Codex,
206 Claude,
208 None,
210}
211
212pub trait LlmBackendFactory: Send + Sync {
225 fn build_extraction_backend(
228 &self,
229 config: &LlmExtractorConfig,
230 ) -> Result<Box<dyn ExtractionBackend>, AppError>;
231
232 fn build_embedder(
234 &self,
235 config: &LlmExtractorConfig,
236 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError>;
237
238 fn kind(&self) -> LlmBackendKindFactory;
240}
241
242pub struct CodexFactory;
244
245impl LlmBackendFactory for CodexFactory {
246 fn build_extraction_backend(
247 &self,
248 config: &LlmExtractorConfig,
249 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
250 let mut cfg = config.clone();
251 cfg.backend = "codex".into();
252 Ok(Box::new(LlmBackend::new(cfg)))
253 }
254 fn build_embedder(
255 &self,
256 _config: &LlmExtractorConfig,
257 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
258 Ok(Box::new(()))
264 }
265 fn kind(&self) -> LlmBackendKindFactory {
266 LlmBackendKindFactory::Codex
267 }
268}
269
270pub struct ClaudeFactory;
272
273impl LlmBackendFactory for ClaudeFactory {
274 fn build_extraction_backend(
275 &self,
276 config: &LlmExtractorConfig,
277 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
278 let mut cfg = config.clone();
279 cfg.backend = "claude".into();
280 Ok(Box::new(LlmBackend::new(cfg)))
281 }
282 fn build_embedder(
283 &self,
284 _config: &LlmExtractorConfig,
285 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
286 Ok(Box::new(()))
287 }
288 fn kind(&self) -> LlmBackendKindFactory {
289 LlmBackendKindFactory::Claude
290 }
291}
292
293pub struct NullFactory;
297
298impl LlmBackendFactory for NullFactory {
299 fn build_extraction_backend(
300 &self,
301 _config: &LlmExtractorConfig,
302 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
303 struct NullExtraction;
304 #[async_trait]
305 impl ExtractionBackend for NullExtraction {
306 fn kind(&self) -> BackendKind {
307 BackendKind::None
308 }
309 fn model_name(&self) -> String {
310 "null".into()
311 }
312 async fn health(&self) -> Result<BackendHealth, AppError> {
313 Ok(BackendHealth {
314 kind: BackendKind::None,
315 healthy: true,
316 model_name: "null".into(),
317 message: "no-op backend".into(),
318 })
319 }
320 async fn extract(
321 &self,
322 _body: &str,
323 _hints: &ExtractionHints,
324 ) -> Result<ExtractionOutput, AppError> {
325 Ok(ExtractionOutput::default())
326 }
327 }
328 Ok(Box::new(NullExtraction))
329 }
330 fn build_embedder(
331 &self,
332 _config: &LlmExtractorConfig,
333 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
334 Ok(Box::new(()))
335 }
336 fn kind(&self) -> LlmBackendKindFactory {
337 LlmBackendKindFactory::None
338 }
339}
340
341pub struct AutoFactory;
346
347impl LlmBackendFactory for AutoFactory {
348 fn build_extraction_backend(
349 &self,
350 config: &LlmExtractorConfig,
351 ) -> Result<Box<dyn ExtractionBackend>, AppError> {
352 let choice = detect_available_backend()?;
353 match choice {
354 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
355 CodexFactory.build_extraction_backend(config)
356 }
357 LlmBackendKindFactory::Claude => ClaudeFactory.build_extraction_backend(config),
358 LlmBackendKindFactory::None => NullFactory.build_extraction_backend(config),
359 }
360 }
361 fn build_embedder(
362 &self,
363 config: &LlmExtractorConfig,
364 ) -> Result<Box<dyn std::any::Any + Send + Sync>, AppError> {
365 let choice = detect_available_backend()?;
366 match choice {
367 LlmBackendKindFactory::Codex | LlmBackendKindFactory::Auto => {
368 CodexFactory.build_embedder(config)
369 }
370 LlmBackendKindFactory::Claude => ClaudeFactory.build_embedder(config),
371 LlmBackendKindFactory::None => NullFactory.build_embedder(config),
372 }
373 }
374 fn kind(&self) -> LlmBackendKindFactory {
375 LlmBackendKindFactory::Auto
376 }
377}
378
379pub fn detect_available_backend() -> Result<LlmBackendKindFactory, AppError> {
387 fn has_in_path(name: &str) -> bool {
391 if let Ok(path_var) = std::env::var("PATH") {
392 for dir in std::env::split_paths(&path_var) {
393 let candidate = dir.join(name);
394 if candidate.is_file() {
395 return true;
396 }
397 }
398 }
399 false
400 }
401
402 if has_in_path("codex") {
404 Ok(LlmBackendKindFactory::Codex)
405 } else if has_in_path("claude") {
406 Ok(LlmBackendKindFactory::Claude)
407 } else {
408 Ok(LlmBackendKindFactory::None)
410 }
411}
412
413pub fn factory_for_choice(
418 choice: LlmBackendKindFactory,
419) -> Result<Box<dyn LlmBackendFactory>, AppError> {
420 match choice {
421 LlmBackendKindFactory::Auto => Ok(Box::new(AutoFactory)),
422 LlmBackendKindFactory::Codex => Ok(Box::new(CodexFactory)),
423 LlmBackendKindFactory::Claude => Ok(Box::new(ClaudeFactory)),
424 LlmBackendKindFactory::None => Ok(Box::new(NullFactory)),
425 }
426}
427
428#[cfg(test)]
429mod factory_tests {
430 use super::*;
431
432 #[test]
433 fn detect_returns_known_kind() {
434 let r = detect_available_backend();
437 assert!(r.is_ok());
438 }
439
440 #[test]
441 fn factory_for_choice_returns_boxed_factory() {
442 let f = factory_for_choice(LlmBackendKindFactory::Codex).expect("Codex factory");
443 assert_eq!(f.kind(), LlmBackendKindFactory::Codex);
444 let f = factory_for_choice(LlmBackendKindFactory::None).expect("Null factory");
445 assert_eq!(f.kind(), LlmBackendKindFactory::None);
446 }
447
448 #[test]
449 fn null_factory_extracts_nothing() {
450 let f = NullFactory;
451 let backend = f
452 .build_extraction_backend(&LlmExtractorConfig::default())
453 .expect("NullFactory always builds");
454 let rt = tokio::runtime::Builder::new_current_thread()
457 .enable_all()
458 .build()
459 .expect("test runtime");
460 let h = rt.block_on(backend.health()).expect("health ok");
461 assert!(h.healthy);
462 let out = rt
463 .block_on(backend.extract("any body", &ExtractionHints::default()))
464 .expect("Null extract is Ok");
465 assert!(out.entities.is_empty());
466 assert!(out.relationships.is_empty());
467 }
468}