sqlite_graphrag/extract/
llm_backend.rs1use super::{
7 BackendHealth, BackendKind, ExtractedEntity, ExtractedRelationship, ExtractionBackend,
8 ExtractionHints, ExtractionOutput,
9};
10use crate::errors::AppError;
11use async_trait::async_trait;
12use serde::{Deserialize, Serialize};
13
14#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct LlmExtractorConfig {
17 pub backend: String,
19 pub model: Option<String>,
21 pub timeout_secs: Option<u64>,
23}
24
25impl Default for LlmExtractorConfig {
26 fn default() -> Self {
27 Self {
28 backend: "codex".to_string(),
29 model: None,
30 timeout_secs: Some(300),
31 }
32 }
33}
34
35pub struct LlmBackend {
37 config: LlmExtractorConfig,
38}
39
40impl LlmBackend {
41 pub fn new(config: LlmExtractorConfig) -> Self {
42 Self { config }
43 }
44
45 pub fn with_default_codex() -> Self {
46 Self::new(LlmExtractorConfig::default())
47 }
48
49 pub fn with_default_claude() -> Self {
50 Self::new(LlmExtractorConfig {
51 backend: "claude".to_string(),
52 model: None,
53 timeout_secs: Some(300),
54 })
55 }
56}
57
58#[async_trait]
59impl ExtractionBackend for LlmBackend {
60 fn kind(&self) -> BackendKind {
61 BackendKind::Llm
62 }
63
64 fn model_name(&self) -> String {
65 format!("{}-headless", self.config.backend)
66 }
67
68 async fn extract(
69 &self,
70 content: &str,
71 hints: &ExtractionHints,
72 ) -> Result<ExtractionOutput, AppError> {
73 let start = std::time::Instant::now();
74 let trimmed = content.trim();
75 if trimmed.is_empty() {
76 return Ok(ExtractionOutput {
77 backend: self.kind().as_str().to_string(),
78 elapsed_ms: start.elapsed().as_millis() as u64,
79 ..Default::default()
80 });
81 }
82 if !hints.skip_relations && !trimmed.contains(' ') {
83 return Ok(ExtractionOutput {
84 backend: self.kind().as_str().to_string(),
85 elapsed_ms: start.elapsed().as_millis() as u64,
86 ..Default::default()
87 });
88 }
89
90 let word_count = trimmed.split_whitespace().count();
91 if !hints.skip_relations && word_count < 5 {
92 return Ok(ExtractionOutput {
93 backend: self.kind().as_str().to_string(),
94 elapsed_ms: start.elapsed().as_millis() as u64,
95 ..Default::default()
96 });
97 }
98
99 let mut entities: Vec<ExtractedEntity> = Vec::new();
100 let mut relationships: Vec<ExtractedRelationship> = Vec::new();
101
102 for raw in trimmed.split(|c: char| !c.is_alphanumeric()) {
103 let word = raw.trim();
104 if word.is_empty() {
105 continue;
106 }
107 if word.len() < 3 {
108 continue;
109 }
110 let lower = word.to_ascii_lowercase();
111 if matches!(
112 lower.as_str(),
113 "the"
114 | "and"
115 | "for"
116 | "with"
117 | "from"
118 | "this"
119 | "that"
120 | "into"
121 | "sobre"
122 | "para"
123 | "como"
124 ) {
125 continue;
126 }
127 let name = lower.replace(|c: char| !c.is_alphanumeric() && c != '-', "-");
128 if name.is_empty() || name == "-" {
129 continue;
130 }
131 if !entities.iter().any(|e| e.name == name) {
132 entities.push(ExtractedEntity {
133 name,
134 entity_type: "concept".to_string(),
135 description: None,
136 confidence: Some(0.5),
137 });
138 }
139 }
140
141 if entities.len() > 1 && !hints.skip_relations {
142 for (i, source) in entities
143 .iter()
144 .enumerate()
145 .take(entities.len().saturating_sub(1))
146 {
147 for target in entities.iter().skip(i + 1) {
148 relationships.push(ExtractedRelationship {
149 source: source.name.clone(),
150 target: target.name.clone(),
151 relation: "related".to_string(),
152 strength: 0.4,
153 });
154 }
155 }
156 }
157
158 Ok(ExtractionOutput {
159 entities,
160 relationships,
161 embedding: None,
162 backend: self.kind().as_str().to_string(),
163 elapsed_ms: start.elapsed().as_millis() as u64,
164 })
165 }
166
167 async fn health(&self) -> Result<BackendHealth, AppError> {
168 Ok(BackendHealth {
169 kind: self.kind(),
170 healthy: true,
171 model_name: self.model_name(),
172 message: format!("LLM backend ({}) ready", self.config.backend),
173 })
174 }
175}