Skip to main content

sqlite_graphrag/extract/
mod.rs

1//! Extraction backend abstraction (v1.0.75 — G21 solution)
2//!
3//! Provides  trait with concrete implementations for
4//! LLM-only (default in v1.0.75), Embedding (legacy), None (no extraction),
5//! and Composite (orchestrates multiple backends in parallel).
6//!
7//! The trait enables backend-agnostic ingest/enrich/remember pipelines.
8
9pub mod codex_compat;
10
11use crate::errors::AppError;
12use async_trait::async_trait;
13use serde::{Deserialize, Serialize};
14use std::sync::Arc;
15
16/// Hint configuration forwarded to the extraction backend.
17#[derive(Debug, Clone, Default, Serialize, Deserialize)]
18pub struct ExtractionHints {
19    /// Memory name to be remembered (kebab-case)
20    pub memory_name: Option<String>,
21    /// Memory type to be remembered
22    pub memory_type: Option<String>,
23    /// Existing entity names to avoid duplicates
24    pub existing_entities: Vec<String>,
25    /// Whether to skip relation extraction
26    pub skip_relations: bool,
27    /// Backend-specific seed for determinism
28    pub seed: Option<u64>,
29}
30
31/// Entity extracted from content.
32#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
33pub struct ExtractedEntity {
34    pub name: String,
35    pub entity_type: String,
36    pub description: Option<String>,
37    pub confidence: Option<f32>,
38}
39
40/// Relationship extracted from content.
41#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
42pub struct ExtractedRelationship {
43    pub source: String,
44    pub target: String,
45    pub relation: String,
46    pub strength: f32,
47}
48
49/// Output of extraction backend.
50#[derive(Debug, Clone, Default, Serialize, Deserialize)]
51pub struct ExtractionOutput {
52    pub entities: Vec<ExtractedEntity>,
53    pub relationships: Vec<ExtractedRelationship>,
54    /// Optional embedding vector (only populated by EmbeddingBackend)
55    pub embedding: Option<Vec<f32>>,
56    /// Backend that produced this output
57    pub backend: String,
58    /// Latency in milliseconds
59    pub elapsed_ms: u64,
60}
61
62/// Backend kind enumeration used for selection and telemetry.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
64#[serde(rename_all = "kebab-case")]
65pub enum BackendKind {
66    Llm,
67    Embedding,
68    None,
69    Composite,
70}
71
72impl BackendKind {
73    pub fn as_str(self) -> &'static str {
74        match self {
75            BackendKind::Llm => "llm",
76            BackendKind::Embedding => "embedding",
77            BackendKind::None => "none",
78            BackendKind::Composite => "composite",
79        }
80    }
81
82    pub fn parse(s: &str) -> Option<Self> {
83        match s.to_ascii_lowercase().as_str() {
84            "llm" => Some(BackendKind::Llm),
85            "embedding" => Some(BackendKind::Embedding),
86            "none" => Some(BackendKind::None),
87            "both" | "composite" => Some(BackendKind::Composite),
88            _ => None,
89        }
90    }
91}
92
93/// Trait abstraction for any extraction backend (LLM, Embedding, None, Composite).
94///
95/// G21 HIGH solution: the trait allows the rest of the codebase to remain
96/// agnostic of the underlying extraction mechanism. New backends can be added
97/// without touching call sites.
98#[async_trait]
99pub trait ExtractionBackend: Send + Sync {
100    /// Identify this backend (used in metrics, logs and ExtractionOutput)
101    fn kind(&self) -> BackendKind;
102
103    /// Identify the underlying model/CLI being used (e.g. "codex-0.137.0")
104    fn model_name(&self) -> String;
105
106    /// Extract entities and relationships from `content`.
107    ///
108    /// `hints` provides optional context (memory name, type, etc.).
109    /// Returns `ExtractionOutput` with entities, relationships, and optional embedding.
110    async fn extract(
111        &self,
112        content: &str,
113        hints: &ExtractionHints,
114    ) -> Result<ExtractionOutput, AppError>;
115
116    /// Health check: whether this backend is ready to operate.
117    async fn health(&self) -> Result<BackendHealth, AppError>;
118}
119
120/// Health status of a backend.
121#[derive(Debug, Clone, Serialize, Deserialize)]
122pub struct BackendHealth {
123    pub kind: BackendKind,
124    pub healthy: bool,
125    pub model_name: String,
126    pub message: String,
127}
128
129/// Type alias for shared backend references.
130pub type SharedBackend = Arc<dyn ExtractionBackend>;
131
132pub mod composite_backend;
133pub mod embedding_backend;
134pub mod llm_backend;
135pub mod llm_embedding;
136pub mod none_backend;
137
138pub use composite_backend::{backend_from_kind, default_backend, CompositeBackend};
139pub use embedding_backend::EmbeddingBackend;
140pub use llm_backend::{LlmBackend, LlmExtractorConfig};
141pub use llm_embedding::{EmbeddingFlavour, LlmEmbedding, EMBEDDING_DIM as LLM_EMBEDDING_DIM};
142pub use none_backend::NoneBackend;