graphrag_core/core/
traits.rs

1//! Core traits for GraphRAG system components
2//!
3//! This module defines the fundamental abstractions that enable modularity,
4//! testability, and flexibility throughout the GraphRAG system.
5//!
6//! ## Async Migration
7//!
8//! All core traits have been migrated to async/await patterns for:
9//! - Non-blocking I/O operations (LLM calls, database access, network requests)
10//! - Better resource utilization with concurrent processing
11//! - Improved throughput for high-load scenarios
12//! - Future-proof architecture for cloud deployments
13
14use crate::core::Result;
15use std::collections::HashMap;
16use std::future::Future;
17use std::pin::Pin;
18
19use async_trait::async_trait;
20
21/// Type alias for vector metadata
22pub type VectorMetadata = Option<HashMap<String, String>>;
23
24/// Type alias for vector batch operations
25pub type VectorBatch = Vec<(String, Vec<f32>, VectorMetadata)>;
26
27/// Core storage abstraction for persisting and retrieving entities, documents, and graph data
28///
29/// ## Synchronous Version
30/// This trait provides synchronous operations for storage.
31pub trait Storage {
32    /// The entity type this storage handles
33    type Entity;
34    /// The document type this storage handles
35    type Document;
36    /// The chunk type this storage handles
37    type Chunk;
38    /// The error type returned by storage operations
39    type Error: std::error::Error + Send + Sync + 'static;
40
41    /// Store an entity and return its assigned ID
42    fn store_entity(&mut self, entity: Self::Entity) -> Result<String>;
43
44    /// Retrieve an entity by its ID
45    fn retrieve_entity(&self, id: &str) -> Result<Option<Self::Entity>>;
46
47    /// Store a document and return its assigned ID
48    fn store_document(&mut self, document: Self::Document) -> Result<String>;
49
50    /// Retrieve a document by its ID
51    fn retrieve_document(&self, id: &str) -> Result<Option<Self::Document>>;
52
53    /// Store a chunk and return its assigned ID
54    fn store_chunk(&mut self, chunk: Self::Chunk) -> Result<String>;
55
56    /// Retrieve a chunk by its ID
57    fn retrieve_chunk(&self, id: &str) -> Result<Option<Self::Chunk>>;
58
59    /// List all entity IDs
60    fn list_entities(&self) -> Result<Vec<String>>;
61
62    /// Batch operations for performance
63    fn store_entities_batch(&mut self, entities: Vec<Self::Entity>) -> Result<Vec<String>>;
64}
65
66/// Async storage abstraction for non-blocking storage operations
67///
68/// ## Async Version
69/// This trait provides async operations for storage with better concurrency and resource utilization.
70#[allow(async_fn_in_trait)]
71#[async_trait]
72pub trait AsyncStorage: Send + Sync {
73    /// The entity type this storage handles
74    type Entity: Send + Sync;
75    /// The document type this storage handles
76    type Document: Send + Sync;
77    /// The chunk type this storage handles
78    type Chunk: Send + Sync;
79    /// The error type returned by storage operations
80    type Error: std::error::Error + Send + Sync + 'static;
81
82    /// Store an entity and return its assigned ID
83    async fn store_entity(&mut self, entity: Self::Entity) -> Result<String>;
84
85    /// Retrieve an entity by its ID
86    async fn retrieve_entity(&self, id: &str) -> Result<Option<Self::Entity>>;
87
88    /// Store a document and return its assigned ID
89    async fn store_document(&mut self, document: Self::Document) -> Result<String>;
90
91    /// Retrieve a document by its ID
92    async fn retrieve_document(&self, id: &str) -> Result<Option<Self::Document>>;
93
94    /// Store a chunk and return its assigned ID
95    async fn store_chunk(&mut self, chunk: Self::Chunk) -> Result<String>;
96
97    /// Retrieve a chunk by its ID
98    async fn retrieve_chunk(&self, id: &str) -> Result<Option<Self::Chunk>>;
99
100    /// List all entity IDs
101    async fn list_entities(&self) -> Result<Vec<String>>;
102
103    /// Batch operations for performance
104    async fn store_entities_batch(&mut self, entities: Vec<Self::Entity>) -> Result<Vec<String>>;
105
106    /// Health check for storage connection
107    async fn health_check(&self) -> Result<bool> {
108        Ok(true)
109    }
110
111    /// Flush any pending operations
112    async fn flush(&mut self) -> Result<()> {
113        Ok(())
114    }
115}
116
117/// Text embedding abstraction for converting text to vector representations
118///
119/// ## Synchronous Version
120/// This trait provides synchronous operations for text embeddings.
121pub trait Embedder {
122    /// The error type returned by embedding operations
123    type Error: std::error::Error + Send + Sync + 'static;
124
125    /// Generate embeddings for a single text
126    fn embed(&self, text: &str) -> Result<Vec<f32>>;
127
128    /// Generate embeddings for multiple texts in batch
129    fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
130
131    /// Get the dimensionality of embeddings produced by this embedder
132    fn dimension(&self) -> usize;
133
134    /// Check if the embedder is ready for use
135    fn is_ready(&self) -> bool;
136}
137
138/// Async text embedding abstraction for non-blocking embedding operations
139///
140/// ## Async Version
141/// This trait provides async operations for text embeddings with better throughput for large batches.
142#[allow(async_fn_in_trait)]
143#[async_trait]
144pub trait AsyncEmbedder: Send + Sync {
145    /// The error type returned by embedding operations
146    type Error: std::error::Error + Send + Sync + 'static;
147
148    /// Generate embeddings for a single text
149    async fn embed(&self, text: &str) -> Result<Vec<f32>>;
150
151    /// Generate embeddings for multiple texts in batch
152    async fn embed_batch(&self, texts: &[&str]) -> Result<Vec<Vec<f32>>>;
153
154    /// Generate embeddings for multiple texts with concurrency control
155    async fn embed_batch_concurrent(
156        &self,
157        texts: &[&str],
158        max_concurrent: usize,
159    ) -> Result<Vec<Vec<f32>>> {
160        if max_concurrent <= 1 {
161            return self.embed_batch(texts).await;
162        }
163
164        let chunks: Vec<_> = texts.chunks(max_concurrent).collect();
165        let mut results = Vec::with_capacity(texts.len());
166
167        for chunk in chunks {
168            let batch_results = self.embed_batch(chunk).await?;
169            results.extend(batch_results);
170        }
171
172        Ok(results)
173    }
174
175    /// Get the dimensionality of embeddings produced by this embedder
176    fn dimension(&self) -> usize;
177
178    /// Check if the embedder is ready for use
179    async fn is_ready(&self) -> bool;
180
181    /// Health check for embedding service
182    async fn health_check(&self) -> Result<bool> {
183        self.is_ready()
184            .await
185            .then_some(true)
186            .ok_or_else(|| crate::core::GraphRAGError::Retrieval {
187                message: "Embedding service health check failed".to_string(),
188            })
189    }
190}
191
192/// Vector similarity search abstraction for finding similar embeddings
193///
194/// ## Synchronous Version
195/// This trait provides synchronous operations for vector search.
196pub trait VectorStore {
197    /// The error type returned by vector store operations
198    type Error: std::error::Error + Send + Sync + 'static;
199
200    /// Add a vector with associated ID and metadata
201    fn add_vector(&mut self, id: String, vector: Vec<f32>, metadata: VectorMetadata) -> Result<()>;
202
203    /// Add multiple vectors in batch
204    fn add_vectors_batch(&mut self, vectors: VectorBatch) -> Result<()>;
205
206    /// Search for k most similar vectors
207    fn search(&self, query_vector: &[f32], k: usize) -> Result<Vec<SearchResult>>;
208
209    /// Search with distance threshold
210    fn search_with_threshold(
211        &self,
212        query_vector: &[f32],
213        k: usize,
214        threshold: f32,
215    ) -> Result<Vec<SearchResult>>;
216
217    /// Remove a vector by ID
218    fn remove_vector(&mut self, id: &str) -> Result<bool>;
219
220    /// Get vector count
221    fn len(&self) -> usize;
222
223    /// Check if empty
224    fn is_empty(&self) -> bool;
225}
226
227/// Async vector similarity search abstraction for non-blocking vector operations
228///
229/// ## Async Version
230/// This trait provides async operations for vector search with better concurrency and scalability.
231#[allow(async_fn_in_trait)]
232#[async_trait]
233pub trait AsyncVectorStore: Send + Sync {
234    /// The error type returned by vector store operations
235    type Error: std::error::Error + Send + Sync + 'static;
236
237    /// Add a vector with associated ID and metadata
238    async fn add_vector(
239        &mut self,
240        id: String,
241        vector: Vec<f32>,
242        metadata: VectorMetadata,
243    ) -> Result<()>;
244
245    /// Add multiple vectors in batch
246    async fn add_vectors_batch(&mut self, vectors: VectorBatch) -> Result<()>;
247
248    /// Add vectors with concurrency control for large batches
249    async fn add_vectors_batch_concurrent(
250        &mut self,
251        vectors: VectorBatch,
252        max_concurrent: usize,
253    ) -> Result<()> {
254        if max_concurrent <= 1 {
255            return self.add_vectors_batch(vectors).await;
256        }
257
258        for chunk in vectors.chunks(max_concurrent) {
259            self.add_vectors_batch(chunk.to_vec()).await?;
260        }
261
262        Ok(())
263    }
264
265    /// Search for k most similar vectors
266    async fn search(&self, query_vector: &[f32], k: usize) -> Result<Vec<SearchResult>>;
267
268    /// Search with distance threshold
269    async fn search_with_threshold(
270        &self,
271        query_vector: &[f32],
272        k: usize,
273        threshold: f32,
274    ) -> Result<Vec<SearchResult>>;
275
276    /// Search multiple queries concurrently
277    async fn search_batch(
278        &self,
279        query_vectors: &[Vec<f32>],
280        k: usize,
281    ) -> Result<Vec<Vec<SearchResult>>> {
282        let mut results = Vec::with_capacity(query_vectors.len());
283        for query in query_vectors {
284            let search_results = self.search(query, k).await?;
285            results.push(search_results);
286        }
287        Ok(results)
288    }
289
290    /// Remove a vector by ID
291    async fn remove_vector(&mut self, id: &str) -> Result<bool>;
292
293    /// Remove multiple vectors in batch
294    async fn remove_vectors_batch(&mut self, ids: &[&str]) -> Result<Vec<bool>> {
295        let mut results = Vec::with_capacity(ids.len());
296        for id in ids {
297            let removed = self.remove_vector(id).await?;
298            results.push(removed);
299        }
300        Ok(results)
301    }
302
303    /// Get vector count
304    async fn len(&self) -> usize;
305
306    /// Check if empty
307    async fn is_empty(&self) -> bool {
308        self.len().await == 0
309    }
310
311    /// Health check for vector store
312    async fn health_check(&self) -> Result<bool> {
313        Ok(true)
314    }
315
316    /// Build index for better search performance (if applicable)
317    async fn build_index(&mut self) -> Result<()> {
318        Ok(())
319    }
320}
321
322/// Result from vector similarity search
323#[derive(Debug, Clone)]
324pub struct SearchResult {
325    /// Unique identifier of the matched vector
326    pub id: String,
327    /// Distance/similarity score (lower is more similar)
328    pub distance: f32,
329    /// Optional metadata associated with the vector
330    pub metadata: Option<HashMap<String, String>>,
331}
332
333/// Entity extraction abstraction for identifying entities in text
334///
335/// ## Synchronous Version
336/// This trait provides synchronous operations for entity extraction.
337pub trait EntityExtractor {
338    /// The entity type this extractor produces
339    type Entity;
340    /// The error type returned by extraction operations
341    type Error: std::error::Error + Send + Sync + 'static;
342
343    /// Extract entities from text
344    fn extract(&self, text: &str) -> Result<Vec<Self::Entity>>;
345
346    /// Extract entities with confidence scores
347    fn extract_with_confidence(&self, text: &str) -> Result<Vec<(Self::Entity, f32)>>;
348
349    /// Set minimum confidence threshold
350    fn set_confidence_threshold(&mut self, threshold: f32);
351}
352
353/// Async entity extraction abstraction for non-blocking entity extraction
354///
355/// ## Async Version
356/// This trait provides async operations for entity extraction with better throughput for large texts.
357#[allow(async_fn_in_trait)]
358#[async_trait]
359pub trait AsyncEntityExtractor: Send + Sync {
360    /// The entity type this extractor produces
361    type Entity: Send + Sync;
362    /// The error type returned by extraction operations
363    type Error: std::error::Error + Send + Sync + 'static;
364
365    /// Extract entities from text
366    async fn extract(&self, text: &str) -> Result<Vec<Self::Entity>>;
367
368    /// Extract entities with confidence scores
369    async fn extract_with_confidence(&self, text: &str) -> Result<Vec<(Self::Entity, f32)>>;
370
371    /// Extract entities from multiple texts in batch
372    async fn extract_batch(&self, texts: &[&str]) -> Result<Vec<Vec<Self::Entity>>> {
373        let mut results = Vec::with_capacity(texts.len());
374        for text in texts {
375            let entities = self.extract(text).await?;
376            results.push(entities);
377        }
378        Ok(results)
379    }
380
381    /// Extract entities from multiple texts with concurrency control
382    async fn extract_batch_concurrent(
383        &self,
384        texts: &[&str],
385        max_concurrent: usize,
386    ) -> Result<Vec<Vec<Self::Entity>>> {
387        if max_concurrent <= 1 {
388            return self.extract_batch(texts).await;
389        }
390
391        let chunks: Vec<_> = texts.chunks(max_concurrent).collect();
392        let mut results = Vec::with_capacity(texts.len());
393
394        for chunk in chunks {
395            let batch_results = self.extract_batch(chunk).await?;
396            results.extend(batch_results);
397        }
398
399        Ok(results)
400    }
401
402    /// Set minimum confidence threshold
403    async fn set_confidence_threshold(&mut self, threshold: f32);
404
405    /// Get current confidence threshold
406    async fn get_confidence_threshold(&self) -> f32;
407
408    /// Health check for entity extractor
409    async fn health_check(&self) -> Result<bool> {
410        Ok(true)
411    }
412}
413
414/// Text retrieval abstraction for finding relevant content
415///
416/// ## Synchronous Version
417/// This trait provides synchronous operations for content retrieval.
418pub trait Retriever {
419    /// The query type this retriever accepts
420    type Query;
421    /// The result type this retriever returns
422    type Result;
423    /// The error type returned by retrieval operations
424    type Error: std::error::Error + Send + Sync + 'static;
425
426    /// Perform a search query
427    fn search(&self, query: Self::Query, k: usize) -> Result<Vec<Self::Result>>;
428
429    /// Perform a search with additional context
430    fn search_with_context(
431        &self,
432        query: Self::Query,
433        context: &str,
434        k: usize,
435    ) -> Result<Vec<Self::Result>>;
436
437    /// Update the retriever with new content
438    fn update(&mut self, content: Vec<String>) -> Result<()>;
439}
440
441/// Async text retrieval abstraction for non-blocking content retrieval
442///
443/// ## Async Version
444/// This trait provides async operations for content retrieval with better scalability and concurrency.
445#[allow(async_fn_in_trait)]
446#[async_trait]
447pub trait AsyncRetriever: Send + Sync {
448    /// The query type this retriever accepts
449    type Query: Send + Sync;
450    /// The result type this retriever returns
451    type Result: Send + Sync;
452    /// The error type returned by retrieval operations
453    type Error: std::error::Error + Send + Sync + 'static;
454
455    /// Perform a search query
456    async fn search(&self, query: Self::Query, k: usize) -> Result<Vec<Self::Result>>;
457
458    /// Perform a search with additional context
459    async fn search_with_context(
460        &self,
461        query: Self::Query,
462        context: &str,
463        k: usize,
464    ) -> Result<Vec<Self::Result>>;
465
466    /// Perform multiple search queries concurrently
467    async fn search_batch(
468        &self,
469        queries: Vec<Self::Query>,
470        k: usize,
471    ) -> Result<Vec<Vec<Self::Result>>> {
472        let mut results = Vec::with_capacity(queries.len());
473        for query in queries {
474            let search_results = self.search(query, k).await?;
475            results.push(search_results);
476        }
477        Ok(results)
478    }
479
480    /// Update the retriever with new content
481    async fn update(&mut self, content: Vec<String>) -> Result<()>;
482
483    /// Update the retriever with new content in batches
484    async fn update_batch(&mut self, content_batches: Vec<Vec<String>>) -> Result<()> {
485        for batch in content_batches {
486            self.update(batch).await?;
487        }
488        Ok(())
489    }
490
491    /// Refresh/rebuild the retrieval index
492    async fn refresh_index(&mut self) -> Result<()> {
493        Ok(())
494    }
495
496    /// Health check for retrieval system
497    async fn health_check(&self) -> Result<bool> {
498        Ok(true)
499    }
500
501    /// Get retrieval statistics
502    async fn get_stats(&self) -> Result<RetrievalStats> {
503        Ok(RetrievalStats::default())
504    }
505}
506
507/// Statistics for retrieval operations
508#[derive(Debug, Clone, Default)]
509pub struct RetrievalStats {
510    /// Total number of queries processed
511    pub total_queries: u64,
512    /// Average response time in milliseconds
513    pub average_response_time_ms: f64,
514    /// Size of the retrieval index
515    pub index_size: usize,
516    /// Cache hit rate as a percentage (0.0 to 1.0)
517    pub cache_hit_rate: f64,
518}
519
520/// Large Language Model abstraction for text generation
521///
522/// ## Synchronous Version
523/// This trait provides synchronous operations for text generation.
524pub trait LanguageModel {
525    /// The error type returned by generation operations
526    type Error: std::error::Error + Send + Sync + 'static;
527
528    /// Generate text completion
529    fn complete(&self, prompt: &str) -> Result<String>;
530
531    /// Generate text with custom parameters
532    fn complete_with_params(&self, prompt: &str, params: GenerationParams) -> Result<String>;
533
534    /// Check if the model is available
535    fn is_available(&self) -> bool;
536
537    /// Get model information
538    fn model_info(&self) -> ModelInfo;
539}
540
541/// Async Large Language Model abstraction for non-blocking text generation
542///
543/// ## Async Version
544/// This trait provides async operations for text generation with better throughput and concurrency.
545#[allow(async_fn_in_trait)]
546#[async_trait]
547pub trait AsyncLanguageModel: Send + Sync {
548    /// The error type returned by generation operations
549    type Error: std::error::Error + Send + Sync + 'static;
550
551    /// Generate text completion
552    async fn complete(&self, prompt: &str) -> Result<String>;
553
554    /// Generate text with custom parameters
555    async fn complete_with_params(&self, prompt: &str, params: GenerationParams) -> Result<String>;
556
557    /// Generate multiple text completions concurrently
558    async fn complete_batch(&self, prompts: &[&str]) -> Result<Vec<String>> {
559        let mut results = Vec::with_capacity(prompts.len());
560        for prompt in prompts {
561            let completion = self.complete(prompt).await?;
562            results.push(completion);
563        }
564        Ok(results)
565    }
566
567    /// Generate multiple text completions with concurrency control
568    async fn complete_batch_concurrent(
569        &self,
570        prompts: &[&str],
571        max_concurrent: usize,
572    ) -> Result<Vec<String>> {
573        if max_concurrent <= 1 {
574            return self.complete_batch(prompts).await;
575        }
576
577        let chunks: Vec<_> = prompts.chunks(max_concurrent).collect();
578        let mut results = Vec::with_capacity(prompts.len());
579
580        for chunk in chunks {
581            let batch_results = self.complete_batch(chunk).await?;
582            results.extend(batch_results);
583        }
584
585        Ok(results)
586    }
587
588    /// Generate streaming completion (if supported)
589    async fn complete_streaming(
590        &self,
591        prompt: &str,
592    ) -> Result<Pin<Box<dyn futures::Stream<Item = Result<String>> + Send>>> {
593        // Default implementation converts regular completion to stream
594        let result = self.complete(prompt).await?;
595        let stream = futures::stream::once(async move { Ok(result) });
596        Ok(Box::pin(stream))
597    }
598
599    /// Check if the model is available
600    async fn is_available(&self) -> bool;
601
602    /// Get model information
603    async fn model_info(&self) -> ModelInfo;
604
605    /// Health check for language model service
606    async fn health_check(&self) -> Result<bool> {
607        self.is_available().await.then_some(true).ok_or_else(|| {
608            crate::core::GraphRAGError::Generation {
609                message: "Language model health check failed".to_string(),
610            }
611        })
612    }
613
614    /// Get model usage statistics
615    async fn get_usage_stats(&self) -> Result<ModelUsageStats> {
616        Ok(ModelUsageStats::default())
617    }
618
619    /// Estimate tokens for prompt
620    async fn estimate_tokens(&self, prompt: &str) -> Result<usize> {
621        // Simple estimation: ~4 characters per token
622        Ok(prompt.len() / 4)
623    }
624}
625
626/// Usage statistics for language model
627#[derive(Debug, Clone, Default)]
628pub struct ModelUsageStats {
629    /// Total number of generation requests
630    pub total_requests: u64,
631    /// Total tokens processed across all requests
632    pub total_tokens_processed: u64,
633    /// Average response time in milliseconds
634    pub average_response_time_ms: f64,
635    /// Error rate as a percentage (0.0 to 1.0)
636    pub error_rate: f64,
637}
638
639/// Parameters for text generation
640#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
641pub struct GenerationParams {
642    /// Maximum number of tokens to generate
643    pub max_tokens: Option<usize>,
644    /// Temperature for sampling (0.0 = deterministic, 1.0 = random)
645    pub temperature: Option<f32>,
646    /// Top-p nucleus sampling threshold
647    pub top_p: Option<f32>,
648    /// Sequences that will stop generation when encountered
649    pub stop_sequences: Option<Vec<String>>,
650}
651
652impl Default for GenerationParams {
653    fn default() -> Self {
654        Self {
655            max_tokens: Some(1000),
656            temperature: Some(0.7),
657            top_p: Some(0.9),
658            stop_sequences: None,
659        }
660    }
661}
662
663/// Information about a language model
664#[derive(Debug, Clone)]
665pub struct ModelInfo {
666    /// Name of the model
667    pub name: String,
668    /// Version of the model
669    pub version: Option<String>,
670    /// Maximum context length in tokens
671    pub max_context_length: Option<usize>,
672    /// Whether the model supports streaming responses
673    pub supports_streaming: bool,
674}
675
676/// Graph operations abstraction for knowledge graph management
677///
678/// ## Synchronous Version
679/// This trait provides synchronous operations for graph management.
680pub trait GraphStore {
681    /// The node type this graph store handles
682    type Node;
683    /// The edge type this graph store handles
684    type Edge;
685    /// The error type returned by graph operations
686    type Error: std::error::Error + Send + Sync + 'static;
687
688    /// Add a node to the graph
689    fn add_node(&mut self, node: Self::Node) -> Result<String>;
690
691    /// Add an edge between two nodes
692    fn add_edge(&mut self, from_id: &str, to_id: &str, edge: Self::Edge) -> Result<String>;
693
694    /// Find nodes by criteria
695    fn find_nodes(&self, criteria: &str) -> Result<Vec<Self::Node>>;
696
697    /// Get neighbors of a node
698    fn get_neighbors(&self, node_id: &str) -> Result<Vec<Self::Node>>;
699
700    /// Perform graph traversal
701    fn traverse(&self, start_id: &str, max_depth: usize) -> Result<Vec<Self::Node>>;
702
703    /// Get graph statistics
704    fn stats(&self) -> GraphStats;
705}
706
707/// Async graph operations abstraction for non-blocking graph management
708///
709/// ## Async Version
710/// This trait provides async operations for graph management with better scalability for large graphs.
711#[allow(async_fn_in_trait)]
712#[async_trait]
713pub trait AsyncGraphStore: Send + Sync {
714    /// The node type this graph store handles
715    type Node: Send + Sync;
716    /// The edge type this graph store handles
717    type Edge: Send + Sync;
718    /// The error type returned by graph operations
719    type Error: std::error::Error + Send + Sync + 'static;
720
721    /// Add a node to the graph
722    async fn add_node(&mut self, node: Self::Node) -> Result<String>;
723
724    /// Add multiple nodes in batch
725    async fn add_nodes_batch(&mut self, nodes: Vec<Self::Node>) -> Result<Vec<String>> {
726        let mut ids = Vec::with_capacity(nodes.len());
727        for node in nodes {
728            let id = self.add_node(node).await?;
729            ids.push(id);
730        }
731        Ok(ids)
732    }
733
734    /// Add an edge between two nodes
735    async fn add_edge(&mut self, from_id: &str, to_id: &str, edge: Self::Edge) -> Result<String>;
736
737    /// Add multiple edges in batch
738    async fn add_edges_batch(
739        &mut self,
740        edges: Vec<(String, String, Self::Edge)>,
741    ) -> Result<Vec<String>> {
742        let mut ids = Vec::with_capacity(edges.len());
743        for (from_id, to_id, edge) in edges {
744            let id = self.add_edge(&from_id, &to_id, edge).await?;
745            ids.push(id);
746        }
747        Ok(ids)
748    }
749
750    /// Find nodes by criteria
751    async fn find_nodes(&self, criteria: &str) -> Result<Vec<Self::Node>>;
752
753    /// Find nodes by multiple criteria concurrently
754    async fn find_nodes_batch(&self, criteria_list: &[&str]) -> Result<Vec<Vec<Self::Node>>> {
755        let mut results = Vec::with_capacity(criteria_list.len());
756        for criteria in criteria_list {
757            let nodes = self.find_nodes(criteria).await?;
758            results.push(nodes);
759        }
760        Ok(results)
761    }
762
763    /// Get neighbors of a node
764    async fn get_neighbors(&self, node_id: &str) -> Result<Vec<Self::Node>>;
765
766    /// Get neighbors of multiple nodes
767    async fn get_neighbors_batch(&self, node_ids: &[&str]) -> Result<Vec<Vec<Self::Node>>> {
768        let mut results = Vec::with_capacity(node_ids.len());
769        for node_id in node_ids {
770            let neighbors = self.get_neighbors(node_id).await?;
771            results.push(neighbors);
772        }
773        Ok(results)
774    }
775
776    /// Perform graph traversal
777    async fn traverse(&self, start_id: &str, max_depth: usize) -> Result<Vec<Self::Node>>;
778
779    /// Perform multiple graph traversals concurrently
780    async fn traverse_batch(
781        &self,
782        start_ids: &[&str],
783        max_depth: usize,
784    ) -> Result<Vec<Vec<Self::Node>>> {
785        let mut results = Vec::with_capacity(start_ids.len());
786        for start_id in start_ids {
787            let traversal = self.traverse(start_id, max_depth).await?;
788            results.push(traversal);
789        }
790        Ok(results)
791    }
792
793    /// Get graph statistics
794    async fn stats(&self) -> GraphStats;
795
796    /// Health check for graph store
797    async fn health_check(&self) -> Result<bool> {
798        Ok(true)
799    }
800
801    /// Optimize graph structure (rebuild indices, etc.)
802    async fn optimize(&mut self) -> Result<()> {
803        Ok(())
804    }
805
806    /// Export graph data
807    async fn export(&self) -> Result<Vec<u8>> {
808        Ok(Vec::new())
809    }
810
811    /// Import graph data
812    #[allow(clippy::disallowed_names)]
813    async fn import(&mut self, data: &[u8]) -> Result<()> {
814        let _ = data; // Unused parameter
815        Ok(())
816    }
817}
818
819/// Statistics about a graph
820#[derive(Debug, Clone)]
821pub struct GraphStats {
822    /// Total number of nodes in the graph
823    pub node_count: usize,
824    /// Total number of edges in the graph
825    pub edge_count: usize,
826    /// Average degree (number of connections per node)
827    pub average_degree: f32,
828    /// Maximum depth when traversing from root nodes
829    pub max_depth: usize,
830}
831
832/// Function calling abstraction for tool usage
833///
834/// ## Synchronous Version
835/// This trait provides synchronous operations for function calling.
836pub trait FunctionRegistry {
837    /// The function type this registry handles
838    type Function;
839    /// The result type returned by function calls
840    type CallResult;
841    /// The error type returned by function operations
842    type Error: std::error::Error + Send + Sync + 'static;
843
844    /// Register a new function
845    fn register(&mut self, name: String, function: Self::Function) -> Result<()>;
846
847    /// Call a function by name with arguments
848    fn call(&self, name: &str, args: &str) -> Result<Self::CallResult>;
849
850    /// List available functions
851    fn list_functions(&self) -> Vec<String>;
852
853    /// Check if a function exists
854    fn has_function(&self, name: &str) -> bool;
855}
856
857/// Async function calling abstraction for non-blocking tool usage
858///
859/// ## Async Version
860/// This trait provides async operations for function calling with better concurrency for tool usage.
861#[allow(async_fn_in_trait)]
862#[async_trait]
863pub trait AsyncFunctionRegistry: Send + Sync {
864    /// The function type this registry handles
865    type Function: Send + Sync;
866    /// The result type returned by function calls
867    type CallResult: Send + Sync;
868    /// The error type returned by function operations
869    type Error: std::error::Error + Send + Sync + 'static;
870
871    /// Register a new function
872    async fn register(&mut self, name: String, function: Self::Function) -> Result<()>;
873
874    /// Call a function by name with arguments
875    async fn call(&self, name: &str, args: &str) -> Result<Self::CallResult>;
876
877    /// Call multiple functions concurrently
878    async fn call_batch(&self, calls: &[(&str, &str)]) -> Result<Vec<Self::CallResult>> {
879        let mut results = Vec::with_capacity(calls.len());
880        for (name, args) in calls {
881            let result = self.call(name, args).await?;
882            results.push(result);
883        }
884        Ok(results)
885    }
886
887    /// List available functions
888    async fn list_functions(&self) -> Vec<String>;
889
890    /// Check if a function exists
891    async fn has_function(&self, name: &str) -> bool;
892
893    /// Get function metadata
894    async fn get_function_info(&self, name: &str) -> Result<Option<FunctionInfo>>;
895
896    /// Health check for function registry
897    async fn health_check(&self) -> Result<bool> {
898        Ok(true)
899    }
900
901    /// Validate function arguments before calling
902    async fn validate_args(&self, name: &str, args: &str) -> Result<bool> {
903        let _ = (name, args); // Unused parameters
904        Ok(true)
905    }
906}
907
908/// Information about a registered function
909#[derive(Debug, Clone)]
910pub struct FunctionInfo {
911    /// Name of the function
912    pub name: String,
913    /// Human-readable description of what the function does
914    pub description: Option<String>,
915    /// List of parameters the function accepts
916    pub parameters: Vec<ParameterInfo>,
917    /// Return type of the function
918    pub return_type: Option<String>,
919}
920
921/// Information about a function parameter
922#[derive(Debug, Clone)]
923pub struct ParameterInfo {
924    /// Name of the parameter
925    pub name: String,
926    /// Type of the parameter
927    pub param_type: String,
928    /// Human-readable description of the parameter
929    pub description: Option<String>,
930    /// Whether this parameter is required
931    pub required: bool,
932}
933
934/// Configuration management abstraction
935///
936/// ## Synchronous Version
937/// This trait provides synchronous operations for configuration management.
938pub trait ConfigProvider {
939    /// The configuration type this provider handles
940    type Config;
941    /// The error type returned by configuration operations
942    type Error: std::error::Error + Send + Sync + 'static;
943
944    /// Load configuration from source
945    fn load(&self) -> Result<Self::Config>;
946
947    /// Save configuration to source
948    fn save(&self, config: &Self::Config) -> Result<()>;
949
950    /// Validate configuration
951    fn validate(&self, config: &Self::Config) -> Result<()>;
952
953    /// Get default configuration
954    fn default_config(&self) -> Self::Config;
955}
956
957/// Async configuration management abstraction for non-blocking configuration operations
958///
959/// ## Async Version
960/// This trait provides async operations for configuration management with better I/O handling.
961#[allow(async_fn_in_trait)]
962#[async_trait]
963pub trait AsyncConfigProvider: Send + Sync {
964    /// The configuration type this provider handles
965    type Config: Send + Sync;
966    /// The error type returned by configuration operations
967    type Error: std::error::Error + Send + Sync + 'static;
968
969    /// Load configuration from source
970    async fn load(&self) -> Result<Self::Config>;
971
972    /// Save configuration to source
973    async fn save(&self, config: &Self::Config) -> Result<()>;
974
975    /// Validate configuration
976    async fn validate(&self, config: &Self::Config) -> Result<()>;
977
978    /// Get default configuration
979    async fn default_config(&self) -> Self::Config;
980
981    /// Watch for configuration changes
982    async fn watch_changes(
983        &self,
984    ) -> Result<Pin<Box<dyn futures::Stream<Item = Result<Self::Config>> + Send + 'static>>>
985    where
986        Self::Config: 'static,
987    {
988        // Default implementation - no change watching
989        let stream = futures::stream::empty::<Result<Self::Config>>();
990        Ok(Box::pin(stream))
991    }
992
993    /// Reload configuration from source
994    async fn reload(&self) -> Result<Self::Config> {
995        self.load().await
996    }
997
998    /// Health check for configuration source
999    async fn health_check(&self) -> Result<bool> {
1000        Ok(true)
1001    }
1002}
1003
1004/// Monitoring and metrics abstraction
1005///
1006/// ## Synchronous Version
1007/// This trait provides synchronous operations for metrics collection.
1008pub trait MetricsCollector {
1009    /// Record a counter metric
1010    fn counter(&self, name: &str, value: u64, tags: Option<&[(&str, &str)]>);
1011
1012    /// Record a gauge metric
1013    fn gauge(&self, name: &str, value: f64, tags: Option<&[(&str, &str)]>);
1014
1015    /// Record a histogram metric
1016    fn histogram(&self, name: &str, value: f64, tags: Option<&[(&str, &str)]>);
1017
1018    /// Start a timer
1019    fn timer(&self, name: &str) -> Timer;
1020}
1021
1022/// Async monitoring and metrics abstraction for non-blocking metrics collection
1023///
1024/// ## Async Version
1025/// This trait provides async operations for metrics collection with better throughput.
1026#[allow(async_fn_in_trait)]
1027#[async_trait]
1028pub trait AsyncMetricsCollector: Send + Sync {
1029    /// Record a counter metric
1030    async fn counter(&self, name: &str, value: u64, tags: Option<&[(&str, &str)]>);
1031
1032    /// Record a gauge metric
1033    async fn gauge(&self, name: &str, value: f64, tags: Option<&[(&str, &str)]>);
1034
1035    /// Record a histogram metric
1036    async fn histogram(&self, name: &str, value: f64, tags: Option<&[(&str, &str)]>);
1037
1038    /// Record multiple metrics in batch
1039    async fn record_batch(&self, metrics: &[MetricRecord]) {
1040        for metric in metrics {
1041            match metric {
1042                MetricRecord::Counter { name, value, tags } => {
1043                    let tags_refs: Option<Vec<(&str, &str)>> = tags
1044                        .as_ref()
1045                        .map(|t| t.iter().map(|(k, v)| (k.as_str(), v.as_str())).collect());
1046                    self.counter(name, *value, tags_refs.as_deref()).await;
1047                },
1048                MetricRecord::Gauge { name, value, tags } => {
1049                    let tags_refs: Option<Vec<(&str, &str)>> = tags
1050                        .as_ref()
1051                        .map(|t| t.iter().map(|(k, v)| (k.as_str(), v.as_str())).collect());
1052                    self.gauge(name, *value, tags_refs.as_deref()).await;
1053                },
1054                MetricRecord::Histogram { name, value, tags } => {
1055                    let tags_refs: Option<Vec<(&str, &str)>> = tags
1056                        .as_ref()
1057                        .map(|t| t.iter().map(|(k, v)| (k.as_str(), v.as_str())).collect());
1058                    self.histogram(name, *value, tags_refs.as_deref()).await;
1059                },
1060            }
1061        }
1062    }
1063
1064    /// Start an async timer
1065    async fn timer(&self, name: &str) -> AsyncTimer;
1066
1067    /// Health check for metrics collection
1068    async fn health_check(&self) -> Result<bool> {
1069        Ok(true)
1070    }
1071
1072    /// Flush pending metrics
1073    async fn flush(&self) -> Result<()> {
1074        Ok(())
1075    }
1076}
1077
1078/// Metric record for batch operations
1079#[derive(Debug, Clone)]
1080pub enum MetricRecord {
1081    /// Counter metric that increments over time
1082    Counter {
1083        /// Name of the metric
1084        name: String,
1085        /// Value to increment by
1086        value: u64,
1087        /// Optional tags for categorization
1088        tags: Option<Vec<(String, String)>>,
1089    },
1090    /// Gauge metric that can go up or down
1091    Gauge {
1092        /// Name of the metric
1093        name: String,
1094        /// Current value
1095        value: f64,
1096        /// Optional tags for categorization
1097        tags: Option<Vec<(String, String)>>,
1098    },
1099    /// Histogram metric for distribution tracking
1100    Histogram {
1101        /// Name of the metric
1102        name: String,
1103        /// Observed value
1104        value: f64,
1105        /// Optional tags for categorization
1106        tags: Option<Vec<(String, String)>>,
1107    },
1108}
1109
1110/// Async timer handle for measuring durations
1111pub struct AsyncTimer {
1112    /// Name of the operation being timed
1113    name: String,
1114    /// Start time of the timer
1115    start: std::time::Instant,
1116}
1117
1118impl AsyncTimer {
1119    /// Create a new async timer with the given name
1120    pub fn new(name: String) -> Self {
1121        Self {
1122            name,
1123            start: std::time::Instant::now(),
1124        }
1125    }
1126
1127    /// Finish the timer and return the elapsed duration
1128    pub async fn finish(self) -> std::time::Duration {
1129        self.start.elapsed()
1130    }
1131
1132    /// Get the name of the operation being timed
1133    pub fn name(&self) -> &str {
1134        &self.name
1135    }
1136}
1137
1138/// Timer handle for measuring durations
1139pub struct Timer {
1140    /// Name of the operation being timed
1141    #[allow(dead_code)]
1142    name: String,
1143    /// Start time of the timer
1144    start: std::time::Instant,
1145}
1146
1147impl Timer {
1148    /// Create a new timer with the given name
1149    pub fn new(name: String) -> Self {
1150        Self {
1151            name,
1152            start: std::time::Instant::now(),
1153        }
1154    }
1155
1156    /// Finish the timer and return the elapsed duration
1157    pub fn finish(self) -> std::time::Duration {
1158        self.start.elapsed()
1159    }
1160}
1161
1162/// Serialization abstraction for different formats
1163///
1164/// ## Synchronous Version
1165/// This trait provides synchronous operations for serialization.
1166pub trait Serializer {
1167    /// The error type returned by serialization operations
1168    type Error: std::error::Error + Send + Sync + 'static;
1169
1170    /// Serialize data to string
1171    fn serialize<T: serde::Serialize>(&self, data: &T) -> Result<String>;
1172
1173    /// Deserialize data from string
1174    fn deserialize<T: serde::de::DeserializeOwned>(&self, data: &str) -> Result<T>;
1175
1176    /// Get file extension for this format
1177    fn extension(&self) -> &'static str;
1178}
1179
1180/// Async serialization abstraction for non-blocking serialization operations
1181///
1182/// ## Async Version
1183/// This trait provides async operations for serialization with better I/O handling.
1184#[allow(async_fn_in_trait)]
1185#[async_trait]
1186pub trait AsyncSerializer: Send + Sync {
1187    /// The error type returned by serialization operations
1188    type Error: std::error::Error + Send + Sync + 'static;
1189
1190    /// Serialize data to string
1191    async fn serialize<T: serde::Serialize + Send + Sync>(&self, data: &T) -> Result<String>;
1192
1193    /// Deserialize data from string
1194    async fn deserialize<T: serde::de::DeserializeOwned + Send + Sync>(
1195        &self,
1196        data: &str,
1197    ) -> Result<T>;
1198
1199    /// Serialize data to bytes
1200    #[allow(clippy::disallowed_names)]
1201    async fn serialize_bytes<T: serde::Serialize + Send + Sync>(
1202        &self,
1203        data: &T,
1204    ) -> Result<Vec<u8>> {
1205        let string = self.serialize(data).await?;
1206        Ok(string.into_bytes())
1207    }
1208
1209    /// Deserialize data from bytes
1210    #[allow(clippy::disallowed_names)]
1211    async fn deserialize_bytes<T: serde::de::DeserializeOwned + Send + Sync>(
1212        &self,
1213        data: &[u8],
1214    ) -> Result<T> {
1215        let string = String::from_utf8(data.to_vec()).map_err(|e| {
1216            crate::core::GraphRAGError::Serialization {
1217                message: format!("Invalid UTF-8 data: {e}"),
1218            }
1219        })?;
1220        self.deserialize(&string).await
1221    }
1222
1223    /// Serialize multiple objects in batch
1224    #[allow(clippy::disallowed_names)]
1225    async fn serialize_batch<T: serde::Serialize + Send + Sync>(
1226        &self,
1227        data: &[T],
1228    ) -> Result<Vec<String>> {
1229        let mut results = Vec::with_capacity(data.len());
1230        for item in data {
1231            let serialized = self.serialize(item).await?;
1232            results.push(serialized);
1233        }
1234        Ok(results)
1235    }
1236
1237    /// Get file extension for this format
1238    fn extension(&self) -> &'static str;
1239
1240    /// Health check for serializer
1241    async fn health_check(&self) -> Result<bool> {
1242        Ok(true)
1243    }
1244}
1245
1246//
1247// COMPREHENSIVE ASYNC TRAIT EXPORTS AND ADAPTER UTILITIES
1248//
1249
1250/// Adapter to convert sync traits to async
1251pub mod sync_to_async {
1252    use super::*;
1253    use std::sync::Arc;
1254
1255    /// Adapter that wraps a sync Storage to implement AsyncStorage
1256    pub struct StorageAdapter<T>(pub Arc<tokio::sync::Mutex<T>>);
1257
1258    #[async_trait]
1259    impl<T> AsyncStorage for StorageAdapter<T>
1260    where
1261        T: Storage + Send + Sync + 'static,
1262        T::Entity: Send + Sync,
1263        T::Document: Send + Sync,
1264        T::Chunk: Send + Sync,
1265    {
1266        /// The entity type from the wrapped storage
1267        type Entity = T::Entity;
1268        /// The document type from the wrapped storage
1269        type Document = T::Document;
1270        /// The chunk type from the wrapped storage
1271        type Chunk = T::Chunk;
1272        /// The error type from the wrapped storage
1273        type Error = T::Error;
1274
1275        async fn store_entity(&mut self, entity: Self::Entity) -> Result<String> {
1276            let mut storage = self.0.lock().await;
1277            storage.store_entity(entity)
1278        }
1279
1280        async fn retrieve_entity(&self, id: &str) -> Result<Option<Self::Entity>> {
1281            let storage = self.0.lock().await;
1282            storage.retrieve_entity(id)
1283        }
1284
1285        async fn store_document(&mut self, document: Self::Document) -> Result<String> {
1286            let mut storage = self.0.lock().await;
1287            storage.store_document(document)
1288        }
1289
1290        async fn retrieve_document(&self, id: &str) -> Result<Option<Self::Document>> {
1291            let storage = self.0.lock().await;
1292            storage.retrieve_document(id)
1293        }
1294
1295        async fn store_chunk(&mut self, chunk: Self::Chunk) -> Result<String> {
1296            let mut storage = self.0.lock().await;
1297            storage.store_chunk(chunk)
1298        }
1299
1300        async fn retrieve_chunk(&self, id: &str) -> Result<Option<Self::Chunk>> {
1301            let storage = self.0.lock().await;
1302            storage.retrieve_chunk(id)
1303        }
1304
1305        async fn list_entities(&self) -> Result<Vec<String>> {
1306            let storage = self.0.lock().await;
1307            storage.list_entities()
1308        }
1309
1310        async fn store_entities_batch(
1311            &mut self,
1312            entities: Vec<Self::Entity>,
1313        ) -> Result<Vec<String>> {
1314            let mut storage = self.0.lock().await;
1315            storage.store_entities_batch(entities)
1316        }
1317    }
1318
1319    /// Adapter that wraps a sync LanguageModel to implement AsyncLanguageModel
1320    pub struct LanguageModelAdapter<T>(pub Arc<T>);
1321
1322    #[async_trait]
1323    impl<T> AsyncLanguageModel for LanguageModelAdapter<T>
1324    where
1325        T: LanguageModel + Send + Sync + 'static,
1326    {
1327        /// The error type from the wrapped language model
1328        type Error = T::Error;
1329
1330        async fn complete(&self, prompt: &str) -> Result<String> {
1331            self.0.complete(prompt)
1332        }
1333
1334        async fn complete_with_params(
1335            &self,
1336            prompt: &str,
1337            params: GenerationParams,
1338        ) -> Result<String> {
1339            self.0.complete_with_params(prompt, params)
1340        }
1341
1342        async fn is_available(&self) -> bool {
1343            self.0.is_available()
1344        }
1345
1346        async fn model_info(&self) -> ModelInfo {
1347            self.0.model_info()
1348        }
1349    }
1350}
1351
1352/// Comprehensive async trait utilities and helpers
1353pub mod async_utils {
1354    use super::*;
1355    use std::time::Duration;
1356
1357    /// Timeout wrapper for any async operation
1358    pub async fn with_timeout<F, T>(future: F, timeout: Duration) -> Result<T>
1359    where
1360        F: Future<Output = Result<T>>,
1361    {
1362        match tokio::time::timeout(timeout, future).await {
1363            Ok(result) => result,
1364            Err(_) => Err(crate::core::GraphRAGError::Timeout {
1365                operation: "async operation".to_string(),
1366                duration: timeout,
1367            }),
1368        }
1369    }
1370
1371    /// Retry wrapper for async operations
1372    pub async fn with_retry<F, T, E>(
1373        mut operation: F,
1374        max_retries: usize,
1375        delay: Duration,
1376    ) -> std::result::Result<T, E>
1377    where
1378        F: FnMut() -> Pin<Box<dyn Future<Output = std::result::Result<T, E>> + Send>>,
1379        E: std::fmt::Debug,
1380    {
1381        let mut attempts = 0;
1382        loop {
1383            match operation().await {
1384                Ok(result) => return Ok(result),
1385                Err(err) => {
1386                    attempts += 1;
1387                    if attempts >= max_retries {
1388                        return Err(err);
1389                    }
1390                    tokio::time::sleep(delay).await;
1391                },
1392            }
1393        }
1394    }
1395
1396    /// Batch processor for concurrent operations with rate limiting
1397    pub async fn process_batch_with_rate_limit<T, F, R>(
1398        items: Vec<T>,
1399        processor: F,
1400        max_concurrent: usize,
1401        rate_limit: Option<Duration>,
1402    ) -> Vec<Result<R>>
1403    where
1404        T: Send + 'static,
1405        F: Fn(T) -> Pin<Box<dyn Future<Output = Result<R>> + Send>> + Send + Sync + 'static,
1406        R: Send + 'static,
1407    {
1408        use futures::stream::{FuturesUnordered, StreamExt};
1409        use std::sync::Arc;
1410
1411        let processor = Arc::new(processor);
1412        let mut futures = FuturesUnordered::new();
1413        let mut results = Vec::with_capacity(items.len());
1414        let mut pending = 0;
1415
1416        for item in items {
1417            if pending >= max_concurrent {
1418                if let Some(result) = futures.next().await {
1419                    results.push(result);
1420                    pending -= 1;
1421                }
1422            }
1423
1424            let processor_clone = Arc::clone(&processor);
1425            futures.push(async move {
1426                if let Some(delay) = rate_limit {
1427                    tokio::time::sleep(delay).await;
1428                }
1429                processor_clone(item).await
1430            });
1431            pending += 1;
1432        }
1433
1434        while let Some(result) = futures.next().await {
1435            results.push(result);
1436        }
1437
1438        results
1439    }
1440}
1441
1442/// Type aliases for common async trait objects
1443/// Type-erased async language model for dynamic dispatch
1444pub type BoxedAsyncLanguageModel =
1445    Box<dyn AsyncLanguageModel<Error = crate::core::GraphRAGError> + Send + Sync>;
1446/// Type-erased async embedder for dynamic dispatch
1447pub type BoxedAsyncEmbedder =
1448    Box<dyn AsyncEmbedder<Error = crate::core::GraphRAGError> + Send + Sync>;
1449/// Type-erased async vector store for dynamic dispatch
1450pub type BoxedAsyncVectorStore =
1451    Box<dyn AsyncVectorStore<Error = crate::core::GraphRAGError> + Send + Sync>;
1452/// Type-erased async retriever for dynamic dispatch
1453pub type BoxedAsyncRetriever = Box<
1454    dyn AsyncRetriever<
1455            Query = String,
1456            Result = crate::retrieval::SearchResult,
1457            Error = crate::core::GraphRAGError,
1458        > + Send
1459        + Sync,
1460>;
graphrag_core/core/traits.rs

graphrag_core/core/
traits.rs