graphrag-core 0.2.0

Core portable library for GraphRAG - works on native and WASM
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
//! Service registry for dependency injection
//!
//! This module provides a dependency injection system that allows
//! components to be swapped out for testing or different implementations.

use crate::core::traits::*;
use crate::core::{GraphRAGError, Result};
use std::any::{Any, TypeId};
use std::collections::HashMap;
use std::sync::Arc;

/// Type-erased service container
type ServiceBox = Box<dyn Any + Send + Sync>;

/// Service registry for dependency injection
pub struct ServiceRegistry {
    services: HashMap<TypeId, ServiceBox>,
}

impl ServiceRegistry {
    /// Create a new empty service registry
    pub fn new() -> Self {
        Self {
            services: HashMap::new(),
        }
    }

    /// Register a service implementation
    pub fn register<T: Any + Send + Sync>(&mut self, service: T) {
        let type_id = TypeId::of::<T>();
        self.services.insert(type_id, Box::new(service));
    }

    /// Get a service by type
    pub fn get<T: Any + Send + Sync>(&self) -> Result<&T> {
        let type_id = TypeId::of::<T>();

        self.services
            .get(&type_id)
            .and_then(|service| service.downcast_ref::<T>())
            .ok_or_else(|| GraphRAGError::Config {
                message: format!("Service not registered: {}", std::any::type_name::<T>()),
            })
    }

    /// Get a mutable service by type
    pub fn get_mut<T: Any + Send + Sync>(&mut self) -> Result<&mut T> {
        let type_id = TypeId::of::<T>();

        self.services
            .get_mut(&type_id)
            .and_then(|service| service.downcast_mut::<T>())
            .ok_or_else(|| GraphRAGError::Config {
                message: format!("Service not registered: {}", std::any::type_name::<T>()),
            })
    }

    /// Check if a service is registered
    pub fn has<T: Any + Send + Sync>(&self) -> bool {
        let type_id = TypeId::of::<T>();
        self.services.contains_key(&type_id)
    }

    /// Remove a service
    pub fn remove<T: Any + Send + Sync>(&mut self) -> Option<T> {
        let type_id = TypeId::of::<T>();

        self.services
            .remove(&type_id)
            .and_then(|service| service.downcast::<T>().ok())
            .map(|boxed| *boxed)
    }

    /// Get the number of registered services
    pub fn len(&self) -> usize {
        self.services.len()
    }

    /// Check if the registry is empty
    pub fn is_empty(&self) -> bool {
        self.services.is_empty()
    }

    /// Clear all services
    pub fn clear(&mut self) {
        self.services.clear();
    }
}

impl Default for ServiceRegistry {
    fn default() -> Self {
        Self::new()
    }
}

/// Builder for creating and configuring service registries
pub struct RegistryBuilder {
    registry: ServiceRegistry,
}

impl RegistryBuilder {
    /// Create a new registry builder
    pub fn new() -> Self {
        Self {
            registry: ServiceRegistry::new(),
        }
    }

    /// Register a service and continue building
    pub fn with_service<T: Any + Send + Sync>(mut self, service: T) -> Self {
        self.registry.register(service);
        self
    }

    /// Register a storage implementation
    pub fn with_storage<S>(mut self, storage: S) -> Self
    where
        S: Storage + Any + Send + Sync,
    {
        self.registry.register(storage);
        self
    }

    /// Register an embedder implementation
    pub fn with_embedder<E>(mut self, embedder: E) -> Self
    where
        E: Embedder + Any + Send + Sync,
    {
        self.registry.register(embedder);
        self
    }

    /// Register a vector store implementation
    pub fn with_vector_store<V>(mut self, vector_store: V) -> Self
    where
        V: VectorStore + Any + Send + Sync,
    {
        self.registry.register(vector_store);
        self
    }

    /// Register an entity extractor implementation
    pub fn with_entity_extractor<E>(mut self, extractor: E) -> Self
    where
        E: EntityExtractor + Any + Send + Sync,
    {
        self.registry.register(extractor);
        self
    }

    /// Register a retriever implementation
    pub fn with_retriever<R>(mut self, retriever: R) -> Self
    where
        R: Retriever + Any + Send + Sync,
    {
        self.registry.register(retriever);
        self
    }

    /// Register a language model implementation
    pub fn with_language_model<L>(mut self, language_model: L) -> Self
    where
        L: LanguageModel + Any + Send + Sync,
    {
        self.registry.register(language_model);
        self
    }

    /// Register a graph store implementation
    pub fn with_graph_store<G>(mut self, graph_store: G) -> Self
    where
        G: GraphStore + Any + Send + Sync,
    {
        self.registry.register(graph_store);
        self
    }

    /// Register a function registry implementation
    pub fn with_function_registry<F>(mut self, function_registry: F) -> Self
    where
        F: FunctionRegistry + Any + Send + Sync,
    {
        self.registry.register(function_registry);
        self
    }

    /// Register a metrics collector implementation
    pub fn with_metrics_collector<M>(mut self, metrics: M) -> Self
    where
        M: MetricsCollector + Any + Send + Sync,
    {
        self.registry.register(metrics);
        self
    }

    /// Register a serializer implementation
    pub fn with_serializer<S>(mut self, serializer: S) -> Self
    where
        S: Serializer + Any + Send + Sync,
    {
        self.registry.register(serializer);
        self
    }

    /// Build the final registry
    pub fn build(self) -> ServiceRegistry {
        self.registry
    }

    /// Create a registry with default Ollama-based services
    #[cfg(feature = "ollama")]
    pub fn with_ollama_defaults() -> Self {
        #[cfg(feature = "memory-storage")]
        use crate::storage::MemoryStorage;

        let mut builder = Self::new();

        #[cfg(feature = "memory-storage")]
        {
            builder = builder.with_storage(MemoryStorage::new());
        }

        // Add other service implementations based on available features
        #[cfg(feature = "parallel-processing")]
        {
            use crate::parallel::ParallelProcessor;

            // Auto-detect number of threads (0 means use default)
            let num_threads = num_cpus::get();
            let parallel_processor = ParallelProcessor::new(num_threads);
            builder = builder.with_service(parallel_processor);
        }

        #[cfg(feature = "vector-hnsw")]
        {
            use crate::vector::VectorIndex;
            builder = builder.with_service(VectorIndex::new());
        }

        #[cfg(feature = "caching")]
        {
            // Add caching services when available
            // Note: Specific cache implementations would be added here
        }
        builder
    }

    /// Create a registry with memory-only services for testing
    ///
    /// This creates a registry with mock implementations suitable for unit testing:
    /// - MemoryStorage for document storage
    /// - MockEmbedder for embeddings (128-dimensional)
    /// - MockLanguageModel for text generation
    /// - MockVectorStore for vector similarity search
    /// - MockRetriever for content retrieval
    #[cfg(feature = "memory-storage")]
    pub fn with_test_defaults() -> Self {
        use crate::core::test_utils::{
            MockEmbedder, MockLanguageModel, MockRetriever, MockVectorStore,
        };
        use crate::storage::MemoryStorage;

        Self::new()
            .with_storage(MemoryStorage::new())
            .with_service(MockEmbedder::new(128))
            .with_service(MockLanguageModel::new())
            .with_service(MockVectorStore::new(128))
            .with_service(MockRetriever::new())
    }
}

impl Default for RegistryBuilder {
    fn default() -> Self {
        Self::new()
    }
}

/// Context object that provides access to services
#[derive(Clone)]
pub struct ServiceContext {
    registry: Arc<ServiceRegistry>,
}

impl ServiceContext {
    /// Create a new service context
    pub fn new(registry: ServiceRegistry) -> Self {
        Self {
            registry: Arc::new(registry),
        }
    }

    /// Get a service by type
    pub fn get<T: Any + Send + Sync>(&self) -> Result<&T> {
        // Safety: This is safe because we're getting an immutable reference
        // from an Arc, which ensures the registry stays alive
        unsafe {
            let ptr = self.registry.as_ref() as *const ServiceRegistry;
            (*ptr).get::<T>()
        }
    }
}

/// Configuration for service creation
#[derive(Debug, Clone)]
pub struct ServiceConfig {
    /// Base URL for Ollama API server
    pub ollama_base_url: Option<String>,
    /// Model name for text embeddings
    pub embedding_model: Option<String>,
    /// Model name for text generation
    pub language_model: Option<String>,
    /// Dimensionality of embedding vectors
    pub vector_dimension: Option<usize>,
    /// Minimum confidence threshold for entity extraction
    pub entity_confidence_threshold: Option<f32>,
    /// Enable parallel processing for batch operations
    pub enable_parallel_processing: bool,
    /// Enable function calling capabilities
    pub enable_function_calling: bool,
    /// Enable monitoring and metrics collection
    pub enable_monitoring: bool,
}

impl Default for ServiceConfig {
    fn default() -> Self {
        Self {
            ollama_base_url: Some("http://localhost:11434".to_string()),
            embedding_model: Some("nomic-embed-text:latest".to_string()),
            language_model: Some("llama3.2:latest".to_string()),
            vector_dimension: Some(384),
            entity_confidence_threshold: Some(0.7),
            enable_parallel_processing: true,
            enable_function_calling: false,
            enable_monitoring: false,
        }
    }
}

impl ServiceConfig {
    /// Create a registry builder from this configuration
    ///
    /// This method creates service instances based on the configuration and available features.
    /// Services are registered in the following order:
    ///
    /// 1. Storage (MemoryStorage with memory-storage feature)
    /// 2. Vector Store (when vector storage implementations are available)
    /// 3. Embedder (when embedding providers are available)
    /// 4. Entity Extractor (when NER models are available)
    /// 5. Retriever (when retrieval systems are implemented)
    /// 6. Language Model (when LLM clients are available)
    /// 7. Metrics Collector (when monitoring is enabled)
    ///
    /// # Example
    ///
    /// ```no_run
    /// use graphrag_core::core::registry::ServiceConfig;
    ///
    /// let config = ServiceConfig::default();
    /// let registry = config.build_registry().build();
    /// ```
    pub fn build_registry(&self) -> RegistryBuilder {
        let mut builder = RegistryBuilder::new();

        // 1. Storage Layer
        #[cfg(feature = "memory-storage")]
        {
            use crate::storage::MemoryStorage;
            builder = builder.with_storage(MemoryStorage::new());
        }

        // 2. Vector Store
        //
        // Vector storage has two parallel trait hierarchies:
        //
        // 1. vector::store::VectorStore (local module)
        //    - Domain-specific trait for GraphRAG vector operations
        //    - Implemented by: MemoryVectorStore, LanceDB, Qdrant
        //    - Used directly by retrieval and embedding systems
        //    - Methods: store_embedding, search_similar, batch operations
        //
        // 2. core::traits::AsyncVectorStore (generic trait)
        //    - Generic async interface for service registry
        //    - Designed for dependency injection and testing
        //    - Methods: store, search, delete, get, count, clear
        //    - Implemented by: MockVectorStore (test_utils)
        //
        // Current status: Both hierarchies work independently
        // - MemoryVectorStore works with retrieval systems ✓
        // - MockVectorStore works with service registry ✓
        //
        // Future unification (optional):
        // 2b. Vector Store (Optional)
        // If needed, create an adapter to bridge vector::VectorStore to AsyncVectorStore.
        // This would enable using production vector stores (LanceDB, Qdrant) through
        // the generic registry interface.
        #[cfg(feature = "vector-memory")]
        {
            if let Some(_dimension) = self.vector_dimension {
                use crate::vector::memory_store::MemoryVectorStore;
                let vector_store = MemoryVectorStore::new();
                builder = builder.with_service(vector_store);

                #[cfg(feature = "tracing")]
                tracing::info!("Registered MemoryVectorStore (dimension: {})", _dimension);
            }
        }

        // 3. Embedding Provider
        // Create embedder based on configuration and available features
        #[cfg(feature = "ollama")]
        {
            if let Some(model) = &self.embedding_model {
                if let Some(dimension) = self.vector_dimension {
                    use crate::core::ollama_adapters::OllamaEmbedderAdapter;

                    let embedder = OllamaEmbedderAdapter::new(model.clone(), dimension);
                    builder = builder.with_service(embedder);

                    #[cfg(feature = "tracing")]
                    tracing::info!(
                        "Registered Ollama embedder with model: {}, dimension: {}",
                        model,
                        dimension
                    );
                }
            }
        }

        // 4. Entity Extractor
        // Register entity extraction service using GraphIndexer
        #[cfg(all(feature = "async", feature = "lightrag"))]
        {
            if let Some(threshold) = self.entity_confidence_threshold {
                use crate::core::entity_adapters::GraphIndexerAdapter;

                // Create GraphIndexer adapter with default entity types
                let entity_types = vec![
                    "person".to_string(),
                    "organization".to_string(),
                    "location".to_string(),
                ];
                let extractor = GraphIndexerAdapter::new(entity_types, 3)
                    .map(|adapter| adapter.with_confidence_threshold(threshold));

                if let Ok(extractor) = extractor {
                    builder = builder.with_service(extractor);

                    #[cfg(feature = "tracing")]
                    tracing::info!(
                        "Registered GraphIndexer entity extractor with threshold: {}",
                        threshold
                    );
                }
            }
        }

        // 5. Retriever
        // Register retrieval system
        #[cfg(all(feature = "async", feature = "basic-retrieval"))]
        {
            use crate::config::Config;
            use crate::core::retrieval_adapters::RetrievalSystemAdapter;
            use crate::retrieval::RetrievalSystem;

            // Create a default config for retrieval system
            let config = Config::default();
            if let Ok(system) = RetrievalSystem::new(&config) {
                let retriever = RetrievalSystemAdapter::new(system);
                builder = builder.with_service(retriever);

                #[cfg(feature = "tracing")]
                tracing::info!("Registered RetrievalSystem");
            }
        }

        // 6. Language Model
        // Register LLM client for text generation
        #[cfg(feature = "ollama")]
        {
            if let (Some(base_url), Some(model)) = (&self.ollama_base_url, &self.language_model) {
                use crate::core::ollama_adapters::OllamaLanguageModelAdapter;
                use crate::ollama::OllamaConfig;

                // Build OllamaConfig from ServiceConfig
                let mut ollama_config = OllamaConfig::default();
                // Parse host and port from base_url (format: "http://localhost:11434")
                if let Some(url_parts) = base_url.split("://").nth(1) {
                    let parts: Vec<&str> = url_parts.split(':').collect();
                    if parts.len() >= 2 {
                        ollama_config.host = format!("http://{}", parts[0]);
                        if let Ok(port) = parts[1].parse::<u16>() {
                            ollama_config.port = port;
                        }
                    }
                }
                ollama_config.chat_model = model.clone();
                ollama_config.enabled = true;

                let language_model = OllamaLanguageModelAdapter::new(ollama_config);
                builder = builder.with_service(language_model);

                #[cfg(feature = "tracing")]
                tracing::info!(
                    "Registered Ollama language model: {} at {}",
                    model,
                    base_url
                );
            }
        }

        // 7. Metrics Collector
        // Register metrics collector when monitoring is enabled
        #[cfg(all(feature = "monitoring", feature = "dashmap"))]
        {
            if self.enable_monitoring {
                use crate::monitoring::MetricsCollector;

                let metrics = MetricsCollector::new();
                builder = builder.with_service(metrics);

                #[cfg(feature = "tracing")]
                tracing::info!("Registered MetricsCollector");
            }
        }

        // 8. Function Registry
        // Register function calling capabilities when enabled
        //
        // Note: The function_calling module provides a comprehensive FunctionCaller
        // implementation with the following characteristics:
        //
        // - Requires KnowledgeGraph context for function execution
        // - Uses json::JsonValue (json crate) instead of serde_json::Value
        // - Provides synchronous call() methods, not async
        // - Includes built-in function history and statistics
        // - Supports complex function orchestration with context passing
        //
        // Creating an adapter for AsyncFunctionRegistry would require:
        // 1. JSON format conversion (json::JsonValue <-> serde_json::Value)
        // 2. Async wrapper around synchronous call methods
        // 3. KnowledgeGraph injection mechanism (currently passed per-call)
        // 4. Context state management for stateless async trait
        //
        // For applications needing function calling:
        // - Use FunctionCaller directly from function_calling module
        // - It provides richer functionality than the generic AsyncFunctionRegistry trait
        // - Built-in support for GraphRAG-specific operations
        //
        // The AsyncFunctionRegistry trait is better suited for simpler,
        // stateless function registries without graph context requirements.
        #[cfg(feature = "function-calling")]
        {
            if self.enable_function_calling {
                #[cfg(feature = "tracing")]
                tracing::info!(
                    "Function calling enabled - use function_calling::FunctionCaller directly"
                );
            }
        }

        builder
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[derive(Debug)]
    struct TestService {
        value: String,
    }

    impl TestService {
        fn new(value: String) -> Self {
            Self { value }
        }
    }

    #[test]
    fn test_registry_basic_operations() {
        let mut registry = ServiceRegistry::new();

        // Test registration
        registry.register(TestService::new("test".to_string()));
        assert!(registry.has::<TestService>());
        assert_eq!(registry.len(), 1);

        // Test retrieval
        let service = registry.get::<TestService>().unwrap();
        assert_eq!(service.value, "test");

        // Test removal
        let removed = registry.remove::<TestService>().unwrap();
        assert_eq!(removed.value, "test");
        assert!(!registry.has::<TestService>());
        assert!(registry.is_empty());
    }

    #[test]
    fn test_registry_builder() {
        let registry = RegistryBuilder::new()
            .with_service(TestService::new("builder".to_string()))
            .build();

        assert!(registry.has::<TestService>());
        let service = registry.get::<TestService>().unwrap();
        assert_eq!(service.value, "builder");
    }

    #[test]
    fn test_service_context() {
        let mut registry = ServiceRegistry::new();
        registry.register(TestService::new("context".to_string()));

        let context = ServiceContext::new(registry);
        let service = context.get::<TestService>().unwrap();
        assert_eq!(service.value, "context");

        // Test cloning
        let cloned_context = context.clone();
        let service2 = cloned_context.get::<TestService>().unwrap();
        assert_eq!(service2.value, "context");
    }

    #[test]
    fn test_service_config_default() {
        let config = ServiceConfig::default();
        assert!(config.ollama_base_url.is_some());
        assert!(config.embedding_model.is_some());
        assert!(config.language_model.is_some());
        assert!(config.vector_dimension.is_some());
        assert!(config.entity_confidence_threshold.is_some());
        assert!(config.enable_parallel_processing);
    }

    #[test]
    #[cfg(feature = "ollama")]
    fn test_service_config_build_with_ollama() {
        let config = ServiceConfig {
            ollama_base_url: Some("http://localhost:11434".to_string()),
            embedding_model: Some("nomic-embed-text".to_string()),
            language_model: Some("llama3.2".to_string()),
            vector_dimension: Some(768),
            entity_confidence_threshold: Some(0.7),
            enable_parallel_processing: true,
            enable_function_calling: false,
            enable_monitoring: false,
        };

        let registry = config.build_registry().build();

        // Verify services are registered
        #[cfg(feature = "memory-storage")]
        {
            use crate::storage::MemoryStorage;
            assert!(registry.has::<MemoryStorage>());
        }

        // Note: We can't easily verify OllamaEmbedderAdapter and OllamaLanguageModelAdapter
        // are registered without making them pub, but the build succeeds which means
        // the registration code runs without errors
        assert!(!registry.is_empty());
    }

    #[test]
    #[cfg(feature = "vector-memory")]
    fn test_registry_with_vector_memory() {
        use crate::vector::memory_store::MemoryVectorStore;

        let config = ServiceConfig {
            ollama_base_url: None,
            embedding_model: None,
            language_model: None,
            vector_dimension: Some(384), // Set vector dimension to enable MemoryVectorStore
            entity_confidence_threshold: None,
            enable_parallel_processing: false,
            enable_function_calling: false,
            enable_monitoring: false,
        };

        let registry = config.build_registry().build();

        // When vector-memory feature is enabled and vector_dimension is set,
        // MemoryVectorStore should be registered
        assert!(
            registry.has::<MemoryVectorStore>(),
            "MemoryVectorStore should be registered when vector-memory feature is enabled"
        );

        // Verify we can retrieve it
        let vector_store = registry.get::<MemoryVectorStore>();
        assert!(
            vector_store.is_ok(),
            "Should be able to retrieve registered MemoryVectorStore"
        );
    }

    #[test]
    #[cfg(not(feature = "vector-memory"))]
    fn test_registry_without_vector_memory() {
        let config = ServiceConfig {
            ollama_base_url: None,
            embedding_model: None,
            language_model: None,
            vector_dimension: Some(384), // Even with dimension set...
            entity_confidence_threshold: None,
            enable_parallel_processing: false,
            enable_function_calling: false,
            enable_monitoring: false,
        };

        let _registry = config.build_registry().build();

        // When vector-memory feature is disabled, MemoryVectorStore should NOT be registered
        // (This test verifies the feature flag works correctly)
        // Note: We can't import MemoryVectorStore to test for absence since it might not be available,
        // but the build succeeds which means the #[cfg] gate works correctly
    }
}