Skip to main content

oxirs_vec/sparql_integration/
mod.rs

1//! SPARQL integration for vector search and hybrid symbolic-vector queries
2//!
3//! This module provides comprehensive SPARQL integration capabilities for vector operations,
4//! including cross-language search, federated queries, and custom function support.
5
6use crate::{
7    embeddings::{EmbeddingManager, EmbeddingStrategy},
8    graph_aware_search::{GraphAwareConfig, GraphAwareSearch},
9    VectorStore,
10};
11use anyhow::Result;
12use std::collections::HashMap;
13
14// Re-export main types and modules
15pub mod config;
16pub mod cross_language;
17pub mod federation;
18pub mod monitoring;
19pub mod multimodal_functions;
20pub mod query_executor;
21pub mod sparql_functions;
22
23// Tantivy text search integration (feature-gated)
24#[cfg(feature = "tantivy-search")]
25pub mod text_functions;
26
27pub use config::{
28    VectorOperation, VectorQuery, VectorQueryOptimizer, VectorQueryResult, VectorServiceArg,
29    VectorServiceConfig, VectorServiceFunction, VectorServiceParameter, VectorServiceResult,
30};
31pub use cross_language::CrossLanguageProcessor;
32pub use federation::{FederatedQueryResult, FederationManager};
33pub use monitoring::{PerformanceMonitor, PerformanceReport};
34pub use multimodal_functions::{
35    generate_multimodal_sparql_function, sparql_multimodal_search,
36    sparql_multimodal_search_from_args, MultimodalSearchConfig, SparqlMultimodalResult,
37};
38pub use query_executor::QueryExecutor;
39pub use sparql_functions::{CustomVectorFunction, SparqlVectorFunctions};
40
41#[cfg(feature = "tantivy-search")]
42pub use text_functions::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
43
44/// Main SPARQL vector service implementation
45pub struct SparqlVectorService {
46    config: VectorServiceConfig,
47    query_executor: QueryExecutor,
48    sparql_functions: SparqlVectorFunctions,
49    federation_manager: Option<FederationManager>,
50    performance_monitor: Option<PerformanceMonitor>,
51}
52
53impl SparqlVectorService {
54    /// Create a new SPARQL vector service
55    pub fn new(config: VectorServiceConfig, embedding_strategy: EmbeddingStrategy) -> Result<Self> {
56        let vector_store = VectorStore::new();
57        let embedding_manager = EmbeddingManager::new(embedding_strategy, 1000)?;
58
59        let performance_monitor = if config.enable_monitoring {
60            Some(PerformanceMonitor::new())
61        } else {
62            None
63        };
64
65        let graph_aware_search = if config.enable_monitoring {
66            Some(GraphAwareSearch::new(GraphAwareConfig::default()))
67        } else {
68            None
69        };
70
71        let optimizer = VectorQueryOptimizer::default();
72        let query_executor = QueryExecutor::new(
73            vector_store,
74            embedding_manager,
75            optimizer,
76            performance_monitor.clone(),
77            graph_aware_search,
78        );
79
80        let sparql_functions = SparqlVectorFunctions::new();
81
82        Ok(Self {
83            config,
84            query_executor,
85            sparql_functions,
86            federation_manager: None,
87            performance_monitor,
88        })
89    }
90
91    /// Execute a SPARQL vector function
92    pub fn execute_function(
93        &mut self,
94        function_name: &str,
95        args: &[VectorServiceArg],
96    ) -> Result<VectorServiceResult> {
97        let start_time = std::time::Instant::now();
98
99        let result =
100            self.sparql_functions
101                .execute_function(function_name, args, &mut self.query_executor);
102
103        // Record performance metrics
104        if let Some(ref monitor) = self.performance_monitor {
105            let duration = start_time.elapsed();
106            monitor.record_query(duration, result.is_ok());
107            monitor.record_operation(&format!("function_{function_name}"), duration);
108        }
109
110        result
111    }
112
113    /// Execute an optimized vector query
114    pub fn execute_query(&mut self, query: &VectorQuery) -> Result<VectorQueryResult> {
115        self.query_executor.execute_optimized_query(query)
116    }
117
118    /// Register a custom SPARQL function
119    pub fn register_function(&mut self, function: VectorServiceFunction) {
120        self.sparql_functions.register_function(function);
121    }
122
123    /// Register a custom function implementation
124    pub fn register_custom_function(
125        &mut self,
126        name: String,
127        function: Box<dyn CustomVectorFunction>,
128    ) {
129        self.sparql_functions
130            .register_custom_function(name, function);
131    }
132
133    /// Enable federation with specified endpoints
134    pub fn enable_federation(&mut self, endpoint_urls: Vec<String>) {
135        self.federation_manager = Some(FederationManager::new(endpoint_urls));
136    }
137
138    /// Execute federated query
139    pub async fn execute_federated_query(
140        &mut self,
141        endpoints: &[String],
142        query: &VectorQuery,
143    ) -> Result<FederatedQueryResult> {
144        if let Some(ref mut manager) = self.federation_manager {
145            manager.execute_federated_query(endpoints, query).await
146        } else {
147            Err(anyhow::anyhow!("Federation not enabled"))
148        }
149    }
150
151    /// Get performance report
152    pub fn get_performance_report(&self) -> Option<PerformanceReport> {
153        self.performance_monitor
154            .as_ref()
155            .map(|m| m.generate_report())
156    }
157
158    /// Get function documentation
159    pub fn get_function_documentation(&self, name: &str) -> Option<String> {
160        self.sparql_functions.get_function_documentation(name)
161    }
162
163    /// Generate SPARQL function definitions
164    pub fn generate_sparql_definitions(&self) -> String {
165        self.sparql_functions.generate_sparql_definitions()
166    }
167
168    /// Check if a function is registered
169    pub fn is_function_registered(&self, name: &str) -> bool {
170        self.sparql_functions.is_function_registered(name)
171    }
172
173    /// Get all registered functions
174    pub fn get_all_functions(&self) -> &HashMap<String, VectorServiceFunction> {
175        self.sparql_functions.get_all_functions()
176    }
177
178    /// Clear query cache
179    pub fn clear_cache(&mut self) {
180        self.query_executor.clear_cache();
181    }
182
183    /// Get cache statistics
184    pub fn cache_stats(&self) -> (usize, usize) {
185        self.query_executor.cache_stats()
186    }
187
188    /// Update configuration
189    pub fn update_config(&mut self, config: VectorServiceConfig) {
190        self.config = config;
191    }
192
193    /// Get current configuration
194    pub fn get_config(&self) -> &VectorServiceConfig {
195        &self.config
196    }
197
198    /// Generate a SPARQL SERVICE query for a vector operation
199    pub fn generate_service_query(&self, operation: &VectorOperation) -> String {
200        operation.to_sparql_service_query(&self.config.service_uri)
201    }
202
203    /// Add a resource embedding to the service's vector store
204    pub fn add_resource_embedding(
205        &mut self,
206        uri: &str,
207        content: &crate::embeddings::EmbeddableContent,
208    ) -> Result<()> {
209        self.query_executor.add_resource_embedding(uri, content)
210    }
211}
212
213/// Builder for creating SPARQL vector service with custom configuration
214pub struct SparqlVectorServiceBuilder {
215    config: VectorServiceConfig,
216    embedding_strategy: Option<EmbeddingStrategy>,
217    federation_endpoints: Vec<String>,
218    custom_functions: Vec<(String, Box<dyn CustomVectorFunction>)>,
219}
220
221impl SparqlVectorServiceBuilder {
222    pub fn new() -> Self {
223        Self {
224            config: VectorServiceConfig::default(),
225            embedding_strategy: None,
226            federation_endpoints: Vec::new(),
227            custom_functions: Vec::new(),
228        }
229    }
230
231    pub fn with_config(mut self, config: VectorServiceConfig) -> Self {
232        self.config = config;
233        self
234    }
235
236    pub fn with_embedding_strategy(mut self, strategy: EmbeddingStrategy) -> Self {
237        self.embedding_strategy = Some(strategy);
238        self
239    }
240
241    pub fn with_federation_endpoints(mut self, endpoints: Vec<String>) -> Self {
242        self.federation_endpoints = endpoints;
243        self
244    }
245
246    pub fn with_custom_function(
247        mut self,
248        name: String,
249        function: Box<dyn CustomVectorFunction>,
250    ) -> Self {
251        self.custom_functions.push((name, function));
252        self
253    }
254
255    pub fn build(self) -> Result<SparqlVectorService> {
256        let embedding_strategy = self
257            .embedding_strategy
258            .unwrap_or(EmbeddingStrategy::SentenceTransformer);
259
260        let mut service = SparqlVectorService::new(self.config, embedding_strategy)?;
261
262        // Enable federation if endpoints provided
263        if !self.federation_endpoints.is_empty() {
264            service.enable_federation(self.federation_endpoints);
265        }
266
267        // Register custom functions
268        for (name, function) in self.custom_functions {
269            service.register_custom_function(name, function);
270        }
271
272        Ok(service)
273    }
274}
275
276impl Default for SparqlVectorServiceBuilder {
277    fn default() -> Self {
278        Self::new()
279    }
280}
281
282/// Convenience functions for common operations
283pub mod convenience {
284    use super::*;
285
286    /// Create a basic SPARQL vector service with default configuration
287    pub fn create_basic_service() -> Result<SparqlVectorService> {
288        SparqlVectorService::new(
289            VectorServiceConfig::default(),
290            EmbeddingStrategy::SentenceTransformer,
291        )
292    }
293
294    /// Create a high-performance SPARQL vector service
295    pub fn create_high_performance_service() -> Result<SparqlVectorService> {
296        let config = VectorServiceConfig {
297            enable_caching: true,
298            cache_size: 10000,
299            enable_optimization: true,
300            enable_monitoring: true,
301            ..Default::default()
302        };
303
304        SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)
305    }
306
307    /// Create a federated SPARQL vector service
308    pub fn create_federated_service(endpoints: Vec<String>) -> Result<SparqlVectorService> {
309        let mut service = create_basic_service()?;
310        service.enable_federation(endpoints);
311        Ok(service)
312    }
313
314    /// Execute a simple similarity query
315    pub fn execute_similarity_query(
316        service: &mut SparqlVectorService,
317        resource1: &str,
318        resource2: &str,
319    ) -> Result<f32> {
320        let args = vec![
321            VectorServiceArg::IRI(resource1.to_string()),
322            VectorServiceArg::IRI(resource2.to_string()),
323        ];
324
325        match service.execute_function("similarity", &args)? {
326            VectorServiceResult::Number(score) => Ok(score),
327            VectorServiceResult::SimilarityList(results) => {
328                Ok(results.first().map(|(_, score)| *score).unwrap_or(0.0))
329            }
330            _ => Err(anyhow::anyhow!(
331                "Unexpected result type for similarity query"
332            )),
333        }
334    }
335
336    /// Execute a simple search query
337    pub fn execute_search_query(
338        service: &mut SparqlVectorService,
339        query_text: &str,
340        limit: usize,
341        threshold: f32,
342    ) -> Result<Vec<(String, f32)>> {
343        let args = vec![
344            VectorServiceArg::String(query_text.to_string()),
345            VectorServiceArg::Number(limit as f32),
346            VectorServiceArg::Number(threshold),
347        ];
348
349        match service.execute_function("search", &args)? {
350            VectorServiceResult::SimilarityList(results) => Ok(results),
351            _ => Err(anyhow::anyhow!("Unexpected result type for search query")),
352        }
353    }
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use crate::embeddings::EmbeddingStrategy;
360
361    #[test]
362    fn test_service_creation() {
363        let config = VectorServiceConfig::default();
364        let service = SparqlVectorService::new(config, EmbeddingStrategy::TfIdf);
365        assert!(service.is_ok());
366    }
367
368    #[test]
369    fn test_builder_pattern() {
370        let service = SparqlVectorServiceBuilder::new()
371            .with_embedding_strategy(EmbeddingStrategy::SentenceTransformer)
372            .with_federation_endpoints(vec!["http://endpoint1.com".to_string()])
373            .build();
374
375        assert!(service.is_ok());
376    }
377
378    #[test]
379    fn test_function_registration() {
380        let service = convenience::create_basic_service().unwrap();
381
382        assert!(service.is_function_registered("similarity"));
383        assert!(service.is_function_registered("search"));
384        assert!(!service.is_function_registered("nonexistent"));
385    }
386
387    #[test]
388    fn test_convenience_functions() {
389        let basic_service = convenience::create_basic_service();
390        assert!(basic_service.is_ok());
391
392        let hp_service = convenience::create_high_performance_service();
393        assert!(hp_service.is_ok());
394
395        let federated_service =
396            convenience::create_federated_service(vec!["http://endpoint1.com".to_string()]);
397        assert!(federated_service.is_ok());
398    }
399
400    #[test]
401    fn test_configuration_update() {
402        let mut service = convenience::create_basic_service().unwrap();
403
404        let new_config = VectorServiceConfig {
405            default_threshold: 0.8,
406            default_limit: 20,
407            ..Default::default()
408        };
409
410        service.update_config(new_config.clone());
411        assert_eq!(service.get_config().default_threshold, 0.8);
412        assert_eq!(service.get_config().default_limit, 20);
413    }
414
415    #[tokio::test]
416    async fn test_function_documentation() {
417        let service = convenience::create_basic_service().unwrap();
418
419        let doc = service.get_function_documentation("similarity");
420        assert!(doc.is_some());
421        assert!(doc.unwrap().contains("similarity"));
422
423        let sparql_defs = service.generate_sparql_definitions();
424        assert!(sparql_defs.contains("vec:similarity"));
425        assert!(sparql_defs.contains("SELECT"));
426    }
427}