oxirs_vec/sparql_integration/
mod.rs

1//! SPARQL integration for vector search and hybrid symbolic-vector queries
2//!
3//! This module provides comprehensive SPARQL integration capabilities for vector operations,
4//! including cross-language search, federated queries, and custom function support.
5
6use crate::{
7    embeddings::{EmbeddingManager, EmbeddingStrategy},
8    graph_aware_search::{GraphAwareConfig, GraphAwareSearch},
9    VectorStore,
10};
11use anyhow::Result;
12use std::collections::HashMap;
13
14// Re-export main types and modules
15pub mod config;
16pub mod cross_language;
17pub mod federation;
18pub mod monitoring;
19pub mod query_executor;
20pub mod sparql_functions;
21
22pub use config::{
23    VectorOperation, VectorQuery, VectorQueryOptimizer, VectorQueryResult, VectorServiceArg,
24    VectorServiceConfig, VectorServiceFunction, VectorServiceParameter, VectorServiceResult,
25};
26pub use cross_language::CrossLanguageProcessor;
27pub use federation::{FederatedQueryResult, FederationManager};
28pub use monitoring::{PerformanceMonitor, PerformanceReport};
29pub use query_executor::QueryExecutor;
30pub use sparql_functions::{CustomVectorFunction, SparqlVectorFunctions};
31
32/// Main SPARQL vector service implementation
33pub struct SparqlVectorService {
34    config: VectorServiceConfig,
35    query_executor: QueryExecutor,
36    sparql_functions: SparqlVectorFunctions,
37    federation_manager: Option<FederationManager>,
38    performance_monitor: Option<PerformanceMonitor>,
39}
40
41impl SparqlVectorService {
42    /// Create a new SPARQL vector service
43    pub fn new(config: VectorServiceConfig, embedding_strategy: EmbeddingStrategy) -> Result<Self> {
44        let vector_store = VectorStore::new();
45        let embedding_manager = EmbeddingManager::new(embedding_strategy, 1000)?;
46
47        let performance_monitor = if config.enable_monitoring {
48            Some(PerformanceMonitor::new())
49        } else {
50            None
51        };
52
53        let graph_aware_search = if config.enable_monitoring {
54            Some(GraphAwareSearch::new(GraphAwareConfig::default()))
55        } else {
56            None
57        };
58
59        let optimizer = VectorQueryOptimizer::default();
60        let query_executor = QueryExecutor::new(
61            vector_store,
62            embedding_manager,
63            optimizer,
64            performance_monitor.clone(),
65            graph_aware_search,
66        );
67
68        let sparql_functions = SparqlVectorFunctions::new();
69
70        Ok(Self {
71            config,
72            query_executor,
73            sparql_functions,
74            federation_manager: None,
75            performance_monitor,
76        })
77    }
78
79    /// Execute a SPARQL vector function
80    pub fn execute_function(
81        &mut self,
82        function_name: &str,
83        args: &[VectorServiceArg],
84    ) -> Result<VectorServiceResult> {
85        let start_time = std::time::Instant::now();
86
87        let result =
88            self.sparql_functions
89                .execute_function(function_name, args, &mut self.query_executor);
90
91        // Record performance metrics
92        if let Some(ref monitor) = self.performance_monitor {
93            let duration = start_time.elapsed();
94            monitor.record_query(duration, result.is_ok());
95            monitor.record_operation(&format!("function_{function_name}"), duration);
96        }
97
98        result
99    }
100
101    /// Execute an optimized vector query
102    pub fn execute_query(&mut self, query: &VectorQuery) -> Result<VectorQueryResult> {
103        self.query_executor.execute_optimized_query(query)
104    }
105
106    /// Register a custom SPARQL function
107    pub fn register_function(&mut self, function: VectorServiceFunction) {
108        self.sparql_functions.register_function(function);
109    }
110
111    /// Register a custom function implementation
112    pub fn register_custom_function(
113        &mut self,
114        name: String,
115        function: Box<dyn CustomVectorFunction>,
116    ) {
117        self.sparql_functions
118            .register_custom_function(name, function);
119    }
120
121    /// Enable federation with specified endpoints
122    pub fn enable_federation(&mut self, endpoint_urls: Vec<String>) {
123        self.federation_manager = Some(FederationManager::new(endpoint_urls));
124    }
125
126    /// Execute federated query
127    pub async fn execute_federated_query(
128        &mut self,
129        endpoints: &[String],
130        query: &VectorQuery,
131    ) -> Result<FederatedQueryResult> {
132        if let Some(ref mut manager) = self.federation_manager {
133            manager.execute_federated_query(endpoints, query).await
134        } else {
135            Err(anyhow::anyhow!("Federation not enabled"))
136        }
137    }
138
139    /// Get performance report
140    pub fn get_performance_report(&self) -> Option<PerformanceReport> {
141        self.performance_monitor
142            .as_ref()
143            .map(|m| m.generate_report())
144    }
145
146    /// Get function documentation
147    pub fn get_function_documentation(&self, name: &str) -> Option<String> {
148        self.sparql_functions.get_function_documentation(name)
149    }
150
151    /// Generate SPARQL function definitions
152    pub fn generate_sparql_definitions(&self) -> String {
153        self.sparql_functions.generate_sparql_definitions()
154    }
155
156    /// Check if a function is registered
157    pub fn is_function_registered(&self, name: &str) -> bool {
158        self.sparql_functions.is_function_registered(name)
159    }
160
161    /// Get all registered functions
162    pub fn get_all_functions(&self) -> &HashMap<String, VectorServiceFunction> {
163        self.sparql_functions.get_all_functions()
164    }
165
166    /// Clear query cache
167    pub fn clear_cache(&mut self) {
168        self.query_executor.clear_cache();
169    }
170
171    /// Get cache statistics
172    pub fn cache_stats(&self) -> (usize, usize) {
173        self.query_executor.cache_stats()
174    }
175
176    /// Update configuration
177    pub fn update_config(&mut self, config: VectorServiceConfig) {
178        self.config = config;
179    }
180
181    /// Get current configuration
182    pub fn get_config(&self) -> &VectorServiceConfig {
183        &self.config
184    }
185
186    /// Generate a SPARQL SERVICE query for a vector operation
187    pub fn generate_service_query(&self, operation: &VectorOperation) -> String {
188        operation.to_sparql_service_query(&self.config.service_uri)
189    }
190
191    /// Add a resource embedding to the service's vector store
192    pub fn add_resource_embedding(
193        &mut self,
194        uri: &str,
195        content: &crate::embeddings::EmbeddableContent,
196    ) -> Result<()> {
197        self.query_executor.add_resource_embedding(uri, content)
198    }
199}
200
201/// Builder for creating SPARQL vector service with custom configuration
202pub struct SparqlVectorServiceBuilder {
203    config: VectorServiceConfig,
204    embedding_strategy: Option<EmbeddingStrategy>,
205    federation_endpoints: Vec<String>,
206    custom_functions: Vec<(String, Box<dyn CustomVectorFunction>)>,
207}
208
209impl SparqlVectorServiceBuilder {
210    pub fn new() -> Self {
211        Self {
212            config: VectorServiceConfig::default(),
213            embedding_strategy: None,
214            federation_endpoints: Vec::new(),
215            custom_functions: Vec::new(),
216        }
217    }
218
219    pub fn with_config(mut self, config: VectorServiceConfig) -> Self {
220        self.config = config;
221        self
222    }
223
224    pub fn with_embedding_strategy(mut self, strategy: EmbeddingStrategy) -> Self {
225        self.embedding_strategy = Some(strategy);
226        self
227    }
228
229    pub fn with_federation_endpoints(mut self, endpoints: Vec<String>) -> Self {
230        self.federation_endpoints = endpoints;
231        self
232    }
233
234    pub fn with_custom_function(
235        mut self,
236        name: String,
237        function: Box<dyn CustomVectorFunction>,
238    ) -> Self {
239        self.custom_functions.push((name, function));
240        self
241    }
242
243    pub fn build(self) -> Result<SparqlVectorService> {
244        let embedding_strategy = self
245            .embedding_strategy
246            .unwrap_or(EmbeddingStrategy::SentenceTransformer);
247
248        let mut service = SparqlVectorService::new(self.config, embedding_strategy)?;
249
250        // Enable federation if endpoints provided
251        if !self.federation_endpoints.is_empty() {
252            service.enable_federation(self.federation_endpoints);
253        }
254
255        // Register custom functions
256        for (name, function) in self.custom_functions {
257            service.register_custom_function(name, function);
258        }
259
260        Ok(service)
261    }
262}
263
264impl Default for SparqlVectorServiceBuilder {
265    fn default() -> Self {
266        Self::new()
267    }
268}
269
270/// Convenience functions for common operations
271pub mod convenience {
272    use super::*;
273
274    /// Create a basic SPARQL vector service with default configuration
275    pub fn create_basic_service() -> Result<SparqlVectorService> {
276        SparqlVectorService::new(
277            VectorServiceConfig::default(),
278            EmbeddingStrategy::SentenceTransformer,
279        )
280    }
281
282    /// Create a high-performance SPARQL vector service
283    pub fn create_high_performance_service() -> Result<SparqlVectorService> {
284        let config = VectorServiceConfig {
285            enable_caching: true,
286            cache_size: 10000,
287            enable_optimization: true,
288            enable_monitoring: true,
289            ..Default::default()
290        };
291
292        SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)
293    }
294
295    /// Create a federated SPARQL vector service
296    pub fn create_federated_service(endpoints: Vec<String>) -> Result<SparqlVectorService> {
297        let mut service = create_basic_service()?;
298        service.enable_federation(endpoints);
299        Ok(service)
300    }
301
302    /// Execute a simple similarity query
303    pub fn execute_similarity_query(
304        service: &mut SparqlVectorService,
305        resource1: &str,
306        resource2: &str,
307    ) -> Result<f32> {
308        let args = vec![
309            VectorServiceArg::IRI(resource1.to_string()),
310            VectorServiceArg::IRI(resource2.to_string()),
311        ];
312
313        match service.execute_function("similarity", &args)? {
314            VectorServiceResult::Number(score) => Ok(score),
315            VectorServiceResult::SimilarityList(results) => {
316                Ok(results.first().map(|(_, score)| *score).unwrap_or(0.0))
317            }
318            _ => Err(anyhow::anyhow!(
319                "Unexpected result type for similarity query"
320            )),
321        }
322    }
323
324    /// Execute a simple search query
325    pub fn execute_search_query(
326        service: &mut SparqlVectorService,
327        query_text: &str,
328        limit: usize,
329        threshold: f32,
330    ) -> Result<Vec<(String, f32)>> {
331        let args = vec![
332            VectorServiceArg::String(query_text.to_string()),
333            VectorServiceArg::Number(limit as f32),
334            VectorServiceArg::Number(threshold),
335        ];
336
337        match service.execute_function("search", &args)? {
338            VectorServiceResult::SimilarityList(results) => Ok(results),
339            _ => Err(anyhow::anyhow!("Unexpected result type for search query")),
340        }
341    }
342}
343
344#[cfg(test)]
345mod tests {
346    use super::*;
347    use crate::embeddings::EmbeddingStrategy;
348
349    #[test]
350    fn test_service_creation() {
351        let config = VectorServiceConfig::default();
352        let service = SparqlVectorService::new(config, EmbeddingStrategy::TfIdf);
353        assert!(service.is_ok());
354    }
355
356    #[test]
357    fn test_builder_pattern() {
358        let service = SparqlVectorServiceBuilder::new()
359            .with_embedding_strategy(EmbeddingStrategy::SentenceTransformer)
360            .with_federation_endpoints(vec!["http://endpoint1.com".to_string()])
361            .build();
362
363        assert!(service.is_ok());
364    }
365
366    #[test]
367    fn test_function_registration() {
368        let service = convenience::create_basic_service().unwrap();
369
370        assert!(service.is_function_registered("similarity"));
371        assert!(service.is_function_registered("search"));
372        assert!(!service.is_function_registered("nonexistent"));
373    }
374
375    #[test]
376    fn test_convenience_functions() {
377        let basic_service = convenience::create_basic_service();
378        assert!(basic_service.is_ok());
379
380        let hp_service = convenience::create_high_performance_service();
381        assert!(hp_service.is_ok());
382
383        let federated_service =
384            convenience::create_federated_service(vec!["http://endpoint1.com".to_string()]);
385        assert!(federated_service.is_ok());
386    }
387
388    #[test]
389    fn test_configuration_update() {
390        let mut service = convenience::create_basic_service().unwrap();
391
392        let new_config = VectorServiceConfig {
393            default_threshold: 0.8,
394            default_limit: 20,
395            ..Default::default()
396        };
397
398        service.update_config(new_config.clone());
399        assert_eq!(service.get_config().default_threshold, 0.8);
400        assert_eq!(service.get_config().default_limit, 20);
401    }
402
403    #[tokio::test]
404    async fn test_function_documentation() {
405        let service = convenience::create_basic_service().unwrap();
406
407        let doc = service.get_function_documentation("similarity");
408        assert!(doc.is_some());
409        assert!(doc.unwrap().contains("similarity"));
410
411        let sparql_defs = service.generate_sparql_definitions();
412        assert!(sparql_defs.contains("vec:similarity"));
413        assert!(sparql_defs.contains("SELECT"));
414    }
415}