oxirs_vec/sparql_integration/
mod.rs1use crate::{
7 embeddings::{EmbeddingManager, EmbeddingStrategy},
8 graph_aware_search::{GraphAwareConfig, GraphAwareSearch},
9 VectorStore,
10};
11use anyhow::Result;
12use std::collections::HashMap;
13
14pub mod config;
16pub mod cross_language;
17pub mod federation;
18pub mod monitoring;
19pub mod query_executor;
20pub mod sparql_functions;
21
22pub use config::{
23 VectorOperation, VectorQuery, VectorQueryOptimizer, VectorQueryResult, VectorServiceArg,
24 VectorServiceConfig, VectorServiceFunction, VectorServiceParameter, VectorServiceResult,
25};
26pub use cross_language::CrossLanguageProcessor;
27pub use federation::{FederatedQueryResult, FederationManager};
28pub use monitoring::{PerformanceMonitor, PerformanceReport};
29pub use query_executor::QueryExecutor;
30pub use sparql_functions::{CustomVectorFunction, SparqlVectorFunctions};
31
32pub struct SparqlVectorService {
34 config: VectorServiceConfig,
35 query_executor: QueryExecutor,
36 sparql_functions: SparqlVectorFunctions,
37 federation_manager: Option<FederationManager>,
38 performance_monitor: Option<PerformanceMonitor>,
39}
40
41impl SparqlVectorService {
42 pub fn new(config: VectorServiceConfig, embedding_strategy: EmbeddingStrategy) -> Result<Self> {
44 let vector_store = VectorStore::new();
45 let embedding_manager = EmbeddingManager::new(embedding_strategy, 1000)?;
46
47 let performance_monitor = if config.enable_monitoring {
48 Some(PerformanceMonitor::new())
49 } else {
50 None
51 };
52
53 let graph_aware_search = if config.enable_monitoring {
54 Some(GraphAwareSearch::new(GraphAwareConfig::default()))
55 } else {
56 None
57 };
58
59 let optimizer = VectorQueryOptimizer::default();
60 let query_executor = QueryExecutor::new(
61 vector_store,
62 embedding_manager,
63 optimizer,
64 performance_monitor.clone(),
65 graph_aware_search,
66 );
67
68 let sparql_functions = SparqlVectorFunctions::new();
69
70 Ok(Self {
71 config,
72 query_executor,
73 sparql_functions,
74 federation_manager: None,
75 performance_monitor,
76 })
77 }
78
79 pub fn execute_function(
81 &mut self,
82 function_name: &str,
83 args: &[VectorServiceArg],
84 ) -> Result<VectorServiceResult> {
85 let start_time = std::time::Instant::now();
86
87 let result =
88 self.sparql_functions
89 .execute_function(function_name, args, &mut self.query_executor);
90
91 if let Some(ref monitor) = self.performance_monitor {
93 let duration = start_time.elapsed();
94 monitor.record_query(duration, result.is_ok());
95 monitor.record_operation(&format!("function_{function_name}"), duration);
96 }
97
98 result
99 }
100
101 pub fn execute_query(&mut self, query: &VectorQuery) -> Result<VectorQueryResult> {
103 self.query_executor.execute_optimized_query(query)
104 }
105
106 pub fn register_function(&mut self, function: VectorServiceFunction) {
108 self.sparql_functions.register_function(function);
109 }
110
111 pub fn register_custom_function(
113 &mut self,
114 name: String,
115 function: Box<dyn CustomVectorFunction>,
116 ) {
117 self.sparql_functions
118 .register_custom_function(name, function);
119 }
120
121 pub fn enable_federation(&mut self, endpoint_urls: Vec<String>) {
123 self.federation_manager = Some(FederationManager::new(endpoint_urls));
124 }
125
126 pub async fn execute_federated_query(
128 &mut self,
129 endpoints: &[String],
130 query: &VectorQuery,
131 ) -> Result<FederatedQueryResult> {
132 if let Some(ref mut manager) = self.federation_manager {
133 manager.execute_federated_query(endpoints, query).await
134 } else {
135 Err(anyhow::anyhow!("Federation not enabled"))
136 }
137 }
138
139 pub fn get_performance_report(&self) -> Option<PerformanceReport> {
141 self.performance_monitor
142 .as_ref()
143 .map(|m| m.generate_report())
144 }
145
146 pub fn get_function_documentation(&self, name: &str) -> Option<String> {
148 self.sparql_functions.get_function_documentation(name)
149 }
150
151 pub fn generate_sparql_definitions(&self) -> String {
153 self.sparql_functions.generate_sparql_definitions()
154 }
155
156 pub fn is_function_registered(&self, name: &str) -> bool {
158 self.sparql_functions.is_function_registered(name)
159 }
160
161 pub fn get_all_functions(&self) -> &HashMap<String, VectorServiceFunction> {
163 self.sparql_functions.get_all_functions()
164 }
165
166 pub fn clear_cache(&mut self) {
168 self.query_executor.clear_cache();
169 }
170
171 pub fn cache_stats(&self) -> (usize, usize) {
173 self.query_executor.cache_stats()
174 }
175
176 pub fn update_config(&mut self, config: VectorServiceConfig) {
178 self.config = config;
179 }
180
181 pub fn get_config(&self) -> &VectorServiceConfig {
183 &self.config
184 }
185
186 pub fn generate_service_query(&self, operation: &VectorOperation) -> String {
188 operation.to_sparql_service_query(&self.config.service_uri)
189 }
190
191 pub fn add_resource_embedding(
193 &mut self,
194 uri: &str,
195 content: &crate::embeddings::EmbeddableContent,
196 ) -> Result<()> {
197 self.query_executor.add_resource_embedding(uri, content)
198 }
199}
200
201pub struct SparqlVectorServiceBuilder {
203 config: VectorServiceConfig,
204 embedding_strategy: Option<EmbeddingStrategy>,
205 federation_endpoints: Vec<String>,
206 custom_functions: Vec<(String, Box<dyn CustomVectorFunction>)>,
207}
208
209impl SparqlVectorServiceBuilder {
210 pub fn new() -> Self {
211 Self {
212 config: VectorServiceConfig::default(),
213 embedding_strategy: None,
214 federation_endpoints: Vec::new(),
215 custom_functions: Vec::new(),
216 }
217 }
218
219 pub fn with_config(mut self, config: VectorServiceConfig) -> Self {
220 self.config = config;
221 self
222 }
223
224 pub fn with_embedding_strategy(mut self, strategy: EmbeddingStrategy) -> Self {
225 self.embedding_strategy = Some(strategy);
226 self
227 }
228
229 pub fn with_federation_endpoints(mut self, endpoints: Vec<String>) -> Self {
230 self.federation_endpoints = endpoints;
231 self
232 }
233
234 pub fn with_custom_function(
235 mut self,
236 name: String,
237 function: Box<dyn CustomVectorFunction>,
238 ) -> Self {
239 self.custom_functions.push((name, function));
240 self
241 }
242
243 pub fn build(self) -> Result<SparqlVectorService> {
244 let embedding_strategy = self
245 .embedding_strategy
246 .unwrap_or(EmbeddingStrategy::SentenceTransformer);
247
248 let mut service = SparqlVectorService::new(self.config, embedding_strategy)?;
249
250 if !self.federation_endpoints.is_empty() {
252 service.enable_federation(self.federation_endpoints);
253 }
254
255 for (name, function) in self.custom_functions {
257 service.register_custom_function(name, function);
258 }
259
260 Ok(service)
261 }
262}
263
264impl Default for SparqlVectorServiceBuilder {
265 fn default() -> Self {
266 Self::new()
267 }
268}
269
270pub mod convenience {
272 use super::*;
273
274 pub fn create_basic_service() -> Result<SparqlVectorService> {
276 SparqlVectorService::new(
277 VectorServiceConfig::default(),
278 EmbeddingStrategy::SentenceTransformer,
279 )
280 }
281
282 pub fn create_high_performance_service() -> Result<SparqlVectorService> {
284 let config = VectorServiceConfig {
285 enable_caching: true,
286 cache_size: 10000,
287 enable_optimization: true,
288 enable_monitoring: true,
289 ..Default::default()
290 };
291
292 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)
293 }
294
295 pub fn create_federated_service(endpoints: Vec<String>) -> Result<SparqlVectorService> {
297 let mut service = create_basic_service()?;
298 service.enable_federation(endpoints);
299 Ok(service)
300 }
301
302 pub fn execute_similarity_query(
304 service: &mut SparqlVectorService,
305 resource1: &str,
306 resource2: &str,
307 ) -> Result<f32> {
308 let args = vec![
309 VectorServiceArg::IRI(resource1.to_string()),
310 VectorServiceArg::IRI(resource2.to_string()),
311 ];
312
313 match service.execute_function("similarity", &args)? {
314 VectorServiceResult::Number(score) => Ok(score),
315 VectorServiceResult::SimilarityList(results) => {
316 Ok(results.first().map(|(_, score)| *score).unwrap_or(0.0))
317 }
318 _ => Err(anyhow::anyhow!(
319 "Unexpected result type for similarity query"
320 )),
321 }
322 }
323
324 pub fn execute_search_query(
326 service: &mut SparqlVectorService,
327 query_text: &str,
328 limit: usize,
329 threshold: f32,
330 ) -> Result<Vec<(String, f32)>> {
331 let args = vec![
332 VectorServiceArg::String(query_text.to_string()),
333 VectorServiceArg::Number(limit as f32),
334 VectorServiceArg::Number(threshold),
335 ];
336
337 match service.execute_function("search", &args)? {
338 VectorServiceResult::SimilarityList(results) => Ok(results),
339 _ => Err(anyhow::anyhow!("Unexpected result type for search query")),
340 }
341 }
342}
343
344#[cfg(test)]
345mod tests {
346 use super::*;
347 use crate::embeddings::EmbeddingStrategy;
348
349 #[test]
350 fn test_service_creation() {
351 let config = VectorServiceConfig::default();
352 let service = SparqlVectorService::new(config, EmbeddingStrategy::TfIdf);
353 assert!(service.is_ok());
354 }
355
356 #[test]
357 fn test_builder_pattern() {
358 let service = SparqlVectorServiceBuilder::new()
359 .with_embedding_strategy(EmbeddingStrategy::SentenceTransformer)
360 .with_federation_endpoints(vec!["http://endpoint1.com".to_string()])
361 .build();
362
363 assert!(service.is_ok());
364 }
365
366 #[test]
367 fn test_function_registration() {
368 let service = convenience::create_basic_service().unwrap();
369
370 assert!(service.is_function_registered("similarity"));
371 assert!(service.is_function_registered("search"));
372 assert!(!service.is_function_registered("nonexistent"));
373 }
374
375 #[test]
376 fn test_convenience_functions() {
377 let basic_service = convenience::create_basic_service();
378 assert!(basic_service.is_ok());
379
380 let hp_service = convenience::create_high_performance_service();
381 assert!(hp_service.is_ok());
382
383 let federated_service =
384 convenience::create_federated_service(vec!["http://endpoint1.com".to_string()]);
385 assert!(federated_service.is_ok());
386 }
387
388 #[test]
389 fn test_configuration_update() {
390 let mut service = convenience::create_basic_service().unwrap();
391
392 let new_config = VectorServiceConfig {
393 default_threshold: 0.8,
394 default_limit: 20,
395 ..Default::default()
396 };
397
398 service.update_config(new_config.clone());
399 assert_eq!(service.get_config().default_threshold, 0.8);
400 assert_eq!(service.get_config().default_limit, 20);
401 }
402
403 #[tokio::test]
404 async fn test_function_documentation() {
405 let service = convenience::create_basic_service().unwrap();
406
407 let doc = service.get_function_documentation("similarity");
408 assert!(doc.is_some());
409 assert!(doc.unwrap().contains("similarity"));
410
411 let sparql_defs = service.generate_sparql_definitions();
412 assert!(sparql_defs.contains("vec:similarity"));
413 assert!(sparql_defs.contains("SELECT"));
414 }
415}