oxirs_vec/sparql_integration/
mod.rs1use crate::{
7 embeddings::{EmbeddingManager, EmbeddingStrategy},
8 graph_aware_search::{GraphAwareConfig, GraphAwareSearch},
9 VectorStore,
10};
11use anyhow::Result;
12use std::collections::HashMap;
13
14pub mod config;
16pub mod cross_language;
17pub mod federation;
18pub mod monitoring;
19pub mod multimodal_functions;
20pub mod query_executor;
21pub mod sparql_functions;
22
23#[cfg(feature = "tantivy-search")]
25pub mod text_functions;
26
27pub use config::{
28 VectorOperation, VectorQuery, VectorQueryOptimizer, VectorQueryResult, VectorServiceArg,
29 VectorServiceConfig, VectorServiceFunction, VectorServiceParameter, VectorServiceResult,
30};
31pub use cross_language::CrossLanguageProcessor;
32pub use federation::{FederatedQueryResult, FederationManager};
33pub use monitoring::{PerformanceMonitor, PerformanceReport};
34pub use multimodal_functions::{
35 generate_multimodal_sparql_function, sparql_multimodal_search,
36 sparql_multimodal_search_from_args, MultimodalSearchConfig, SparqlMultimodalResult,
37};
38pub use query_executor::QueryExecutor;
39pub use sparql_functions::{CustomVectorFunction, SparqlVectorFunctions};
40
41#[cfg(feature = "tantivy-search")]
42pub use text_functions::{RdfLiteral, SearchStats, SparqlSearchResult, SparqlTextFunctions};
43
44pub struct SparqlVectorService {
46 config: VectorServiceConfig,
47 query_executor: QueryExecutor,
48 sparql_functions: SparqlVectorFunctions,
49 federation_manager: Option<FederationManager>,
50 performance_monitor: Option<PerformanceMonitor>,
51}
52
53impl SparqlVectorService {
54 pub fn new(config: VectorServiceConfig, embedding_strategy: EmbeddingStrategy) -> Result<Self> {
56 let vector_store = VectorStore::new();
57 let embedding_manager = EmbeddingManager::new(embedding_strategy, 1000)?;
58
59 let performance_monitor = if config.enable_monitoring {
60 Some(PerformanceMonitor::new())
61 } else {
62 None
63 };
64
65 let graph_aware_search = if config.enable_monitoring {
66 Some(GraphAwareSearch::new(GraphAwareConfig::default()))
67 } else {
68 None
69 };
70
71 let optimizer = VectorQueryOptimizer::default();
72 let query_executor = QueryExecutor::new(
73 vector_store,
74 embedding_manager,
75 optimizer,
76 performance_monitor.clone(),
77 graph_aware_search,
78 );
79
80 let sparql_functions = SparqlVectorFunctions::new();
81
82 Ok(Self {
83 config,
84 query_executor,
85 sparql_functions,
86 federation_manager: None,
87 performance_monitor,
88 })
89 }
90
91 pub fn execute_function(
93 &mut self,
94 function_name: &str,
95 args: &[VectorServiceArg],
96 ) -> Result<VectorServiceResult> {
97 let start_time = std::time::Instant::now();
98
99 let result =
100 self.sparql_functions
101 .execute_function(function_name, args, &mut self.query_executor);
102
103 if let Some(ref monitor) = self.performance_monitor {
105 let duration = start_time.elapsed();
106 monitor.record_query(duration, result.is_ok());
107 monitor.record_operation(&format!("function_{function_name}"), duration);
108 }
109
110 result
111 }
112
113 pub fn execute_query(&mut self, query: &VectorQuery) -> Result<VectorQueryResult> {
115 self.query_executor.execute_optimized_query(query)
116 }
117
118 pub fn register_function(&mut self, function: VectorServiceFunction) {
120 self.sparql_functions.register_function(function);
121 }
122
123 pub fn register_custom_function(
125 &mut self,
126 name: String,
127 function: Box<dyn CustomVectorFunction>,
128 ) {
129 self.sparql_functions
130 .register_custom_function(name, function);
131 }
132
133 pub fn enable_federation(&mut self, endpoint_urls: Vec<String>) {
135 self.federation_manager = Some(FederationManager::new(endpoint_urls));
136 }
137
138 pub async fn execute_federated_query(
140 &mut self,
141 endpoints: &[String],
142 query: &VectorQuery,
143 ) -> Result<FederatedQueryResult> {
144 if let Some(ref mut manager) = self.federation_manager {
145 manager.execute_federated_query(endpoints, query).await
146 } else {
147 Err(anyhow::anyhow!("Federation not enabled"))
148 }
149 }
150
151 pub fn get_performance_report(&self) -> Option<PerformanceReport> {
153 self.performance_monitor
154 .as_ref()
155 .map(|m| m.generate_report())
156 }
157
158 pub fn get_function_documentation(&self, name: &str) -> Option<String> {
160 self.sparql_functions.get_function_documentation(name)
161 }
162
163 pub fn generate_sparql_definitions(&self) -> String {
165 self.sparql_functions.generate_sparql_definitions()
166 }
167
168 pub fn is_function_registered(&self, name: &str) -> bool {
170 self.sparql_functions.is_function_registered(name)
171 }
172
173 pub fn get_all_functions(&self) -> &HashMap<String, VectorServiceFunction> {
175 self.sparql_functions.get_all_functions()
176 }
177
178 pub fn clear_cache(&mut self) {
180 self.query_executor.clear_cache();
181 }
182
183 pub fn cache_stats(&self) -> (usize, usize) {
185 self.query_executor.cache_stats()
186 }
187
188 pub fn update_config(&mut self, config: VectorServiceConfig) {
190 self.config = config;
191 }
192
193 pub fn get_config(&self) -> &VectorServiceConfig {
195 &self.config
196 }
197
198 pub fn generate_service_query(&self, operation: &VectorOperation) -> String {
200 operation.to_sparql_service_query(&self.config.service_uri)
201 }
202
203 pub fn add_resource_embedding(
205 &mut self,
206 uri: &str,
207 content: &crate::embeddings::EmbeddableContent,
208 ) -> Result<()> {
209 self.query_executor.add_resource_embedding(uri, content)
210 }
211}
212
213pub struct SparqlVectorServiceBuilder {
215 config: VectorServiceConfig,
216 embedding_strategy: Option<EmbeddingStrategy>,
217 federation_endpoints: Vec<String>,
218 custom_functions: Vec<(String, Box<dyn CustomVectorFunction>)>,
219}
220
221impl SparqlVectorServiceBuilder {
222 pub fn new() -> Self {
223 Self {
224 config: VectorServiceConfig::default(),
225 embedding_strategy: None,
226 federation_endpoints: Vec::new(),
227 custom_functions: Vec::new(),
228 }
229 }
230
231 pub fn with_config(mut self, config: VectorServiceConfig) -> Self {
232 self.config = config;
233 self
234 }
235
236 pub fn with_embedding_strategy(mut self, strategy: EmbeddingStrategy) -> Self {
237 self.embedding_strategy = Some(strategy);
238 self
239 }
240
241 pub fn with_federation_endpoints(mut self, endpoints: Vec<String>) -> Self {
242 self.federation_endpoints = endpoints;
243 self
244 }
245
246 pub fn with_custom_function(
247 mut self,
248 name: String,
249 function: Box<dyn CustomVectorFunction>,
250 ) -> Self {
251 self.custom_functions.push((name, function));
252 self
253 }
254
255 pub fn build(self) -> Result<SparqlVectorService> {
256 let embedding_strategy = self
257 .embedding_strategy
258 .unwrap_or(EmbeddingStrategy::SentenceTransformer);
259
260 let mut service = SparqlVectorService::new(self.config, embedding_strategy)?;
261
262 if !self.federation_endpoints.is_empty() {
264 service.enable_federation(self.federation_endpoints);
265 }
266
267 for (name, function) in self.custom_functions {
269 service.register_custom_function(name, function);
270 }
271
272 Ok(service)
273 }
274}
275
276impl Default for SparqlVectorServiceBuilder {
277 fn default() -> Self {
278 Self::new()
279 }
280}
281
282pub mod convenience {
284 use super::*;
285
286 pub fn create_basic_service() -> Result<SparqlVectorService> {
288 SparqlVectorService::new(
289 VectorServiceConfig::default(),
290 EmbeddingStrategy::SentenceTransformer,
291 )
292 }
293
294 pub fn create_high_performance_service() -> Result<SparqlVectorService> {
296 let config = VectorServiceConfig {
297 enable_caching: true,
298 cache_size: 10000,
299 enable_optimization: true,
300 enable_monitoring: true,
301 ..Default::default()
302 };
303
304 SparqlVectorService::new(config, EmbeddingStrategy::SentenceTransformer)
305 }
306
307 pub fn create_federated_service(endpoints: Vec<String>) -> Result<SparqlVectorService> {
309 let mut service = create_basic_service()?;
310 service.enable_federation(endpoints);
311 Ok(service)
312 }
313
314 pub fn execute_similarity_query(
316 service: &mut SparqlVectorService,
317 resource1: &str,
318 resource2: &str,
319 ) -> Result<f32> {
320 let args = vec![
321 VectorServiceArg::IRI(resource1.to_string()),
322 VectorServiceArg::IRI(resource2.to_string()),
323 ];
324
325 match service.execute_function("similarity", &args)? {
326 VectorServiceResult::Number(score) => Ok(score),
327 VectorServiceResult::SimilarityList(results) => {
328 Ok(results.first().map(|(_, score)| *score).unwrap_or(0.0))
329 }
330 _ => Err(anyhow::anyhow!(
331 "Unexpected result type for similarity query"
332 )),
333 }
334 }
335
336 pub fn execute_search_query(
338 service: &mut SparqlVectorService,
339 query_text: &str,
340 limit: usize,
341 threshold: f32,
342 ) -> Result<Vec<(String, f32)>> {
343 let args = vec![
344 VectorServiceArg::String(query_text.to_string()),
345 VectorServiceArg::Number(limit as f32),
346 VectorServiceArg::Number(threshold),
347 ];
348
349 match service.execute_function("search", &args)? {
350 VectorServiceResult::SimilarityList(results) => Ok(results),
351 _ => Err(anyhow::anyhow!("Unexpected result type for search query")),
352 }
353 }
354}
355
356#[cfg(test)]
357mod tests {
358 use super::*;
359 use crate::embeddings::EmbeddingStrategy;
360
361 #[test]
362 fn test_service_creation() {
363 let config = VectorServiceConfig::default();
364 let service = SparqlVectorService::new(config, EmbeddingStrategy::TfIdf);
365 assert!(service.is_ok());
366 }
367
368 #[test]
369 fn test_builder_pattern() {
370 let service = SparqlVectorServiceBuilder::new()
371 .with_embedding_strategy(EmbeddingStrategy::SentenceTransformer)
372 .with_federation_endpoints(vec!["http://endpoint1.com".to_string()])
373 .build();
374
375 assert!(service.is_ok());
376 }
377
378 #[test]
379 fn test_function_registration() {
380 let service = convenience::create_basic_service().unwrap();
381
382 assert!(service.is_function_registered("similarity"));
383 assert!(service.is_function_registered("search"));
384 assert!(!service.is_function_registered("nonexistent"));
385 }
386
387 #[test]
388 fn test_convenience_functions() {
389 let basic_service = convenience::create_basic_service();
390 assert!(basic_service.is_ok());
391
392 let hp_service = convenience::create_high_performance_service();
393 assert!(hp_service.is_ok());
394
395 let federated_service =
396 convenience::create_federated_service(vec!["http://endpoint1.com".to_string()]);
397 assert!(federated_service.is_ok());
398 }
399
400 #[test]
401 fn test_configuration_update() {
402 let mut service = convenience::create_basic_service().unwrap();
403
404 let new_config = VectorServiceConfig {
405 default_threshold: 0.8,
406 default_limit: 20,
407 ..Default::default()
408 };
409
410 service.update_config(new_config.clone());
411 assert_eq!(service.get_config().default_threshold, 0.8);
412 assert_eq!(service.get_config().default_limit, 20);
413 }
414
415 #[tokio::test]
416 async fn test_function_documentation() {
417 let service = convenience::create_basic_service().unwrap();
418
419 let doc = service.get_function_documentation("similarity");
420 assert!(doc.is_some());
421 assert!(doc.unwrap().contains("similarity"));
422
423 let sparql_defs = service.generate_sparql_definitions();
424 assert!(sparql_defs.contains("vec:similarity"));
425 assert!(sparql_defs.contains("SELECT"));
426 }
427}