oxirs_vec/sparql_integration/
config.rs

1//! Configuration types and defaults for SPARQL vector integration
2
3use crate::similarity::SimilarityMetric;
4use serde::{Deserialize, Serialize};
5
6/// SPARQL vector service configuration
7#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct VectorServiceConfig {
9    /// Service namespace URI
10    pub service_uri: String,
11    /// Default similarity threshold
12    pub default_threshold: f32,
13    /// Default number of results to return
14    pub default_limit: usize,
15    /// Default similarity metric
16    pub default_metric: SimilarityMetric,
17    /// Enable caching of vector search results
18    pub enable_caching: bool,
19    /// Cache size for search results
20    pub cache_size: usize,
21    /// Enable query optimization
22    pub enable_optimization: bool,
23    /// Enable result explanations
24    pub enable_explanations: bool,
25    /// Performance monitoring
26    pub enable_monitoring: bool,
27}
28
29impl Default for VectorServiceConfig {
30    fn default() -> Self {
31        Self {
32            service_uri: "http://oxirs.org/vec/".to_string(),
33            default_threshold: 0.7,
34            default_limit: 10,
35            default_metric: SimilarityMetric::Cosine,
36            enable_caching: true,
37            cache_size: 1000,
38            enable_optimization: true,
39            enable_explanations: false,
40            enable_monitoring: false,
41        }
42    }
43}
44
45/// Vector service function registry
46#[derive(Debug, Clone)]
47pub struct VectorServiceFunction {
48    pub name: String,
49    pub arity: usize,
50    pub description: String,
51    pub parameters: Vec<VectorServiceParameter>,
52}
53
54#[derive(Debug, Clone)]
55pub struct VectorServiceParameter {
56    pub name: String,
57    pub param_type: VectorParameterType,
58    pub required: bool,
59    pub description: String,
60}
61
62#[derive(Debug, Clone)]
63pub enum VectorParameterType {
64    IRI,
65    Literal,
66    Vector,
67    Number,
68    String,
69}
70
71/// Vector query optimizer for performance enhancement
72#[derive(Debug, Clone)]
73pub struct VectorQueryOptimizer {
74    pub enable_caching: bool,
75    pub enable_parallel_execution: bool,
76    pub enable_index_selection: bool,
77    pub cost_model: CostModel,
78}
79
80#[derive(Debug, Clone)]
81pub struct CostModel {
82    pub linear_search_cost: f32,
83    pub index_search_cost: f32,
84    pub embedding_generation_cost: f32,
85    pub cache_lookup_cost: f32,
86}
87
88impl Default for CostModel {
89    fn default() -> Self {
90        Self {
91            linear_search_cost: 1.0,
92            index_search_cost: 0.1,
93            embedding_generation_cost: 10.0,
94            cache_lookup_cost: 0.01,
95        }
96    }
97}
98
99impl Default for VectorQueryOptimizer {
100    fn default() -> Self {
101        Self {
102            enable_caching: true,
103            enable_parallel_execution: true,
104            enable_index_selection: true,
105            cost_model: CostModel::default(),
106        }
107    }
108}
109
110/// Vector service argument types
111#[derive(Debug, Clone)]
112pub enum VectorServiceArg {
113    IRI(String),
114    Literal(String),
115    Number(f32),
116    Vector(crate::Vector),
117    String(String),
118}
119
120/// Vector service result types
121#[derive(Debug, Clone)]
122pub enum VectorServiceResult {
123    Number(f32),
124    String(String),
125    Vector(crate::Vector),
126    Boolean(bool),
127    SimilarityList(Vec<(String, f32)>),
128    DetailedSimilarityList(Vec<(String, f32, std::collections::HashMap<String, String>)>),
129    Clusters(Vec<Vec<String>>),
130}
131
132/// Vector query representation
133#[derive(Debug, Clone)]
134pub struct VectorQuery {
135    pub operation_type: String,
136    pub args: Vec<VectorServiceArg>,
137    pub metadata: std::collections::HashMap<String, String>,
138    pub estimated_result_size: Option<usize>,
139    pub preferred_index: Option<String>,
140    pub use_cache: bool,
141    pub parallel_execution: bool,
142    pub timeout: Option<std::time::Duration>,
143}
144
145impl VectorQuery {
146    pub fn new(operation_type: String, args: Vec<VectorServiceArg>) -> Self {
147        Self {
148            operation_type,
149            args,
150            metadata: std::collections::HashMap::new(),
151            estimated_result_size: None,
152            preferred_index: None,
153            use_cache: false,
154            parallel_execution: false,
155            timeout: None,
156        }
157    }
158
159    pub fn can_parallelize(&self) -> bool {
160        matches!(
161            self.operation_type.as_str(),
162            "similarity_search" | "batch_search" | "cluster_search"
163        )
164    }
165
166    pub fn cache_key(&self) -> String {
167        use std::collections::hash_map::DefaultHasher;
168        use std::hash::{Hash, Hasher};
169
170        let mut hasher = DefaultHasher::new();
171        self.operation_type.hash(&mut hasher);
172
173        // Simple hash of arguments (simplified for this implementation)
174        for arg in &self.args {
175            match arg {
176                VectorServiceArg::IRI(s)
177                | VectorServiceArg::Literal(s)
178                | VectorServiceArg::String(s) => {
179                    s.hash(&mut hasher);
180                }
181                VectorServiceArg::Number(n) => {
182                    n.to_bits().hash(&mut hasher);
183                }
184                VectorServiceArg::Vector(v) => {
185                    v.len().hash(&mut hasher);
186                }
187            }
188        }
189
190        format!("query_{:x}", hasher.finish())
191    }
192}
193
194/// Vector operation enum for SPARQL queries
195#[derive(Debug, Clone)]
196pub enum VectorOperation {
197    /// Find similar resources to a given resource
198    FindSimilar {
199        resource: String,
200        limit: Option<usize>,
201        threshold: Option<f32>,
202    },
203    /// Calculate similarity between two resources
204    CalculateSimilarity {
205        resource1: String,
206        resource2: String,
207    },
208    /// Search for resources similar to given text
209    SearchText {
210        query: String,
211        limit: Option<usize>,
212        threshold: Option<f32>,
213    },
214    /// Embed text into a vector
215    EmbedText { text: String },
216    /// Calculate similarity between two vectors
217    VectorSimilarity {
218        vector1: crate::Vector,
219        vector2: crate::Vector,
220    },
221    /// Find nearest neighbors for a given vector
222    VectorKNN {
223        query_vector: crate::Vector,
224        k: usize,
225        threshold: Option<f32>,
226    },
227}
228
229impl VectorOperation {
230    /// Generate a SPARQL SERVICE query for this operation
231    pub fn to_sparql_service_query(&self, service_uri: &str) -> String {
232        match self {
233            VectorOperation::FindSimilar {
234                resource,
235                limit,
236                threshold,
237            } => {
238                let limit_clause = limit.map(|l| format!("LIMIT {l}")).unwrap_or_default();
239                let threshold_param = threshold.unwrap_or(0.0);
240                format!(
241                    r#"
242                    SELECT ?resource ?similarity WHERE {{
243                        SERVICE <{service_uri}> {{
244                            SELECT ?resource ?similarity WHERE {{
245                                ?resource vec:similar <{resource}> .
246                                ?resource vec:similarity ?similarity .
247                                FILTER(?similarity >= {threshold_param})
248                            }}
249                            ORDER BY DESC(?similarity)
250                            {limit_clause}
251                        }}
252                    }}
253                    "#,
254                )
255            }
256            VectorOperation::CalculateSimilarity {
257                resource1,
258                resource2,
259            } => {
260                format!(
261                    r#"
262                    SELECT ?similarity WHERE {{
263                        SERVICE <{service_uri}> {{
264                            SELECT ?similarity WHERE {{
265                                BIND(vec:similarity(<{resource1}>, <{resource2}>) AS ?similarity)
266                            }}
267                        }}
268                    }}
269                    "#,
270                )
271            }
272            VectorOperation::SearchText {
273                query,
274                limit,
275                threshold,
276            } => {
277                let limit_clause = limit.map(|l| format!("LIMIT {l}")).unwrap_or_default();
278                let threshold_param = threshold.unwrap_or(0.0);
279                format!(
280                    r#"
281                    SELECT ?resource ?similarity WHERE {{
282                        SERVICE <{service_uri}> {{
283                            SELECT ?resource ?similarity WHERE {{
284                                ?resource vec:searchText "{query}" .
285                                ?resource vec:similarity ?similarity .
286                                FILTER(?similarity >= {threshold_param})
287                            }}
288                            ORDER BY DESC(?similarity)
289                            {limit_clause}
290                        }}
291                    }}
292                    "#,
293                )
294            }
295            VectorOperation::EmbedText { text } => {
296                format!(
297                    r#"
298                    SELECT ?vector WHERE {{
299                        SERVICE <{service_uri}> {{
300                            SELECT ?vector WHERE {{
301                                BIND(vec:embedText("{text}") AS ?vector)
302                            }}
303                        }}
304                    }}
305                    "#
306                )
307            }
308            VectorOperation::VectorSimilarity { .. } => {
309                format!(
310                    r#"
311                    SELECT ?similarity WHERE {{
312                        SERVICE <{service_uri}> {{
313                            SELECT ?similarity WHERE {{
314                                BIND(vec:vectorSimilarity(?vector1, ?vector2) AS ?similarity)
315                            }}
316                        }}
317                    }}
318                    "#
319                )
320            }
321            VectorOperation::VectorKNN { k, threshold, .. } => {
322                let threshold_param = threshold.unwrap_or(0.0);
323                format!(
324                    r#"
325                    SELECT ?resource ?similarity WHERE {{
326                        SERVICE <{service_uri}> {{
327                            SELECT ?resource ?similarity WHERE {{
328                                ?resource vec:knn ?queryVector .
329                                ?resource vec:similarity ?similarity .
330                                FILTER(?similarity >= {threshold_param})
331                            }}
332                            ORDER BY DESC(?similarity)
333                            LIMIT {k}
334                        }}
335                    }}
336                    "#
337                )
338            }
339        }
340    }
341}
342
343/// Vector query result
344#[derive(Debug, Clone)]
345pub struct VectorQueryResult {
346    pub results: Vec<(String, f32)>,
347    pub metadata: std::collections::HashMap<String, String>,
348    pub execution_time: std::time::Duration,
349    pub from_cache: bool,
350}
351
352impl VectorQueryResult {
353    pub fn new(results: Vec<(String, f32)>, execution_time: std::time::Duration) -> Self {
354        Self {
355            results,
356            metadata: std::collections::HashMap::new(),
357            execution_time,
358            from_cache: false,
359        }
360    }
361
362    pub fn with_metadata(mut self, metadata: std::collections::HashMap<String, String>) -> Self {
363        self.metadata = metadata;
364        self
365    }
366
367    pub fn from_cache(mut self) -> Self {
368        self.from_cache = true;
369        self
370    }
371}