oxirs_vec/sparql_integration/
config.rs1use crate::similarity::SimilarityMetric;
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Serialize, Deserialize)]
8pub struct VectorServiceConfig {
9 pub service_uri: String,
11 pub default_threshold: f32,
13 pub default_limit: usize,
15 pub default_metric: SimilarityMetric,
17 pub enable_caching: bool,
19 pub cache_size: usize,
21 pub enable_optimization: bool,
23 pub enable_explanations: bool,
25 pub enable_monitoring: bool,
27}
28
29impl Default for VectorServiceConfig {
30 fn default() -> Self {
31 Self {
32 service_uri: "http://oxirs.org/vec/".to_string(),
33 default_threshold: 0.7,
34 default_limit: 10,
35 default_metric: SimilarityMetric::Cosine,
36 enable_caching: true,
37 cache_size: 1000,
38 enable_optimization: true,
39 enable_explanations: false,
40 enable_monitoring: false,
41 }
42 }
43}
44
45#[derive(Debug, Clone)]
47pub struct VectorServiceFunction {
48 pub name: String,
49 pub arity: usize,
50 pub description: String,
51 pub parameters: Vec<VectorServiceParameter>,
52}
53
54#[derive(Debug, Clone)]
55pub struct VectorServiceParameter {
56 pub name: String,
57 pub param_type: VectorParameterType,
58 pub required: bool,
59 pub description: String,
60}
61
62#[derive(Debug, Clone)]
63pub enum VectorParameterType {
64 IRI,
65 Literal,
66 Vector,
67 Number,
68 String,
69}
70
71#[derive(Debug, Clone)]
73pub struct VectorQueryOptimizer {
74 pub enable_caching: bool,
75 pub enable_parallel_execution: bool,
76 pub enable_index_selection: bool,
77 pub cost_model: CostModel,
78}
79
80#[derive(Debug, Clone)]
81pub struct CostModel {
82 pub linear_search_cost: f32,
83 pub index_search_cost: f32,
84 pub embedding_generation_cost: f32,
85 pub cache_lookup_cost: f32,
86}
87
88impl Default for CostModel {
89 fn default() -> Self {
90 Self {
91 linear_search_cost: 1.0,
92 index_search_cost: 0.1,
93 embedding_generation_cost: 10.0,
94 cache_lookup_cost: 0.01,
95 }
96 }
97}
98
99impl Default for VectorQueryOptimizer {
100 fn default() -> Self {
101 Self {
102 enable_caching: true,
103 enable_parallel_execution: true,
104 enable_index_selection: true,
105 cost_model: CostModel::default(),
106 }
107 }
108}
109
110#[derive(Debug, Clone)]
112pub enum VectorServiceArg {
113 IRI(String),
114 Literal(String),
115 Number(f32),
116 Vector(crate::Vector),
117 String(String),
118}
119
120#[derive(Debug, Clone)]
122pub enum VectorServiceResult {
123 Number(f32),
124 String(String),
125 Vector(crate::Vector),
126 Boolean(bool),
127 SimilarityList(Vec<(String, f32)>),
128 DetailedSimilarityList(Vec<(String, f32, std::collections::HashMap<String, String>)>),
129 Clusters(Vec<Vec<String>>),
130}
131
132#[derive(Debug, Clone)]
134pub struct VectorQuery {
135 pub operation_type: String,
136 pub args: Vec<VectorServiceArg>,
137 pub metadata: std::collections::HashMap<String, String>,
138 pub estimated_result_size: Option<usize>,
139 pub preferred_index: Option<String>,
140 pub use_cache: bool,
141 pub parallel_execution: bool,
142 pub timeout: Option<std::time::Duration>,
143}
144
145impl VectorQuery {
146 pub fn new(operation_type: String, args: Vec<VectorServiceArg>) -> Self {
147 Self {
148 operation_type,
149 args,
150 metadata: std::collections::HashMap::new(),
151 estimated_result_size: None,
152 preferred_index: None,
153 use_cache: false,
154 parallel_execution: false,
155 timeout: None,
156 }
157 }
158
159 pub fn can_parallelize(&self) -> bool {
160 matches!(
161 self.operation_type.as_str(),
162 "similarity_search" | "batch_search" | "cluster_search"
163 )
164 }
165
166 pub fn cache_key(&self) -> String {
167 use std::collections::hash_map::DefaultHasher;
168 use std::hash::{Hash, Hasher};
169
170 let mut hasher = DefaultHasher::new();
171 self.operation_type.hash(&mut hasher);
172
173 for arg in &self.args {
175 match arg {
176 VectorServiceArg::IRI(s)
177 | VectorServiceArg::Literal(s)
178 | VectorServiceArg::String(s) => {
179 s.hash(&mut hasher);
180 }
181 VectorServiceArg::Number(n) => {
182 n.to_bits().hash(&mut hasher);
183 }
184 VectorServiceArg::Vector(v) => {
185 v.len().hash(&mut hasher);
186 }
187 }
188 }
189
190 format!("query_{:x}", hasher.finish())
191 }
192}
193
194#[derive(Debug, Clone)]
196pub enum VectorOperation {
197 FindSimilar {
199 resource: String,
200 limit: Option<usize>,
201 threshold: Option<f32>,
202 },
203 CalculateSimilarity {
205 resource1: String,
206 resource2: String,
207 },
208 SearchText {
210 query: String,
211 limit: Option<usize>,
212 threshold: Option<f32>,
213 },
214 EmbedText { text: String },
216 VectorSimilarity {
218 vector1: crate::Vector,
219 vector2: crate::Vector,
220 },
221 VectorKNN {
223 query_vector: crate::Vector,
224 k: usize,
225 threshold: Option<f32>,
226 },
227}
228
229impl VectorOperation {
230 pub fn to_sparql_service_query(&self, service_uri: &str) -> String {
232 match self {
233 VectorOperation::FindSimilar {
234 resource,
235 limit,
236 threshold,
237 } => {
238 let limit_clause = limit.map(|l| format!("LIMIT {l}")).unwrap_or_default();
239 let threshold_param = threshold.unwrap_or(0.0);
240 format!(
241 r#"
242 SELECT ?resource ?similarity WHERE {{
243 SERVICE <{service_uri}> {{
244 SELECT ?resource ?similarity WHERE {{
245 ?resource vec:similar <{resource}> .
246 ?resource vec:similarity ?similarity .
247 FILTER(?similarity >= {threshold_param})
248 }}
249 ORDER BY DESC(?similarity)
250 {limit_clause}
251 }}
252 }}
253 "#,
254 )
255 }
256 VectorOperation::CalculateSimilarity {
257 resource1,
258 resource2,
259 } => {
260 format!(
261 r#"
262 SELECT ?similarity WHERE {{
263 SERVICE <{service_uri}> {{
264 SELECT ?similarity WHERE {{
265 BIND(vec:similarity(<{resource1}>, <{resource2}>) AS ?similarity)
266 }}
267 }}
268 }}
269 "#,
270 )
271 }
272 VectorOperation::SearchText {
273 query,
274 limit,
275 threshold,
276 } => {
277 let limit_clause = limit.map(|l| format!("LIMIT {l}")).unwrap_or_default();
278 let threshold_param = threshold.unwrap_or(0.0);
279 format!(
280 r#"
281 SELECT ?resource ?similarity WHERE {{
282 SERVICE <{service_uri}> {{
283 SELECT ?resource ?similarity WHERE {{
284 ?resource vec:searchText "{query}" .
285 ?resource vec:similarity ?similarity .
286 FILTER(?similarity >= {threshold_param})
287 }}
288 ORDER BY DESC(?similarity)
289 {limit_clause}
290 }}
291 }}
292 "#,
293 )
294 }
295 VectorOperation::EmbedText { text } => {
296 format!(
297 r#"
298 SELECT ?vector WHERE {{
299 SERVICE <{service_uri}> {{
300 SELECT ?vector WHERE {{
301 BIND(vec:embedText("{text}") AS ?vector)
302 }}
303 }}
304 }}
305 "#
306 )
307 }
308 VectorOperation::VectorSimilarity { .. } => {
309 format!(
310 r#"
311 SELECT ?similarity WHERE {{
312 SERVICE <{service_uri}> {{
313 SELECT ?similarity WHERE {{
314 BIND(vec:vectorSimilarity(?vector1, ?vector2) AS ?similarity)
315 }}
316 }}
317 }}
318 "#
319 )
320 }
321 VectorOperation::VectorKNN { k, threshold, .. } => {
322 let threshold_param = threshold.unwrap_or(0.0);
323 format!(
324 r#"
325 SELECT ?resource ?similarity WHERE {{
326 SERVICE <{service_uri}> {{
327 SELECT ?resource ?similarity WHERE {{
328 ?resource vec:knn ?queryVector .
329 ?resource vec:similarity ?similarity .
330 FILTER(?similarity >= {threshold_param})
331 }}
332 ORDER BY DESC(?similarity)
333 LIMIT {k}
334 }}
335 }}
336 "#
337 )
338 }
339 }
340 }
341}
342
343#[derive(Debug, Clone)]
345pub struct VectorQueryResult {
346 pub results: Vec<(String, f32)>,
347 pub metadata: std::collections::HashMap<String, String>,
348 pub execution_time: std::time::Duration,
349 pub from_cache: bool,
350}
351
352impl VectorQueryResult {
353 pub fn new(results: Vec<(String, f32)>, execution_time: std::time::Duration) -> Self {
354 Self {
355 results,
356 metadata: std::collections::HashMap::new(),
357 execution_time,
358 from_cache: false,
359 }
360 }
361
362 pub fn with_metadata(mut self, metadata: std::collections::HashMap<String, String>) -> Self {
363 self.metadata = metadata;
364 self
365 }
366
367 pub fn from_cache(mut self) -> Self {
368 self.from_cache = true;
369 self
370 }
371}