1use crate::Vector;
4use anyhow::{anyhow, Result};
5use oxirs_core::simd::SimdOps;
6use serde::{Deserialize, Serialize};
7use std::collections::hash_map::DefaultHasher;
8use std::collections::HashMap;
9use std::hash::{Hash, Hasher};
10use std::time::{SystemTime, UNIX_EPOCH};
11
12#[derive(Debug, Clone, Serialize, Deserialize, oxicode::Encode, oxicode::Decode)]
14pub struct SimilarityConfig {
15 pub primary_metric: SimilarityMetric,
17 pub ensemble_metrics: Vec<SimilarityMetric>,
19 pub ensemble_weights: Vec<f32>,
21 pub similarity_threshold: f32,
23 pub semantic_boost: bool,
25 pub temporal_decay: bool,
27}
28
29impl Default for SimilarityConfig {
30 fn default() -> Self {
31 Self {
32 primary_metric: SimilarityMetric::Cosine,
33 ensemble_metrics: vec![
34 SimilarityMetric::Cosine,
35 SimilarityMetric::Pearson,
36 SimilarityMetric::Jaccard,
37 ],
38 ensemble_weights: vec![0.5, 0.3, 0.2],
39 similarity_threshold: 0.7,
40 semantic_boost: true,
41 temporal_decay: false,
42 }
43 }
44}
45
46#[derive(
48 Debug, Clone, Copy, Serialize, Deserialize, PartialEq, oxicode::Encode, oxicode::Decode,
49)]
50pub enum SimilarityMetric {
51 Cosine,
53 Euclidean,
55 Manhattan,
57 Minkowski(f32),
59 Pearson,
61 Spearman,
63 Jaccard,
65 Dice,
67 JensenShannon,
69 Bhattacharyya,
71 Mahalanobis,
73 Hamming,
75 Canberra,
77 Angular,
79 Chebyshev,
81 DotProduct,
83}
84
85impl SimilarityMetric {
86 pub fn similarity(&self, a: &[f32], b: &[f32]) -> Result<f32> {
88 if a.len() != b.len() {
89 return Err(anyhow!("Vector dimensions must match"));
90 }
91
92 let similarity = match self {
93 SimilarityMetric::Cosine => cosine_similarity(a, b),
94 SimilarityMetric::Euclidean => euclidean_similarity(a, b),
95 SimilarityMetric::Manhattan => manhattan_similarity(a, b),
96 SimilarityMetric::Minkowski(p) => minkowski_similarity(a, b, *p),
97 SimilarityMetric::Pearson => pearson_correlation(a, b)?,
98 SimilarityMetric::Spearman => spearman_correlation(a, b)?,
99 SimilarityMetric::Jaccard => jaccard_similarity(a, b),
100 SimilarityMetric::Dice => dice_coefficient(a, b),
101 SimilarityMetric::JensenShannon => jensen_shannon_similarity(a, b)?,
102 SimilarityMetric::Bhattacharyya => bhattacharyya_similarity(a, b)?,
103 SimilarityMetric::Mahalanobis => {
104 euclidean_similarity(a, b)
106 }
107 SimilarityMetric::Hamming => hamming_similarity(a, b),
108 SimilarityMetric::Canberra => canberra_similarity(a, b),
109 SimilarityMetric::Angular => angular_similarity(a, b),
110 SimilarityMetric::Chebyshev => chebyshev_similarity(a, b),
111 SimilarityMetric::DotProduct => dot_product_similarity(a, b),
112 };
113
114 Ok(similarity.clamp(0.0, 1.0))
115 }
116
117 pub fn distance(&self, a: &Vector, b: &Vector) -> Result<f32> {
119 let a_f32 = a.as_f32();
120 let b_f32 = b.as_f32();
121
122 if a_f32.len() != b_f32.len() {
123 return Err(anyhow!("Vector dimensions must match"));
124 }
125
126 let distance = match self {
127 SimilarityMetric::Euclidean => euclidean_distance(&a_f32, &b_f32),
129 SimilarityMetric::Manhattan => manhattan_distance(&a_f32, &b_f32),
130 SimilarityMetric::Minkowski(p) => minkowski_distance(&a_f32, &b_f32, *p),
131 SimilarityMetric::Hamming => hamming_distance(&a_f32, &b_f32),
132 SimilarityMetric::Canberra => canberra_distance(&a_f32, &b_f32),
133 SimilarityMetric::Chebyshev => chebyshev_distance(&a_f32, &b_f32),
134
135 _ => {
137 let similarity = self.similarity(&a_f32, &b_f32)?;
138 1.0 - similarity
139 }
140 };
141
142 Ok(distance.max(0.0))
143 }
144
145 pub fn compute(&self, a: &Vector, b: &Vector) -> Result<f32> {
147 let a_f32 = a.as_f32();
148 let b_f32 = b.as_f32();
149 self.similarity(&a_f32, &b_f32)
150 }
151}
152
153pub struct SemanticSimilarity {
155 config: SimilarityConfig,
156 feature_weights: Option<Vec<f32>>,
157 covariance_matrix: Option<Vec<Vec<f32>>>,
158}
159
160impl SemanticSimilarity {
161 pub fn new(config: SimilarityConfig) -> Self {
162 Self {
163 config,
164 feature_weights: None,
165 covariance_matrix: None,
166 }
167 }
168
169 pub fn set_feature_weights(&mut self, weights: Vec<f32>) {
171 self.feature_weights = Some(weights);
172 }
173
174 pub fn set_covariance_matrix(&mut self, matrix: Vec<Vec<f32>>) {
176 self.covariance_matrix = Some(matrix);
177 }
178
179 pub fn similarity(&self, a: &Vector, b: &Vector) -> Result<f32> {
181 let a_f32 = a.as_f32();
182 let b_f32 = b.as_f32();
183
184 let mut similarity = self.config.primary_metric.similarity(&a_f32, &b_f32)?;
185
186 if let Some(ref weights) = self.feature_weights {
188 similarity = self.apply_feature_weights(&a_f32, &b_f32, weights);
189 }
190
191 if self.config.semantic_boost {
193 similarity = self.apply_semantic_boost(similarity, a, b);
194 }
195
196 Ok(similarity)
197 }
198
199 pub fn ensemble_similarity(&self, a: &Vector, b: &Vector) -> Result<f32> {
201 if self.config.ensemble_metrics.len() != self.config.ensemble_weights.len() {
202 return Err(anyhow!("Ensemble metrics and weights length mismatch"));
203 }
204
205 let a_f32 = a.as_f32();
206 let b_f32 = b.as_f32();
207
208 let mut weighted_sum = 0.0;
209 let mut total_weight = 0.0;
210
211 for (metric, weight) in self
212 .config
213 .ensemble_metrics
214 .iter()
215 .zip(&self.config.ensemble_weights)
216 {
217 let similarity = metric.similarity(&a_f32, &b_f32)?;
218 weighted_sum += similarity * weight;
219 total_weight += weight;
220 }
221
222 if total_weight == 0.0 {
223 return Ok(0.0);
224 }
225
226 let ensemble_score = weighted_sum / total_weight;
227
228 if self.config.semantic_boost {
230 Ok(self.apply_semantic_boost(ensemble_score, a, b))
231 } else {
232 Ok(ensemble_score)
233 }
234 }
235
236 pub fn similarity_matrix(&self, vectors: &[Vector]) -> Result<Vec<Vec<f32>>> {
238 let n = vectors.len();
239 let mut matrix = vec![vec![0.0; n]; n];
240
241 for i in 0..n {
242 for j in i..n {
243 let similarity = if i == j {
244 1.0
245 } else {
246 self.similarity(&vectors[i], &vectors[j])?
247 };
248
249 matrix[i][j] = similarity;
250 matrix[j][i] = similarity;
251 }
252 }
253
254 Ok(matrix)
255 }
256
257 pub fn find_similar(
259 &self,
260 query: &Vector,
261 candidates: &[(String, Vector)],
262 k: usize,
263 ) -> Result<Vec<(String, f32)>> {
264 let mut similarities: Vec<(String, f32)> = candidates
265 .iter()
266 .map(|(uri, vector)| {
267 let sim = self.similarity(query, vector).unwrap_or(0.0);
268 (uri.clone(), sim)
269 })
270 .collect();
271
272 similarities.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
273 similarities.truncate(k);
274
275 Ok(similarities)
276 }
277
278 pub fn cluster_by_similarity(
280 &self,
281 vectors: &[(String, Vector)],
282 threshold: f32,
283 ) -> Result<Vec<Vec<String>>> {
284 let mut clusters: Vec<Vec<String>> = Vec::new();
285 let mut assigned: Vec<bool> = vec![false; vectors.len()];
286
287 for i in 0..vectors.len() {
288 if assigned[i] {
289 continue;
290 }
291
292 let mut cluster = vec![vectors[i].0.clone()];
293 assigned[i] = true;
294
295 for j in (i + 1)..vectors.len() {
296 if assigned[j] {
297 continue;
298 }
299
300 let similarity = self.similarity(&vectors[i].1, &vectors[j].1)?;
301 if similarity >= threshold {
302 cluster.push(vectors[j].0.clone());
303 assigned[j] = true;
304 }
305 }
306
307 clusters.push(cluster);
308 }
309
310 Ok(clusters)
311 }
312
313 fn apply_feature_weights(&self, a: &[f32], b: &[f32], weights: &[f32]) -> f32 {
314 let weighted_a: Vec<f32> = a.iter().zip(weights).map(|(x, w)| x * w).collect();
315 let weighted_b: Vec<f32> = b.iter().zip(weights).map(|(x, w)| x * w).collect();
316
317 cosine_similarity(&weighted_a, &weighted_b)
318 }
319
320 fn apply_semantic_boost(&self, similarity: f32, a: &Vector, b: &Vector) -> f32 {
321 let a_f32 = a.as_f32();
323 let b_f32 = b.as_f32();
324 let mag_a = vector_magnitude(&a_f32);
325 let mag_b = vector_magnitude(&b_f32);
326 let magnitude_similarity = 1.0 - (mag_a - mag_b).abs() / (mag_a + mag_b + f32::EPSILON);
327
328 0.8 * similarity + 0.2 * magnitude_similarity
330 }
331}
332
333pub struct AdaptiveSimilarity {
335 base_similarity: SemanticSimilarity,
336 feedback_weights: HashMap<String, f32>,
337 learning_rate: f32,
338}
339
340impl AdaptiveSimilarity {
341 pub fn new(config: SimilarityConfig, learning_rate: f32) -> Self {
342 Self {
343 base_similarity: SemanticSimilarity::new(config),
344 feedback_weights: HashMap::new(),
345 learning_rate,
346 }
347 }
348
349 pub fn add_feedback(&mut self, uri: &str, expected_similarity: f32, actual_similarity: f32) {
351 let error = expected_similarity - actual_similarity;
352 let adjustment = self.learning_rate * error;
353
354 *self.feedback_weights.entry(uri.to_string()).or_insert(0.0) += adjustment;
355 }
356
357 pub fn adaptive_similarity(
359 &self,
360 a: &Vector,
361 b: &Vector,
362 uri_a: &str,
363 uri_b: &str,
364 ) -> Result<f32> {
365 let base_sim = self.base_similarity.similarity(a, b)?;
366
367 let weight_a = self.feedback_weights.get(uri_a).unwrap_or(&0.0);
368 let weight_b = self.feedback_weights.get(uri_b).unwrap_or(&0.0);
369 let adjustment = (weight_a + weight_b) / 2.0;
370
371 Ok((base_sim + adjustment).clamp(0.0, 1.0))
372 }
373
374 pub fn get_feedback_weights(&self) -> &HashMap<String, f32> {
376 &self.feedback_weights
377 }
378}
379
380pub struct TemporalSimilarity {
382 base_similarity: SemanticSimilarity,
383 decay_rate: f32,
384 time_weights: HashMap<String, f32>,
385}
386
387impl TemporalSimilarity {
388 pub fn new(config: SimilarityConfig, decay_rate: f32) -> Self {
389 Self {
390 base_similarity: SemanticSimilarity::new(config),
391 decay_rate,
392 time_weights: HashMap::new(),
393 }
394 }
395
396 pub fn set_time_weight(&mut self, uri: &str, time_weight: f32) {
398 self.time_weights.insert(uri.to_string(), time_weight);
399 }
400
401 pub fn temporal_similarity(
403 &self,
404 a: &Vector,
405 b: &Vector,
406 uri_a: &str,
407 uri_b: &str,
408 ) -> Result<f32> {
409 let base_sim = self.base_similarity.similarity(a, b)?;
410
411 let time_a = self.time_weights.get(uri_a).unwrap_or(&1.0);
412 let time_b = self.time_weights.get(uri_b).unwrap_or(&1.0);
413
414 let time_factor = (time_a + time_b) / 2.0;
415 let decay = (-self.decay_rate * (1.0 - time_factor)).exp();
416
417 Ok(base_sim * decay)
418 }
419}
420
421pub fn compute_similarity(a: &[f32], b: &[f32], metric: SimilarityMetric) -> Result<f32> {
425 metric.similarity(a, b)
426}
427
428pub fn normalize_vector(vector: &mut [f32]) -> Result<()> {
430 let magnitude: f32 = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
431 if magnitude > 0.0 {
432 for value in vector.iter_mut() {
433 *value /= magnitude;
434 }
435 }
436 Ok(())
437}
438
439pub fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
440 1.0 - f32::cosine_distance(a, b)
442}
443
444fn euclidean_similarity(a: &[f32], b: &[f32]) -> f32 {
445 let distance = f32::euclidean_distance(a, b);
447 1.0 / (1.0 + distance)
448}
449
450fn manhattan_similarity(a: &[f32], b: &[f32]) -> f32 {
451 let distance = f32::manhattan_distance(a, b);
453 1.0 / (1.0 + distance)
454}
455
456fn minkowski_similarity(a: &[f32], b: &[f32], p: f32) -> f32 {
457 if p <= 0.0 {
458 return euclidean_similarity(a, b);
460 }
461
462 let distance: f32 = a
463 .iter()
464 .zip(b)
465 .map(|(x, y)| (x - y).abs().powf(p))
466 .sum::<f32>()
467 .powf(1.0 / p);
468 1.0 / (1.0 + distance)
469}
470
471fn chebyshev_similarity(a: &[f32], b: &[f32]) -> f32 {
472 let distance: f32 = a
473 .iter()
474 .zip(b)
475 .map(|(x, y)| (x - y).abs())
476 .fold(0.0, |acc, diff| acc.max(diff));
477 1.0 / (1.0 + distance)
478}
479
480fn pearson_correlation(a: &[f32], b: &[f32]) -> Result<f32> {
481 let n = a.len() as f32;
482 if n == 0.0 {
483 return Ok(0.0);
484 }
485
486 let mean_a = f32::mean(a);
488 let mean_b = f32::mean(b);
489
490 let numerator: f32 = a
491 .iter()
492 .zip(b)
493 .map(|(x, y)| (x - mean_a) * (y - mean_b))
494 .sum();
495 let sum_sq_a: f32 = a.iter().map(|x| (x - mean_a).powi(2)).sum();
496 let sum_sq_b: f32 = b.iter().map(|x| (x - mean_b).powi(2)).sum();
497
498 let denominator = (sum_sq_a * sum_sq_b).sqrt();
499
500 if denominator == 0.0 {
501 Ok(0.0)
502 } else {
503 Ok(numerator / denominator)
504 }
505}
506
507fn spearman_correlation(a: &[f32], b: &[f32]) -> Result<f32> {
508 let ranks_a = compute_ranks(a);
509 let ranks_b = compute_ranks(b);
510 pearson_correlation(&ranks_a, &ranks_b)
511}
512
513fn compute_ranks(values: &[f32]) -> Vec<f32> {
514 let mut indexed: Vec<(usize, f32)> = values.iter().enumerate().map(|(i, &v)| (i, v)).collect();
515 indexed.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal));
516
517 let mut ranks = vec![0.0; values.len()];
518 for (rank, (original_index, _)) in indexed.iter().enumerate() {
519 ranks[*original_index] = rank as f32 + 1.0;
520 }
521
522 ranks
523}
524
525fn jaccard_similarity(a: &[f32], b: &[f32]) -> f32 {
526 let threshold = 0.01; let set_a: Vec<bool> = a.iter().map(|&x| x > threshold).collect();
528 let set_b: Vec<bool> = b.iter().map(|&x| x > threshold).collect();
529
530 let intersection: usize = set_a
531 .iter()
532 .zip(&set_b)
533 .map(|(x, y)| (*x && *y) as usize)
534 .sum();
535 let union: usize = set_a
536 .iter()
537 .zip(&set_b)
538 .map(|(x, y)| (*x || *y) as usize)
539 .sum();
540
541 if union == 0 {
542 1.0 } else {
544 intersection as f32 / union as f32
545 }
546}
547
548fn dice_coefficient(a: &[f32], b: &[f32]) -> f32 {
549 let threshold = 0.01;
550 let set_a: Vec<bool> = a.iter().map(|&x| x > threshold).collect();
551 let set_b: Vec<bool> = b.iter().map(|&x| x > threshold).collect();
552
553 let intersection: usize = set_a
554 .iter()
555 .zip(&set_b)
556 .map(|(x, y)| (*x && *y) as usize)
557 .sum();
558 let size_a: usize = set_a.iter().map(|&x| x as usize).sum();
559 let size_b: usize = set_b.iter().map(|&x| x as usize).sum();
560
561 if size_a + size_b == 0 {
562 1.0
563 } else {
564 2.0 * intersection as f32 / (size_a + size_b) as f32
565 }
566}
567
568fn jensen_shannon_similarity(a: &[f32], b: &[f32]) -> Result<f32> {
569 let sum_a: f32 = a.iter().sum();
571 let sum_b: f32 = b.iter().sum();
572
573 if sum_a == 0.0 || sum_b == 0.0 {
574 return Ok(0.0);
575 }
576
577 let p: Vec<f32> = a.iter().map(|x| x / sum_a).collect();
578 let q: Vec<f32> = b.iter().map(|x| x / sum_b).collect();
579
580 let m: Vec<f32> = p.iter().zip(&q).map(|(x, y)| (x + y) / 2.0).collect();
582
583 let kl_pm = kl_divergence(&p, &m);
585 let kl_qm = kl_divergence(&q, &m);
586
587 let js_distance = (kl_pm + kl_qm) / 2.0;
588 Ok(1.0 - js_distance.sqrt()) }
590
591fn kl_divergence(p: &[f32], q: &[f32]) -> f32 {
592 p.iter()
593 .zip(q)
594 .map(|(pi, qi)| {
595 if *pi > 0.0 && *qi > 0.0 {
596 pi * (pi / qi).ln()
597 } else {
598 0.0
599 }
600 })
601 .sum()
602}
603
604fn bhattacharyya_similarity(a: &[f32], b: &[f32]) -> Result<f32> {
605 let sum_a: f32 = a.iter().sum();
606 let sum_b: f32 = b.iter().sum();
607
608 if sum_a == 0.0 || sum_b == 0.0 {
609 return Ok(0.0);
610 }
611
612 let p: Vec<f32> = a.iter().map(|x| x / sum_a).collect();
613 let q: Vec<f32> = b.iter().map(|x| x / sum_b).collect();
614
615 let bc: f32 = p.iter().zip(&q).map(|(x, y)| (x * y).sqrt()).sum();
616 Ok(bc)
617}
618
619fn hamming_similarity(a: &[f32], b: &[f32]) -> f32 {
620 let threshold = 0.5;
621 let matches = a
622 .iter()
623 .zip(b)
624 .filter(|(x, y)| (**x > threshold) == (**y > threshold))
625 .count();
626
627 matches as f32 / a.len() as f32
628}
629
630fn canberra_similarity(a: &[f32], b: &[f32]) -> f32 {
631 let distance: f32 = a
632 .iter()
633 .zip(b)
634 .map(|(x, y)| {
635 let numerator = (x - y).abs();
636 let denominator = x.abs() + y.abs();
637 if denominator > 0.0 {
638 numerator / denominator
639 } else {
640 0.0
641 }
642 })
643 .sum();
644
645 1.0 / (1.0 + distance)
646}
647
648fn angular_similarity(a: &[f32], b: &[f32]) -> f32 {
649 let cosine_sim = cosine_similarity(a, b);
650 let angle = cosine_sim.acos();
651 1.0 - (angle / std::f32::consts::PI)
652}
653
654fn dot_product_similarity(a: &[f32], b: &[f32]) -> f32 {
655 a.iter().zip(b.iter()).map(|(x, y)| x * y).sum()
657}
658
659fn vector_magnitude(vector: &[f32]) -> f32 {
660 vector.iter().map(|x| x * x).sum::<f32>().sqrt()
662}
663
664fn euclidean_distance(a: &[f32], b: &[f32]) -> f32 {
667 f32::euclidean_distance(a, b)
669}
670
671fn manhattan_distance(a: &[f32], b: &[f32]) -> f32 {
672 f32::manhattan_distance(a, b)
674}
675
676fn minkowski_distance(a: &[f32], b: &[f32], p: f32) -> f32 {
677 if p <= 0.0 {
678 return euclidean_distance(a, b);
679 }
680
681 a.iter()
682 .zip(b)
683 .map(|(x, y)| (x - y).abs().powf(p))
684 .sum::<f32>()
685 .powf(1.0 / p)
686}
687
688fn chebyshev_distance(a: &[f32], b: &[f32]) -> f32 {
689 a.iter()
690 .zip(b)
691 .map(|(x, y)| (x - y).abs())
692 .fold(0.0, |acc, diff| acc.max(diff))
693}
694
695fn hamming_distance(a: &[f32], b: &[f32]) -> f32 {
696 let threshold = 0.5;
697 let mismatches = a
698 .iter()
699 .zip(b)
700 .filter(|(x, y)| (**x > threshold) != (**y > threshold))
701 .count();
702
703 mismatches as f32 / a.len() as f32
704}
705
706fn canberra_distance(a: &[f32], b: &[f32]) -> f32 {
707 a.iter()
708 .zip(b)
709 .map(|(x, y)| {
710 let numerator = (x - y).abs();
711 let denominator = x.abs() + y.abs();
712 if denominator > 0.0 {
713 numerator / denominator
714 } else {
715 0.0
716 }
717 })
718 .sum()
719}
720
721#[derive(Debug, Clone, Serialize, Deserialize)]
723pub struct SimilarityResult {
724 pub id: String,
725 pub uri: String,
726 pub similarity: f32,
727 pub metrics: HashMap<String, f32>,
728 pub metadata: Option<HashMap<String, String>>,
729}
730
731pub struct BatchSimilarityProcessor {
733 similarity: SemanticSimilarity,
734 cache: HashMap<(String, String), f32>,
735 max_cache_size: usize,
736}
737
738impl BatchSimilarityProcessor {
739 pub fn new(config: SimilarityConfig, max_cache_size: usize) -> Self {
740 Self {
741 similarity: SemanticSimilarity::new(config),
742 cache: HashMap::new(),
743 max_cache_size,
744 }
745 }
746
747 pub fn process_batch(
749 &mut self,
750 queries: &[(String, Vector)],
751 candidates: &[(String, Vector)],
752 ) -> Result<Vec<Vec<SimilarityResult>>> {
753 let mut results = Vec::new();
754
755 for (query_uri, query_vec) in queries {
756 let mut query_results = Vec::new();
757
758 for (candidate_uri, candidate_vec) in candidates {
759 let cache_key = if query_uri < candidate_uri {
760 (query_uri.clone(), candidate_uri.clone())
761 } else {
762 (candidate_uri.clone(), query_uri.clone())
763 };
764
765 let similarity = if let Some(&cached_sim) = self.cache.get(&cache_key) {
766 cached_sim
767 } else {
768 let sim = self.similarity.similarity(query_vec, candidate_vec)?;
769
770 if self.cache.len() >= self.max_cache_size {
772 if let Some(key) = self.cache.keys().next().cloned() {
774 self.cache.remove(&key);
775 }
776 }
777
778 self.cache.insert(cache_key, sim);
779 sim
780 };
781
782 query_results.push(SimilarityResult {
783 id: generate_similarity_id(candidate_uri, similarity),
784 uri: candidate_uri.clone(),
785 similarity,
786 metrics: HashMap::new(),
787 metadata: None,
788 });
789 }
790
791 query_results.sort_by(|a, b| {
793 b.similarity
794 .partial_cmp(&a.similarity)
795 .unwrap_or(std::cmp::Ordering::Equal)
796 });
797 results.push(query_results);
798 }
799
800 Ok(results)
801 }
802
803 pub fn cache_stats(&self) -> (usize, usize) {
804 (self.cache.len(), self.max_cache_size)
805 }
806
807 pub fn clear_cache(&mut self) {
808 self.cache.clear();
809 }
810}
811
812fn generate_similarity_id(uri: &str, similarity: f32) -> String {
814 let mut hasher = DefaultHasher::new();
815 uri.hash(&mut hasher);
816 similarity.to_bits().hash(&mut hasher);
817
818 let timestamp = SystemTime::now()
819 .duration_since(UNIX_EPOCH)
820 .unwrap_or_default()
821 .as_millis();
822
823 timestamp.hash(&mut hasher);
824
825 format!("sim_{:x}", hasher.finish())
826}