1use crate::VectorStore;
7use anyhow::{anyhow, Result};
8use serde::{Deserialize, Serialize};
9use std::collections::{HashMap, HashSet, VecDeque};
10use std::sync::{Arc, RwLock};
11
12#[derive(Debug, Clone, Serialize, Deserialize)]
14pub struct HierarchicalSimilarityConfig {
15 pub enable_ontology_similarity: bool,
17 pub enable_hierarchy_traversal: bool,
19 pub enable_contextual_similarity: bool,
21 pub enable_adaptive_similarity: bool,
23 pub max_hierarchy_depth: usize,
25 pub direct_similarity_weight: f32,
27 pub hierarchical_similarity_weight: f32,
29 pub contextual_similarity_weight: f32,
31 pub similarity_cache_size: usize,
33}
34
35impl Default for HierarchicalSimilarityConfig {
36 fn default() -> Self {
37 Self {
38 enable_ontology_similarity: true,
39 enable_hierarchy_traversal: true,
40 enable_contextual_similarity: true,
41 enable_adaptive_similarity: false,
42 max_hierarchy_depth: 5,
43 direct_similarity_weight: 0.6,
44 hierarchical_similarity_weight: 0.3,
45 contextual_similarity_weight: 0.1,
46 similarity_cache_size: 10000,
47 }
48 }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, Default)]
53pub struct ConceptHierarchy {
54 pub child_to_parent: HashMap<String, String>,
56 pub parent_to_children: HashMap<String, HashSet<String>>,
58 pub concept_levels: HashMap<String, usize>,
60 pub concept_types: HashMap<String, String>,
62 pub concept_weights: HashMap<String, f32>,
64}
65
66impl ConceptHierarchy {
67 pub fn add_relationship(&mut self, parent: String, child: String) {
69 self.child_to_parent.insert(child.clone(), parent.clone());
70 self.parent_to_children
71 .entry(parent)
72 .or_default()
73 .insert(child);
74
75 self.recompute_levels();
77 }
78
79 pub fn lowest_common_ancestor(&self, concept1: &str, concept2: &str) -> Option<String> {
81 let ancestors1 = self.get_ancestors(concept1);
82 let ancestors2 = self.get_ancestors(concept2);
83
84 let common_ancestors: HashSet<&String> = ancestors1.intersection(&ancestors2).collect();
86
87 common_ancestors
89 .into_iter()
90 .max_by_key(|ancestor| self.concept_levels.get(*ancestor).unwrap_or(&0))
91 .cloned()
92 }
93
94 pub fn get_ancestors(&self, concept: &str) -> HashSet<String> {
96 let mut ancestors = HashSet::new();
97 let mut current = concept.to_string();
98
99 while let Some(parent) = self.child_to_parent.get(¤t) {
100 ancestors.insert(parent.clone());
101 current = parent.clone();
102 }
103
104 ancestors
105 }
106
107 pub fn get_descendants(&self, concept: &str) -> HashSet<String> {
109 let mut descendants = HashSet::new();
110 let mut queue = VecDeque::new();
111
112 if let Some(children) = self.parent_to_children.get(concept) {
113 for child in children {
114 queue.push_back(child.clone());
115 }
116 }
117
118 while let Some(current) = queue.pop_front() {
119 if descendants.insert(current.clone()) {
120 if let Some(children) = self.parent_to_children.get(¤t) {
121 for child in children {
122 queue.push_back(child.clone());
123 }
124 }
125 }
126 }
127
128 descendants
129 }
130
131 pub fn concept_distance(&self, concept1: &str, concept2: &str) -> f32 {
133 if concept1 == concept2 {
134 return 0.0;
135 }
136
137 if let Some(lca) = self.lowest_common_ancestor(concept1, concept2) {
138 let level1 = self.concept_levels.get(concept1).unwrap_or(&0);
139 let level2 = self.concept_levels.get(concept2).unwrap_or(&0);
140 let lca_level = self.concept_levels.get(&lca).unwrap_or(&0);
141
142 let distance = (level1 - lca_level) + (level2 - lca_level);
144 distance as f32
145 } else {
146 f32::INFINITY
148 }
149 }
150
151 fn recompute_levels(&mut self) {
153 self.concept_levels.clear();
154
155 let roots: Vec<String> = self
157 .parent_to_children
158 .keys()
159 .filter(|concept| !self.child_to_parent.contains_key(*concept))
160 .cloned()
161 .collect();
162
163 let mut queue = VecDeque::new();
165 for root in roots {
166 self.concept_levels.insert(root.clone(), 0);
167 queue.push_back((root, 0));
168 }
169
170 while let Some((concept, level)) = queue.pop_front() {
171 if let Some(children) = self.parent_to_children.get(&concept) {
172 for child in children {
173 let child_level = level + 1;
174 if child_level > *self.concept_levels.get(child).unwrap_or(&0) {
176 self.concept_levels.insert(child.clone(), child_level);
177 queue.push_back((child.clone(), child_level));
178 }
179 }
180 }
181 }
182 }
183}
184
185#[derive(Debug, Clone, Serialize, Deserialize)]
187pub struct SimilarityContext {
188 pub domain: Option<String>,
190 pub temporal_weight: f32,
192 pub cultural_context: Option<String>,
194 pub user_context: HashMap<String, f32>,
196 pub task_type: SimilarityTaskType,
198}
199
200impl Default for SimilarityContext {
201 fn default() -> Self {
202 Self {
203 domain: None,
204 temporal_weight: 1.0,
205 cultural_context: None,
206 user_context: HashMap::new(),
207 task_type: SimilarityTaskType::General,
208 }
209 }
210}
211
212#[derive(Debug, Clone, Copy, PartialEq, Hash, Serialize, Deserialize)]
214pub enum SimilarityTaskType {
215 General,
217 Classification,
219 Retrieval,
221 Recommendation,
223 Clustering,
225}
226
227#[derive(Debug, Clone, Serialize, Deserialize)]
229pub struct HierarchicalSimilarityResult {
230 pub overall_similarity: f32,
232 pub direct_similarity: f32,
234 pub hierarchical_similarity: f32,
236 pub contextual_similarity: f32,
238 pub explanation: SimilarityExplanation,
240}
241
242#[derive(Debug, Clone, Serialize, Deserialize)]
244pub struct SimilarityExplanation {
245 pub concepts_involved: Vec<String>,
247 pub hierarchy_paths: Vec<String>,
249 pub contextual_factors: Vec<String>,
251 pub weight_breakdown: HashMap<String, f32>,
253}
254
255pub struct HierarchicalSimilarity {
257 config: HierarchicalSimilarityConfig,
258 concept_hierarchy: Arc<RwLock<ConceptHierarchy>>,
259 similarity_cache: Arc<RwLock<HashMap<String, f32>>>,
260 concept_to_resource: Arc<RwLock<HashMap<String, Vec<String>>>>,
261 adaptive_weights: Arc<RwLock<HashMap<String, f32>>>,
262}
263
264impl HierarchicalSimilarity {
265 pub fn new(config: HierarchicalSimilarityConfig) -> Self {
267 Self {
268 config,
269 concept_hierarchy: Arc::new(RwLock::new(ConceptHierarchy::default())),
270 similarity_cache: Arc::new(RwLock::new(HashMap::new())),
271 concept_to_resource: Arc::new(RwLock::new(HashMap::new())),
272 adaptive_weights: Arc::new(RwLock::new(HashMap::new())),
273 }
274 }
275
276 pub fn compute_similarity(
278 &self,
279 vector_store: &VectorStore,
280 resource1: &str,
281 resource2: &str,
282 context: &SimilarityContext,
283 ) -> Result<HierarchicalSimilarityResult> {
284 let cache_key = format!("{}:{}:{}", resource1, resource2, self.context_hash(context));
286 if let Ok(cache) = self.similarity_cache.read() {
287 if let Some(&cached_similarity) = cache.get(&cache_key) {
288 return Ok(HierarchicalSimilarityResult {
289 overall_similarity: cached_similarity,
290 direct_similarity: cached_similarity,
291 hierarchical_similarity: 0.0,
292 contextual_similarity: 0.0,
293 explanation: SimilarityExplanation {
294 concepts_involved: vec![],
295 hierarchy_paths: vec!["cached".to_string()],
296 contextual_factors: vec![],
297 weight_breakdown: HashMap::new(),
298 },
299 });
300 }
301 }
302
303 let direct_sim = vector_store.calculate_similarity(resource1, resource2)?;
305
306 let hierarchical_sim = if self.config.enable_ontology_similarity {
308 self.compute_ontology_similarity(resource1, resource2, context)?
309 } else {
310 0.0
311 };
312
313 let contextual_sim = if self.config.enable_contextual_similarity {
315 self.compute_contextual_similarity(resource1, resource2, context)?
316 } else {
317 0.0
318 };
319
320 let (direct_weight, hierarchical_weight, contextual_weight) =
322 if self.config.enable_adaptive_similarity {
323 self.get_adaptive_weights(context)
324 } else {
325 (
326 self.config.direct_similarity_weight,
327 self.config.hierarchical_similarity_weight,
328 self.config.contextual_similarity_weight,
329 )
330 };
331
332 let overall_sim = direct_sim * direct_weight
334 + hierarchical_sim * hierarchical_weight
335 + contextual_sim * contextual_weight;
336
337 if let Ok(mut cache) = self.similarity_cache.write() {
339 if cache.len() >= self.config.similarity_cache_size {
340 cache.clear(); }
342 cache.insert(cache_key, overall_sim);
343 }
344
345 let explanation = SimilarityExplanation {
346 concepts_involved: self
347 .get_concepts_for_resource(resource1)
348 .into_iter()
349 .chain(self.get_concepts_for_resource(resource2))
350 .collect(),
351 hierarchy_paths: vec![format!(
352 "hierarchy_depth_{}",
353 self.config.max_hierarchy_depth
354 )],
355 contextual_factors: vec![format!("domain_{:?}", context.domain)],
356 weight_breakdown: {
357 let mut breakdown = HashMap::new();
358 breakdown.insert("direct".to_string(), direct_weight);
359 breakdown.insert("hierarchical".to_string(), hierarchical_weight);
360 breakdown.insert("contextual".to_string(), contextual_weight);
361 breakdown
362 },
363 };
364
365 Ok(HierarchicalSimilarityResult {
366 overall_similarity: overall_sim,
367 direct_similarity: direct_sim,
368 hierarchical_similarity: hierarchical_sim,
369 contextual_similarity: contextual_sim,
370 explanation,
371 })
372 }
373
374 pub fn add_concept_relationship(&self, parent: &str, child: &str) -> Result<()> {
376 match self.concept_hierarchy.write() {
377 Ok(mut hierarchy) => {
378 hierarchy.add_relationship(parent.to_string(), child.to_string());
379 Ok(())
380 }
381 _ => Err(anyhow!("Failed to acquire write lock on concept hierarchy")),
382 }
383 }
384
385 pub fn associate_resource_with_concepts(
387 &self,
388 resource: &str,
389 concepts: Vec<String>,
390 ) -> Result<()> {
391 match self.concept_to_resource.write() {
392 Ok(mut mapping) => {
393 mapping.insert(resource.to_string(), concepts);
394 Ok(())
395 }
396 _ => Err(anyhow!(
397 "Failed to acquire write lock on concept-resource mapping"
398 )),
399 }
400 }
401
402 fn compute_ontology_similarity(
404 &self,
405 resource1: &str,
406 resource2: &str,
407 _context: &SimilarityContext,
408 ) -> Result<f32> {
409 let concepts1 = self.get_concepts_for_resource(resource1);
410 let concepts2 = self.get_concepts_for_resource(resource2);
411
412 if concepts1.is_empty() || concepts2.is_empty() {
413 return Ok(0.0);
414 }
415
416 let hierarchy = self
417 .concept_hierarchy
418 .read()
419 .map_err(|_| anyhow!("Failed to acquire read lock on concept hierarchy"))?;
420
421 let mut max_similarity = 0.0f32;
422
423 for concept1 in &concepts1 {
425 for concept2 in &concepts2 {
426 let distance = hierarchy.concept_distance(concept1, concept2);
427
428 let similarity = if distance.is_infinite() {
430 0.0
431 } else {
432 1.0 / (1.0 + distance)
433 };
434
435 max_similarity = max_similarity.max(similarity);
436 }
437 }
438
439 Ok(max_similarity)
440 }
441
442 fn compute_contextual_similarity(
444 &self,
445 _resource1: &str,
446 _resource2: &str,
447 context: &SimilarityContext,
448 ) -> Result<f32> {
449 let mut contextual_score = 0.0f32;
450
451 if context.domain.is_some() {
453 contextual_score += 0.3; }
455
456 contextual_score += context.temporal_weight * 0.2;
458
459 if context.cultural_context.is_some() {
461 contextual_score += 0.2;
462 }
463
464 let task_boost = match context.task_type {
466 SimilarityTaskType::General => 0.1,
467 SimilarityTaskType::Classification => 0.15,
468 SimilarityTaskType::Retrieval => 0.2,
469 SimilarityTaskType::Recommendation => 0.25,
470 SimilarityTaskType::Clustering => 0.15,
471 };
472 contextual_score += task_boost;
473
474 if !context.user_context.is_empty() {
476 let user_boost =
477 context.user_context.values().sum::<f32>() / context.user_context.len() as f32;
478 contextual_score += user_boost * 0.2;
479 }
480
481 Ok(contextual_score.clamp(0.0, 1.0))
482 }
483
484 fn get_adaptive_weights(&self, _context: &SimilarityContext) -> (f32, f32, f32) {
486 (
489 self.config.direct_similarity_weight,
490 self.config.hierarchical_similarity_weight,
491 self.config.contextual_similarity_weight,
492 )
493 }
494
495 fn get_concepts_for_resource(&self, resource: &str) -> Vec<String> {
497 match self.concept_to_resource.read() {
498 Ok(mapping) => mapping.get(resource).cloned().unwrap_or_default(),
499 _ => Vec::new(),
500 }
501 }
502
503 fn context_hash(&self, context: &SimilarityContext) -> u64 {
505 use std::collections::hash_map::DefaultHasher;
506 use std::hash::{Hash, Hasher};
507
508 let mut hasher = DefaultHasher::new();
509 context.domain.hash(&mut hasher);
510 context.cultural_context.hash(&mut hasher);
511 context.task_type.hash(&mut hasher);
512 (context.temporal_weight as u64).hash(&mut hasher);
513
514 hasher.finish()
515 }
516
517 pub fn build_hierarchy_from_ontology(&self, ontology_data: &[(String, String)]) -> Result<()> {
519 let mut hierarchy = self
520 .concept_hierarchy
521 .write()
522 .map_err(|_| anyhow!("Failed to acquire write lock on concept hierarchy"))?;
523
524 for (parent, child) in ontology_data {
525 hierarchy.add_relationship(parent.clone(), child.clone());
526 }
527
528 Ok(())
529 }
530
531 pub fn update_adaptive_weights(
533 &self,
534 context: &SimilarityContext,
535 feedback_score: f32,
536 ) -> Result<()> {
537 if !self.config.enable_adaptive_similarity {
538 return Ok(());
539 }
540
541 let context_key = format!("{:?}:{:?}", context.domain, context.task_type);
542
543 if let Ok(mut weights) = self.adaptive_weights.write() {
544 let current_weight = weights.get(&context_key).cloned().unwrap_or(0.5);
546 let new_weight = current_weight * 0.9 + feedback_score * 0.1;
547 weights.insert(context_key, new_weight);
548 }
549
550 Ok(())
551 }
552
553 pub fn get_statistics(&self) -> Result<HierarchicalSimilarityStats> {
555 let hierarchy = self
556 .concept_hierarchy
557 .read()
558 .map_err(|_| anyhow!("Failed to acquire read lock on concept hierarchy"))?;
559 let cache = self
560 .similarity_cache
561 .read()
562 .map_err(|_| anyhow!("Failed to acquire read lock on similarity cache"))?;
563 let concept_mapping = self
564 .concept_to_resource
565 .read()
566 .map_err(|_| anyhow!("Failed to acquire read lock on concept mapping"))?;
567
568 Ok(HierarchicalSimilarityStats {
569 total_concepts: hierarchy.concept_levels.len(),
570 max_hierarchy_depth: hierarchy
571 .concept_levels
572 .values()
573 .max()
574 .cloned()
575 .unwrap_or(0),
576 cached_similarities: cache.len(),
577 mapped_resources: concept_mapping.len(),
578 average_concepts_per_resource: if concept_mapping.is_empty() {
579 0.0
580 } else {
581 concept_mapping.values().map(|v| v.len()).sum::<usize>() as f32
582 / concept_mapping.len() as f32
583 },
584 })
585 }
586}
587
588#[derive(Debug, Clone, Serialize, Deserialize)]
590pub struct HierarchicalSimilarityStats {
591 pub total_concepts: usize,
592 pub max_hierarchy_depth: usize,
593 pub cached_similarities: usize,
594 pub mapped_resources: usize,
595 pub average_concepts_per_resource: f32,
596}
597
598#[cfg(test)]
599mod tests {
600 use super::*;
601
602 #[test]
603 fn test_concept_hierarchy() {
604 let mut hierarchy = ConceptHierarchy::default();
605
606 hierarchy.add_relationship("Animal".to_string(), "Mammal".to_string());
608 hierarchy.add_relationship("Mammal".to_string(), "Dog".to_string());
609 hierarchy.add_relationship("Animal".to_string(), "Bird".to_string());
610
611 let distance = hierarchy.concept_distance("Dog", "Bird");
613 assert!(distance > 0.0);
614
615 let distance_same = hierarchy.concept_distance("Dog", "Dog");
616 assert_eq!(distance_same, 0.0);
617
618 let lca = hierarchy.lowest_common_ancestor("Dog", "Bird");
620 assert_eq!(lca, Some("Animal".to_string()));
621 }
622
623 #[test]
624 fn test_hierarchical_similarity() -> Result<()> {
625 let config = HierarchicalSimilarityConfig::default();
626 let hierarchical_sim = HierarchicalSimilarity::new(config);
627
628 hierarchical_sim.add_concept_relationship("Animal", "Mammal")?;
630 hierarchical_sim.add_concept_relationship("Mammal", "Dog")?;
631
632 hierarchical_sim.associate_resource_with_concepts("resource1", vec!["Dog".to_string()])?;
634 hierarchical_sim
635 .associate_resource_with_concepts("resource2", vec!["Mammal".to_string()])?;
636
637 Ok(())
640 }
641}