1use crate::ai::AiConfig;
7use crate::model::Triple;
8use anyhow::Result;
9use serde::{Deserialize, Serialize};
10use std::collections::{HashMap, HashSet};
11
12pub struct EntityResolver {
14 config: ResolutionConfig,
16
17 similarity_calculator: Box<dyn SimilarityCalculator>,
19
20 clustering_algorithm: Box<dyn ClusteringAlgorithm>,
22
23 #[allow(dead_code)]
25 feature_extractor: Box<dyn FeatureExtractor>,
26
27 blocking_strategy: Box<dyn BlockingStrategy>,
29}
30
31#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct ResolutionConfig {
34 pub similarity_threshold: f32,
36
37 pub clustering_algorithm: ClusteringType,
39
40 pub features: Vec<FeatureType>,
42
43 pub blocking_strategy: BlockingType,
45
46 pub max_cluster_size: usize,
48
49 pub enable_ml_similarity: bool,
51
52 pub training_data_path: Option<String>,
54}
55
56impl Default for ResolutionConfig {
57 fn default() -> Self {
58 Self {
59 similarity_threshold: 0.8,
60 clustering_algorithm: ClusteringType::HierarchicalClustering,
61 features: vec![
62 FeatureType::StringSimilarity,
63 FeatureType::NumericSimilarity,
64 FeatureType::StructuralSimilarity,
65 ],
66 blocking_strategy: BlockingType::SortedNeighborhood,
67 max_cluster_size: 100,
68 enable_ml_similarity: true,
69 training_data_path: None,
70 }
71 }
72}
73
74#[derive(Debug, Clone, Serialize, Deserialize)]
76pub enum ClusteringType {
77 HierarchicalClustering,
79
80 ConnectedComponents,
82
83 CorrelationClustering,
85
86 DBSCAN { eps: f32, min_samples: usize },
88
89 MarkovClustering { inflation: f32 },
91}
92
93#[derive(Debug, Clone, Serialize, Deserialize)]
95pub enum FeatureType {
96 StringSimilarity,
98
99 NumericSimilarity,
101
102 StructuralSimilarity,
104
105 SemanticSimilarity,
107
108 TemporalSimilarity,
110
111 ContextualSimilarity,
113}
114
115#[derive(Debug, Clone, Serialize, Deserialize)]
117pub enum BlockingType {
118 StandardBlocking,
120
121 SortedNeighborhood,
123
124 LSH {
126 num_hashes: usize,
127 hash_length: usize,
128 },
129
130 CanopyClustering { t1: f32, t2: f32 },
132
133 MultiPass(Vec<BlockingType>),
135}
136
137#[derive(Debug, Clone, Serialize, Deserialize)]
139pub struct EntityCluster {
140 pub id: String,
142
143 pub entities: Vec<EntityRecord>,
145
146 pub canonical_entity: EntityRecord,
148
149 pub confidence: f32,
151
152 pub size: usize,
154
155 pub merge_decisions: Vec<MergeDecision>,
157}
158
159#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
161pub struct EntityRecord {
162 pub id: String,
164
165 pub uri: String,
167
168 pub attributes: HashMap<String, String>,
170
171 pub triples: Vec<Triple>,
173
174 pub source: String,
176
177 pub quality_score: f32,
179}
180
181#[derive(Debug, Clone, Serialize, Deserialize)]
183pub struct MergeDecision {
184 pub source_entity: String,
186
187 pub target_entity: String,
189
190 pub similarity: f32,
192
193 pub decision: DecisionType,
195
196 pub confidence: f32,
198
199 pub features_used: Vec<FeatureType>,
201}
202
203#[derive(Debug, Clone, Serialize, Deserialize)]
205pub enum DecisionType {
206 Merge,
207 NoMerge,
208 Uncertain,
209}
210
211pub trait SimilarityCalculator: Send + Sync {
213 fn calculate_similarity(&self, entity1: &EntityRecord, entity2: &EntityRecord) -> Result<f32>;
215
216 fn get_feature_vector(&self, entity: &EntityRecord) -> Result<Vec<f32>>;
218}
219
220pub trait ClusteringAlgorithm: Send + Sync {
222 fn cluster_entities(
224 &self,
225 entities: &[EntityRecord],
226 similarity_matrix: &[Vec<f32>],
227 threshold: f32,
228 ) -> Result<Vec<EntityCluster>>;
229}
230
231pub trait FeatureExtractor: Send + Sync {
233 fn extract_features(&self, entity: &EntityRecord) -> Result<HashMap<String, f32>>;
235
236 fn feature_names(&self) -> Vec<String>;
238}
239
240pub trait BlockingStrategy: Send + Sync {
242 fn generate_blocks(&self, entities: &[EntityRecord]) -> Result<Vec<Vec<usize>>>;
244
245 fn get_blocking_key(&self, entity: &EntityRecord) -> Result<String>;
247}
248
249impl EntityResolver {
250 pub fn new(_config: &AiConfig) -> Result<Self> {
252 let resolution_config = ResolutionConfig::default();
253
254 let similarity_calculator = Box::new(DefaultSimilarityCalculator::new());
256 let clustering_algorithm = Box::new(HierarchicalClusterer::new());
257 let feature_extractor = Box::new(DefaultFeatureExtractor::new());
258 let blocking_strategy = Box::new(SortedNeighborhoodBlocking::new());
259
260 Ok(Self {
261 config: resolution_config,
262 similarity_calculator,
263 clustering_algorithm,
264 feature_extractor,
265 blocking_strategy,
266 })
267 }
268
269 pub async fn resolve_entities(&self, triples: &[Triple]) -> Result<Vec<EntityCluster>> {
271 let entities = self.extract_entity_records(triples)?;
273
274 let blocks = self.blocking_strategy.generate_blocks(&entities)?;
276
277 let mut all_clusters = Vec::new();
278
279 for block in blocks {
281 let block_entities: Vec<&EntityRecord> = block.iter().map(|&i| &entities[i]).collect();
282
283 let similarity_matrix = self.calculate_similarity_matrix(&block_entities)?;
285
286 let block_entities_owned: Vec<EntityRecord> =
288 block_entities.into_iter().cloned().collect();
289 let clusters = self.clustering_algorithm.cluster_entities(
290 &block_entities_owned,
291 &similarity_matrix,
292 self.config.similarity_threshold,
293 )?;
294
295 all_clusters.extend(clusters);
296 }
297
298 let final_clusters = self.post_process_clusters(all_clusters)?;
300
301 Ok(final_clusters)
302 }
303
304 fn extract_entity_records(&self, triples: &[Triple]) -> Result<Vec<EntityRecord>> {
306 let mut entity_map: HashMap<String, EntityRecord> = HashMap::new();
307 let entity_counter = std::cell::RefCell::new(0);
308
309 for triple in triples {
310 let subject_uri = triple.subject().to_string();
311 let predicate_uri = triple.predicate().to_string();
312 let object_string = triple.object().to_string();
313
314 let subject_entry = entity_map.entry(subject_uri.clone()).or_insert_with(|| {
316 let id = {
317 let mut counter = entity_counter.borrow_mut();
318 *counter += 1;
319 *counter
320 };
321 EntityRecord {
322 id: format!("entity_{id}"),
323 uri: subject_uri.clone(),
324 attributes: HashMap::new(),
325 triples: Vec::new(),
326 source: "unknown".to_string(),
327 quality_score: 1.0,
328 }
329 });
330
331 subject_entry.triples.push(triple.clone());
332 subject_entry
333 .attributes
334 .insert(predicate_uri.clone(), object_string.clone());
335
336 if let crate::model::Object::NamedNode(node) = triple.object() {
338 let object_uri = node.to_string();
339 let object_entry = entity_map.entry(object_uri.clone()).or_insert_with(|| {
340 let id = {
341 let mut counter = entity_counter.borrow_mut();
342 *counter += 1;
343 *counter
344 };
345 EntityRecord {
346 id: format!("entity_{id}"),
347 uri: object_uri.clone(),
348 attributes: HashMap::new(),
349 triples: Vec::new(),
350 source: "unknown".to_string(),
351 quality_score: 1.0,
352 }
353 });
354
355 object_entry
357 .attributes
358 .insert(format!("{predicate_uri}^-1"), subject_uri.clone());
359 }
360 }
361
362 Ok(entity_map.into_values().collect())
363 }
364
365 fn calculate_similarity_matrix(&self, entities: &[&EntityRecord]) -> Result<Vec<Vec<f32>>> {
367 let n = entities.len();
368 let mut matrix = vec![vec![0.0; n]; n];
369
370 for i in 0..n {
371 for j in i..n {
372 if i == j {
373 matrix[i][j] = 1.0;
374 } else {
375 let similarity = self
376 .similarity_calculator
377 .calculate_similarity(entities[i], entities[j])?;
378 matrix[i][j] = similarity;
379 matrix[j][i] = similarity;
380 }
381 }
382 }
383
384 Ok(matrix)
385 }
386
387 fn post_process_clusters(&self, clusters: Vec<EntityCluster>) -> Result<Vec<EntityCluster>> {
389 let mut processed_clusters = clusters;
390
391 processed_clusters = self.merge_overlapping_clusters(processed_clusters)?;
393
394 processed_clusters = self.split_large_clusters(processed_clusters)?;
396
397 processed_clusters = self.validate_cluster_quality(processed_clusters)?;
399
400 Ok(processed_clusters)
401 }
402
403 fn merge_overlapping_clusters(
405 &self,
406 clusters: Vec<EntityCluster>,
407 ) -> Result<Vec<EntityCluster>> {
408 let mut merged_clusters = Vec::new();
409 let mut processed = vec![false; clusters.len()];
410
411 for (i, cluster_a) in clusters.iter().enumerate() {
412 if processed[i] {
413 continue;
414 }
415
416 let mut merged_cluster = cluster_a.clone();
417 processed[i] = true;
418
419 for (j, cluster_b) in clusters.iter().enumerate().skip(i + 1) {
421 if processed[j] {
422 continue;
423 }
424
425 let overlap_count = cluster_a
427 .entities
428 .iter()
429 .filter(|entity| cluster_b.entities.contains(entity))
430 .count();
431
432 let min_size = cluster_a.entities.len().min(cluster_b.entities.len());
433 let overlap_ratio = overlap_count as f64 / min_size as f64;
434
435 if overlap_ratio > 0.3 {
437 for entity in &cluster_b.entities {
439 if !merged_cluster.entities.contains(entity) {
440 merged_cluster.entities.push(entity.clone());
441 }
442 }
443
444 merged_cluster.size = merged_cluster.entities.len();
446 merged_cluster.confidence =
447 (merged_cluster.confidence + cluster_b.confidence) / 2.0;
448
449 merged_cluster.merge_decisions.push(MergeDecision {
451 source_entity: cluster_b.id.clone(),
452 target_entity: merged_cluster.id.clone(),
453 similarity: overlap_ratio as f32,
454 decision: DecisionType::Merge,
455 confidence: overlap_ratio as f32,
456 features_used: vec![FeatureType::StructuralSimilarity],
457 });
458
459 processed[j] = true;
460 }
461 }
462
463 merged_clusters.push(merged_cluster);
464 }
465
466 Ok(merged_clusters)
467 }
468
469 fn split_large_clusters(&self, clusters: Vec<EntityCluster>) -> Result<Vec<EntityCluster>> {
471 let mut split_clusters = Vec::new();
472 let max_cluster_size = 50; for cluster in clusters {
475 if cluster.entities.len() <= max_cluster_size {
476 split_clusters.push(cluster);
477 continue;
478 }
479
480 let sub_clusters = self.split_cluster_by_similarity(&cluster, max_cluster_size)?;
482 split_clusters.extend(sub_clusters);
483 }
484
485 Ok(split_clusters)
486 }
487
488 fn split_cluster_by_similarity(
490 &self,
491 cluster: &EntityCluster,
492 max_size: usize,
493 ) -> Result<Vec<EntityCluster>> {
494 let mut sub_clusters = Vec::new();
495 let mut remaining_entities = cluster.entities.clone();
496 let mut cluster_id_counter = 0;
497
498 while !remaining_entities.is_empty() {
499 let mut current_cluster_entities = Vec::new();
500 let seed_entity = remaining_entities.remove(0);
501 current_cluster_entities.push(seed_entity.clone());
502
503 let mut i = 0;
505 while i < remaining_entities.len() && current_cluster_entities.len() < max_size {
506 let entity = &remaining_entities[i];
507
508 let mut max_similarity = 0.0;
510 for cluster_entity in ¤t_cluster_entities {
511 let similarity = self.calculate_entity_similarity(entity, cluster_entity)?;
512 if similarity > max_similarity {
513 max_similarity = similarity;
514 }
515 }
516
517 if max_similarity > 0.7 {
519 current_cluster_entities.push(remaining_entities.remove(i));
520 } else {
521 i += 1;
522 }
523 }
524
525 let canonical_entity = current_cluster_entities[0].clone();
527 let sub_cluster = EntityCluster {
528 id: format!("{}_split_{}", cluster.id, cluster_id_counter),
529 entities: current_cluster_entities.clone(),
530 canonical_entity,
531 confidence: cluster.confidence * 0.9, size: current_cluster_entities.len(),
533 merge_decisions: vec![MergeDecision {
534 source_entity: cluster.id.clone(),
535 target_entity: format!("{}_split_{}", cluster.id, cluster_id_counter),
536 similarity: cluster.confidence,
537 decision: DecisionType::NoMerge,
538 confidence: cluster.confidence,
539 features_used: vec![FeatureType::StructuralSimilarity],
540 }],
541 };
542
543 sub_clusters.push(sub_cluster);
544 cluster_id_counter += 1;
545 }
546
547 Ok(sub_clusters)
548 }
549
550 fn validate_cluster_quality(&self, clusters: Vec<EntityCluster>) -> Result<Vec<EntityCluster>> {
552 let mut validated_clusters = Vec::new();
553
554 for cluster in clusters {
555 let min_cluster_size = 2;
557 let min_confidence = 0.5;
558
559 if cluster.entities.len() < min_cluster_size {
561 continue;
562 }
563
564 if cluster.confidence < min_confidence {
566 continue;
567 }
568
569 let internal_similarity = self.calculate_cluster_internal_similarity(&cluster)?;
571 if internal_similarity < 0.6 {
572 continue;
573 }
574
575 validated_clusters.push(cluster);
576 }
577
578 Ok(validated_clusters)
579 }
580
581 fn calculate_cluster_internal_similarity(&self, cluster: &EntityCluster) -> Result<f64> {
583 if cluster.entities.len() < 2 {
584 return Ok(1.0);
585 }
586
587 let mut total_similarity = 0.0;
588 let mut comparison_count = 0;
589
590 for i in 0..cluster.entities.len() {
591 for j in (i + 1)..cluster.entities.len() {
592 let similarity =
593 self.calculate_entity_similarity(&cluster.entities[i], &cluster.entities[j])?;
594 total_similarity += similarity;
595 comparison_count += 1;
596 }
597 }
598
599 if comparison_count > 0 {
600 Ok(total_similarity / comparison_count as f64)
601 } else {
602 Ok(0.0)
603 }
604 }
605
606 fn calculate_entity_similarity(
608 &self,
609 entity1: &EntityRecord,
610 entity2: &EntityRecord,
611 ) -> Result<f64> {
612 let label1 = entity1
614 .attributes
615 .get("label")
616 .unwrap_or(&entity1.uri)
617 .to_lowercase();
618 let label2 = entity2
619 .attributes
620 .get("label")
621 .unwrap_or(&entity2.uri)
622 .to_lowercase();
623
624 let ngrams1: std::collections::HashSet<String> = self.generate_character_ngrams(&label1, 2);
626 let ngrams2: std::collections::HashSet<String> = self.generate_character_ngrams(&label2, 2);
627
628 let intersection = ngrams1.intersection(&ngrams2).count();
629 let union = ngrams1.union(&ngrams2).count();
630
631 if union > 0 {
632 Ok(intersection as f64 / union as f64)
633 } else {
634 Ok(0.0)
635 }
636 }
637
638 fn generate_character_ngrams(&self, text: &str, n: usize) -> std::collections::HashSet<String> {
640 let mut ngrams = std::collections::HashSet::new();
641 let chars: Vec<char> = text.chars().collect();
642
643 if chars.len() >= n {
644 for i in 0..=(chars.len() - n) {
645 let ngram: String = chars[i..i + n].iter().collect();
646 ngrams.insert(ngram);
647 }
648 }
649
650 ngrams
651 }
652}
653
654struct DefaultSimilarityCalculator;
656
657impl DefaultSimilarityCalculator {
658 fn new() -> Self {
659 Self
660 }
661
662 fn string_similarity(&self, s1: &str, s2: &str) -> f32 {
663 let set1: HashSet<char> = s1.chars().collect();
665 let set2: HashSet<char> = s2.chars().collect();
666
667 let intersection = set1.intersection(&set2).count();
668 let union = set1.union(&set2).count();
669
670 if union == 0 {
671 0.0
672 } else {
673 intersection as f32 / union as f32
674 }
675 }
676
677 fn attribute_similarity(
678 &self,
679 attrs1: &HashMap<String, String>,
680 attrs2: &HashMap<String, String>,
681 ) -> f32 {
682 let mut total_similarity = 0.0;
683 let mut count = 0;
684
685 for (key, value1) in attrs1 {
686 if let Some(value2) = attrs2.get(key) {
687 total_similarity += self.string_similarity(value1, value2);
688 count += 1;
689 }
690 }
691
692 if count == 0 {
693 0.0
694 } else {
695 total_similarity / count as f32
696 }
697 }
698}
699
700impl SimilarityCalculator for DefaultSimilarityCalculator {
701 fn calculate_similarity(&self, entity1: &EntityRecord, entity2: &EntityRecord) -> Result<f32> {
702 let uri_similarity = self.string_similarity(&entity1.uri, &entity2.uri);
704 let attr_similarity = self.attribute_similarity(&entity1.attributes, &entity2.attributes);
705
706 let similarity = 0.3 * uri_similarity + 0.7 * attr_similarity;
708
709 Ok(similarity)
710 }
711
712 fn get_feature_vector(&self, entity: &EntityRecord) -> Result<Vec<f32>> {
713 let features = vec![
715 entity.uri.len() as f32,
717 entity.attributes.len() as f32,
719 entity.triples.len() as f32,
721 entity.quality_score,
723 ];
724
725 Ok(features)
726 }
727}
728
729struct HierarchicalClusterer;
731
732impl HierarchicalClusterer {
733 fn new() -> Self {
734 Self
735 }
736}
737
738impl ClusteringAlgorithm for HierarchicalClusterer {
739 fn cluster_entities(
740 &self,
741 entities: &[EntityRecord],
742 similarity_matrix: &[Vec<f32>],
743 threshold: f32,
744 ) -> Result<Vec<EntityCluster>> {
745 let n = entities.len();
746 if n == 0 {
747 return Ok(Vec::new());
748 }
749
750 let mut clusters = Vec::new();
752 let mut visited = vec![false; n];
753
754 for i in 0..n {
755 if visited[i] {
756 continue;
757 }
758
759 let mut cluster_entities = vec![entities[i].clone()];
760 visited[i] = true;
761
762 for j in (i + 1)..n {
763 if !visited[j] && similarity_matrix[i][j] >= threshold {
764 cluster_entities.push(entities[j].clone());
765 visited[j] = true;
766 }
767 }
768
769 let canonical_entity = cluster_entities[0].clone(); let cluster = EntityCluster {
772 id: format!("cluster_{}", clusters.len()),
773 entities: cluster_entities.clone(),
774 canonical_entity,
775 confidence: 0.8, size: cluster_entities.len(),
777 merge_decisions: vec![MergeDecision {
778 source_entity: "initial".to_string(),
779 target_entity: format!("cluster_{}", clusters.len()),
780 similarity: 0.8,
781 decision: DecisionType::Merge,
782 confidence: 0.8,
783 features_used: vec![FeatureType::StructuralSimilarity],
784 }],
785 };
786
787 clusters.push(cluster);
788 }
789
790 Ok(clusters)
791 }
792}
793
794struct DefaultFeatureExtractor;
796
797impl DefaultFeatureExtractor {
798 fn new() -> Self {
799 Self
800 }
801}
802
803impl FeatureExtractor for DefaultFeatureExtractor {
804 fn extract_features(&self, entity: &EntityRecord) -> Result<HashMap<String, f32>> {
805 let mut features = HashMap::new();
806
807 features.insert("uri_length".to_string(), entity.uri.len() as f32);
809 features.insert("num_attributes".to_string(), entity.attributes.len() as f32);
810 features.insert("num_triples".to_string(), entity.triples.len() as f32);
811 features.insert("quality_score".to_string(), entity.quality_score);
812
813 for (key, value) in &entity.attributes {
815 features.insert(format!("attr_{key}_length"), value.len() as f32);
816 }
817
818 Ok(features)
819 }
820
821 fn feature_names(&self) -> Vec<String> {
822 vec![
823 "uri_length".to_string(),
824 "num_attributes".to_string(),
825 "num_triples".to_string(),
826 "quality_score".to_string(),
827 ]
828 }
829}
830
831struct SortedNeighborhoodBlocking;
833
834impl SortedNeighborhoodBlocking {
835 fn new() -> Self {
836 Self
837 }
838}
839
840impl BlockingStrategy for SortedNeighborhoodBlocking {
841 fn generate_blocks(&self, entities: &[EntityRecord]) -> Result<Vec<Vec<usize>>> {
842 let mut indexed_entities: Vec<(usize, String)> = entities
844 .iter()
845 .enumerate()
846 .map(|(i, entity)| (i, self.get_blocking_key(entity).unwrap_or_default()))
847 .collect();
848
849 indexed_entities.sort_by_key(|x| x.1.clone());
850
851 let window_size = 10; let mut blocks = Vec::new();
854
855 for start in 0..entities.len() {
856 if start + window_size <= entities.len() {
857 let block: Vec<usize> = indexed_entities[start..start + window_size]
858 .iter()
859 .map(|(i, _)| *i)
860 .collect();
861 blocks.push(block);
862 }
863 }
864
865 if blocks.is_empty() {
866 blocks.push((0..entities.len()).collect());
868 }
869
870 Ok(blocks)
871 }
872
873 fn get_blocking_key(&self, entity: &EntityRecord) -> Result<String> {
874 let key = entity
876 .uri
877 .chars()
878 .take(10)
879 .collect::<String>()
880 .to_lowercase();
881 Ok(key)
882 }
883}
884
885#[cfg(test)]
886mod tests {
887 use super::*;
888 use crate::ai::AiConfig;
889 use crate::model::{Literal, NamedNode};
890
891 #[tokio::test]
892 async fn test_entity_resolver_creation() {
893 let config = AiConfig::default();
894 let resolver = EntityResolver::new(&config);
895 assert!(resolver.is_ok());
896 }
897
898 #[tokio::test]
899 async fn test_entity_resolution() {
900 let config = AiConfig::default();
901 let mut resolver = EntityResolver::new(&config).expect("construction should succeed");
902
903 resolver.config.similarity_threshold = 0.3;
905
906 let triples = vec![
907 Triple::new(
908 NamedNode::new("http://example.org/person1").expect("valid IRI"),
909 NamedNode::new("http://example.org/name").expect("valid IRI"),
910 Literal::new("John Smith"),
911 ),
912 Triple::new(
913 NamedNode::new("http://example.org/person2").expect("valid IRI"),
914 NamedNode::new("http://example.org/name").expect("valid IRI"),
915 Literal::new("John Smith"),
916 ),
917 Triple::new(
918 NamedNode::new("http://example.org/person3").expect("valid IRI"),
919 NamedNode::new("http://example.org/name").expect("valid IRI"),
920 Literal::new("Jane Doe"),
921 ),
922 Triple::new(
923 NamedNode::new("http://example.org/person4").expect("valid IRI"),
924 NamedNode::new("http://example.org/name").expect("valid IRI"),
925 Literal::new("Jane Doe"),
926 ),
927 ];
928
929 let clusters = resolver
930 .resolve_entities(&triples)
931 .await
932 .expect("async operation should succeed");
933 assert!(!clusters.is_empty());
935 }
936
937 #[test]
938 fn test_similarity_calculation() {
939 let calculator = DefaultSimilarityCalculator::new();
940
941 let entity1 = EntityRecord {
942 id: "1".to_string(),
943 uri: "http://example.org/john".to_string(),
944 attributes: [("name".to_string(), "John".to_string())]
945 .iter()
946 .cloned()
947 .collect(),
948 triples: Vec::new(),
949 source: "source1".to_string(),
950 quality_score: 1.0,
951 };
952
953 let entity2 = EntityRecord {
954 id: "2".to_string(),
955 uri: "http://example.org/john_smith".to_string(),
956 attributes: [("name".to_string(), "John Smith".to_string())]
957 .iter()
958 .cloned()
959 .collect(),
960 triples: Vec::new(),
961 source: "source2".to_string(),
962 quality_score: 1.0,
963 };
964
965 let similarity = calculator
966 .calculate_similarity(&entity1, &entity2)
967 .expect("operation should succeed");
968 assert!(similarity > 0.0);
969 assert!(similarity <= 1.0);
970 }
971}