oxirs_federate/
schema_alignment.rs

1//! Schema Alignment and Ontology Mapping for Federated RDF/SPARQL
2//!
3//! This module provides automatic schema alignment, ontology mapping, and vocabulary
4//! harmonization for federated SPARQL queries across heterogeneous RDF sources.
5//!
6//! Features:
7//! - Automatic ontology alignment using similarity metrics
8//! - Property and class mapping across vocabularies
9//! - Equivalence reasoning (owl:sameAs, owl:equivalentClass, owl:equivalentProperty)
10//! - Schema translation and query rewriting
11//! - ML-based mapping suggestion using scirs2
12//!
13//! Enhanced with scirs2 for similarity computations and ML-based mapping.
14
15use anyhow::{anyhow, Result};
16use serde::{Deserialize, Serialize};
17use std::collections::{HashMap, HashSet};
18use std::sync::Arc;
19use tokio::sync::RwLock;
20use tracing::{debug, info};
21
22// scirs2 integration for similarity and ML
23// Note: Advanced features simplified for initial release
24
25use crate::semantic_reasoner::{ReasonerConfig, SemanticReasoner};
26
27/// Schema alignment configuration
28#[derive(Debug, Clone, Serialize, Deserialize)]
29pub struct AlignmentConfig {
30    /// Minimum similarity threshold for automatic alignment (0.0 - 1.0)
31    pub similarity_threshold: f64,
32    /// Enable property alignment
33    pub enable_property_alignment: bool,
34    /// Enable class alignment
35    pub enable_class_alignment: bool,
36    /// Enable instance alignment (owl:sameAs)
37    pub enable_instance_alignment: bool,
38    /// Use ML-based mapping suggestions
39    pub enable_ml_mapping: bool,
40    /// Confidence threshold for ML suggestions (0.0 - 1.0)
41    pub ml_confidence_threshold: f64,
42    /// Maximum number of alignment suggestions per entity
43    pub max_suggestions_per_entity: usize,
44    /// Enable reasoning with OWL axioms
45    pub enable_owl_reasoning: bool,
46}
47
48impl Default for AlignmentConfig {
49    fn default() -> Self {
50        Self {
51            similarity_threshold: 0.75,
52            enable_property_alignment: true,
53            enable_class_alignment: true,
54            enable_instance_alignment: true,
55            enable_ml_mapping: true,
56            ml_confidence_threshold: 0.8,
57            max_suggestions_per_entity: 5,
58            enable_owl_reasoning: true,
59        }
60    }
61}
62
63/// Schema alignment engine
64pub struct SchemaAligner {
65    config: AlignmentConfig,
66    /// Property mappings: (source_property, target_property) -> confidence
67    property_mappings: Arc<RwLock<HashMap<(String, String), f64>>>,
68    /// Class mappings: (source_class, target_class) -> confidence
69    class_mappings: Arc<RwLock<HashMap<(String, String), f64>>>,
70    /// Instance mappings: (source_instance, target_instance) -> confidence
71    instance_mappings: Arc<RwLock<HashMap<(String, String), f64>>>,
72    /// Vocabulary metadata cache
73    vocabulary_cache: Arc<RwLock<HashMap<String, VocabularyMetadata>>>,
74    /// ML model for mapping prediction (simplified)
75    ml_predictor: Arc<RwLock<Option<MappingPredictor>>>,
76    /// Semantic reasoner for RDFS/OWL inference
77    reasoner: Arc<RwLock<Option<SemanticReasoner>>>,
78}
79
80impl SchemaAligner {
81    /// Create a new schema aligner
82    pub fn new(config: AlignmentConfig) -> Self {
83        Self {
84            config,
85            property_mappings: Arc::new(RwLock::new(HashMap::new())),
86            class_mappings: Arc::new(RwLock::new(HashMap::new())),
87            instance_mappings: Arc::new(RwLock::new(HashMap::new())),
88            vocabulary_cache: Arc::new(RwLock::new(HashMap::new())),
89            ml_predictor: Arc::new(RwLock::new(None)),
90            reasoner: Arc::new(RwLock::new(None)),
91        }
92    }
93
94    /// Enable semantic reasoning for enhanced alignment
95    pub async fn enable_reasoning(&self, reasoner_config: ReasonerConfig) -> Result<()> {
96        let reasoner = SemanticReasoner::new(reasoner_config);
97        let mut reasoner_guard = self.reasoner.write().await;
98        *reasoner_guard = Some(reasoner);
99        info!("Semantic reasoning enabled for schema alignment");
100        Ok(())
101    }
102
103    /// Align two RDF vocabularies/ontologies
104    pub async fn align_vocabularies(
105        &self,
106        source_vocab: &str,
107        target_vocab: &str,
108    ) -> Result<AlignmentResult> {
109        info!(
110            "Aligning vocabularies: {} -> {}",
111            source_vocab, target_vocab
112        );
113
114        // Load vocabulary metadata
115        let source_meta = self.load_vocabulary_metadata(source_vocab).await?;
116        let target_meta = self.load_vocabulary_metadata(target_vocab).await?;
117
118        let mut property_alignments = Vec::new();
119        let mut class_alignments = Vec::new();
120
121        // Align properties
122        if self.config.enable_property_alignment {
123            property_alignments = self
124                .align_properties(&source_meta.properties, &target_meta.properties)
125                .await?;
126        }
127
128        // Align classes
129        if self.config.enable_class_alignment {
130            class_alignments = self
131                .align_classes(&source_meta.classes, &target_meta.classes)
132                .await?;
133        }
134
135        // Store alignments
136        self.store_alignments(&property_alignments, &class_alignments)
137            .await?;
138
139        let overall_confidence =
140            self.calculate_overall_confidence(&property_alignments, &class_alignments);
141
142        Ok(AlignmentResult {
143            source_vocabulary: source_vocab.to_string(),
144            target_vocabulary: target_vocab.to_string(),
145            property_alignments,
146            class_alignments,
147            instance_alignments: vec![],
148            overall_confidence,
149        })
150    }
151
152    /// Align RDF properties between vocabularies
153    async fn align_properties(
154        &self,
155        source_properties: &[PropertyMetadata],
156        target_properties: &[PropertyMetadata],
157    ) -> Result<Vec<Alignment>> {
158        let mut alignments = Vec::new();
159
160        for source_prop in source_properties {
161            let mut candidates = Vec::new();
162
163            for target_prop in target_properties {
164                // Calculate similarity
165                let similarity = self
166                    .calculate_property_similarity(source_prop, target_prop)
167                    .await?;
168
169                if similarity >= self.config.similarity_threshold {
170                    candidates.push((target_prop, similarity));
171                }
172            }
173
174            // Sort by similarity (highest first)
175            candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
176
177            // Take top N suggestions
178            for (target_prop, confidence) in candidates
179                .iter()
180                .take(self.config.max_suggestions_per_entity)
181            {
182                alignments.push(Alignment {
183                    source_entity: source_prop.uri.clone(),
184                    target_entity: target_prop.uri.clone(),
185                    alignment_type: AlignmentType::Property,
186                    confidence: *confidence,
187                    evidence: vec![format!(
188                        "String similarity: {:.3}, Domain/range match",
189                        confidence
190                    )],
191                });
192            }
193        }
194
195        Ok(alignments)
196    }
197
198    /// Align RDF classes between vocabularies
199    async fn align_classes(
200        &self,
201        source_classes: &[ClassMetadata],
202        target_classes: &[ClassMetadata],
203    ) -> Result<Vec<Alignment>> {
204        let mut alignments = Vec::new();
205
206        for source_class in source_classes {
207            let mut candidates = Vec::new();
208
209            for target_class in target_classes {
210                // Calculate similarity
211                let similarity = self
212                    .calculate_class_similarity(source_class, target_class)
213                    .await?;
214
215                if similarity >= self.config.similarity_threshold {
216                    candidates.push((target_class, similarity));
217                }
218            }
219
220            // Sort by similarity
221            candidates.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
222
223            // Take top N suggestions
224            for (target_class, confidence) in candidates
225                .iter()
226                .take(self.config.max_suggestions_per_entity)
227            {
228                alignments.push(Alignment {
229                    source_entity: source_class.uri.clone(),
230                    target_entity: target_class.uri.clone(),
231                    alignment_type: AlignmentType::Class,
232                    confidence: *confidence,
233                    evidence: vec![format!("Class similarity: {:.3}", confidence)],
234                });
235            }
236        }
237
238        Ok(alignments)
239    }
240
241    /// Calculate similarity between two properties
242    async fn calculate_property_similarity(
243        &self,
244        source: &PropertyMetadata,
245        target: &PropertyMetadata,
246    ) -> Result<f64> {
247        let mut similarities = Vec::new();
248
249        // Label similarity (string-based)
250        let label_sim = self.calculate_string_similarity(&source.label, &target.label);
251        similarities.push(label_sim * 0.4); // Weight: 40%
252
253        // Local name similarity
254        let local_name_sim = self.calculate_string_similarity(
255            &Self::extract_local_name(&source.uri),
256            &Self::extract_local_name(&target.uri),
257        );
258        similarities.push(local_name_sim * 0.3); // Weight: 30%
259
260        // Domain/range compatibility
261        let domain_range_sim = self.calculate_domain_range_similarity(source, target);
262        similarities.push(domain_range_sim * 0.2); // Weight: 20%
263
264        // Description similarity (if available)
265        if let (Some(ref src_desc), Some(ref tgt_desc)) = (&source.description, &target.description)
266        {
267            let desc_sim = self.calculate_string_similarity(src_desc, tgt_desc);
268            similarities.push(desc_sim * 0.1); // Weight: 10%
269        }
270
271        // Aggregate similarities
272        let total_similarity = similarities.iter().sum::<f64>();
273
274        Ok(total_similarity.min(1.0))
275    }
276
277    /// Calculate similarity between two classes
278    async fn calculate_class_similarity(
279        &self,
280        source: &ClassMetadata,
281        target: &ClassMetadata,
282    ) -> Result<f64> {
283        let mut similarities = Vec::new();
284
285        // Label similarity
286        let label_sim = self.calculate_string_similarity(&source.label, &target.label);
287        similarities.push(label_sim * 0.5); // Weight: 50%
288
289        // Local name similarity
290        let local_name_sim = self.calculate_string_similarity(
291            &Self::extract_local_name(&source.uri),
292            &Self::extract_local_name(&target.uri),
293        );
294        similarities.push(local_name_sim * 0.3); // Weight: 30%
295
296        // Superclass/subclass overlap
297        let hierarchy_sim = self.calculate_hierarchy_similarity(source, target);
298        similarities.push(hierarchy_sim * 0.2); // Weight: 20%
299
300        let total_similarity = similarities.iter().sum::<f64>();
301
302        Ok(total_similarity.min(1.0))
303    }
304
305    /// Calculate string similarity using multiple methods
306    fn calculate_string_similarity(&self, s1: &str, s2: &str) -> f64 {
307        // Normalize strings
308        let s1_norm = s1.to_lowercase().trim().to_string();
309        let s2_norm = s2.to_lowercase().trim().to_string();
310
311        // Exact match
312        if s1_norm == s2_norm {
313            return 1.0;
314        }
315
316        // Levenshtein distance (normalized) - simplified implementation
317        let lev_dist = Self::simple_levenshtein(&s1_norm, &s2_norm);
318        let max_len = s1_norm.len().max(s2_norm.len()) as f64;
319        let lev_sim = 1.0 - (lev_dist as f64 / max_len);
320
321        // Token-based similarity (Jaccard)
322        let tokens1: HashSet<&str> = s1_norm.split_whitespace().collect();
323        let tokens2: HashSet<&str> = s2_norm.split_whitespace().collect();
324        let intersection = tokens1.intersection(&tokens2).count();
325        let union = tokens1.union(&tokens2).count();
326        let jaccard_sim = if union > 0 {
327            intersection as f64 / union as f64
328        } else {
329            0.0
330        };
331
332        // Combine similarities
333        (lev_sim * 0.6 + jaccard_sim * 0.4).min(1.0)
334    }
335
336    /// Calculate domain/range similarity for properties
337    fn calculate_domain_range_similarity(
338        &self,
339        source: &PropertyMetadata,
340        target: &PropertyMetadata,
341    ) -> f64 {
342        let mut similarity: f64 = 0.0;
343
344        // Domain match
345        if let (Some(ref src_domain), Some(ref tgt_domain)) = (&source.domain, &target.domain) {
346            if src_domain == tgt_domain {
347                similarity += 0.5;
348            } else {
349                // Partial match based on local names
350                let src_local = Self::extract_local_name(src_domain);
351                let tgt_local = Self::extract_local_name(tgt_domain);
352                if src_local == tgt_local {
353                    similarity += 0.3;
354                }
355            }
356        }
357
358        // Range match
359        if let (Some(ref src_range), Some(ref tgt_range)) = (&source.range, &target.range) {
360            if src_range == tgt_range {
361                similarity += 0.5;
362            } else {
363                let src_local = Self::extract_local_name(src_range);
364                let tgt_local = Self::extract_local_name(tgt_range);
365                if src_local == tgt_local {
366                    similarity += 0.3;
367                }
368            }
369        }
370
371        similarity.min(1.0)
372    }
373
374    /// Calculate hierarchy similarity for classes
375    fn calculate_hierarchy_similarity(
376        &self,
377        source: &ClassMetadata,
378        target: &ClassMetadata,
379    ) -> f64 {
380        // Calculate Jaccard similarity of superclasses
381        let src_supers: HashSet<_> = source.superclasses.iter().collect();
382        let tgt_supers: HashSet<_> = target.superclasses.iter().collect();
383
384        let intersection = src_supers.intersection(&tgt_supers).count();
385        let union = src_supers.union(&tgt_supers).count();
386
387        if union > 0 {
388            intersection as f64 / union as f64
389        } else {
390            0.0
391        }
392    }
393
394    /// Extract local name from URI
395    fn extract_local_name(uri: &str) -> String {
396        uri.rsplit(['/', '#']).next().unwrap_or(uri).to_string()
397    }
398
399    /// Simple Levenshtein distance calculation
400    fn simple_levenshtein(s1: &str, s2: &str) -> usize {
401        let len1 = s1.len();
402        let len2 = s2.len();
403
404        if len1 == 0 {
405            return len2;
406        }
407        if len2 == 0 {
408            return len1;
409        }
410
411        let mut prev_row: Vec<usize> = (0..=len2).collect();
412        let mut curr_row = vec![0; len2 + 1];
413
414        for (i, c1) in s1.chars().enumerate() {
415            curr_row[0] = i + 1;
416
417            for (j, c2) in s2.chars().enumerate() {
418                let cost = if c1 == c2 { 0 } else { 1 };
419                curr_row[j + 1] = (curr_row[j] + 1)
420                    .min(prev_row[j + 1] + 1)
421                    .min(prev_row[j] + cost);
422            }
423
424            std::mem::swap(&mut prev_row, &mut curr_row);
425        }
426
427        prev_row[len2]
428    }
429
430    /// Load vocabulary metadata (simplified - in production would query SPARQL endpoints)
431    async fn load_vocabulary_metadata(&self, vocab_uri: &str) -> Result<VocabularyMetadata> {
432        // Check cache first
433        let cache = self.vocabulary_cache.read().await;
434        if let Some(meta) = cache.get(vocab_uri) {
435            return Ok(meta.clone());
436        }
437        drop(cache);
438
439        // In production, this would:
440        // 1. Query the vocabulary endpoint
441        // 2. Parse RDFS/OWL definitions
442        // 3. Extract class and property metadata
443        // For now, return mock data
444
445        let metadata = VocabularyMetadata {
446            namespace: vocab_uri.to_string(),
447            prefix: Self::extract_local_name(vocab_uri),
448            properties: vec![],
449            classes: vec![],
450            version: None,
451        };
452
453        // Cache the metadata
454        let mut cache = self.vocabulary_cache.write().await;
455        cache.insert(vocab_uri.to_string(), metadata.clone());
456
457        Ok(metadata)
458    }
459
460    /// Store alignments in the mapping tables
461    async fn store_alignments(
462        &self,
463        property_alignments: &[Alignment],
464        class_alignments: &[Alignment],
465    ) -> Result<()> {
466        let mut prop_mappings = self.property_mappings.write().await;
467        let mut class_mappings = self.class_mappings.write().await;
468
469        for alignment in property_alignments {
470            prop_mappings.insert(
471                (
472                    alignment.source_entity.clone(),
473                    alignment.target_entity.clone(),
474                ),
475                alignment.confidence,
476            );
477        }
478
479        for alignment in class_alignments {
480            class_mappings.insert(
481                (
482                    alignment.source_entity.clone(),
483                    alignment.target_entity.clone(),
484                ),
485                alignment.confidence,
486            );
487        }
488
489        info!(
490            "Stored {} property alignments and {} class alignments",
491            property_alignments.len(),
492            class_alignments.len()
493        );
494
495        Ok(())
496    }
497
498    /// Calculate overall alignment confidence
499    fn calculate_overall_confidence(
500        &self,
501        property_alignments: &[Alignment],
502        class_alignments: &[Alignment],
503    ) -> f64 {
504        let all_alignments: Vec<f64> = property_alignments
505            .iter()
506            .chain(class_alignments.iter())
507            .map(|a| a.confidence)
508            .collect();
509
510        if all_alignments.is_empty() {
511            return 0.0;
512        }
513
514        // Average confidence
515        all_alignments.iter().sum::<f64>() / all_alignments.len() as f64
516    }
517
518    /// Rewrite SPARQL query using schema alignments
519    pub async fn rewrite_query(&self, query: &str, target_vocabulary: &str) -> Result<String> {
520        debug!("Rewriting query for vocabulary: {}", target_vocabulary);
521
522        // Parse query (simplified - in production would use full SPARQL parser)
523        let mut rewritten = query.to_string();
524
525        // Apply property mappings
526        let prop_mappings = self.property_mappings.read().await;
527        for ((source, target), _confidence) in prop_mappings.iter() {
528            rewritten = rewritten.replace(source, target);
529        }
530
531        // Apply class mappings
532        let class_mappings = self.class_mappings.read().await;
533        for ((source, target), _confidence) in class_mappings.iter() {
534            rewritten = rewritten.replace(source, target);
535        }
536
537        Ok(rewritten)
538    }
539
540    /// Get mapping for a specific entity
541    pub async fn get_mapping(
542        &self,
543        source_entity: &str,
544        entity_type: AlignmentType,
545    ) -> Result<Option<String>> {
546        match entity_type {
547            AlignmentType::Property => {
548                let mappings = self.property_mappings.read().await;
549                Ok(mappings
550                    .iter()
551                    .find(|((s, _), _)| s == source_entity)
552                    .map(|((_, t), _)| t.clone()))
553            }
554            AlignmentType::Class => {
555                let mappings = self.class_mappings.read().await;
556                Ok(mappings
557                    .iter()
558                    .find(|((s, _), _)| s == source_entity)
559                    .map(|((_, t), _)| t.clone()))
560            }
561            AlignmentType::Instance => {
562                let mappings = self.instance_mappings.read().await;
563                Ok(mappings
564                    .iter()
565                    .find(|((s, _), _)| s == source_entity)
566                    .map(|((_, t), _)| t.clone()))
567            }
568        }
569    }
570
571    /// Train ML-based mapping predictor
572    pub async fn train_ml_predictor(&self, training_data: Vec<MappingExample>) -> Result<()> {
573        if !self.config.enable_ml_mapping {
574            return Err(anyhow!("ML mapping is disabled"));
575        }
576
577        info!(
578            "Training ML-based mapping predictor with {} examples",
579            training_data.len()
580        );
581
582        let predictor = MappingPredictor::train(training_data)?;
583
584        let mut ml_predictor = self.ml_predictor.write().await;
585        *ml_predictor = Some(predictor);
586
587        Ok(())
588    }
589}
590
591/// Vocabulary metadata
592#[derive(Debug, Clone, Serialize, Deserialize)]
593pub struct VocabularyMetadata {
594    pub namespace: String,
595    pub prefix: String,
596    pub properties: Vec<PropertyMetadata>,
597    pub classes: Vec<ClassMetadata>,
598    pub version: Option<String>,
599}
600
601/// Property metadata
602#[derive(Debug, Clone, Serialize, Deserialize)]
603pub struct PropertyMetadata {
604    pub uri: String,
605    pub label: String,
606    pub description: Option<String>,
607    pub domain: Option<String>,
608    pub range: Option<String>,
609}
610
611/// Class metadata
612#[derive(Debug, Clone, Serialize, Deserialize)]
613pub struct ClassMetadata {
614    pub uri: String,
615    pub label: String,
616    pub description: Option<String>,
617    pub superclasses: Vec<String>,
618    pub subclasses: Vec<String>,
619}
620
621/// Alignment result
622#[derive(Debug, Clone, Serialize, Deserialize)]
623pub struct AlignmentResult {
624    pub source_vocabulary: String,
625    pub target_vocabulary: String,
626    pub property_alignments: Vec<Alignment>,
627    pub class_alignments: Vec<Alignment>,
628    pub instance_alignments: Vec<Alignment>,
629    pub overall_confidence: f64,
630}
631
632/// Individual alignment
633#[derive(Debug, Clone, Serialize, Deserialize)]
634pub struct Alignment {
635    pub source_entity: String,
636    pub target_entity: String,
637    pub alignment_type: AlignmentType,
638    pub confidence: f64,
639    pub evidence: Vec<String>,
640}
641
642/// Alignment type
643#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
644pub enum AlignmentType {
645    Property,
646    Class,
647    Instance,
648}
649
650/// Mapping example for ML training
651#[derive(Debug, Clone, Serialize, Deserialize)]
652pub struct MappingExample {
653    pub source_entity: String,
654    pub target_entity: String,
655    pub is_correct: bool,
656    pub features: HashMap<String, f64>,
657}
658
659/// ML-based mapping predictor (simplified)
660#[derive(Debug, Clone)]
661pub struct MappingPredictor {
662    /// Feature weights learned from training
663    weights: HashMap<String, f64>,
664}
665
666impl MappingPredictor {
667    /// Train predictor from examples
668    pub fn train(examples: Vec<MappingExample>) -> Result<Self> {
669        // Simplified training - in production would use scirs2's ML capabilities
670        let mut weights = HashMap::new();
671
672        // Calculate feature importance
673        for example in &examples {
674            for (feature_name, &feature_value) in &example.features {
675                let weight = if example.is_correct {
676                    feature_value
677                } else {
678                    -feature_value
679                };
680                *weights.entry(feature_name.clone()).or_insert(0.0) += weight;
681            }
682        }
683
684        // Normalize weights
685        let sum: f64 = weights.values().map(|w| w.abs()).sum();
686        if sum > 0.0 {
687            for weight in weights.values_mut() {
688                *weight /= sum;
689            }
690        }
691
692        Ok(Self { weights })
693    }
694
695    /// Predict if mapping is correct
696    pub fn predict(&self, features: &HashMap<String, f64>) -> f64 {
697        let mut score = 0.5; // Base score
698
699        for (feature_name, &feature_value) in features {
700            if let Some(&weight) = self.weights.get(feature_name) {
701                score += weight * feature_value;
702            }
703        }
704
705        score.clamp(0.0, 1.0)
706    }
707}
708
709#[cfg(test)]
710mod tests {
711    use super::*;
712
713    #[test]
714    fn test_alignment_config_default() {
715        let config = AlignmentConfig::default();
716        assert_eq!(config.similarity_threshold, 0.75);
717        assert!(config.enable_property_alignment);
718        assert!(config.enable_class_alignment);
719    }
720
721    #[test]
722    fn test_extract_local_name() {
723        assert_eq!(
724            SchemaAligner::extract_local_name("http://xmlns.com/foaf/0.1/name"),
725            "name"
726        );
727        assert_eq!(
728            SchemaAligner::extract_local_name("http://schema.org#Person"),
729            "Person"
730        );
731    }
732
733    #[tokio::test]
734    async fn test_schema_aligner_creation() {
735        let config = AlignmentConfig::default();
736        let aligner = SchemaAligner::new(config);
737
738        // Test that aligner is created successfully
739        assert!(aligner.property_mappings.read().await.is_empty());
740        assert!(aligner.class_mappings.read().await.is_empty());
741    }
742
743    #[test]
744    fn test_string_similarity() {
745        let config = AlignmentConfig::default();
746        let aligner = SchemaAligner::new(config);
747
748        // Exact match
749        let sim1 = aligner.calculate_string_similarity("name", "name");
750        assert_eq!(sim1, 1.0);
751
752        // Similar strings
753        let sim2 = aligner.calculate_string_similarity("firstName", "first_name");
754        assert!(sim2 > 0.5);
755
756        // Different strings
757        let sim3 = aligner.calculate_string_similarity("name", "age");
758        assert!(sim3 < 0.5);
759    }
760
761    #[test]
762    fn test_mapping_predictor_train() {
763        let examples = vec![
764            MappingExample {
765                source_entity: "foaf:name".to_string(),
766                target_entity: "schema:name".to_string(),
767                is_correct: true,
768                features: [("string_sim".to_string(), 0.9)].into(),
769            },
770            MappingExample {
771                source_entity: "foaf:age".to_string(),
772                target_entity: "schema:birthDate".to_string(),
773                is_correct: false,
774                features: [("string_sim".to_string(), 0.3)].into(),
775            },
776        ];
777
778        let predictor = MappingPredictor::train(examples).unwrap();
779        assert!(!predictor.weights.is_empty());
780    }
781}