scirs2_text/information_extraction/
linking.rs

1//! Entity linking and knowledge base integration
2
3use super::entities::{Entity, EntityType};
4use crate::error::Result;
5use std::collections::HashMap;
6
7/// Knowledge base entry for entity linking
8#[derive(Debug, Clone)]
9pub struct KnowledgeBaseEntry {
10    /// Canonical name of the entity
11    pub canonical_name: String,
12    /// Type of the entity
13    pub entity_type: EntityType,
14    /// Alternative names for the entity
15    pub aliases: Vec<String>,
16    /// Confidence score for this entry
17    pub confidence: f64,
18    /// Additional metadata about the entity
19    pub metadata: HashMap<String, String>,
20}
21
22/// Entity with knowledge base linking
23#[derive(Debug, Clone)]
24pub struct LinkedEntity {
25    /// The original entity
26    pub entity: Entity,
27    /// Canonical name from knowledge base
28    pub canonical_name: String,
29    /// Confidence score for the linking
30    pub linked_confidence: f64,
31    /// Additional metadata from knowledge base
32    pub metadata: HashMap<String, String>,
33}
34
35/// Entity linker for connecting entities to knowledge bases
36pub struct EntityLinker {
37    knowledge_base: HashMap<String, KnowledgeBaseEntry>,
38    alias_map: HashMap<String, String>,
39}
40
41impl Default for EntityLinker {
42    fn default() -> Self {
43        Self::new()
44    }
45}
46
47impl EntityLinker {
48    /// Create new entity linker
49    pub fn new() -> Self {
50        Self {
51            knowledge_base: HashMap::new(),
52            alias_map: HashMap::new(),
53        }
54    }
55
56    /// Add entity to knowledge base
57    pub fn add_entity(&mut self, entry: KnowledgeBaseEntry) {
58        let canonical = entry.canonical_name.clone();
59
60        // Add aliases to alias map (store in lowercase for case-insensitive lookup)
61        for alias in &entry.aliases {
62            self.alias_map
63                .insert(alias.to_lowercase(), canonical.clone());
64        }
65        self.alias_map
66            .insert(canonical.to_lowercase(), canonical.clone());
67
68        self.knowledge_base.insert(canonical, entry);
69    }
70
71    /// Link extracted entities to knowledge base
72    pub fn link_entities(&self, entities: &mut [Entity]) -> Result<Vec<LinkedEntity>> {
73        let mut linked_entities = Vec::new();
74
75        for entity in entities {
76            if let Some(canonical_name) = self.alias_map.get(&entity.text.to_lowercase()) {
77                if let Some(kb_entry) = self.knowledge_base.get(canonical_name) {
78                    let confidence = entity.confidence * kb_entry.confidence;
79                    linked_entities.push(LinkedEntity {
80                        entity: entity.clone(),
81                        canonical_name: kb_entry.canonical_name.clone(),
82                        linked_confidence: confidence,
83                        metadata: kb_entry.metadata.clone(),
84                    });
85                }
86            }
87        }
88
89        Ok(linked_entities)
90    }
91}