scirs2_text/information_extraction/
linking.rs1use super::entities::{Entity, EntityType};
4use crate::error::Result;
5use std::collections::HashMap;
6
7#[derive(Debug, Clone)]
9pub struct KnowledgeBaseEntry {
10 pub canonical_name: String,
12 pub entity_type: EntityType,
14 pub aliases: Vec<String>,
16 pub confidence: f64,
18 pub metadata: HashMap<String, String>,
20}
21
22#[derive(Debug, Clone)]
24pub struct LinkedEntity {
25 pub entity: Entity,
27 pub canonical_name: String,
29 pub linked_confidence: f64,
31 pub metadata: HashMap<String, String>,
33}
34
35pub struct EntityLinker {
37 knowledge_base: HashMap<String, KnowledgeBaseEntry>,
38 alias_map: HashMap<String, String>,
39}
40
41impl Default for EntityLinker {
42 fn default() -> Self {
43 Self::new()
44 }
45}
46
47impl EntityLinker {
48 pub fn new() -> Self {
50 Self {
51 knowledge_base: HashMap::new(),
52 alias_map: HashMap::new(),
53 }
54 }
55
56 pub fn add_entity(&mut self, entry: KnowledgeBaseEntry) {
58 let canonical = entry.canonical_name.clone();
59
60 for alias in &entry.aliases {
62 self.alias_map
63 .insert(alias.to_lowercase(), canonical.clone());
64 }
65 self.alias_map
66 .insert(canonical.to_lowercase(), canonical.clone());
67
68 self.knowledge_base.insert(canonical, entry);
69 }
70
71 pub fn link_entities(&self, entities: &mut [Entity]) -> Result<Vec<LinkedEntity>> {
73 let mut linked_entities = Vec::new();
74
75 for entity in entities {
76 if let Some(canonical_name) = self.alias_map.get(&entity.text.to_lowercase()) {
77 if let Some(kb_entry) = self.knowledge_base.get(canonical_name) {
78 let confidence = entity.confidence * kb_entry.confidence;
79 linked_entities.push(LinkedEntity {
80 entity: entity.clone(),
81 canonical_name: kb_entry.canonical_name.clone(),
82 linked_confidence: confidence,
83 metadata: kb_entry.metadata.clone(),
84 });
85 }
86 }
87 }
88
89 Ok(linked_entities)
90 }
91}