Skip to main content

cdx_core/extensions/semantic/
entity.rs

1//! Entity linking to external knowledge bases.
2
3use serde::{Deserialize, Serialize};
4
5/// A link to an external entity in a knowledge base.
6#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
7#[serde(rename_all = "camelCase")]
8pub struct EntityLink {
9    /// URI of the entity.
10    pub uri: String,
11
12    /// Type of entity.
13    pub entity_type: EntityType,
14
15    /// Display label.
16    #[serde(default, skip_serializing_if = "Option::is_none")]
17    pub label: Option<String>,
18
19    /// Confidence score (0.0 to 1.0).
20    #[serde(default, skip_serializing_if = "Option::is_none")]
21    pub confidence: Option<f64>,
22
23    /// Source knowledge base.
24    #[serde(default, skip_serializing_if = "Option::is_none")]
25    pub source: Option<KnowledgeBase>,
26}
27
28impl EntityLink {
29    /// Create a new entity link.
30    #[must_use]
31    pub fn new(uri: impl Into<String>, entity_type: EntityType) -> Self {
32        Self {
33            uri: uri.into(),
34            entity_type,
35            label: None,
36            confidence: None,
37            source: None,
38        }
39    }
40
41    /// Set the display label.
42    #[must_use]
43    pub fn with_label(mut self, label: impl Into<String>) -> Self {
44        self.label = Some(label.into());
45        self
46    }
47
48    /// Set confidence score.
49    #[must_use]
50    pub fn with_confidence(mut self, confidence: f64) -> Self {
51        self.confidence = Some(confidence.clamp(0.0, 1.0));
52        self
53    }
54
55    /// Set source knowledge base.
56    #[must_use]
57    pub fn with_source(mut self, source: KnowledgeBase) -> Self {
58        self.source = Some(source);
59        self
60    }
61
62    /// Create a Wikipedia entity link.
63    #[must_use]
64    pub fn wikipedia(title: impl Into<String>, entity_type: EntityType) -> Self {
65        let title = title.into();
66        let uri = format!("https://en.wikipedia.org/wiki/{}", title.replace(' ', "_"));
67        Self::new(uri, entity_type)
68            .with_label(title)
69            .with_source(KnowledgeBase::Wikipedia)
70    }
71
72    /// Create a Wikidata entity link.
73    #[must_use]
74    pub fn wikidata(qid: impl Into<String>, entity_type: EntityType) -> Self {
75        let qid = qid.into();
76        let uri = format!("https://www.wikidata.org/wiki/{qid}");
77        Self::new(uri, entity_type).with_source(KnowledgeBase::Wikidata)
78    }
79}
80
81/// Type of entity being linked.
82#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, strum::Display)]
83#[serde(rename_all = "camelCase")]
84#[strum(serialize_all = "kebab-case")]
85pub enum EntityType {
86    /// A person.
87    Person,
88    /// An organization or company.
89    Organization,
90    /// A geographic location.
91    Place,
92    /// A historical or scheduled event.
93    Event,
94    /// A product.
95    Product,
96    /// A creative work (book, film, etc.).
97    CreativeWork,
98    /// A concept or idea.
99    Concept,
100    /// A scientific term or phenomenon.
101    Scientific,
102    /// A time period or era.
103    TimePeriod,
104    /// Other entity type.
105    Other,
106}
107
108/// Known knowledge bases for entity linking.
109#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
110#[serde(rename_all = "lowercase")]
111pub enum KnowledgeBase {
112    /// Wikipedia.
113    Wikipedia,
114    /// Wikidata.
115    Wikidata,
116    /// `DBpedia`.
117    Dbpedia,
118    /// Schema.org.
119    Schema,
120    /// Library of Congress.
121    Loc,
122    /// `GeoNames`.
123    Geonames,
124    /// Other knowledge base.
125    Other,
126}