Skip to main content

bookforge_core/
entity.rs

1//! Named entities — a structured extension of the glossary for
2//! characters, places, and named items whose grammatical gender matters
3//! in the target language.
4//!
5//! Glossary terms enforce a source → target substitution; entities go
6//! further and tell the model how to inflect adjectives and articles
7//! around the translated name. The rendered block reads like a
8//! grammatical-agreement table (see [`render_entity_agreement_block`]),
9//! so the model can keep gender concord consistent across paragraphs.
10//!
11//! Italian, French, Spanish, German, etc. all need this; English
12//! doesn't. The feature is purely additive — empty input produces an
13//! empty block.
14
15use std::collections::HashMap;
16
17use serde::{Deserialize, Serialize};
18use sha2::{Digest, Sha256};
19
20use crate::glossary::GlossaryScopeKind;
21
22#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
23pub enum EntityGender {
24    #[serde(rename = "m")]
25    Masculine,
26    #[serde(rename = "f")]
27    Feminine,
28    #[serde(rename = "n")]
29    Neuter,
30}
31
32impl EntityGender {
33    pub fn as_label(self) -> &'static str {
34        match self {
35            EntityGender::Masculine => "masculine",
36            EntityGender::Feminine => "feminine",
37            EntityGender::Neuter => "neuter",
38        }
39    }
40
41    pub fn as_short(self) -> &'static str {
42        match self {
43            EntityGender::Masculine => "m",
44            EntityGender::Feminine => "f",
45            EntityGender::Neuter => "n",
46        }
47    }
48}
49
50#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
51pub struct Entity {
52    #[serde(default, skip_serializing_if = "Option::is_none")]
53    pub id: Option<i64>,
54    pub scope_kind: GlossaryScopeKind,
55    #[serde(default, skip_serializing_if = "Option::is_none")]
56    pub scope_id: Option<String>,
57    pub source_name: String,
58    pub target_name: String,
59    #[serde(default, skip_serializing_if = "Option::is_none")]
60    pub gender_target: Option<EntityGender>,
61    #[serde(default, skip_serializing_if = "Option::is_none")]
62    pub role: Option<String>,
63    #[serde(default, skip_serializing_if = "Option::is_none")]
64    pub notes: Option<String>,
65    pub source_language: String,
66    pub target_language: String,
67}
68
69/// Merge entities across scopes with the same `book > series > global`
70/// precedence as glossary terms. Entities are keyed on
71/// `(source_name, source_language, target_language)`; the highest-
72/// priority row wins.
73pub fn merge_scope_entities(entities: &[Entity]) -> Vec<Entity> {
74    let mut by_key: HashMap<(String, String, String), Entity> = HashMap::new();
75    for entity in entities {
76        let key = (
77            entity.source_name.clone(),
78            entity.source_language.clone(),
79            entity.target_language.clone(),
80        );
81        match by_key.get(&key) {
82            Some(existing) if existing.scope_kind.priority() > entity.scope_kind.priority() => {}
83            _ => {
84                by_key.insert(key, entity.clone());
85            }
86        }
87    }
88    let mut merged: Vec<Entity> = by_key.into_values().collect();
89    merged.sort_by(|a, b| {
90        a.source_language
91            .cmp(&b.source_language)
92            .then_with(|| a.target_language.cmp(&b.target_language))
93            .then_with(|| a.source_name.cmp(&b.source_name))
94    });
95    merged
96}
97
98/// Render merged entities as a grammatical-agreement block. Empty input
99/// returns an empty string so the placeholder substitutes to nothing in
100/// templates that don't reference the table.
101pub fn render_entity_agreement_block(entities: &[Entity]) -> String {
102    if entities.is_empty() {
103        return String::new();
104    }
105    let mut out = String::from(
106        "=== Entity grammatical agreement (use this for adjective/article concord) ===\n",
107    );
108    for entity in entities {
109        let mut line = format!("- {}", entity.target_name);
110        if entity.target_name != entity.source_name {
111            line.push_str(&format!(" ({})", entity.source_name));
112        }
113        if let Some(gender) = entity.gender_target {
114            line.push_str(&format!(": {}", gender.as_label()));
115        } else {
116            line.push_str(": unspecified");
117        }
118        if let Some(role) = entity.role.as_deref().filter(|r| !r.is_empty()) {
119            line.push_str(&format!(" [{role}]"));
120        }
121        out.push_str(&line);
122        out.push('\n');
123    }
124    out.push_str("=== End ===\n");
125    out
126}
127
128/// Stable fingerprint of a merged entity set. Empty input still produces
129/// a stable fingerprint so the cache namespace can ignore-or-include
130/// uniformly.
131pub fn entities_fingerprint(entities: &[Entity]) -> String {
132    let mut normalized: Vec<Entity> = entities.to_vec();
133    // Strip ids and sort for stability — the same logical set must
134    // fingerprint identically regardless of insertion order.
135    for entity in &mut normalized {
136        entity.id = None;
137    }
138    normalized.sort_by(|a, b| {
139        a.scope_kind
140            .priority()
141            .cmp(&b.scope_kind.priority())
142            .then_with(|| a.scope_id.cmp(&b.scope_id))
143            .then_with(|| a.source_language.cmp(&b.source_language))
144            .then_with(|| a.target_language.cmp(&b.target_language))
145            .then_with(|| a.source_name.cmp(&b.source_name))
146            .then_with(|| a.target_name.cmp(&b.target_name))
147    });
148    let payload = serde_json::json!({
149        "schema": 1,
150        "entities": normalized,
151    });
152    let serialized = serde_json::to_vec(&payload).unwrap_or_default();
153    let digest = Sha256::digest(serialized);
154    let mut hex = String::with_capacity(digest.len() * 2);
155    for byte in digest {
156        use std::fmt::Write as _;
157        write!(&mut hex, "{byte:02x}").expect("write to string");
158    }
159    hex
160}
161
162/// Convenience: produce both the rendered block and the fingerprint
163/// from a single merge.
164pub fn render_and_fingerprint(merged: &[Entity]) -> (String, String) {
165    (
166        render_entity_agreement_block(merged),
167        entities_fingerprint(merged),
168    )
169}
170
171#[cfg(test)]
172mod tests {
173    use super::*;
174
175    fn entity(name: &str, target: &str, scope: GlossaryScopeKind, gender: EntityGender) -> Entity {
176        Entity {
177            id: None,
178            scope_kind: scope,
179            scope_id: Some("test".to_string()),
180            source_name: name.to_string(),
181            target_name: target.to_string(),
182            gender_target: Some(gender),
183            role: None,
184            notes: None,
185            source_language: "English".to_string(),
186            target_language: "Italian".to_string(),
187        }
188    }
189
190    #[test]
191    fn render_block_returns_empty_for_no_entities() {
192        assert_eq!(render_entity_agreement_block(&[]), "");
193    }
194
195    #[test]
196    fn render_block_includes_source_when_target_differs() {
197        let entities = vec![entity(
198            "the Ring",
199            "l'Anello",
200            GlossaryScopeKind::Book,
201            EntityGender::Masculine,
202        )];
203        let rendered = render_entity_agreement_block(&entities);
204        assert!(rendered.contains("l'Anello (the Ring): masculine"));
205    }
206
207    #[test]
208    fn render_block_omits_source_when_target_matches() {
209        let entities = vec![entity(
210            "Galadriel",
211            "Galadriel",
212            GlossaryScopeKind::Book,
213            EntityGender::Feminine,
214        )];
215        let rendered = render_entity_agreement_block(&entities);
216        assert!(rendered.contains("- Galadriel: feminine"));
217        assert!(!rendered.contains("(Galadriel)"));
218    }
219
220    #[test]
221    fn merge_book_overrides_series_overrides_global() {
222        let global = entity(
223            "Aragorn",
224            "Aragorn-old",
225            GlossaryScopeKind::Global,
226            EntityGender::Masculine,
227        );
228        let series = entity(
229            "Aragorn",
230            "Aragorn-series",
231            GlossaryScopeKind::Series,
232            EntityGender::Masculine,
233        );
234        let book = entity(
235            "Aragorn",
236            "Aragorn",
237            GlossaryScopeKind::Book,
238            EntityGender::Masculine,
239        );
240        let merged = merge_scope_entities(&[global, series, book]);
241        assert_eq!(merged.len(), 1);
242        assert_eq!(merged[0].target_name, "Aragorn");
243        assert_eq!(merged[0].scope_kind, GlossaryScopeKind::Book);
244    }
245
246    #[test]
247    fn merge_keeps_distinct_source_names() {
248        let a = entity(
249            "Galadriel",
250            "Galadriel",
251            GlossaryScopeKind::Book,
252            EntityGender::Feminine,
253        );
254        let b = entity(
255            "Boromir",
256            "Boromir",
257            GlossaryScopeKind::Book,
258            EntityGender::Masculine,
259        );
260        let merged = merge_scope_entities(&[a, b]);
261        assert_eq!(merged.len(), 2);
262    }
263
264    #[test]
265    fn entities_fingerprint_is_stable_across_input_order() {
266        let a = entity(
267            "Galadriel",
268            "Galadriel",
269            GlossaryScopeKind::Book,
270            EntityGender::Feminine,
271        );
272        let b = entity(
273            "Boromir",
274            "Boromir",
275            GlossaryScopeKind::Book,
276            EntityGender::Masculine,
277        );
278        let fp_ab = entities_fingerprint(&[a.clone(), b.clone()]);
279        let fp_ba = entities_fingerprint(&[b, a]);
280        assert_eq!(fp_ab, fp_ba);
281    }
282
283    #[test]
284    fn entities_fingerprint_changes_when_gender_changes() {
285        let masc = entity("X", "X", GlossaryScopeKind::Book, EntityGender::Masculine);
286        let fem = entity("X", "X", GlossaryScopeKind::Book, EntityGender::Feminine);
287        assert_ne!(entities_fingerprint(&[masc]), entities_fingerprint(&[fem]));
288    }
289
290    #[test]
291    fn entities_fingerprint_of_empty_is_stable() {
292        let a = entities_fingerprint(&[]);
293        let b = entities_fingerprint(&[]);
294        assert_eq!(a, b);
295        assert!(!a.is_empty());
296    }
297
298    #[test]
299    fn render_block_includes_role_when_present() {
300        let mut e = entity(
301            "Galadriel",
302            "Galadriel",
303            GlossaryScopeKind::Book,
304            EntityGender::Feminine,
305        );
306        e.role = Some("elf-queen".to_string());
307        let rendered = render_entity_agreement_block(&[e]);
308        assert!(rendered.contains("[elf-queen]"));
309    }
310}