Skip to main content

prosaic_core/
reg.rs

1//! Referring Expression Generation (REG).
2//!
3//! Implements two algorithms:
4//!
5//! - **Dale & Reiter Incremental Algorithm** (1995): given a target entity and
6//!   a distractor set, select the shortest set of unary attributes that
7//!   uniquely identifies the target.
8//!
9//! - **Krahmer et al. Graph-Based Greedy Algorithm** (2003): extends D&R by
10//!   also considering labeled directed relations between entities. When
11//!   attributes alone do not disambiguate, the algorithm appends one
12//!   relation clause (e.g. "that calls AuthService") to the referring
13//!   expression.
14//!
15//! In an `prosaic` engine, this powers the `{name|refer}` pipe's *Full form*
16//! path. When multiple entities of the same type are known to the engine,
17//! the algorithm adds distinguishing adjectives until the target is
18//! unambiguous — producing "the domain class UserService" instead of the
19//! bare "the class UserService" when an ambiguity-causing "the infra class
20//! AuthService" has also been registered.
21
22#[cfg(not(feature = "std"))]
23use alloc::string::{String, ToString};
24#[cfg(not(feature = "std"))]
25use alloc::vec::Vec;
26
27use crate::collections::HashMap;
28
29/// A described entity. Attributes are intentionally ordered so the default
30/// preference ordering respects registration order.
31#[derive(Debug, Clone, PartialEq, Eq, Default)]
32#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
33pub struct EntityDescriptor {
34    pub name: String,
35    pub entity_type: String,
36    pub attributes: Vec<(String, String)>,
37    /// Labeled directed edges from this entity to other named entities.
38    ///
39    /// Each `(relation_label, target_name)` pair. The label is the
40    /// surface-form fragment that will be inserted verbatim after the head
41    /// noun in the referring expression — e.g. `("that calls", "AuthService")`
42    /// renders as `"that calls AuthService"`. Labels are chosen by the caller
43    /// to read naturally in context.
44    ///
45    /// `#[serde(default)]` ensures existing serialized `EntityDescriptor`
46    /// payloads that lack this field still deserialize cleanly.
47    #[cfg_attr(feature = "serde", serde(default))]
48    pub relations: Vec<(String, String)>,
49}
50
51impl EntityDescriptor {
52    pub fn new(name: impl Into<String>, entity_type: impl Into<String>) -> Self {
53        Self {
54            name: name.into(),
55            entity_type: entity_type.into(),
56            attributes: Vec::new(),
57            relations: Vec::new(),
58        }
59    }
60
61    /// Append an attribute. Chainable.
62    pub fn with_attribute(mut self, key: impl Into<String>, value: impl Into<String>) -> Self {
63        self.attributes.push((key.into(), value.into()));
64        self
65    }
66
67    /// Append a labeled directed relation to another entity. Chainable.
68    ///
69    /// `label` is the surface-form fragment inserted verbatim after the head
70    /// noun when this relation is selected by the graph-based algorithm
71    /// (e.g. `"that calls"`). `target` is the name of the target entity.
72    ///
73    /// Relations are considered in insertion order during REG.
74    pub fn with_relation(mut self, label: impl Into<String>, target: impl Into<String>) -> Self {
75        self.relations.push((label.into(), target.into()));
76        self
77    }
78
79    /// Look up an attribute by key.
80    pub fn attribute(&self, key: &str) -> Option<&str> {
81        self.attributes
82            .iter()
83            .find(|(k, _)| k == key)
84            .map(|(_, v)| v.as_str())
85    }
86
87    /// Look up a relation's target by label.
88    ///
89    /// Returns the target name of the first relation whose label matches, or
90    /// `None` if no such relation exists.
91    pub fn relation(&self, label: &str) -> Option<&str> {
92        self.relations
93            .iter()
94            .find(|(l, _)| l == label)
95            .map(|(_, t)| t.as_str())
96    }
97}
98
99/// Registry of descriptors for entities known to the engine.
100///
101/// Keyed by the tuple `(entity_type, name)` so the same name can represent
102/// distinct entities of different types (e.g. `UserService` as a `class`
103/// and `UserService` as a `trait` are independent entries). Callers whose
104/// entities collide on both type and name should use fully-qualified names
105/// (e.g. `"auth::init"`) to disambiguate.
106#[derive(Debug, Clone, Default)]
107pub struct EntityRegistry {
108    entries: HashMap<(String, String), EntityDescriptor>,
109}
110
111impl EntityRegistry {
112    pub fn new() -> Self {
113        Self::default()
114    }
115
116    /// Insert or replace an entity. Last write wins for a given
117    /// `(entity_type, name)` pair — different-type entries with the same
118    /// name coexist.
119    pub fn insert(&mut self, descriptor: EntityDescriptor) {
120        let key = (descriptor.entity_type.clone(), descriptor.name.clone());
121        self.entries.insert(key, descriptor);
122    }
123
124    /// Look up by `(entity_type, name)` — the canonical registry key.
125    pub fn get(&self, entity_type: &str, name: &str) -> Option<&EntityDescriptor> {
126        self.entries
127            .get(&(entity_type.to_string(), name.to_string()))
128    }
129
130    pub fn iter(&self) -> impl Iterator<Item = &EntityDescriptor> {
131        self.entries.values()
132    }
133
134    pub fn len(&self) -> usize {
135        self.entries.len()
136    }
137
138    pub fn is_empty(&self) -> bool {
139        self.entries.is_empty()
140    }
141}
142
143/// Output of the graph-based REG algorithm.
144///
145/// Contains the attribute values chosen to premodify the head noun
146/// (same as Dale & Reiter output), and an optional relation clause to
147/// append as a postmodifier when attributes alone do not disambiguate.
148#[derive(Debug, Clone, PartialEq, Eq, Default)]
149pub struct SubgraphDescription {
150    /// Attribute values to include as premodifiers, in preference order.
151    /// May be empty when the entity is the only one of its type.
152    pub attributes: Vec<String>,
153    /// Distinguishing relation, if one was needed: `(label, target_name)`.
154    ///
155    /// The label is inserted verbatim into the surface form after the head
156    /// noun — e.g. `Some(("that calls", "AuthService"))` renders as
157    /// `"that calls AuthService"`.
158    pub relation: Option<(String, String)>,
159}
160
161/// Shared core of both REG algorithms.
162///
163/// Runs the Dale & Reiter incremental attribute selection and returns
164/// both the chosen attribute values AND the surviving distractor set after
165/// those attributes have been applied. The surviving set is empty when
166/// attributes alone disambiguate; non-empty when they do not.
167///
168/// Both [`distinguishing_attributes`] and [`distinguishing_subgraph`] call
169/// this helper so the distractor-survival semantics are identical and
170/// cannot diverge.
171pub(crate) fn incremental_attributes_with_remaining<'a>(
172    target: &EntityDescriptor,
173    registry: &'a EntityRegistry,
174    preference_order: &[String],
175) -> (Vec<String>, Vec<&'a EntityDescriptor>) {
176    // Build distractor set: all registry entries with the same type,
177    // excluding the target itself.
178    let mut distractors: Vec<&EntityDescriptor> = registry
179        .iter()
180        .filter(|d| d.name != target.name && d.entity_type == target.entity_type)
181        .collect();
182
183    // If nothing could confuse the target, no distinguishers needed.
184    if distractors.is_empty() {
185        return (Vec::new(), Vec::new());
186    }
187
188    // Determine the attribute walk order: explicit preference first, then
189    // the target's own registration order (so any not covered by the
190    // preference list still get a chance).
191    let mut walked: Vec<&String> = preference_order.iter().collect();
192    for (k, _) in &target.attributes {
193        if !walked.iter().any(|s| s.as_str() == k.as_str()) {
194            walked.push(k);
195        }
196    }
197
198    let mut chosen: Vec<String> = Vec::new();
199
200    for attr_key in walked {
201        if distractors.is_empty() {
202            break;
203        }
204        let target_value = match target.attribute(attr_key) {
205            Some(v) => v,
206            None => continue,
207        };
208
209        // Would this attribute rule out any distractor?
210        let still_matching: Vec<&EntityDescriptor> = distractors
211            .iter()
212            .copied()
213            .filter(|d| d.attribute(attr_key) == Some(target_value))
214            .collect();
215
216        if still_matching.len() < distractors.len() {
217            chosen.push(target_value.to_string());
218            distractors = still_matching;
219        }
220    }
221
222    (chosen, distractors)
223}
224
225/// Dale & Reiter's Incremental Algorithm.
226///
227/// Returns the ordered list of attribute values that should premodify the
228/// head noun, in the order they should appear. The head noun (entity type)
229/// is always included implicitly — callers render it alongside the name
230/// themselves: `"the <attrs joined by space> <type> <name>"`.
231///
232/// Algorithm:
233/// 1. Filter distractors to those sharing the target's entity type (the
234///    type is always the head noun, so same-type entities are the only
235///    candidates that could still be confused).
236/// 2. Walk attributes in preference order. For each attribute the target
237///    has, include it if it rules out at least one remaining distractor.
238/// 3. Stop when no distractors remain.
239/// 4. If the attribute list is exhausted with distractors still present,
240///    return whatever was chosen — the result may still be ambiguous but
241///    uses all available discriminating information.
242pub fn distinguishing_attributes(
243    target: &EntityDescriptor,
244    registry: &EntityRegistry,
245    preference_order: &[String],
246) -> Vec<String> {
247    let (attrs, _) = incremental_attributes_with_remaining(target, registry, preference_order);
248    attrs
249}
250
251/// Krahmer et al. 2003 graph-based greedy REG algorithm.
252///
253/// Extends Dale & Reiter by also considering labeled directed relations
254/// between entities. When attributes alone do not fully disambiguate the
255/// target from all same-type distractors, the algorithm appends one
256/// distinguishing relation clause.
257///
258/// Algorithm:
259/// 1. Run D&R attribute selection (`incremental_attributes_with_remaining`).
260/// 2. If no distractors survive the attribute filter, return early — no
261///    relation needed (identical behaviour to D&R for this case).
262/// 3. Walk the target's relations in insertion order. Pick the first
263///    relation `(label, target_name)` that none of the surviving
264///    distractors also hold.
265/// 4. If all of the target's relations are shared with at least one
266///    surviving distractor, return the best-effort attribute list with
267///    no relation (may still be ambiguous — greedy fallback).
268///
269/// Complexity: O(|distractors| × (|attributes| + |relations|)) — linear
270/// in registry size. No backtracking, no B&B.
271pub fn distinguishing_subgraph(
272    target: &EntityDescriptor,
273    registry: &EntityRegistry,
274    preference_order: &[String],
275) -> SubgraphDescription {
276    let (attrs, remaining) =
277        incremental_attributes_with_remaining(target, registry, preference_order);
278
279    // Attributes alone were sufficient — no relation needed.
280    if remaining.is_empty() {
281        return SubgraphDescription {
282            attributes: attrs,
283            relation: None,
284        };
285    }
286
287    // Walk target's relations in insertion order. Pick the first relation
288    // that no surviving distractor also holds.
289    for (label, target_name) in &target.relations {
290        let any_shared = remaining.iter().any(|d| {
291            d.relations
292                .iter()
293                .any(|(l, t)| l == label && t == target_name)
294        });
295        if !any_shared {
296            return SubgraphDescription {
297                attributes: attrs,
298                relation: Some((label.clone(), target_name.clone())),
299            };
300        }
301    }
302
303    // Exhausted — return best-effort (may still be ambiguous).
304    SubgraphDescription {
305        attributes: attrs,
306        relation: None,
307    }
308}
309
310#[cfg(test)]
311mod tests {
312    use super::*;
313
314    fn reg_with(entities: Vec<EntityDescriptor>) -> EntityRegistry {
315        let mut r = EntityRegistry::new();
316        for e in entities {
317            r.insert(e);
318        }
319        r
320    }
321
322    #[test]
323    fn no_distractors_yields_empty_attribute_list() {
324        let target =
325            EntityDescriptor::new("UserService", "class").with_attribute("layer", "domain");
326        let registry = reg_with(vec![target.clone()]);
327        let attrs = distinguishing_attributes(&target, &registry, &[]);
328        assert!(attrs.is_empty());
329    }
330
331    #[test]
332    fn different_type_distractor_does_not_force_attribute() {
333        let target =
334            EntityDescriptor::new("UserService", "class").with_attribute("layer", "domain");
335        let other = EntityDescriptor::new("UserService", "trait").with_attribute("layer", "infra");
336        let registry = reg_with(vec![target.clone(), other]);
337        // Same name but different type — the head noun alone disambiguates.
338        let attrs = distinguishing_attributes(&target, &registry, &[]);
339        assert!(attrs.is_empty());
340    }
341
342    #[test]
343    fn same_type_requires_distinguishing_attribute() {
344        let target =
345            EntityDescriptor::new("UserService", "class").with_attribute("layer", "domain");
346        let distractor =
347            EntityDescriptor::new("AuthService", "class").with_attribute("layer", "infra");
348        let registry = reg_with(vec![target.clone(), distractor]);
349        let attrs = distinguishing_attributes(&target, &registry, &[]);
350        assert_eq!(attrs, vec!["domain".to_string()]);
351    }
352
353    #[test]
354    fn preference_order_is_respected() {
355        let target = EntityDescriptor::new("Foo", "class")
356            .with_attribute("color", "red")
357            .with_attribute("size", "small");
358        let d1 = EntityDescriptor::new("Bar", "class")
359            .with_attribute("color", "blue")
360            .with_attribute("size", "small");
361
362        let registry = reg_with(vec![target.clone(), d1]);
363
364        // With size preferred first, size is useless (both small) so color is used.
365        let attrs = distinguishing_attributes(
366            &target,
367            &registry,
368            &["size".to_string(), "color".to_string()],
369        );
370        assert_eq!(attrs, vec!["red".to_string()]);
371    }
372
373    #[test]
374    fn preferred_attribute_selected_when_sufficient() {
375        let target = EntityDescriptor::new("Foo", "widget")
376            .with_attribute("color", "red")
377            .with_attribute("size", "small");
378        let d1 = EntityDescriptor::new("Bar", "widget")
379            .with_attribute("color", "blue")
380            .with_attribute("size", "large");
381
382        let registry = reg_with(vec![target.clone(), d1]);
383        let attrs = distinguishing_attributes(&target, &registry, &["color".to_string()]);
384        assert_eq!(attrs, vec!["red".to_string()]);
385    }
386
387    #[test]
388    fn multiple_attributes_needed_for_full_disambiguation() {
389        let target = EntityDescriptor::new("Foo", "widget")
390            .with_attribute("color", "red")
391            .with_attribute("size", "small");
392        let same_color = EntityDescriptor::new("Bar", "widget")
393            .with_attribute("color", "red")
394            .with_attribute("size", "large");
395        let same_size = EntityDescriptor::new("Baz", "widget")
396            .with_attribute("color", "blue")
397            .with_attribute("size", "small");
398
399        let registry = reg_with(vec![target.clone(), same_color, same_size]);
400        let attrs = distinguishing_attributes(
401            &target,
402            &registry,
403            &["color".to_string(), "size".to_string()],
404        );
405        // color removes same_size (blue ≠ red); size then removes same_color (large ≠ small).
406        assert_eq!(attrs, vec!["red".to_string(), "small".to_string()]);
407    }
408
409    #[test]
410    fn useless_attribute_is_skipped() {
411        // All three widgets are red — color doesn't disambiguate.
412        let target = EntityDescriptor::new("Foo", "widget")
413            .with_attribute("color", "red")
414            .with_attribute("size", "small");
415        let d1 = EntityDescriptor::new("Bar", "widget")
416            .with_attribute("color", "red")
417            .with_attribute("size", "large");
418        let d2 = EntityDescriptor::new("Baz", "widget")
419            .with_attribute("color", "red")
420            .with_attribute("size", "medium");
421
422        let registry = reg_with(vec![target.clone(), d1, d2]);
423        let attrs = distinguishing_attributes(
424            &target,
425            &registry,
426            &["color".to_string(), "size".to_string()],
427        );
428        // color does nothing (all red), size alone distinguishes (small vs large, medium).
429        assert_eq!(attrs, vec!["small".to_string()]);
430    }
431
432    #[test]
433    fn stops_as_soon_as_unambiguous() {
434        let target = EntityDescriptor::new("Foo", "widget")
435            .with_attribute("color", "red")
436            .with_attribute("size", "small")
437            .with_attribute("shape", "round");
438        let d1 = EntityDescriptor::new("Bar", "widget").with_attribute("color", "blue");
439
440        let registry = reg_with(vec![target.clone(), d1]);
441        let attrs = distinguishing_attributes(
442            &target,
443            &registry,
444            &["color".to_string(), "size".to_string(), "shape".to_string()],
445        );
446        // color alone is enough; size and shape must not be included.
447        assert_eq!(attrs, vec!["red".to_string()]);
448    }
449
450    #[test]
451    fn falls_back_to_registration_order_when_no_preference() {
452        let target = EntityDescriptor::new("Foo", "widget")
453            .with_attribute("first_attr", "A")
454            .with_attribute("second_attr", "B");
455        let d1 = EntityDescriptor::new("Bar", "widget")
456            .with_attribute("first_attr", "X")
457            .with_attribute("second_attr", "B");
458
459        let registry = reg_with(vec![target.clone(), d1]);
460        // With no preference order, registration order picks first_attr,
461        // which is sufficient.
462        let attrs = distinguishing_attributes(&target, &registry, &[]);
463        assert_eq!(attrs, vec!["A".to_string()]);
464    }
465
466    #[test]
467    fn missing_attribute_on_target_skips_without_panic() {
468        let target = EntityDescriptor::new("Foo", "widget").with_attribute("size", "small");
469        let d1 = EntityDescriptor::new("Bar", "widget").with_attribute("size", "large");
470
471        let registry = reg_with(vec![target.clone(), d1]);
472        // Preference includes "color", which the target doesn't have; skip and try "size".
473        let attrs = distinguishing_attributes(
474            &target,
475            &registry,
476            &["color".to_string(), "size".to_string()],
477        );
478        assert_eq!(attrs, vec!["small".to_string()]);
479    }
480
481    #[test]
482    fn registry_insert_replaces_same_type_and_name() {
483        let mut r = EntityRegistry::new();
484        r.insert(EntityDescriptor::new("X", "t").with_attribute("a", "1"));
485        r.insert(EntityDescriptor::new("X", "t").with_attribute("a", "2"));
486        assert_eq!(r.get("t", "X").unwrap().attribute("a"), Some("2"));
487        assert_eq!(r.len(), 1);
488    }
489
490    #[test]
491    fn registry_keeps_same_name_different_type_as_separate_entries() {
492        let mut r = EntityRegistry::new();
493        r.insert(EntityDescriptor::new("UserService", "class").with_attribute("a", "1"));
494        r.insert(EntityDescriptor::new("UserService", "trait").with_attribute("a", "2"));
495        assert_eq!(r.len(), 2);
496        assert_eq!(
497            r.get("class", "UserService").unwrap().attribute("a"),
498            Some("1")
499        );
500        assert_eq!(
501            r.get("trait", "UserService").unwrap().attribute("a"),
502            Some("2")
503        );
504    }
505
506    // ── Relations on EntityDescriptor ────────────────────────────────────────
507
508    #[test]
509    fn with_relation_adds_edge() {
510        let e = EntityDescriptor::new("Handler", "function").with_relation("calls", "AuthService");
511        assert_eq!(
512            e.relations,
513            vec![("calls".to_string(), "AuthService".to_string())]
514        );
515    }
516
517    #[test]
518    fn relation_lookup_by_label() {
519        let e = EntityDescriptor::new("Handler", "function")
520            .with_relation("calls", "AuthService")
521            .with_relation("tests", "HandlerTests");
522        assert_eq!(e.relation("calls"), Some("AuthService"));
523        assert_eq!(e.relation("tests"), Some("HandlerTests"));
524        assert_eq!(e.relation("unknown"), None);
525    }
526
527    #[test]
528    fn default_has_empty_relations() {
529        let e = EntityDescriptor::default();
530        assert!(e.relations.is_empty());
531    }
532
533    // ── distinguishing_subgraph (graph-based REG) ─────────────────────────
534
535    #[test]
536    fn graph_reg_no_distractors_returns_empty() {
537        let target = EntityDescriptor::new("Foo", "class");
538        let registry = reg_with(vec![target.clone()]);
539        let desc = distinguishing_subgraph(&target, &registry, &[]);
540        assert!(desc.attributes.is_empty());
541        assert!(desc.relation.is_none());
542    }
543
544    #[test]
545    fn graph_reg_falls_back_to_dale_reiter_when_attributes_suffice() {
546        let target =
547            EntityDescriptor::new("UserService", "class").with_attribute("layer", "domain");
548        let other = EntityDescriptor::new("AuthService", "class").with_attribute("layer", "infra");
549        let registry = reg_with(vec![target.clone(), other]);
550        let desc = distinguishing_subgraph(&target, &registry, &[]);
551        assert_eq!(desc.attributes, vec!["domain".to_string()]);
552        assert!(desc.relation.is_none());
553    }
554
555    #[test]
556    fn graph_reg_adds_relation_when_attributes_dont_disambiguate() {
557        // Two handlers, both in the same layer, differentiated only by
558        // what they call.
559        let target = EntityDescriptor::new("LoginHandler", "function")
560            .with_attribute("layer", "api")
561            .with_relation("calls", "AuthService");
562        let other = EntityDescriptor::new("LogoutHandler", "function")
563            .with_attribute("layer", "api")
564            .with_relation("calls", "SessionService");
565        let registry = reg_with(vec![target.clone(), other]);
566        let desc = distinguishing_subgraph(&target, &registry, &[]);
567        // Attributes alone don't distinguish (both are `api` layer), so
568        // the relation must be included.
569        assert_eq!(
570            desc.relation,
571            Some(("calls".to_string(), "AuthService".to_string()))
572        );
573    }
574
575    #[test]
576    fn graph_reg_skips_shared_relation_picks_next() {
577        // Two handlers, both call the same logging service; but target has
578        // an additional distinguishing relation.
579        let target = EntityDescriptor::new("LoginHandler", "function")
580            .with_relation("calls", "LogService")
581            .with_relation("tests", "LoginTests");
582        let other = EntityDescriptor::new("LogoutHandler", "function")
583            .with_relation("calls", "LogService")
584            .with_relation("tests", "LogoutTests");
585        let registry = reg_with(vec![target.clone(), other]);
586        let desc = distinguishing_subgraph(&target, &registry, &[]);
587        assert_eq!(
588            desc.relation,
589            Some(("tests".to_string(), "LoginTests".to_string()))
590        );
591    }
592
593    #[test]
594    fn graph_reg_gives_up_when_nothing_distinguishes() {
595        // Two identical entities — no attributes, no distinguishing
596        // relations.
597        let target = EntityDescriptor::new("Foo", "thing").with_relation("calls", "X");
598        let other = EntityDescriptor::new("Bar", "thing").with_relation("calls", "X");
599        let registry = reg_with(vec![target.clone(), other]);
600        let desc = distinguishing_subgraph(&target, &registry, &[]);
601        // Returns whatever attributes D&R could find (none here) and no
602        // relation — greedy fallback.
603        assert!(desc.relation.is_none());
604    }
605
606    #[test]
607    fn graph_reg_combines_attributes_and_relation() {
608        // Three handlers:
609        // - Target:      layer=api, calls=AuthService
610        // - Distractor1: layer=api, calls=SessionService (differs by relation)
611        // - Distractor2: layer=web, calls=AuthService    (differs by attribute)
612        //
613        // D&R picks `layer=api` to exclude distractor2, leaving distractor1.
614        // Graph-based adds `calls=AuthService` to exclude distractor1.
615        let target = EntityDescriptor::new("LoginHandler", "function")
616            .with_attribute("layer", "api")
617            .with_relation("calls", "AuthService");
618        let d1 = EntityDescriptor::new("LogoutHandler", "function")
619            .with_attribute("layer", "api")
620            .with_relation("calls", "SessionService");
621        let d2 = EntityDescriptor::new("ProfileHandler", "function")
622            .with_attribute("layer", "web")
623            .with_relation("calls", "AuthService");
624        let registry = reg_with(vec![target.clone(), d1, d2]);
625        let desc = distinguishing_subgraph(&target, &registry, &[]);
626        assert_eq!(desc.attributes, vec!["api".to_string()]);
627        assert_eq!(
628            desc.relation,
629            Some(("calls".to_string(), "AuthService".to_string()))
630        );
631    }
632}