Skip to main content

panproto_schema/
normalize.rs

1//! Ref-normalization for schemas.
2//!
3//! When a schema theory includes a `Ref` sort, edges may pass through
4//! chains of ref vertices (`A -> Ref -> Ref -> B`). [`normalize`] collapses
5//! such chains into direct edges (`A -> B`), removing the intermediate
6//! ref vertices. The operation is idempotent.
7
8use std::collections::HashMap;
9
10use panproto_gat::Name;
11use rustc_hash::{FxHashMap, FxHashSet};
12use smallvec::SmallVec;
13
14use crate::schema::{Edge, Schema, Vertex};
15
16/// The vertex kind that represents a reference indirection.
17const REF_KIND: &str = "ref";
18
19/// Collapse ref-chains in a schema into direct edges.
20///
21/// For every path `A -> R1 -> R2 -> ... -> Rn -> B` where all
22/// intermediate vertices have kind `"ref"`, this function produces a
23/// single edge `A -> B`, inheriting the kind and name from the first
24/// edge in the chain.
25///
26/// Ref vertices that become unreachable (no longer sources or targets
27/// of any non-ref edge) are removed.
28///
29/// This operation is **idempotent**: `normalize(normalize(s)) == normalize(s)`.
30#[must_use]
31pub fn normalize(schema: &Schema) -> Schema {
32    let ref_targets = build_ref_targets(schema);
33
34    // If there are no ref vertices, return a clone unchanged.
35    if ref_targets.is_empty() {
36        return schema.clone();
37    }
38
39    let (new_edges, used_refs) = collapse_edges(schema, &ref_targets);
40    rebuild_schema(schema, &new_edges, &used_refs)
41}
42
43/// Build a map from ref vertex IDs to their outgoing edge targets.
44fn build_ref_targets(schema: &Schema) -> FxHashMap<Name, (Name, Edge)> {
45    let mut ref_targets: FxHashMap<Name, (Name, Edge)> = FxHashMap::default();
46    for (id, vertex) in &schema.vertices {
47        if vertex.kind == REF_KIND {
48            if let Some(edges) = schema.outgoing.get(id) {
49                if let Some(edge) = edges.first() {
50                    ref_targets.insert(id.clone(), (edge.tgt.clone(), edge.clone()));
51                }
52            }
53        }
54    }
55    ref_targets
56}
57
58/// Follow a ref chain to find the ultimate non-ref target.
59///
60/// Returns `None` if the chain contains a cycle.
61fn resolve_ref_chain(start: &Name, ref_targets: &FxHashMap<Name, (Name, Edge)>) -> Option<Name> {
62    let mut current = start.clone();
63    let mut visited = FxHashSet::default();
64    loop {
65        if !visited.insert(current.clone()) {
66            return None;
67        }
68        if let Some((target, _)) = ref_targets.get(&current) {
69            current.clone_from(target);
70        } else {
71            return Some(current);
72        }
73    }
74}
75
76/// Collapse edges through ref chains, returning the new edge list
77/// and the set of ref vertex IDs that were consumed.
78fn collapse_edges(
79    schema: &Schema,
80    ref_targets: &FxHashMap<Name, (Name, Edge)>,
81) -> (Vec<Edge>, FxHashSet<Name>) {
82    let mut new_edges: Vec<Edge> = Vec::new();
83    let mut used_refs: FxHashSet<Name> = FxHashSet::default();
84
85    for edge in schema.edges.keys() {
86        let src_vertex = schema.vertices.get(&edge.src);
87        // Skip edges that originate FROM a ref vertex (they are part of
88        // the chain and will be collapsed into the originating edge).
89        if src_vertex.is_some_and(|v| v.kind == REF_KIND) {
90            continue;
91        }
92
93        // If the target is a ref vertex, resolve the chain.
94        if let Some(resolved_tgt) = ref_targets
95            .get(&edge.tgt)
96            .and_then(|_| resolve_ref_chain(&edge.tgt, ref_targets))
97        {
98            // Track which ref vertices are traversed.
99            let mut cursor = edge.tgt.clone();
100            while let Some((next, _)) = ref_targets.get(&cursor) {
101                used_refs.insert(cursor.clone());
102                cursor.clone_from(next);
103            }
104
105            new_edges.push(Edge {
106                src: edge.src.clone(),
107                tgt: resolved_tgt,
108                kind: edge.kind.clone(),
109                name: edge.name.clone(),
110            });
111        } else {
112            // Not targeting a ref — keep as-is.
113            new_edges.push(edge.clone());
114        }
115    }
116
117    (new_edges, used_refs)
118}
119
120/// Rebuild the schema from the collapsed edges, removing consumed ref vertices.
121fn rebuild_schema(schema: &Schema, new_edges: &[Edge], used_refs: &FxHashSet<Name>) -> Schema {
122    let new_vertices: HashMap<Name, Vertex> = schema
123        .vertices
124        .iter()
125        .filter(|(id, v)| v.kind != REF_KIND || !used_refs.contains(*id))
126        .map(|(id, v)| (id.clone(), v.clone()))
127        .collect();
128
129    let mut edge_map = HashMap::with_capacity(new_edges.len());
130    let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
131    let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
132    let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
133
134    for edge in new_edges {
135        edge_map.insert(edge.clone(), edge.kind.clone());
136        outgoing
137            .entry(edge.src.clone())
138            .or_default()
139            .push(edge.clone());
140        incoming
141            .entry(edge.tgt.clone())
142            .or_default()
143            .push(edge.clone());
144        between
145            .entry((edge.src.clone(), edge.tgt.clone()))
146            .or_default()
147            .push(edge.clone());
148    }
149
150    let new_constraints = schema
151        .constraints
152        .iter()
153        .filter(|(id, _)| new_vertices.contains_key(*id))
154        .map(|(id, c)| (id.clone(), c.clone()))
155        .collect();
156
157    let new_required = schema
158        .required
159        .iter()
160        .filter(|(id, _)| new_vertices.contains_key(*id))
161        .map(|(id, r)| (id.clone(), r.clone()))
162        .collect();
163
164    let new_nsids = schema
165        .nsids
166        .iter()
167        .filter(|(id, _)| new_vertices.contains_key(*id))
168        .map(|(id, n)| (id.clone(), n.clone()))
169        .collect();
170
171    let new_hyper_edges = schema
172        .hyper_edges
173        .iter()
174        .filter(|(_, he)| he.signature.values().all(|v| new_vertices.contains_key(v)))
175        .map(|(id, he)| (id.clone(), he.clone()))
176        .collect();
177
178    let new_mergers = schema
179        .mergers
180        .iter()
181        .filter(|(id, _)| new_vertices.contains_key(*id))
182        .map(|(id, e)| (id.clone(), e.clone()))
183        .collect();
184
185    let new_defaults = schema
186        .defaults
187        .iter()
188        .filter(|(id, _)| new_vertices.contains_key(*id))
189        .map(|(id, e)| (id.clone(), e.clone()))
190        .collect();
191
192    // Filter coercions: keep only those whose kind pair references surviving vertex kinds.
193    let surviving_kinds: rustc_hash::FxHashSet<&Name> =
194        new_vertices.values().map(|v| &v.kind).collect();
195    let new_coercions = schema
196        .coercions
197        .iter()
198        .filter(|((from, to), _)| surviving_kinds.contains(from) && surviving_kinds.contains(to))
199        .map(|(k, v)| (k.clone(), v.clone()))
200        .collect();
201
202    // Filter policies: keep only those keyed by surviving vertex IDs.
203    let new_policies = schema
204        .policies
205        .iter()
206        .filter(|(id, _)| new_vertices.contains_key(*id))
207        .map(|(id, e)| (id.clone(), e.clone()))
208        .collect();
209
210    Schema {
211        protocol: schema.protocol.clone(),
212        vertices: new_vertices,
213        edges: edge_map,
214        hyper_edges: new_hyper_edges,
215        constraints: new_constraints,
216        required: new_required,
217        nsids: new_nsids,
218        variants: schema.variants.clone(),
219        orderings: schema.orderings.clone(),
220        recursion_points: schema.recursion_points.clone(),
221        spans: schema.spans.clone(),
222        usage_modes: schema.usage_modes.clone(),
223        nominal: schema.nominal.clone(),
224        coercions: new_coercions,
225        mergers: new_mergers,
226        defaults: new_defaults,
227        policies: new_policies,
228        outgoing,
229        incoming,
230        between,
231    }
232}
233
234#[cfg(test)]
235#[allow(clippy::unwrap_used, clippy::expect_used)]
236mod tests {
237    use super::*;
238    use crate::builder::SchemaBuilder;
239    use crate::protocol::{EdgeRule, Protocol};
240
241    /// Build a protocol that allows ref vertices and prop edges between anything.
242    fn ref_protocol() -> Protocol {
243        Protocol {
244            name: "test-ref".to_owned(),
245            schema_theory: "ThTestSchema".to_owned(),
246            instance_theory: "ThWType".to_owned(),
247            edge_rules: vec![EdgeRule {
248                edge_kind: "prop".to_owned(),
249                src_kinds: vec![],
250                tgt_kinds: vec![],
251            }],
252            obj_kinds: vec!["object".to_owned(), "string".to_owned(), "ref".to_owned()],
253            constraint_sorts: vec![],
254            ..Protocol::default()
255        }
256    }
257
258    #[test]
259    fn collapse_single_ref() {
260        let proto = ref_protocol();
261        let schema = SchemaBuilder::new(&proto)
262            .vertex("A", "object", None)
263            .expect("A")
264            .vertex("R", "ref", None)
265            .expect("R")
266            .vertex("B", "string", None)
267            .expect("B")
268            .edge("A", "R", "prop", Some("x"))
269            .expect("A->R")
270            .edge("R", "B", "prop", None)
271            .expect("R->B")
272            .build()
273            .expect("build");
274
275        let normalized = normalize(&schema);
276
277        // The ref vertex R should be removed.
278        assert!(
279            !normalized.has_vertex("R"),
280            "ref vertex R should be removed"
281        );
282        assert_eq!(normalized.vertex_count(), 2, "should have A and B");
283
284        // There should be a direct edge A -> B.
285        let ab = normalized.edges_between("A", "B");
286        assert_eq!(ab.len(), 1, "should have one edge A -> B");
287        assert_eq!(ab[0].kind, "prop");
288        assert_eq!(ab[0].name.as_deref(), Some("x"));
289    }
290
291    #[test]
292    fn collapse_double_ref_chain() {
293        let proto = ref_protocol();
294        let schema = SchemaBuilder::new(&proto)
295            .vertex("A", "object", None)
296            .expect("A")
297            .vertex("R1", "ref", None)
298            .expect("R1")
299            .vertex("R2", "ref", None)
300            .expect("R2")
301            .vertex("B", "string", None)
302            .expect("B")
303            .edge("A", "R1", "prop", Some("link"))
304            .expect("A->R1")
305            .edge("R1", "R2", "prop", None)
306            .expect("R1->R2")
307            .edge("R2", "B", "prop", None)
308            .expect("R2->B")
309            .build()
310            .expect("build");
311
312        let normalized = normalize(&schema);
313
314        // Both ref vertices should be removed.
315        assert!(!normalized.has_vertex("R1"));
316        assert!(!normalized.has_vertex("R2"));
317        assert_eq!(normalized.vertex_count(), 2);
318
319        // Direct edge A -> B.
320        let ab = normalized.edges_between("A", "B");
321        assert_eq!(ab.len(), 1);
322        assert_eq!(ab[0].name.as_deref(), Some("link"));
323    }
324
325    #[test]
326    fn normalize_is_idempotent() {
327        let proto = ref_protocol();
328        let schema = SchemaBuilder::new(&proto)
329            .vertex("A", "object", None)
330            .expect("A")
331            .vertex("R1", "ref", None)
332            .expect("R1")
333            .vertex("R2", "ref", None)
334            .expect("R2")
335            .vertex("B", "string", None)
336            .expect("B")
337            .edge("A", "R1", "prop", Some("link"))
338            .expect("A->R1")
339            .edge("R1", "R2", "prop", None)
340            .expect("R1->R2")
341            .edge("R2", "B", "prop", None)
342            .expect("R2->B")
343            .build()
344            .expect("build");
345
346        let once = normalize(&schema);
347        let twice = normalize(&once);
348
349        assert_eq!(once.vertex_count(), twice.vertex_count());
350        assert_eq!(once.edge_count(), twice.edge_count());
351
352        // Verify same vertices.
353        for id in once.vertices.keys() {
354            assert!(
355                twice.vertices.contains_key(id),
356                "vertex {id} missing after second normalize"
357            );
358        }
359
360        // Verify same edges.
361        for edge in once.edges.keys() {
362            assert!(
363                twice.edges.contains_key(edge),
364                "edge {edge:?} missing after second normalize"
365            );
366        }
367    }
368
369    #[test]
370    fn no_refs_is_noop() {
371        let proto = ref_protocol();
372        let schema = SchemaBuilder::new(&proto)
373            .vertex("A", "object", None)
374            .expect("A")
375            .vertex("B", "string", None)
376            .expect("B")
377            .edge("A", "B", "prop", Some("name"))
378            .expect("edge")
379            .build()
380            .expect("build");
381
382        let normalized = normalize(&schema);
383        assert_eq!(normalized.vertex_count(), schema.vertex_count());
384        assert_eq!(normalized.edge_count(), schema.edge_count());
385    }
386}