Skip to main content

panproto_inst/
wtype.rs

1//! W-type instance representation and the `wtype_restrict` pipeline.
2//!
3//! A [`WInstance`] is a tree-shaped data instance conforming to a schema.
4//! The restrict operation (`wtype_restrict`) is a fused single-pass pipeline
5//! that projects a W-type instance along a migration mapping.
6//!
7//! The pipeline fuses four concerns into one BFS traversal:
8//! 1. Anchor survival check: does this node's schema vertex survive?
9//! 2. Reachability: is this node reachable from the root?
10//! 3. Ancestor contraction: who is the nearest surviving ancestor?
11//! 4. Edge resolution: what edge connects the contracted arc?
12//!
13//! Fan reconstruction (step 5) runs as a separate pass since it operates
14//! on the original fan list, not the BFS tree.
15//!
16//! The five individual step functions are retained for testing and debugging.
17
18use std::collections::{HashMap, HashSet, VecDeque};
19
20use panproto_gat::Name;
21use panproto_schema::{Edge, Schema};
22use rustc_hash::{FxHashMap, FxHashSet};
23use serde::{Deserialize, Serialize};
24use smallvec::SmallVec;
25
26use crate::error::RestrictError;
27use crate::fan::Fan;
28use crate::metadata::Node;
29use crate::value::Value;
30
31/// A compiled migration specification (minimal version for panproto-inst).
32///
33/// The full `CompiledMigration` lives in `panproto-mig`. This type provides
34/// the subset of fields that `wtype_restrict` and `functor_restrict` need.
35#[derive(Clone, Debug, Default, Serialize, Deserialize)]
36pub struct CompiledMigration {
37    /// Vertices that survive the migration.
38    pub surviving_verts: HashSet<Name>,
39    /// Edges that survive the migration.
40    pub surviving_edges: HashSet<Edge>,
41    /// Vertex remapping: source vertex ID to target vertex ID.
42    pub vertex_remap: HashMap<Name, Name>,
43    /// Edge remapping: source edge to target edge.
44    pub edge_remap: HashMap<Edge, Edge>,
45    /// Binary contraction resolver: (`src_anchor`, `tgt_anchor`) to resolved edge.
46    pub resolver: HashMap<(Name, Name), Edge>,
47    /// Hyper-edge contraction resolver.
48    pub hyper_resolver: HashMap<Name, (Name, HashMap<Name, Name>)>,
49    /// Value-level field transforms applied to surviving nodes' `extra_fields`.
50    ///
51    /// Keyed by source vertex anchor. Each entry is a list of field operations
52    /// applied in order after the node survives and is remapped.
53    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
54    pub field_transforms: HashMap<Name, Vec<FieldTransform>>,
55    /// Value-dependent survival predicates.
56    ///
57    /// During `wtype_restrict`, after checking that a node's anchor vertex
58    /// is in `surviving_verts`, the conditional survival predicate (if any)
59    /// is evaluated with the node's `extra_fields` bound as variables.
60    /// If the predicate evaluates to `false`, the node is dropped despite
61    /// its anchor surviving.
62    ///
63    /// This enables value-dependent filtering: "keep this vertex only if
64    /// attrs.level == 2" (matchAttrs), or "keep this vertex only if
65    /// class contains 'u-url'" (matchAttrsAll).
66    ///
67    /// Categorically, this is a refinement of the survival predicate
68    /// from a structural predicate (vertex set membership) to a
69    /// value-dependent predicate (vertex set membership AND value predicate).
70    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
71    pub conditional_survival: HashMap<Name, panproto_expr::Expr>,
72    /// Multi-hop expansion paths for nest-style migrations.
73    ///
74    /// When a direct edge `src --> tgt` existed in the source schema but
75    /// only a multi-hop path `src --> i1 --> i2 --> ... --> tgt` exists in
76    /// the target (as happens after `combinators::nest_field`), this map
77    /// records the sequence of intermediate target anchor ids to insert
78    /// when walking the source arc during `wtype_restrict`. The value is
79    /// the intermediate anchors only (endpoints excluded), ordered from
80    /// parent-adjacent to child-adjacent.
81    ///
82    /// Dual of the ancestor-contraction mechanism: contraction collapses
83    /// a path to a direct arc (hoist), expansion fans a direct arc out
84    /// into a path by materializing fresh view nodes (nest).
85    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
86    pub expansion_path: HashMap<(Name, Name), Vec<Name>>,
87}
88
89/// A value-level transformation on a node's `extra_fields`.
90///
91/// These are applied during `wtype_restrict` after structural operations
92/// (anchor remapping, vertex survival). They enable the instance pipeline
93/// to handle value-dependent migrations (attribute renames, drops, value
94/// transforms) that go beyond pure structural schema changes.
95#[derive(Clone, Debug, Serialize, Deserialize)]
96pub enum FieldTransform {
97    /// Rename a field key: `old_key` → `new_key`.
98    RenameField {
99        /// The current field name.
100        old_key: String,
101        /// The new field name.
102        new_key: String,
103    },
104    /// Drop a field by key.
105    DropField {
106        /// The field to remove.
107        key: String,
108    },
109    /// Add a field with a constant default value.
110    AddField {
111        /// The field name to add.
112        key: String,
113        /// The default value.
114        value: Value,
115    },
116    /// Keep only the specified fields (all others are dropped).
117    KeepFields {
118        /// The field names to retain.
119        keys: Vec<String>,
120    },
121    /// Apply an expression to a field's value, storing the result.
122    ApplyExpr {
123        /// The field whose value is transformed.
124        key: String,
125        /// The expression to evaluate (receives the field value as input).
126        expr: panproto_expr::Expr,
127        /// Optional inverse expression for round-tripping.
128        inverse: Option<panproto_expr::Expr>,
129        /// Round-trip classification of this transformation.
130        coercion_class: panproto_gat::CoercionClass,
131    },
132    /// Apply a field transform at a nested path within the Value tree.
133    ///
134    /// The path is a sequence of string keys navigating through nested
135    /// `Value::Unknown` (object) structures. The inner transform is applied
136    /// to the `extra_fields` map at the resolved path.
137    ///
138    /// This generalizes flat field transforms to operate on the full
139    /// Value algebra. A `PathTransform` with an empty path is equivalent
140    /// to applying the inner transform directly.
141    ///
142    /// Categorically, this is the action of a path functor on the
143    /// endomorphism algebra of field transforms; it lifts a transform
144    /// from a leaf to an inner node of the Value tree.
145    PathTransform {
146        /// Path to navigate (e.g., `vec!["attrs"]` for nested attrs objects).
147        path: Vec<String>,
148        /// The transform to apply at the resolved path.
149        inner: Box<Self>,
150    },
151    /// Compute a field value from an expression with access to the full
152    /// fiber over the parent vertex.
153    ///
154    /// Unlike `ApplyExpr` which binds a single field, `ComputeField` binds
155    /// all `extra_fields`, nested attrs, AND scalar values from immediate
156    /// child nodes (the dependent-sum projection) as variables, evaluates
157    /// the expression, and stores the result in the target field.
158    ///
159    /// This means `ComputeField` can access any scalar property of the
160    /// parent object, whether it was parsed as an extra field or as a
161    /// schema-defined child vertex (e.g., a string field with a `"format"`
162    /// annotation like `"at-uri"`). Computed results are always written to
163    /// `extra_fields`, making them available to subsequent transforms and
164    /// to `to_json` serialization (where `extra_fields` overwrite child
165    /// values with the same key).
166    ///
167    /// Computed fields are classified by `coercion_class`:
168    /// - `Iso`: the computation is invertible via `inverse`; the lens law
169    ///   `PutGet` holds for modifications to the computed field.
170    /// - `Opaque`: no inverse exists; the complement stores the entire
171    ///   original value. Modifications to the computed field in the view
172    ///   are not independently round-trippable. This is analogous to SQL
173    ///   computed columns: the lens law holds for the independent
174    ///   (non-derived) components of the view, and the derived components
175    ///   are re-computed deterministically.
176    ///
177    /// This enables template name computation like
178    ///   `target_key`: "name",
179    ///   `expr`: `(concat "h" (int_to_str attrs.level))`
180    /// which computes "h1", "h2", etc. from the level attribute, as well
181    /// as AT-URI decomposition where the `repo` field is a schema-defined
182    /// child vertex.
183    ComputeField {
184        /// The field to store the computed result in.
185        target_key: String,
186        /// The expression, with all `extra_fields` bound as variables.
187        expr: panproto_expr::Expr,
188        /// Optional inverse expression for round-tripping.
189        inverse: Option<panproto_expr::Expr>,
190        /// Round-trip classification of this transformation.
191        coercion_class: panproto_gat::CoercionClass,
192    },
193    /// Case analysis on node values: the coproduct eliminator for the
194    /// field transform algebra.
195    ///
196    /// Each branch is a (predicate, transforms) pair. Branches are evaluated
197    /// in order with the node's `extra_fields` (and nested `attrs.*` keys)
198    /// bound as expression variables. The first branch whose predicate
199    /// evaluates to `true` has its transforms applied. If no branch matches,
200    /// the node passes through unchanged.
201    ///
202    /// This is the dependent function space lift of field transforms:
203    /// `Π(x : Value). FieldTransform`, a transform that depends on the
204    /// runtime value of the node, not just its schema vertex. It composes
205    /// naturally with all other transform variants (including nesting
206    /// inside `PathTransform`).
207    ///
208    /// Use cases:
209    /// - `matchAttrs`: "if `level == 1` then rename to `h1`, if `level == 2`
210    ///   then rename to `h2`", where each heading level is a branch.
211    /// - Conditional attribute injection: "if `list == 'ordered'` then add
212    ///   `type: ol`, else add `type: ul`".
213    Case {
214        /// Ordered branches: first matching predicate wins.
215        branches: Vec<CaseBranch>,
216    },
217    /// Update string values that reference vertex names.
218    ///
219    /// When vertices are renamed or dropped during migration, string fields
220    /// that reference those vertices by name must be updated to reflect the
221    /// new names. This is the functorial action of the vertex rename map
222    /// on the name-reference algebra.
223    ///
224    /// For each field value:
225    /// - If the value is a `Value::Str` matching a key in `rename_map`,
226    ///   it is replaced with the mapped value (or removed if mapped to None).
227    /// - If the value is a `Value::List`, each string element is checked
228    ///   and the list is rebuilt with renames applied and drops removed.
229    ///
230    /// This handles parent reference arrays, cross-annotation links,
231    /// and any other string fields that carry vertex identity.
232    MapReferences {
233        /// The field containing references (e.g., "parents").
234        field: String,
235        /// Map from old name to new name (None = remove the reference).
236        rename_map: HashMap<String, Option<String>>,
237    },
238}
239
240impl FieldTransform {
241    /// Compute the coercion class of this field transform.
242    ///
243    /// The class describes the round-trip properties: whether the transform
244    /// is lossless (`Iso`), has a left inverse (`Retraction`), is a
245    /// deterministic derivation (`Projection`), or has no structural
246    /// round-trip property (`Opaque`).
247    #[must_use]
248    pub fn coercion_class(&self) -> panproto_gat::CoercionClass {
249        match self {
250            Self::RenameField { .. } => panproto_gat::CoercionClass::Iso,
251            Self::DropField { .. } | Self::KeepFields { .. } => panproto_gat::CoercionClass::Opaque,
252            Self::AddField { .. } | Self::MapReferences { .. } => {
253                panproto_gat::CoercionClass::Retraction
254            }
255            Self::ApplyExpr { coercion_class, .. } | Self::ComputeField { coercion_class, .. } => {
256                *coercion_class
257            }
258            Self::PathTransform { inner, .. } => inner.coercion_class(),
259            Self::Case { branches } => branches
260                .iter()
261                .flat_map(|b| b.transforms.iter())
262                .fold(panproto_gat::CoercionClass::Iso, |acc, t| {
263                    acc.compose(t.coercion_class())
264                }),
265        }
266    }
267}
268
269/// A branch in a [`FieldTransform::Case`] analysis.
270///
271/// Contains a predicate expression and a sequence of transforms to apply
272/// if the predicate evaluates to `true`.
273#[derive(Clone, Debug, Serialize, Deserialize)]
274pub struct CaseBranch {
275    /// Predicate evaluated with the node's `extra_fields` as variables.
276    pub predicate: panproto_expr::Expr,
277    /// Transforms to apply if the predicate is true.
278    pub transforms: Vec<FieldTransform>,
279}
280
281impl CompiledMigration {
282    /// Compute the composite coercion class of all field transforms in this migration.
283    ///
284    /// Folds over every transform across all vertices using `CoercionClass::compose`,
285    /// starting from `Iso` (the identity element).
286    #[must_use]
287    pub fn coercion_class(&self) -> panproto_gat::CoercionClass {
288        self.field_transforms
289            .values()
290            .flat_map(|ts| ts.iter())
291            .fold(panproto_gat::CoercionClass::Iso, |acc, t| {
292                acc.compose(t.coercion_class())
293            })
294    }
295
296    /// Add a field rename transform for a vertex.
297    ///
298    /// After the node survives and its anchor is remapped, the field
299    /// `old_key` in `extra_fields` is renamed to `new_key`.
300    pub fn add_field_rename(&mut self, vertex: &str, old_key: &str, new_key: &str) {
301        self.field_transforms
302            .entry(Name::from(vertex))
303            .or_default()
304            .push(FieldTransform::RenameField {
305                old_key: old_key.to_owned(),
306                new_key: new_key.to_owned(),
307            });
308    }
309
310    /// Add a field drop transform for a vertex.
311    ///
312    /// The field `key` is removed from the node's `extra_fields`.
313    pub fn add_field_drop(&mut self, vertex: &str, key: &str) {
314        self.field_transforms
315            .entry(Name::from(vertex))
316            .or_default()
317            .push(FieldTransform::DropField {
318                key: key.to_owned(),
319            });
320    }
321
322    /// Add a field with a default value for a vertex.
323    ///
324    /// The field `key` is added to `extra_fields` with the given value
325    /// if it does not already exist.
326    pub fn add_field_default(&mut self, vertex: &str, key: &str, value: Value) {
327        self.field_transforms
328            .entry(Name::from(vertex))
329            .or_default()
330            .push(FieldTransform::AddField {
331                key: key.to_owned(),
332                value,
333            });
334    }
335
336    /// Add a keep-fields transform for a vertex.
337    ///
338    /// Only the specified fields are retained in `extra_fields`;
339    /// all others are dropped.
340    pub fn add_field_keep(&mut self, vertex: &str, keys: &[&str]) {
341        self.field_transforms
342            .entry(Name::from(vertex))
343            .or_default()
344            .push(FieldTransform::KeepFields {
345                keys: keys.iter().map(|k| (*k).to_owned()).collect(),
346            });
347    }
348
349    /// Add an expression transform for a field on a vertex.
350    ///
351    /// The expression is evaluated with the field's current value
352    /// bound to the variable named `key`, and the result replaces
353    /// the field value.
354    pub fn add_field_expr(&mut self, vertex: &str, key: &str, expr: panproto_expr::Expr) {
355        self.field_transforms
356            .entry(Name::from(vertex))
357            .or_default()
358            .push(FieldTransform::ApplyExpr {
359                key: key.to_owned(),
360                expr,
361                inverse: None,
362                coercion_class: panproto_gat::CoercionClass::Opaque,
363            });
364    }
365
366    /// Add a path-based field transform for a vertex.
367    ///
368    /// The inner transform is applied at the nested path within the
369    /// node's `extra_fields` tree, navigating through `Value::Unknown`
370    /// maps at each path segment.
371    pub fn add_path_transform(&mut self, vertex: &str, path: &[&str], inner: FieldTransform) {
372        self.field_transforms
373            .entry(Name::from(vertex))
374            .or_default()
375            .push(FieldTransform::PathTransform {
376                path: path.iter().map(|s| (*s).to_owned()).collect(),
377                inner: Box::new(inner),
378            });
379    }
380
381    /// Add a computed field transform for a vertex.
382    ///
383    /// The expression is evaluated with all `extra_fields` (and nested
384    /// attrs) bound as variables, and the result is stored in `target_key`.
385    pub fn add_computed_field(
386        &mut self,
387        vertex: &str,
388        target_key: &str,
389        expr: panproto_expr::Expr,
390    ) {
391        self.field_transforms
392            .entry(Name::from(vertex))
393            .or_default()
394            .push(FieldTransform::ComputeField {
395                target_key: target_key.to_owned(),
396                expr,
397                inverse: None,
398                coercion_class: panproto_gat::CoercionClass::Opaque,
399            });
400    }
401
402    /// Add a conditional survival predicate for a vertex.
403    ///
404    /// The expression is evaluated with the node's `extra_fields` bound
405    /// as variables. If it returns false, the node is dropped.
406    pub fn add_conditional_survival(&mut self, vertex: &str, predicate: panproto_expr::Expr) {
407        self.conditional_survival
408            .entry(Name::from(vertex))
409            .or_insert(predicate);
410    }
411
412    /// Add a reference map transform for a vertex's field.
413    ///
414    /// String values (or encoded array elements) in the given field
415    /// are renamed or removed according to the `rename_map`.
416    pub fn add_map_references(
417        &mut self,
418        vertex: &str,
419        field: &str,
420        rename_map: HashMap<String, Option<String>>,
421    ) {
422        self.field_transforms
423            .entry(Name::from(vertex))
424            .or_default()
425            .push(FieldTransform::MapReferences {
426                field: field.to_owned(),
427                rename_map,
428            });
429    }
430
431    /// Add a case-analysis transform for a vertex.
432    ///
433    /// The branches are evaluated in order; the first matching predicate's
434    /// transforms are applied. This is the dependent function space lift
435    /// of field transforms.
436    pub fn add_case_transform(&mut self, vertex: &str, branches: Vec<CaseBranch>) {
437        self.field_transforms
438            .entry(Name::from(vertex))
439            .or_default()
440            .push(FieldTransform::Case { branches });
441    }
442}
443
444/// A W-type instance: tree-shaped data conforming to a schema.
445///
446/// Nodes are anchored to schema vertices, connected by arcs that
447/// correspond to schema edges. The tree is rooted at `root`.
448/// Precomputed `parent_map` and `children_map` enable fast traversal.
449#[derive(Clone, Debug, Serialize, Deserialize)]
450pub struct WInstance {
451    /// All nodes keyed by their numeric ID.
452    pub nodes: HashMap<u32, Node>,
453    /// Arcs: (`parent_id`, `child_id`, `schema_edge`).
454    pub arcs: Vec<(u32, u32, Edge)>,
455    /// Hyper-edge fans.
456    pub fans: Vec<Fan>,
457    /// Root node ID.
458    pub root: u32,
459    /// Schema vertex that the root node is anchored to.
460    pub schema_root: Name,
461    /// Precomputed parent map: `child_id` -> `parent_id`.
462    pub parent_map: HashMap<u32, u32>,
463    /// Precomputed children map: `parent_id` -> child IDs.
464    pub children_map: HashMap<u32, SmallVec<u32, 4>>,
465}
466
467impl WInstance {
468    /// Build a new W-type instance, computing parent and children maps from arcs.
469    #[must_use]
470    pub fn new(
471        nodes: HashMap<u32, Node>,
472        arcs: Vec<(u32, u32, Edge)>,
473        fans: Vec<Fan>,
474        root: u32,
475        schema_root: Name,
476    ) -> Self {
477        let mut parent_map = HashMap::with_capacity(arcs.len());
478        let mut children_map: HashMap<u32, SmallVec<u32, 4>> = HashMap::new();
479        for &(parent, child, _) in &arcs {
480            parent_map.insert(child, parent);
481            children_map.entry(parent).or_default().push(child);
482        }
483        Self {
484            nodes,
485            arcs,
486            fans,
487            root,
488            schema_root,
489            parent_map,
490            children_map,
491        }
492    }
493
494    /// Returns the number of nodes.
495    #[inline]
496    #[must_use]
497    pub fn node_count(&self) -> usize {
498        self.nodes.len()
499    }
500
501    /// Returns the number of arcs.
502    #[inline]
503    #[must_use]
504    pub fn arc_count(&self) -> usize {
505        self.arcs.len()
506    }
507
508    /// Get a node by ID.
509    #[inline]
510    #[must_use]
511    pub fn node(&self, id: u32) -> Option<&Node> {
512        self.nodes.get(&id)
513    }
514
515    /// Get the children of a node.
516    #[inline]
517    #[must_use]
518    pub fn children(&self, id: u32) -> &[u32] {
519        self.children_map.get(&id).map_or(&[], SmallVec::as_slice)
520    }
521
522    /// Get the parent of a node.
523    #[inline]
524    #[must_use]
525    pub fn parent(&self, id: u32) -> Option<u32> {
526        self.parent_map.get(&id).copied()
527    }
528}
529
530// ---------------------------------------------------------------------------
531// Step 1: Signature restriction (retained for testing)
532// ---------------------------------------------------------------------------
533
534/// Keep nodes whose anchor vertex is in the surviving vertex set.
535#[must_use]
536pub fn anchor_surviving(instance: &WInstance, surviving_verts: &HashSet<Name>) -> HashSet<u32> {
537    instance
538        .nodes
539        .iter()
540        .filter(|(_, node)| surviving_verts.contains(&node.anchor))
541        .map(|(&id, _)| id)
542        .collect()
543}
544
545// ---------------------------------------------------------------------------
546// Step 2: Reachability BFS (retained for testing)
547// ---------------------------------------------------------------------------
548
549// ---------------------------------------------------------------------------
550// Step 3: Ancestor contraction with path compression (retained for testing)
551// ---------------------------------------------------------------------------
552
553/// For each surviving non-root node, find its nearest surviving ancestor.
554///
555/// Uses path compression: when we walk the parent chain for a node,
556/// we cache the result for every intermediate node visited. Subsequent
557/// queries hitting a cached node return in O(1). This gives O(n)
558/// amortized complexity instead of O(n × depth).
559#[must_use]
560pub fn ancestor_contraction(instance: &WInstance, surviving: &HashSet<u32>) -> HashMap<u32, u32> {
561    let mut cache: FxHashMap<u32, u32> = FxHashMap::default();
562    let mut ancestors = HashMap::new();
563
564    for &node_id in surviving {
565        if node_id == instance.root {
566            continue;
567        }
568
569        // Check cache first
570        if let Some(&cached) = cache.get(&node_id) {
571            ancestors.insert(node_id, cached);
572            continue;
573        }
574
575        // Walk the parent chain, recording the path for compression
576        let mut path = Vec::new();
577        let mut current = node_id;
578        let mut found_ancestor = None;
579
580        while let Some(parent) = instance.parent(current) {
581            if let Some(&cached) = cache.get(&parent) {
582                found_ancestor = Some(cached);
583                break;
584            }
585            if surviving.contains(&parent) {
586                found_ancestor = Some(parent);
587                break;
588            }
589            path.push(parent);
590            current = parent;
591        }
592
593        // Path compression: cache the ancestor for all nodes on the path
594        if let Some(ancestor) = found_ancestor {
595            ancestors.insert(node_id, ancestor);
596            cache.insert(node_id, ancestor);
597            for &intermediate in &path {
598                cache.insert(intermediate, ancestor);
599            }
600        }
601    }
602    ancestors
603}
604
605// ---------------------------------------------------------------------------
606// Step 4: Edge resolution (retained for testing)
607// ---------------------------------------------------------------------------
608
609/// Resolve the edge for a contracted arc in the target schema.
610///
611/// Avoids allocating a `(String, String)` tuple for the resolver lookup
612/// by checking the resolver with borrowed references.
613///
614/// # Errors
615///
616/// Returns `RestrictError::NoEdgeFound` if no edge exists, or
617/// `RestrictError::AmbiguousEdge` if multiple edges exist without
618/// a resolver entry.
619pub fn resolve_edge(
620    tgt_schema: &Schema,
621    resolver: &HashMap<(Name, Name), Edge>,
622    src_v: &str,
623    tgt_v: &str,
624) -> Result<Edge, RestrictError> {
625    // Check resolver: avoid allocation by scanning for matching key
626    for ((k_src, k_tgt), edge) in resolver {
627        if k_src == src_v && k_tgt == tgt_v {
628            return Ok(edge.clone());
629        }
630    }
631
632    // Fall back to unique-edge lookup
633    let candidates = tgt_schema.edges_between(src_v, tgt_v);
634    match candidates.len() {
635        0 => Err(RestrictError::NoEdgeFound {
636            src: src_v.to_string(),
637            tgt: tgt_v.to_string(),
638        }),
639        1 => Ok(candidates[0].clone()),
640        n => Err(RestrictError::AmbiguousEdge {
641            src: src_v.to_string(),
642            tgt: tgt_v.to_string(),
643            count: n,
644        }),
645    }
646}
647
648// ---------------------------------------------------------------------------
649// Step 5: Fan reconstruction (retained for testing)
650// ---------------------------------------------------------------------------
651
652/// Reconstruct fans after restriction.
653///
654/// # Errors
655///
656/// Returns `RestrictError::FanReconstructionFailed` if a fan cannot
657/// be validly reconstructed.
658pub fn reconstruct_fans(
659    instance: &WInstance,
660    surviving: &FxHashSet<u32>,
661    _ancestors: &FxHashMap<u32, u32>,
662    migration: &CompiledMigration,
663    _tgt_schema: &Schema,
664) -> Result<Vec<Fan>, RestrictError> {
665    let mut result = Vec::new();
666
667    for fan in &instance.fans {
668        if !surviving.contains(&fan.parent) {
669            continue;
670        }
671
672        let surviving_children: HashMap<String, u32> = fan
673            .children
674            .iter()
675            .filter(|(_, node_id)| surviving.contains(node_id))
676            .map(|(label, node_id)| (label.clone(), *node_id))
677            .collect();
678
679        if surviving_children.is_empty() {
680            continue;
681        }
682
683        if let Some((new_he_id, label_map)) =
684            migration.hyper_resolver.get(fan.hyper_edge_id.as_str())
685        {
686            let mut new_children = HashMap::new();
687            for (old_label, &node_id) in &surviving_children {
688                let new_label = label_map
689                    .get(old_label.as_str())
690                    .map_or_else(|| old_label.clone(), std::string::ToString::to_string);
691                new_children.insert(new_label, node_id);
692            }
693            result.push(Fan {
694                hyper_edge_id: new_he_id.to_string(),
695                parent: fan.parent,
696                children: new_children,
697            });
698        } else {
699            result.push(Fan {
700                hyper_edge_id: fan.hyper_edge_id.clone(),
701                parent: fan.parent,
702                children: surviving_children,
703            });
704        }
705    }
706
707    Ok(result)
708}
709
710// ---------------------------------------------------------------------------
711// Main restrict function: fused single-pass pipeline
712// ---------------------------------------------------------------------------
713
714/// The restrict operation for W-type instances.
715///
716/// Executes a fused single-pass pipeline that combines anchor checking,
717/// BFS reachability, ancestor contraction, and edge resolution into one
718/// traversal. Fan reconstruction runs as a separate pass.
719///
720/// The fused approach visits each node at most once (O(n)) versus
721/// the sequential 5-step approach which makes 3-4 passes.
722///
723/// # Errors
724///
725/// Returns `RestrictError` if edge resolution fails or the root
726/// is pruned during restriction.
727pub fn wtype_restrict(
728    instance: &WInstance,
729    _src_schema: &Schema,
730    tgt_schema: &Schema,
731    migration: &CompiledMigration,
732) -> Result<WInstance, RestrictError> {
733    // Check root survives
734    let root_node = instance
735        .nodes
736        .get(&instance.root)
737        .ok_or(RestrictError::RootPruned)?;
738    let root_target_anchor = migration
739        .vertex_remap
740        .get(&root_node.anchor)
741        .unwrap_or(&root_node.anchor);
742    if !migration.surviving_verts.contains(root_target_anchor) {
743        return Err(RestrictError::RootPruned);
744    }
745
746    let conditional_fail = precompute_conditional_fail(instance, migration);
747
748    // Fused BFS: traverse the tree from root, tracking the nearest
749    // surviving ancestor for each node as we go.
750    //
751    // For each node in the BFS:
752    //   - If its anchor survives (and not in conditional_fail): it
753    //     becomes part of the result. Its nearest surviving ancestor
754    //     is used to build an arc. It becomes the "current surviving
755    //     ancestor" for its subtree.
756    //   - If its anchor does not survive: skip it, but continue BFS
757    //     into its children (they might survive). Pass along the
758    //     current surviving ancestor unchanged.
759
760    let mut new_nodes: HashMap<u32, Node> = HashMap::new();
761    let mut new_arcs: Vec<(u32, u32, Edge)> = Vec::new();
762    let mut surviving_set: FxHashSet<u32> = FxHashSet::default();
763
764    // Counter for synthesized intermediate node ids used by nest-style
765    // `expansion_path` handling. Starts above any id present in the
766    // source so we never collide with instance-owned node ids.
767    let mut next_synth_id: u32 = instance
768        .nodes
769        .keys()
770        .copied()
771        .max()
772        .map_or(0, |m| m.saturating_add(1));
773
774    // Queue entries: (node_id, nearest_surviving_ancestor_id)
775    let mut queue: VecDeque<(u32, Option<u32>)> = VecDeque::new();
776
777    // Process root: remap, check conditional survival, apply field transforms.
778    let root_node_cloned = prepare_root_node(root_node, migration, instance)?;
779    new_nodes.insert(instance.root, root_node_cloned);
780    surviving_set.insert(instance.root);
781    queue.push_back((instance.root, None));
782
783    while let Some((current_id, ancestor_id)) = queue.pop_front() {
784        let current_survives = surviving_set.contains(&current_id);
785        // The ancestor for children: if current survives, it's the new ancestor;
786        // otherwise, pass along the existing ancestor.
787        let child_ancestor = if current_survives {
788            Some(current_id)
789        } else {
790            ancestor_id
791        };
792
793        for &child_id in instance.children(current_id) {
794            let Some(child_node) = instance.nodes.get(&child_id) else {
795                continue;
796            };
797
798            // Check if this vertex survives: look up the remapped target name,
799            // falling back to the source name for unmapped vertices.
800            let target_anchor = migration
801                .vertex_remap
802                .get(&child_node.anchor)
803                .unwrap_or(&child_node.anchor);
804            if migration.surviving_verts.contains(target_anchor)
805                && !conditional_fail.contains(&child_id)
806            {
807                // This child survives; add it to results
808                surviving_set.insert(child_id);
809                let mut new_node = child_node.clone();
810                if let Some(remapped) = migration.vertex_remap.get(&child_node.anchor) {
811                    new_node.anchor.clone_from(remapped);
812                }
813                // Apply value-level field transforms if any exist for this vertex.
814                // Collect scalar child values from the original instance so that
815                // ComputeField / Case / ApplyExpr can access the full fiber.
816                if let Some(transforms) = migration.field_transforms.get(&child_node.anchor) {
817                    let scalars = collect_scalar_child_values(instance, child_id);
818                    apply_field_transforms(&mut new_node, transforms, &scalars);
819                }
820                new_nodes.insert(child_id, new_node.clone());
821
822                // Build the arc from nearest surviving ancestor to this node.
823                //
824                // Fast path: a direct edge exists in the target between the
825                // ancestor and this node.
826                //
827                // Expansion path: `resolve_edge` fails because a nest-style
828                // migration removed the direct arc and replaced it with a
829                // multi-hop path through newly introduced intermediates.
830                // The compiled migration's `expansion_path` records the
831                // intermediate anchor ids; we synthesize fresh view nodes
832                // for each of them and stitch the chain.
833                if let Some(anc_id) = child_ancestor {
834                    connect_ancestor_to_child(
835                        anc_id,
836                        child_id,
837                        &new_node.anchor,
838                        &mut new_nodes,
839                        &mut new_arcs,
840                        &mut surviving_set,
841                        &mut next_synth_id,
842                        migration,
843                        tgt_schema,
844                    )?;
845                }
846            }
847
848            // Always continue BFS into children (non-surviving intermediate
849            // nodes may have surviving descendants)
850            queue.push_back((child_id, child_ancestor));
851        }
852    }
853
854    // Step 5: Fan reconstruction (separate pass: operates on original fans)
855    let fused_surviving = &surviving_set;
856    let empty_ancestors = FxHashMap::default();
857    let new_fans = reconstruct_fans(
858        instance,
859        fused_surviving,
860        &empty_ancestors,
861        migration,
862        tgt_schema,
863    )?;
864
865    let new_schema_root = migration
866        .vertex_remap
867        .get(&instance.schema_root)
868        .cloned()
869        .unwrap_or_else(|| instance.schema_root.clone());
870
871    Ok(WInstance::new(
872        new_nodes,
873        new_arcs,
874        new_fans,
875        instance.root,
876        new_schema_root,
877    ))
878}
879
880/// Precompute the set of node ids whose conditional-survival predicate
881/// evaluates to `false` against their original extra fields.
882///
883/// This ensures the BFS result is order-independent (functorial): the
884/// predicate is evaluated against original values, not values that may
885/// have been modified by ancestor contraction during restrict.
886fn precompute_conditional_fail(
887    instance: &WInstance,
888    migration: &CompiledMigration,
889) -> FxHashSet<u32> {
890    if migration.conditional_survival.is_empty() {
891        return FxHashSet::default();
892    }
893    instance
894        .nodes
895        .iter()
896        .filter_map(|(&id, node)| {
897            let pred = migration.conditional_survival.get(&node.anchor)?;
898            let env = build_env_from_extra_fields(&node.extra_fields);
899            let config = panproto_expr::EvalConfig::default();
900            matches!(
901                panproto_expr::eval(pred, &env, &config),
902                Ok(panproto_expr::Literal::Bool(false))
903            )
904            .then_some(id)
905        })
906        .collect()
907}
908
909/// Emit arcs connecting a surviving ancestor to a surviving child during
910/// restrict, handling both the direct-edge fast path and the nest-style
911/// expansion path that introduces synthesized intermediate nodes.
912#[allow(clippy::too_many_arguments)]
913fn connect_ancestor_to_child(
914    anc_id: u32,
915    child_id: u32,
916    child_anchor: &Name,
917    new_nodes: &mut HashMap<u32, Node>,
918    new_arcs: &mut Vec<(u32, u32, Edge)>,
919    surviving_set: &mut FxHashSet<u32>,
920    next_synth_id: &mut u32,
921    migration: &CompiledMigration,
922    tgt_schema: &Schema,
923) -> Result<(), RestrictError> {
924    let anc_anchor = new_nodes
925        .get(&anc_id)
926        .ok_or(RestrictError::RootPruned)?
927        .anchor
928        .clone();
929    let child_anchor = child_anchor.clone();
930    match resolve_edge(tgt_schema, &migration.resolver, &anc_anchor, &child_anchor) {
931        Ok(edge) => {
932            new_arcs.push((anc_id, child_id, edge));
933            Ok(())
934        }
935        Err(restrict_err) => {
936            let Some(intermediates) = migration
937                .expansion_path
938                .get(&(anc_anchor.clone(), child_anchor.clone()))
939            else {
940                return Err(restrict_err);
941            };
942            // Emit the expansion chain:
943            //   anc_id --> synth_1 --> synth_2 --> ... --> child_id
944            let mut prev_id = anc_id;
945            let mut prev_anchor = anc_anchor;
946            for intermediate_anchor in intermediates {
947                let synth_id = *next_synth_id;
948                *next_synth_id = next_synth_id.saturating_add(1);
949                let synth_node = Node::new(synth_id, intermediate_anchor.clone());
950                new_nodes.insert(synth_id, synth_node);
951                surviving_set.insert(synth_id);
952                let edge = resolve_edge(
953                    tgt_schema,
954                    &migration.resolver,
955                    &prev_anchor,
956                    intermediate_anchor,
957                )?;
958                new_arcs.push((prev_id, synth_id, edge));
959                prev_id = synth_id;
960                prev_anchor = intermediate_anchor.clone();
961            }
962            let final_edge =
963                resolve_edge(tgt_schema, &migration.resolver, &prev_anchor, &child_anchor)?;
964            new_arcs.push((prev_id, child_id, final_edge));
965            Ok(())
966        }
967    }
968}
969
970// ---------------------------------------------------------------------------
971// Value-level field transforms
972// ---------------------------------------------------------------------------
973
974/// Apply a sequence of field transforms to a node's `extra_fields`.
975///
976/// Called during `wtype_restrict` after a node survives and its anchor
977/// is remapped. Operations are applied in order.
978///
979/// The `child_scalars` parameter provides the dependent-sum projection:
980/// scalar values from the node's immediate child vertices, keyed by edge
981/// name. This extends the expression environment beyond `extra_fields`
982/// to include the full fiber data over the parent vertex in the
983/// Grothendieck fibration. Binding precedence: `extra_fields` override
984/// `child_scalars` on key collision, which is correct because
985/// `extra_fields` may contain values already transformed by prior steps
986/// in the transform sequence.
987///
988/// Computed fields (via `ComputeField`) are derived data in the sense of
989/// dependent projections: they are functionally determined by the source
990/// fiber data. The `CoercionClass` on each `ComputeField` classifies the
991/// round-trip behavior:
992/// - `Iso`: the computation is invertible; `PutGet` holds for
993///   modifications to the computed field (via the `inverse` expression).
994/// - `Opaque`: no inverse exists; the complement stores the entire
995///   original value. Modifications to the computed field in the view
996///   are not independently round-trippable. This is analogous to SQL
997///   computed columns or database views with derived columns. `PutGet`
998///   holds for the independent (non-derived) components of the view,
999///   and derived components are re-computed deterministically.
1000pub fn apply_field_transforms(
1001    node: &mut Node,
1002    transforms: &[FieldTransform],
1003    child_scalars: &HashMap<String, Value>,
1004) {
1005    for transform in transforms {
1006        match transform {
1007            FieldTransform::RenameField { old_key, new_key } => {
1008                if let Some(val) = node.extra_fields.remove(old_key) {
1009                    node.extra_fields.insert(new_key.clone(), val);
1010                }
1011            }
1012            FieldTransform::DropField { key } => {
1013                node.extra_fields.remove(key);
1014            }
1015            FieldTransform::AddField { key, value } => {
1016                node.extra_fields
1017                    .entry(key.clone())
1018                    .or_insert_with(|| value.clone());
1019            }
1020            FieldTransform::KeepFields { keys } => {
1021                node.extra_fields.retain(|k, _| keys.contains(k));
1022            }
1023            FieldTransform::ApplyExpr { key, expr, .. } => {
1024                // Special case: "__value__" targets node.value (leaf node primary value),
1025                // not extra_fields. This is how coercions (kind changes) are applied.
1026                if key == "__value__" {
1027                    if let Some(crate::value::FieldPresence::Present(val)) = &node.value {
1028                        let input = value_to_expr_literal(val);
1029                        let env = panproto_expr::Env::new()
1030                            .extend(std::sync::Arc::from("v"), input.clone())
1031                            .extend(std::sync::Arc::from("__value__"), input);
1032                        let config = panproto_expr::EvalConfig::default();
1033                        if let Ok(result) = panproto_expr::eval(expr, &env, &config) {
1034                            node.value = Some(crate::value::FieldPresence::Present(
1035                                expr_literal_to_value(&result),
1036                            ));
1037                        }
1038                    }
1039                } else if let Some(val) = node
1040                    .extra_fields
1041                    .get(key)
1042                    .or_else(|| child_scalars.get(key))
1043                {
1044                    // Check extra_fields first (may contain a value modified by
1045                    // an earlier transform in the sequence), then child_scalars.
1046                    // Result is always written to extra_fields regardless of where
1047                    // the source was found: in to_json, extra_fields are serialized
1048                    // after children, so the transform output is authoritative over
1049                    // the original child vertex value.
1050                    let input = value_to_expr_literal(val);
1051                    let env =
1052                        panproto_expr::Env::new().extend(std::sync::Arc::from(key.as_str()), input);
1053                    let config = panproto_expr::EvalConfig::default();
1054                    if let Ok(result) = panproto_expr::eval(expr, &env, &config) {
1055                        node.extra_fields
1056                            .insert(key.clone(), expr_literal_to_value(&result));
1057                    }
1058                }
1059            }
1060            FieldTransform::ComputeField {
1061                target_key, expr, ..
1062            } => {
1063                let env = build_env_with_children(&node.extra_fields, child_scalars);
1064                let config = panproto_expr::EvalConfig::default();
1065                if let Ok(result) = panproto_expr::eval(expr, &env, &config) {
1066                    node.extra_fields
1067                        .insert(target_key.clone(), expr_literal_to_value(&result));
1068                }
1069            }
1070            FieldTransform::PathTransform { path, inner } => {
1071                if path.is_empty() {
1072                    // Empty path = apply directly. PathTransform operates on nested
1073                    // extra_fields, not the instance tree, so child_scalars is empty.
1074                    apply_field_transforms(node, std::slice::from_ref(inner), &HashMap::new());
1075                } else {
1076                    apply_path_transform(node, path, inner);
1077                }
1078            }
1079            FieldTransform::MapReferences { field, rename_map } => {
1080                apply_map_references(node, field, rename_map);
1081            }
1082            FieldTransform::Case { branches } => {
1083                // Case predicates evaluate against the full fiber (extra_fields +
1084                // child scalars) so that branching can depend on schema-defined
1085                // scalar child values.
1086                let env = build_env_with_children(&node.extra_fields, child_scalars);
1087                let config = panproto_expr::EvalConfig::default();
1088                for branch in branches {
1089                    let result = panproto_expr::eval(&branch.predicate, &env, &config);
1090                    if matches!(result, Ok(panproto_expr::Literal::Bool(true))) {
1091                        apply_field_transforms(node, &branch.transforms, child_scalars);
1092                        break;
1093                    }
1094                }
1095            }
1096        }
1097    }
1098}
1099
1100/// Navigate into nested `Value::Unknown` maps along `path` and apply the
1101/// inner transform at the resolved location.
1102fn apply_path_transform(node: &mut Node, path: &[String], inner: &FieldTransform) {
1103    let first = &path[0];
1104    if let Some(Value::Unknown(map)) = node.extra_fields.get_mut(first) {
1105        if path.len() == 1 {
1106            // At the target; apply inner transform to this map.
1107            // PathTransform operates on nested extra_fields, not the
1108            // instance tree, so child_scalars is empty.
1109            let mut temp_node = Node::new(0, "");
1110            temp_node.extra_fields = std::mem::take(map);
1111            apply_field_transforms(&mut temp_node, std::slice::from_ref(inner), &HashMap::new());
1112            *map = temp_node.extra_fields;
1113        } else {
1114            // Recurse deeper; wrap the remaining path in a temporary node
1115            let rest = &path[1..];
1116            let mut temp_node = Node::new(0, "");
1117            temp_node.extra_fields = std::mem::take(map);
1118            apply_path_transform(&mut temp_node, rest, inner);
1119            *map = temp_node.extra_fields;
1120        }
1121    }
1122}
1123
1124/// Apply a `MapReferences` transform to a node's field, handling both
1125/// a scalar `Value::Str` reference and a `Value::List` of references.
1126///
1127/// This is the action of the rename map on string-typed leaves that
1128/// denote vertex names. The transform is functorial: it commutes with
1129/// the list constructor (renaming a list of references is the same as
1130/// mapping the rename over the list).
1131fn apply_map_references(
1132    node: &mut Node,
1133    field: &str,
1134    rename_map: &HashMap<String, Option<String>>,
1135) {
1136    if let Some(val) = node.extra_fields.get_mut(field) {
1137        match val {
1138            Value::Str(s) => {
1139                if let Some(replacement) = rename_map.get(s.as_str()) {
1140                    match replacement {
1141                        Some(new_name) => *s = new_name.clone(),
1142                        None => {
1143                            node.extra_fields.remove(field);
1144                        }
1145                    }
1146                }
1147            }
1148            Value::List(items) => {
1149                // Rebuild the list with renames applied and entries
1150                // mapped to None dropped. Non-string items pass through
1151                // unchanged: `MapReferences` is specifically the action
1152                // of the rename map on string leaves.
1153                let mut new_items = Vec::with_capacity(items.len());
1154                for item in items.iter() {
1155                    match item {
1156                        Value::Str(s) => match rename_map.get(s.as_str()) {
1157                            Some(Some(new_name)) => {
1158                                new_items.push(Value::Str(new_name.clone()));
1159                            }
1160                            Some(None) => {} // drop
1161                            None => new_items.push(Value::Str(s.clone())),
1162                        },
1163                        other => new_items.push(other.clone()),
1164                    }
1165                }
1166                *items = new_items;
1167            }
1168            _ => {}
1169        }
1170    }
1171}
1172
1173/// Collect scalar values from a node's immediate children, keyed by edge name.
1174///
1175/// This is the dependent-sum projection from the total fiber over vertex
1176/// `v` in the Grothendieck fibration. In the W-type model, a node at `v`
1177/// with children via edges `e_i: v -> w_i` has total fiber
1178///
1179/// ```text
1180/// Fiber(v) = ExtraFields(v) x Product_{i} Fiber(w_i)
1181/// ```
1182///
1183/// This function projects the leaf (scalar) components of the product
1184/// into a flat map, making them available to fiber endomorphisms (field
1185/// transforms). Only children with a present leaf value are included;
1186/// structural children (objects, arrays) are omitted because they are
1187/// not representable as flat `Value` entries.
1188#[must_use]
1189pub fn collect_scalar_child_values(instance: &WInstance, node_id: u32) -> HashMap<String, Value> {
1190    let mut result = HashMap::new();
1191    for &(parent, child, ref edge) in &instance.arcs {
1192        if parent != node_id {
1193            continue;
1194        }
1195        let Some(child_node) = instance.nodes.get(&child) else {
1196            continue;
1197        };
1198        if let Some(crate::value::FieldPresence::Present(val)) = &child_node.value {
1199            let field_name = edge.name.as_deref().unwrap_or(&*edge.tgt);
1200            result.insert(field_name.to_string(), val.clone());
1201        }
1202    }
1203    result
1204}
1205
1206/// Build an expression evaluation environment from the full fiber over a
1207/// vertex: both `extra_fields` and scalar child values.
1208///
1209/// The binding order is `child_scalars` first, then `extra_fields`. This
1210/// ensures that `extra_fields` take precedence on key collision, which
1211/// is correct because `extra_fields` may contain values modified by
1212/// earlier transforms in the same sequence, and the transform pipeline
1213/// must see the most recent values.
1214///
1215/// Categorically, this constructs the left-biased coproduct injection
1216/// `ExtraFields + ChildScalars → Env` where `ExtraFields` has priority:
1217/// both maps contribute bindings, but on key collision the `ExtraFields`
1218/// value wins. This models the fiber projection
1219/// `π : ExtraFields(v) × Π_e Fiber(target(e)) → Env` where
1220/// `ExtraFields` carries transform-local state and `ChildScalars`
1221/// carries the dependent-sum projection of the structural children.
1222#[must_use]
1223pub fn build_env_with_children(
1224    fields: &HashMap<String, Value>,
1225    child_scalars: &HashMap<String, Value>,
1226) -> panproto_expr::Env {
1227    // Start with child scalars, then overlay extra_fields so that
1228    // extra_fields take precedence.
1229    let mut combined = child_scalars.clone();
1230    for (key, val) in fields {
1231        combined.insert(key.clone(), val.clone());
1232    }
1233    build_env_from_extra_fields(&combined)
1234}
1235
1236/// Build an evaluation environment from a node's `extra_fields`.
1237///
1238/// Each field is bound as a top-level variable. If an `attrs` field
1239/// contains a `Value::Unknown` map, its entries are also bound with
1240/// qualified names (e.g., `attrs.level`).
1241#[must_use]
1242pub fn build_env_from_extra_fields(fields: &HashMap<String, Value>) -> panproto_expr::Env {
1243    let mut env = panproto_expr::Env::new();
1244    for (key, val) in fields {
1245        let lit = value_to_expr_literal(val);
1246        // Bind flat key
1247        env = env.extend(std::sync::Arc::from(key.as_str()), lit.clone());
1248        // Also bind as attrs.key (so predicates work regardless of nesting style)
1249        if key != "attrs" && key != "name" && key != "$type" && key != "parents" {
1250            let qualified = format!("attrs.{key}");
1251            env = env.extend(std::sync::Arc::from(qualified.as_str()), lit);
1252        }
1253    }
1254    // Also bind nested "attrs" entries as both qualified and flat
1255    if let Some(Value::Unknown(attrs)) = fields.get("attrs") {
1256        for (key, val) in attrs {
1257            let lit = value_to_expr_literal(val);
1258            let qualified = format!("attrs.{key}");
1259            env = env.extend(std::sync::Arc::from(qualified.as_str()), lit.clone());
1260            // Also bind as flat key if not already present
1261            if !fields.contains_key(key) {
1262                env = env.extend(std::sync::Arc::from(key.as_str()), lit);
1263            }
1264        }
1265    }
1266    env
1267}
1268
1269/// Convert an instance `Value` to a `panproto_expr::Literal` for expression evaluation.
1270///
1271/// `Value::List` is flattened to a comma-separated `Literal::Str`
1272/// containing only the list's string elements, so that predicate
1273/// builtins like `Contains` can test membership without needing a list
1274/// literal in the expression language. Non-string elements are
1275/// silently dropped from the joined form — `value_to_expr_literal`
1276/// represents the projection `List Value → List Str ↪ Str`, which is
1277/// the composition of the obvious forgetful map and the join. This
1278/// matches the pre-existing (pre-`Value::List`) semantics of the
1279/// encoded-array path, which also only contributed string leaves to
1280/// the joined representation.
1281#[must_use]
1282pub fn value_to_expr_literal(val: &Value) -> panproto_expr::Literal {
1283    match val {
1284        Value::Bool(b) => panproto_expr::Literal::Bool(*b),
1285        Value::Int(i) => panproto_expr::Literal::Int(*i),
1286        Value::Float(f) => panproto_expr::Literal::Float(*f),
1287        Value::Str(s) => panproto_expr::Literal::Str(s.clone()),
1288        Value::List(items) => {
1289            let parts: Vec<&str> = items
1290                .iter()
1291                .filter_map(|item| match item {
1292                    Value::Str(s) => Some(s.as_str()),
1293                    _ => None,
1294                })
1295                .collect();
1296            panproto_expr::Literal::Str(parts.join(","))
1297        }
1298        _ => panproto_expr::Literal::Null,
1299    }
1300}
1301
1302/// Convert a `panproto_expr::Literal` back to an instance `Value`.
1303///
1304/// Integer-valued floats are normalized to `Value::Int` for round-trip
1305/// fidelity with JSON (which doesn't distinguish int/float).
1306#[must_use]
1307pub fn expr_literal_to_value(lit: &panproto_expr::Literal) -> Value {
1308    match lit {
1309        panproto_expr::Literal::Bool(b) => Value::Bool(*b),
1310        panproto_expr::Literal::Int(i) => Value::Int(*i),
1311        panproto_expr::Literal::Float(f) => {
1312            // Normalize integer-valued floats to Int for JSON round-trip fidelity.
1313            // Use safe bounds that avoid precision loss in f64→i64 conversion.
1314            #[allow(clippy::cast_precision_loss)]
1315            let fits = f.fract() == 0.0 && *f >= i64::MIN as f64 && *f <= i64::MAX as f64;
1316            if fits {
1317                #[allow(clippy::cast_possible_truncation)]
1318                let i = *f as i64;
1319                Value::Int(i)
1320            } else {
1321                Value::Float(*f)
1322            }
1323        }
1324        panproto_expr::Literal::Str(s) => Value::Str(s.clone()),
1325        _ => Value::Null,
1326    }
1327}
1328
1329// ---------------------------------------------------------------------------
1330// Left Kan extension (Σ_F) for W-type instances
1331// ---------------------------------------------------------------------------
1332
1333/// Left Kan extension (`Sigma_F`) for W-type instances.
1334///
1335/// Pushes a W-type instance forward along a migration morphism.
1336/// Unlike [`wtype_restrict`] (which drops unmapped nodes), extend
1337/// maps all source nodes into the target schema, remapping anchors
1338/// Prepare the root node for restriction: remap anchor, check conditional
1339/// survival, and apply field transforms.
1340fn prepare_root_node(
1341    root_node: &Node,
1342    migration: &CompiledMigration,
1343    instance: &WInstance,
1344) -> Result<Node, RestrictError> {
1345    let mut node = root_node.clone();
1346    if let Some(remapped) = migration.vertex_remap.get(&root_node.anchor) {
1347        node.anchor.clone_from(remapped);
1348    }
1349    if let Some(pred) = migration.conditional_survival.get(&root_node.anchor) {
1350        let env = build_env_from_extra_fields(&root_node.extra_fields);
1351        let config = panproto_expr::EvalConfig::default();
1352        if matches!(
1353            panproto_expr::eval(pred, &env, &config),
1354            Ok(panproto_expr::Literal::Bool(false))
1355        ) {
1356            return Err(RestrictError::RootPruned);
1357        }
1358    }
1359    if let Some(transforms) = migration.field_transforms.get(&root_node.anchor) {
1360        let scalars = collect_scalar_child_values(instance, root_node.id);
1361        apply_field_transforms(&mut node, transforms, &scalars);
1362    }
1363    Ok(node)
1364}
1365
1366/// and edges according to the compiled migration.
1367///
1368/// # Errors
1369///
1370/// Returns [`RestrictError`] if edge resolution fails or the root
1371/// cannot be mapped.
1372pub fn wtype_extend(
1373    instance: &WInstance,
1374    tgt_schema: &Schema,
1375    migration: &CompiledMigration,
1376) -> Result<WInstance, RestrictError> {
1377    // Check root can be mapped
1378    let root_node = instance
1379        .nodes
1380        .get(&instance.root)
1381        .ok_or(RestrictError::RootPruned)?;
1382
1383    let root_anchor = &root_node.anchor;
1384    if !migration.surviving_verts.contains(root_anchor)
1385        && !migration.vertex_remap.contains_key(root_anchor)
1386    {
1387        return Err(RestrictError::RootPruned);
1388    }
1389
1390    // Build new nodes: remap anchors where applicable
1391    let mut new_nodes: HashMap<u32, Node> = HashMap::with_capacity(instance.nodes.len());
1392    for (&id, node) in &instance.nodes {
1393        let mut new_node = node.clone();
1394        if let Some(remapped) = migration.vertex_remap.get(&node.anchor) {
1395            new_node.anchor.clone_from(remapped);
1396        } else if !migration.surviving_verts.contains(&node.anchor) {
1397            // Node's anchor has no remap and doesn't survive; skip it
1398            continue;
1399        }
1400        // Apply field transforms (coercions) to the extended node.
1401        // Collect scalar child values from the original instance for the
1402        // full fiber projection.
1403        if let Some(transforms) = migration.field_transforms.get(&node.anchor) {
1404            let scalars = collect_scalar_child_values(instance, id);
1405            apply_field_transforms(&mut new_node, transforms, &scalars);
1406        }
1407        new_nodes.insert(id, new_node);
1408    }
1409
1410    // Build new arcs: remap edges where applicable
1411    let mut new_arcs: Vec<(u32, u32, Edge)> = Vec::with_capacity(instance.arcs.len());
1412    for &(parent, child, ref edge) in &instance.arcs {
1413        // Both endpoints must be in the new node set
1414        if !new_nodes.contains_key(&parent) || !new_nodes.contains_key(&child) {
1415            continue;
1416        }
1417
1418        if let Some(new_edge) = migration.edge_remap.get(edge) {
1419            new_arcs.push((parent, child, new_edge.clone()));
1420        } else if migration.surviving_edges.contains(edge) {
1421            // Edge survives unchanged, but anchors may have been remapped.
1422            // Rebuild the edge with the remapped src/tgt vertex names.
1423            let parent_anchor = &new_nodes[&parent].anchor;
1424            let child_anchor = &new_nodes[&child].anchor;
1425            if edge.src == *parent_anchor && edge.tgt == *child_anchor {
1426                new_arcs.push((parent, child, edge.clone()));
1427            } else {
1428                // Anchors were remapped; resolve the edge in the target schema
1429                let resolved =
1430                    resolve_edge(tgt_schema, &migration.resolver, parent_anchor, child_anchor)?;
1431                new_arcs.push((parent, child, resolved));
1432            }
1433        } else {
1434            // Edge not in surviving_edges or edge_remap; try to resolve
1435            // from remapped anchors
1436            let parent_anchor = &new_nodes[&parent].anchor;
1437            let child_anchor = &new_nodes[&child].anchor;
1438            let resolved =
1439                resolve_edge(tgt_schema, &migration.resolver, parent_anchor, child_anchor)?;
1440            new_arcs.push((parent, child, resolved));
1441        }
1442    }
1443
1444    // Handle fans similarly to restrict's reconstruct_fans
1445    let surviving_ids: FxHashSet<u32> = new_nodes.keys().copied().collect();
1446    let empty_ancestors = FxHashMap::default();
1447    let new_fans = reconstruct_fans(
1448        instance,
1449        &surviving_ids,
1450        &empty_ancestors,
1451        migration,
1452        tgt_schema,
1453    )?;
1454
1455    let new_schema_root = migration
1456        .vertex_remap
1457        .get(&instance.schema_root)
1458        .cloned()
1459        .unwrap_or_else(|| instance.schema_root.clone());
1460
1461    Ok(WInstance::new(
1462        new_nodes,
1463        new_arcs,
1464        new_fans,
1465        instance.root,
1466        new_schema_root,
1467    ))
1468}
1469
1470#[cfg(test)]
1471mod tests {
1472    use super::*;
1473    use crate::value::{FieldPresence, Value};
1474
1475    /// Helper: build a simple 3-node instance (object with two string children).
1476    fn three_node_instance() -> WInstance {
1477        let mut nodes = HashMap::new();
1478        nodes.insert(0, Node::new(0, panproto_gat::Name::from("post:body")));
1479        nodes.insert(
1480            1,
1481            Node::new(1, "post:body.text")
1482                .with_value(FieldPresence::Present(Value::Str("hello".into()))),
1483        );
1484        nodes.insert(
1485            2,
1486            Node::new(2, "post:body.createdAt")
1487                .with_value(FieldPresence::Present(Value::Str("2024-01-01".into()))),
1488        );
1489
1490        let arcs = vec![
1491            (
1492                0,
1493                1,
1494                Edge {
1495                    src: "post:body".into(),
1496                    tgt: "post:body.text".into(),
1497                    kind: "prop".into(),
1498                    name: Some("text".into()),
1499                },
1500            ),
1501            (
1502                0,
1503                2,
1504                Edge {
1505                    src: "post:body".into(),
1506                    tgt: "post:body.createdAt".into(),
1507                    kind: "prop".into(),
1508                    name: Some("createdAt".into()),
1509                },
1510            ),
1511        ];
1512
1513        WInstance::new(
1514            nodes,
1515            arcs,
1516            vec![],
1517            0,
1518            panproto_gat::Name::from("post:body"),
1519        )
1520    }
1521
1522    #[test]
1523    fn anchor_surviving_keeps_matching_nodes() {
1524        let inst = three_node_instance();
1525        let surviving_verts: HashSet<Name> = ["post:body", "post:body.text"]
1526            .iter()
1527            .map(|&s| Name::from(s))
1528            .collect();
1529
1530        let result = anchor_surviving(&inst, &surviving_verts);
1531        assert_eq!(result.len(), 2);
1532        assert!(result.contains(&0));
1533        assert!(result.contains(&1));
1534        assert!(!result.contains(&2));
1535    }
1536
1537    #[test]
1538    fn ancestor_contraction_direct_parent() {
1539        let inst = three_node_instance();
1540        let surviving: HashSet<u32> = [0, 1, 2].iter().copied().collect();
1541        let ancestors = ancestor_contraction(&inst, &surviving);
1542        assert_eq!(ancestors.get(&1), Some(&0));
1543        assert_eq!(ancestors.get(&2), Some(&0));
1544    }
1545
1546    #[test]
1547    fn resolve_edge_unique() {
1548        use smallvec::smallvec;
1549        let mut between = HashMap::new();
1550        let edge = Edge {
1551            src: "a".into(),
1552            tgt: "b".into(),
1553            kind: "prop".into(),
1554            name: Some("x".into()),
1555        };
1556        between.insert((Name::from("a"), Name::from("b")), smallvec![edge.clone()]);
1557
1558        let schema = Schema {
1559            protocol: "test".into(),
1560            vertices: HashMap::new(),
1561            edges: HashMap::new(),
1562            hyper_edges: HashMap::new(),
1563            constraints: HashMap::new(),
1564            required: HashMap::new(),
1565            nsids: HashMap::new(),
1566            entries: Vec::new(),
1567            variants: HashMap::new(),
1568            orderings: HashMap::new(),
1569            recursion_points: HashMap::new(),
1570            spans: HashMap::new(),
1571            usage_modes: HashMap::new(),
1572            nominal: HashMap::new(),
1573            coercions: HashMap::new(),
1574            mergers: HashMap::new(),
1575            defaults: HashMap::new(),
1576            policies: HashMap::new(),
1577            outgoing: HashMap::new(),
1578            incoming: HashMap::new(),
1579            between,
1580        };
1581
1582        let resolver = HashMap::new();
1583        let result = resolve_edge(&schema, &resolver, "a", "b");
1584        assert!(result.is_ok());
1585        assert_eq!(result.ok(), Some(edge));
1586    }
1587
1588    #[test]
1589    fn resolve_edge_uses_resolver() {
1590        let schema = Schema {
1591            protocol: "test".into(),
1592            vertices: HashMap::new(),
1593            edges: HashMap::new(),
1594            hyper_edges: HashMap::new(),
1595            constraints: HashMap::new(),
1596            required: HashMap::new(),
1597            nsids: HashMap::new(),
1598            entries: Vec::new(),
1599            variants: HashMap::new(),
1600            orderings: HashMap::new(),
1601            recursion_points: HashMap::new(),
1602            spans: HashMap::new(),
1603            usage_modes: HashMap::new(),
1604            nominal: HashMap::new(),
1605            coercions: HashMap::new(),
1606            mergers: HashMap::new(),
1607            defaults: HashMap::new(),
1608            policies: HashMap::new(),
1609            outgoing: HashMap::new(),
1610            incoming: HashMap::new(),
1611            between: HashMap::new(),
1612        };
1613
1614        let resolved_edge = Edge {
1615            src: "a".into(),
1616            tgt: "b".into(),
1617            kind: "prop".into(),
1618            name: Some("resolved".into()),
1619        };
1620        let mut resolver = HashMap::new();
1621        resolver.insert((Name::from("a"), Name::from("b")), resolved_edge.clone());
1622
1623        let result = resolve_edge(&schema, &resolver, "a", "b");
1624        assert!(result.is_ok());
1625        assert_eq!(result.ok(), Some(resolved_edge));
1626    }
1627
1628    // --- wtype_extend tests ---
1629
1630    #[allow(clippy::unwrap_used)]
1631    fn make_test_schema(vertices: &[&str], edges: &[Edge]) -> Schema {
1632        use smallvec::smallvec;
1633        let mut between = HashMap::new();
1634        for edge in edges {
1635            between
1636                .entry((Name::from(&*edge.src), Name::from(&*edge.tgt)))
1637                .or_insert_with(|| smallvec![])
1638                .push(edge.clone());
1639        }
1640        Schema {
1641            protocol: "test".into(),
1642            vertices: vertices
1643                .iter()
1644                .map(|&v| {
1645                    (
1646                        Name::from(v),
1647                        panproto_schema::Vertex {
1648                            id: Name::from(v),
1649                            kind: Name::from("object"),
1650                            nsid: None,
1651                        },
1652                    )
1653                })
1654                .collect(),
1655            edges: HashMap::new(),
1656            hyper_edges: HashMap::new(),
1657            constraints: HashMap::new(),
1658            required: HashMap::new(),
1659            nsids: HashMap::new(),
1660            entries: Vec::new(),
1661            variants: HashMap::new(),
1662            orderings: HashMap::new(),
1663            recursion_points: HashMap::new(),
1664            spans: HashMap::new(),
1665            usage_modes: HashMap::new(),
1666            nominal: HashMap::new(),
1667            coercions: HashMap::new(),
1668            mergers: HashMap::new(),
1669            defaults: HashMap::new(),
1670            policies: HashMap::new(),
1671            outgoing: HashMap::new(),
1672            incoming: HashMap::new(),
1673            between,
1674        }
1675    }
1676
1677    #[test]
1678    #[allow(clippy::unwrap_used)]
1679    fn extend_identity_migration() {
1680        let inst = three_node_instance();
1681        let edge_text = Edge {
1682            src: "post:body".into(),
1683            tgt: "post:body.text".into(),
1684            kind: "prop".into(),
1685            name: Some("text".into()),
1686        };
1687        let edge_time = Edge {
1688            src: "post:body".into(),
1689            tgt: "post:body.createdAt".into(),
1690            kind: "prop".into(),
1691            name: Some("createdAt".into()),
1692        };
1693        let surviving_edges = HashSet::from([edge_text.clone(), edge_time.clone()]);
1694        let schema = make_test_schema(
1695            &["post:body", "post:body.text", "post:body.createdAt"],
1696            &[edge_text, edge_time],
1697        );
1698        let migration = CompiledMigration {
1699            surviving_verts: HashSet::from([
1700                Name::from("post:body"),
1701                Name::from("post:body.text"),
1702                Name::from("post:body.createdAt"),
1703            ]),
1704            surviving_edges,
1705            vertex_remap: HashMap::new(),
1706            edge_remap: HashMap::new(),
1707            resolver: HashMap::new(),
1708            hyper_resolver: HashMap::new(),
1709            field_transforms: HashMap::new(),
1710            conditional_survival: HashMap::new(),
1711            expansion_path: HashMap::new(),
1712        };
1713        let result = wtype_extend(&inst, &schema, &migration).unwrap();
1714        assert_eq!(result.node_count(), 3);
1715        assert_eq!(result.arc_count(), 2);
1716        assert_eq!(result.schema_root, Name::from("post:body"));
1717    }
1718
1719    #[test]
1720    #[allow(clippy::unwrap_used)]
1721    fn extend_with_vertex_remap() {
1722        let inst = three_node_instance();
1723        let tgt_edge_text = Edge {
1724            src: "article:body".into(),
1725            tgt: "article:body.text".into(),
1726            kind: "prop".into(),
1727            name: Some("text".into()),
1728        };
1729        let tgt_edge_time = Edge {
1730            src: "article:body".into(),
1731            tgt: "article:body.createdAt".into(),
1732            kind: "prop".into(),
1733            name: Some("createdAt".into()),
1734        };
1735        let tgt_schema = make_test_schema(
1736            &[
1737                "article:body",
1738                "article:body.text",
1739                "article:body.createdAt",
1740            ],
1741            &[tgt_edge_text, tgt_edge_time],
1742        );
1743        let mut vertex_remap = HashMap::new();
1744        vertex_remap.insert(Name::from("post:body"), Name::from("article:body"));
1745        vertex_remap.insert(
1746            Name::from("post:body.text"),
1747            Name::from("article:body.text"),
1748        );
1749        vertex_remap.insert(
1750            Name::from("post:body.createdAt"),
1751            Name::from("article:body.createdAt"),
1752        );
1753        let migration = CompiledMigration {
1754            surviving_verts: HashSet::from([
1755                Name::from("article:body"),
1756                Name::from("article:body.text"),
1757                Name::from("article:body.createdAt"),
1758            ]),
1759            surviving_edges: HashSet::new(),
1760            vertex_remap,
1761            edge_remap: HashMap::new(),
1762            resolver: HashMap::new(),
1763            hyper_resolver: HashMap::new(),
1764            field_transforms: HashMap::new(),
1765            conditional_survival: HashMap::new(),
1766            expansion_path: HashMap::new(),
1767        };
1768        let result = wtype_extend(&inst, &tgt_schema, &migration).unwrap();
1769        assert_eq!(result.node_count(), 3);
1770        assert_eq!(result.arc_count(), 2);
1771        assert_eq!(result.schema_root, Name::from("article:body"));
1772        assert_eq!(result.nodes[&0].anchor, Name::from("article:body"));
1773        assert_eq!(result.nodes[&1].anchor, Name::from("article:body.text"));
1774    }
1775
1776    #[test]
1777    #[allow(clippy::unwrap_used)]
1778    fn extend_with_edge_remap() {
1779        let inst = three_node_instance();
1780        let src_edge_text = Edge {
1781            src: "post:body".into(),
1782            tgt: "post:body.text".into(),
1783            kind: "prop".into(),
1784            name: Some("text".into()),
1785        };
1786        let new_edge_text = Edge {
1787            src: "post:body".into(),
1788            tgt: "post:body.text".into(),
1789            kind: "prop".into(),
1790            name: Some("content".into()),
1791        };
1792        let edge_time = Edge {
1793            src: "post:body".into(),
1794            tgt: "post:body.createdAt".into(),
1795            kind: "prop".into(),
1796            name: Some("createdAt".into()),
1797        };
1798        let surviving_edges = HashSet::from([edge_time.clone()]);
1799        let tgt_schema = make_test_schema(
1800            &["post:body", "post:body.text", "post:body.createdAt"],
1801            &[new_edge_text.clone(), edge_time],
1802        );
1803        let mut edge_remap = HashMap::new();
1804        edge_remap.insert(src_edge_text, new_edge_text);
1805        let migration = CompiledMigration {
1806            surviving_verts: HashSet::from([
1807                Name::from("post:body"),
1808                Name::from("post:body.text"),
1809                Name::from("post:body.createdAt"),
1810            ]),
1811            surviving_edges,
1812            vertex_remap: HashMap::new(),
1813            edge_remap,
1814            resolver: HashMap::new(),
1815            hyper_resolver: HashMap::new(),
1816            field_transforms: HashMap::new(),
1817            conditional_survival: HashMap::new(),
1818            expansion_path: HashMap::new(),
1819        };
1820        let result = wtype_extend(&inst, &tgt_schema, &migration).unwrap();
1821        assert_eq!(result.arc_count(), 2);
1822        // Check that the remapped edge is used
1823        let text_arc = result.arcs.iter().find(|a| a.1 == 1).unwrap();
1824        assert_eq!(text_arc.2.name.as_deref(), Some("content"));
1825    }
1826
1827    #[test]
1828    #[allow(clippy::unwrap_used)]
1829    fn extend_preserves_structure() {
1830        let inst = three_node_instance();
1831        let edge_text = Edge {
1832            src: "post:body".into(),
1833            tgt: "post:body.text".into(),
1834            kind: "prop".into(),
1835            name: Some("text".into()),
1836        };
1837        let edge_time = Edge {
1838            src: "post:body".into(),
1839            tgt: "post:body.createdAt".into(),
1840            kind: "prop".into(),
1841            name: Some("createdAt".into()),
1842        };
1843        let surviving_edges = HashSet::from([edge_text.clone(), edge_time.clone()]);
1844        let schema = make_test_schema(
1845            &["post:body", "post:body.text", "post:body.createdAt"],
1846            &[edge_text, edge_time],
1847        );
1848        let migration = CompiledMigration {
1849            surviving_verts: HashSet::from([
1850                Name::from("post:body"),
1851                Name::from("post:body.text"),
1852                Name::from("post:body.createdAt"),
1853            ]),
1854            surviving_edges,
1855            vertex_remap: HashMap::new(),
1856            edge_remap: HashMap::new(),
1857            resolver: HashMap::new(),
1858            hyper_resolver: HashMap::new(),
1859            field_transforms: HashMap::new(),
1860            conditional_survival: HashMap::new(),
1861            expansion_path: HashMap::new(),
1862        };
1863        let result = wtype_extend(&inst, &schema, &migration).unwrap();
1864        // Verify parent/children maps are correctly rebuilt
1865        assert_eq!(result.parent(1), Some(0));
1866        assert_eq!(result.parent(2), Some(0));
1867        assert!(result.children(0).contains(&1));
1868        assert!(result.children(0).contains(&2));
1869        // Verify values are preserved
1870        assert!(result.nodes[&1].has_value());
1871        assert!(result.nodes[&2].has_value());
1872    }
1873
1874    /// Regression test: renamed vertices must survive restrict.
1875    ///
1876    /// When a migration maps source vertex `A` to target vertex `B`, the
1877    /// `surviving_verts` set contains `B` (the target). The restrict BFS
1878    /// must remap `A` → `B` before checking membership, otherwise the
1879    /// node is incorrectly pruned and its value is lost.
1880    #[test]
1881    #[allow(clippy::expect_used, clippy::too_many_lines)]
1882    fn restrict_renamed_vertex_preserves_value() {
1883        use smallvec::smallvec;
1884
1885        // Source instance: post:body { text: "hello", title: "world" }
1886        let mut nodes = HashMap::new();
1887        nodes.insert(0, Node::new(0, Name::from("post:body")));
1888        nodes.insert(
1889            1,
1890            Node::new(1, "post:text")
1891                .with_value(FieldPresence::Present(Value::Str("hello".into()))),
1892        );
1893        nodes.insert(
1894            2,
1895            Node::new(2, "post:title")
1896                .with_value(FieldPresence::Present(Value::Str("world".into()))),
1897        );
1898        let arcs = vec![
1899            (
1900                0,
1901                1,
1902                Edge {
1903                    src: "post:body".into(),
1904                    tgt: "post:text".into(),
1905                    kind: "prop".into(),
1906                    name: Some("text".into()),
1907                },
1908            ),
1909            (
1910                0,
1911                2,
1912                Edge {
1913                    src: "post:body".into(),
1914                    tgt: "post:title".into(),
1915                    kind: "prop".into(),
1916                    name: Some("title".into()),
1917                },
1918            ),
1919        ];
1920        let inst = WInstance::new(nodes, arcs, vec![], 0, Name::from("post:body"));
1921
1922        // Target schema: post:body has edges to post:content and post:title
1923        let tgt_content_edge = Edge {
1924            src: "post:body".into(),
1925            tgt: "post:content".into(),
1926            kind: "prop".into(),
1927            name: Some("content".into()),
1928        };
1929        let tgt_title_edge = Edge {
1930            src: "post:body".into(),
1931            tgt: "post:title".into(),
1932            kind: "prop".into(),
1933            name: Some("title".into()),
1934        };
1935        let mut tgt_between = HashMap::new();
1936        tgt_between.insert(
1937            (Name::from("post:body"), Name::from("post:content")),
1938            smallvec![tgt_content_edge],
1939        );
1940        tgt_between.insert(
1941            (Name::from("post:body"), Name::from("post:title")),
1942            smallvec![tgt_title_edge],
1943        );
1944        let tgt_schema = Schema {
1945            protocol: "test".into(),
1946            vertices: HashMap::new(),
1947            edges: HashMap::new(),
1948            hyper_edges: HashMap::new(),
1949            constraints: HashMap::new(),
1950            required: HashMap::new(),
1951            nsids: HashMap::new(),
1952            entries: Vec::new(),
1953            variants: HashMap::new(),
1954            orderings: HashMap::new(),
1955            recursion_points: HashMap::new(),
1956            spans: HashMap::new(),
1957            usage_modes: HashMap::new(),
1958            nominal: HashMap::new(),
1959            coercions: HashMap::new(),
1960            mergers: HashMap::new(),
1961            defaults: HashMap::new(),
1962            policies: HashMap::new(),
1963            outgoing: HashMap::new(),
1964            incoming: HashMap::new(),
1965            between: tgt_between,
1966        };
1967
1968        // Migration: post:text → post:content (renamed), post:title stays
1969        let mut surviving_verts = HashSet::new();
1970        surviving_verts.insert(Name::from("post:body"));
1971        surviving_verts.insert(Name::from("post:content")); // target name
1972        surviving_verts.insert(Name::from("post:title"));
1973
1974        let mut vertex_remap = HashMap::new();
1975        vertex_remap.insert(Name::from("post:text"), Name::from("post:content"));
1976
1977        let migration = CompiledMigration {
1978            surviving_verts,
1979            surviving_edges: HashSet::new(),
1980            vertex_remap,
1981            edge_remap: HashMap::new(),
1982            resolver: HashMap::new(),
1983            hyper_resolver: HashMap::new(),
1984            field_transforms: HashMap::new(),
1985            conditional_survival: HashMap::new(),
1986            expansion_path: HashMap::new(),
1987        };
1988
1989        let src_schema = Schema {
1990            protocol: "test".into(),
1991            vertices: HashMap::new(),
1992            edges: HashMap::new(),
1993            hyper_edges: HashMap::new(),
1994            constraints: HashMap::new(),
1995            required: HashMap::new(),
1996            nsids: HashMap::new(),
1997            entries: Vec::new(),
1998            variants: HashMap::new(),
1999            orderings: HashMap::new(),
2000            recursion_points: HashMap::new(),
2001            spans: HashMap::new(),
2002            usage_modes: HashMap::new(),
2003            nominal: HashMap::new(),
2004            coercions: HashMap::new(),
2005            mergers: HashMap::new(),
2006            defaults: HashMap::new(),
2007            policies: HashMap::new(),
2008            outgoing: HashMap::new(),
2009            incoming: HashMap::new(),
2010            between: HashMap::new(),
2011        };
2012
2013        let result = wtype_restrict(&inst, &src_schema, &tgt_schema, &migration)
2014            .expect("restrict should succeed");
2015
2016        // All three nodes must survive (root + renamed + unchanged)
2017        assert_eq!(result.nodes.len(), 3, "all three nodes should survive");
2018
2019        // The renamed node should now have anchor "post:content"
2020        let renamed_node = result.nodes.get(&1).expect("node 1 should survive");
2021        assert_eq!(renamed_node.anchor.as_ref(), "post:content");
2022        assert!(renamed_node.has_value(), "renamed node must keep its value");
2023
2024        // The value should be preserved
2025        assert!(
2026            matches!(
2027                &renamed_node.value,
2028                Some(FieldPresence::Present(Value::Str(s))) if s.as_str() == "hello"
2029            ),
2030            "expected Some(Present(Str(\"hello\"))), got {:?}",
2031            renamed_node.value,
2032        );
2033    }
2034
2035    // --- PathTransform tests ---
2036
2037    #[test]
2038    fn path_transform_renames_nested_field() {
2039        let mut node = Node::new(0, "v");
2040        let mut inner_map = HashMap::new();
2041        inner_map.insert("old_attr".to_string(), Value::Str("val".into()));
2042        node.extra_fields
2043            .insert("attrs".to_string(), Value::Unknown(inner_map));
2044
2045        let transform = FieldTransform::PathTransform {
2046            path: vec!["attrs".to_string()],
2047            inner: Box::new(FieldTransform::RenameField {
2048                old_key: "old_attr".to_string(),
2049                new_key: "new_attr".to_string(),
2050            }),
2051        };
2052        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2053
2054        match node.extra_fields.get("attrs") {
2055            Some(Value::Unknown(map)) => {
2056                assert!(!map.contains_key("old_attr"));
2057                assert_eq!(map.get("new_attr"), Some(&Value::Str("val".into())));
2058            }
2059            other => panic!("expected Unknown map, got {other:?}"),
2060        }
2061    }
2062
2063    #[test]
2064    fn path_transform_empty_path_is_identity() {
2065        let mut node = Node::new(0, "v");
2066        node.extra_fields
2067            .insert("color".to_string(), Value::Str("red".into()));
2068
2069        let transform = FieldTransform::PathTransform {
2070            path: vec![],
2071            inner: Box::new(FieldTransform::RenameField {
2072                old_key: "color".to_string(),
2073                new_key: "colour".to_string(),
2074            }),
2075        };
2076        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2077
2078        assert!(!node.extra_fields.contains_key("color"));
2079        assert_eq!(
2080            node.extra_fields.get("colour"),
2081            Some(&Value::Str("red".into()))
2082        );
2083    }
2084
2085    // --- MapReferences tests ---
2086
2087    #[test]
2088    fn map_references_renames_string_field() {
2089        let mut node = Node::new(0, "v");
2090        node.extra_fields
2091            .insert("parent".to_string(), Value::Str("old_name".into()));
2092
2093        let mut rename_map = HashMap::new();
2094        rename_map.insert("old_name".to_string(), Some("new_name".to_string()));
2095
2096        let transform = FieldTransform::MapReferences {
2097            field: "parent".to_string(),
2098            rename_map,
2099        };
2100        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2101
2102        assert_eq!(
2103            node.extra_fields.get("parent"),
2104            Some(&Value::Str("new_name".into()))
2105        );
2106    }
2107
2108    #[test]
2109    fn map_references_filters_list() {
2110        let mut node = Node::new(0, "v");
2111        node.extra_fields.insert(
2112            "parents".to_string(),
2113            Value::List(vec![
2114                Value::Str("alpha".into()),
2115                Value::Str("beta".into()),
2116                Value::Str("gamma".into()),
2117            ]),
2118        );
2119
2120        let mut rename_map = HashMap::new();
2121        rename_map.insert("alpha".to_string(), Some("alpha_v2".to_string()));
2122        rename_map.insert("beta".to_string(), None); // drop
2123
2124        let transform = FieldTransform::MapReferences {
2125            field: "parents".to_string(),
2126            rename_map,
2127        };
2128        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2129
2130        match node.extra_fields.get("parents") {
2131            Some(Value::List(items)) => {
2132                assert_eq!(items.len(), 2);
2133                assert_eq!(items[0], Value::Str("alpha_v2".into()));
2134                assert_eq!(items[1], Value::Str("gamma".into()));
2135            }
2136            other => panic!("expected List, got {other:?}"),
2137        }
2138    }
2139
2140    #[test]
2141    fn map_references_drops_removed_entries() {
2142        let mut node = Node::new(0, "v");
2143        node.extra_fields.insert(
2144            "refs".to_string(),
2145            Value::List(vec![
2146                Value::Str("gone".into()),
2147                Value::Str("also_gone".into()),
2148            ]),
2149        );
2150
2151        let mut rename_map = HashMap::new();
2152        rename_map.insert("gone".to_string(), None);
2153        rename_map.insert("also_gone".to_string(), None);
2154
2155        let transform = FieldTransform::MapReferences {
2156            field: "refs".to_string(),
2157            rename_map,
2158        };
2159        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2160
2161        match node.extra_fields.get("refs") {
2162            Some(Value::List(items)) => {
2163                assert!(items.is_empty(), "expected empty list, got {items:?}");
2164            }
2165            other => panic!("expected List, got {other:?}"),
2166        }
2167    }
2168
2169    #[test]
2170    fn value_to_expr_literal_joins_string_list() {
2171        // A list of strings becomes a comma-separated `Literal::Str`.
2172        // This is what `Contains` substring predicates operate on.
2173        let val = Value::List(vec![
2174            Value::Str("a".into()),
2175            Value::Str("b".into()),
2176            Value::Str("c".into()),
2177        ]);
2178        match value_to_expr_literal(&val) {
2179            panproto_expr::Literal::Str(s) => assert_eq!(s, "a,b,c"),
2180            other => panic!("expected Literal::Str, got {other:?}"),
2181        }
2182    }
2183
2184    #[test]
2185    fn value_to_expr_literal_drops_non_string_list_elements() {
2186        // Non-string elements must be SILENTLY DROPPED from the joined
2187        // form (not Debug-formatted, which would make `Contains(s, "42")`
2188        // spuriously match on an `Int(42)` element). This preserves the
2189        // pre-existing `__array_len` semantics, under which non-string
2190        // entries simply didn't contribute to the joined string.
2191        let val = Value::List(vec![
2192            Value::Str("keep".into()),
2193            Value::Int(42),
2194            Value::Bool(true),
2195            Value::Str("alsokeep".into()),
2196            Value::Null,
2197        ]);
2198        match value_to_expr_literal(&val) {
2199            panproto_expr::Literal::Str(s) => assert_eq!(
2200                s, "keep,alsokeep",
2201                "non-string list elements should be filtered out, not Debug-formatted"
2202            ),
2203            other => panic!("expected Literal::Str, got {other:?}"),
2204        }
2205    }
2206
2207    #[test]
2208    fn value_to_expr_literal_empty_list_is_empty_string() {
2209        let val = Value::List(Vec::new());
2210        match value_to_expr_literal(&val) {
2211            panproto_expr::Literal::Str(s) => assert!(s.is_empty()),
2212            other => panic!("expected Literal::Str, got {other:?}"),
2213        }
2214    }
2215
2216    #[test]
2217    fn value_to_expr_literal_non_collection_variants_pass_through() {
2218        assert!(matches!(
2219            value_to_expr_literal(&Value::Bool(true)),
2220            panproto_expr::Literal::Bool(true)
2221        ));
2222        assert!(matches!(
2223            value_to_expr_literal(&Value::Int(7)),
2224            panproto_expr::Literal::Int(7)
2225        ));
2226        assert!(matches!(
2227            value_to_expr_literal(&Value::Null),
2228            panproto_expr::Literal::Null
2229        ));
2230        // Unknown (record) maps to Null because it has no natural
2231        // coercion to a Literal without a projection.
2232        assert!(matches!(
2233            value_to_expr_literal(&Value::Unknown(HashMap::new())),
2234            panproto_expr::Literal::Null
2235        ));
2236    }
2237
2238    #[test]
2239    fn map_references_preserves_non_string_elements() {
2240        // MapReferences is the action of the rename on *string leaves*;
2241        // non-string elements in a list pass through unchanged. This
2242        // pins the functoriality in the Kleisli category of the "rename
2243        // or drop" partial map.
2244        let mut node = Node::new(0, "v");
2245        node.extra_fields.insert(
2246            "mixed".to_string(),
2247            Value::List(vec![
2248                Value::Str("renameme".into()),
2249                Value::Int(42),
2250                Value::Bool(true),
2251                Value::Str("dropme".into()),
2252            ]),
2253        );
2254
2255        let mut rename_map = HashMap::new();
2256        rename_map.insert("renameme".to_string(), Some("renamed".to_string()));
2257        rename_map.insert("dropme".to_string(), None);
2258
2259        let transform = FieldTransform::MapReferences {
2260            field: "mixed".to_string(),
2261            rename_map,
2262        };
2263        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2264
2265        match node.extra_fields.get("mixed") {
2266            Some(Value::List(items)) => {
2267                assert_eq!(items.len(), 3);
2268                assert_eq!(items[0], Value::Str("renamed".into()));
2269                assert_eq!(items[1], Value::Int(42));
2270                assert_eq!(items[2], Value::Bool(true));
2271            }
2272            other => panic!("expected List, got {other:?}"),
2273        }
2274    }
2275
2276    // --- ConditionalSurvival tests ---
2277
2278    #[test]
2279    #[allow(clippy::expect_used)]
2280    fn conditional_survival_drops_non_matching_node() {
2281        use smallvec::smallvec;
2282
2283        // Two child nodes anchored to "item", with different level values.
2284        let mut nodes = HashMap::new();
2285        nodes.insert(0, Node::new(0, Name::from("root")));
2286        nodes.insert(
2287            1,
2288            Node::new(1, "item").with_extra_field("level", Value::Int(2)),
2289        );
2290        nodes.insert(
2291            2,
2292            Node::new(2, "item").with_extra_field("level", Value::Int(1)),
2293        );
2294
2295        let edge = Edge {
2296            src: "root".into(),
2297            tgt: "item".into(),
2298            kind: "prop".into(),
2299            name: Some("child".into()),
2300        };
2301        let arcs = vec![(0, 1, edge.clone()), (0, 2, edge.clone())];
2302        let inst = WInstance::new(nodes, arcs, vec![], 0, Name::from("root"));
2303
2304        let mut between = HashMap::new();
2305        between.insert(
2306            (Name::from("root"), Name::from("item")),
2307            smallvec![edge.clone()],
2308        );
2309        let tgt_schema = Schema {
2310            protocol: "test".into(),
2311            vertices: HashMap::new(),
2312            edges: HashMap::new(),
2313            hyper_edges: HashMap::new(),
2314            constraints: HashMap::new(),
2315            required: HashMap::new(),
2316            nsids: HashMap::new(),
2317            entries: Vec::new(),
2318            variants: HashMap::new(),
2319            orderings: HashMap::new(),
2320            recursion_points: HashMap::new(),
2321            spans: HashMap::new(),
2322            usage_modes: HashMap::new(),
2323            nominal: HashMap::new(),
2324            coercions: HashMap::new(),
2325            mergers: HashMap::new(),
2326            defaults: HashMap::new(),
2327            policies: HashMap::new(),
2328            outgoing: HashMap::new(),
2329            incoming: HashMap::new(),
2330            between,
2331        };
2332        let src_schema = Schema {
2333            protocol: "test".into(),
2334            vertices: HashMap::new(),
2335            edges: HashMap::new(),
2336            hyper_edges: HashMap::new(),
2337            constraints: HashMap::new(),
2338            required: HashMap::new(),
2339            nsids: HashMap::new(),
2340            entries: Vec::new(),
2341            variants: HashMap::new(),
2342            orderings: HashMap::new(),
2343            recursion_points: HashMap::new(),
2344            spans: HashMap::new(),
2345            usage_modes: HashMap::new(),
2346            nominal: HashMap::new(),
2347            coercions: HashMap::new(),
2348            mergers: HashMap::new(),
2349            defaults: HashMap::new(),
2350            policies: HashMap::new(),
2351            outgoing: HashMap::new(),
2352            incoming: HashMap::new(),
2353            between: HashMap::new(),
2354        };
2355
2356        // Predicate: (== level 2)
2357        let predicate = panproto_expr::Expr::Builtin(
2358            panproto_expr::BuiltinOp::Eq,
2359            vec![
2360                panproto_expr::Expr::Var(std::sync::Arc::from("level")),
2361                panproto_expr::Expr::Lit(panproto_expr::Literal::Int(2)),
2362            ],
2363        );
2364
2365        let mut migration = CompiledMigration {
2366            surviving_verts: HashSet::from([Name::from("root"), Name::from("item")]),
2367            surviving_edges: HashSet::from([edge]),
2368            vertex_remap: HashMap::new(),
2369            edge_remap: HashMap::new(),
2370            resolver: HashMap::new(),
2371            hyper_resolver: HashMap::new(),
2372            field_transforms: HashMap::new(),
2373            conditional_survival: HashMap::new(),
2374            expansion_path: HashMap::new(),
2375        };
2376        migration.add_conditional_survival("item", predicate);
2377
2378        let result =
2379            wtype_restrict(&inst, &src_schema, &tgt_schema, &migration).expect("restrict ok");
2380
2381        // Node 1 (level=2) survives, node 2 (level=1) is dropped
2382        assert_eq!(result.node_count(), 2);
2383        assert!(result.nodes.contains_key(&0));
2384        assert!(result.nodes.contains_key(&1));
2385        assert!(!result.nodes.contains_key(&2));
2386    }
2387
2388    #[test]
2389    #[allow(clippy::expect_used)]
2390    fn conditional_survival_no_predicate_survives() {
2391        use smallvec::smallvec;
2392
2393        let mut nodes = HashMap::new();
2394        nodes.insert(0, Node::new(0, Name::from("root")));
2395        nodes.insert(
2396            1,
2397            Node::new(1, "item").with_extra_field("level", Value::Int(1)),
2398        );
2399
2400        let edge = Edge {
2401            src: "root".into(),
2402            tgt: "item".into(),
2403            kind: "prop".into(),
2404            name: Some("child".into()),
2405        };
2406        let arcs = vec![(0, 1, edge.clone())];
2407        let inst = WInstance::new(nodes, arcs, vec![], 0, Name::from("root"));
2408
2409        let mut between = HashMap::new();
2410        between.insert(
2411            (Name::from("root"), Name::from("item")),
2412            smallvec![edge.clone()],
2413        );
2414        let tgt_schema = Schema {
2415            protocol: "test".into(),
2416            vertices: HashMap::new(),
2417            edges: HashMap::new(),
2418            hyper_edges: HashMap::new(),
2419            constraints: HashMap::new(),
2420            required: HashMap::new(),
2421            nsids: HashMap::new(),
2422            entries: Vec::new(),
2423            variants: HashMap::new(),
2424            orderings: HashMap::new(),
2425            recursion_points: HashMap::new(),
2426            spans: HashMap::new(),
2427            usage_modes: HashMap::new(),
2428            nominal: HashMap::new(),
2429            coercions: HashMap::new(),
2430            mergers: HashMap::new(),
2431            defaults: HashMap::new(),
2432            policies: HashMap::new(),
2433            outgoing: HashMap::new(),
2434            incoming: HashMap::new(),
2435            between,
2436        };
2437        let src_schema = Schema {
2438            protocol: "test".into(),
2439            vertices: HashMap::new(),
2440            edges: HashMap::new(),
2441            hyper_edges: HashMap::new(),
2442            constraints: HashMap::new(),
2443            required: HashMap::new(),
2444            nsids: HashMap::new(),
2445            entries: Vec::new(),
2446            variants: HashMap::new(),
2447            orderings: HashMap::new(),
2448            recursion_points: HashMap::new(),
2449            spans: HashMap::new(),
2450            usage_modes: HashMap::new(),
2451            nominal: HashMap::new(),
2452            coercions: HashMap::new(),
2453            mergers: HashMap::new(),
2454            defaults: HashMap::new(),
2455            policies: HashMap::new(),
2456            outgoing: HashMap::new(),
2457            incoming: HashMap::new(),
2458            between: HashMap::new(),
2459        };
2460
2461        // No conditional_survival predicates; node should survive normally
2462        let migration = CompiledMigration {
2463            surviving_verts: HashSet::from([Name::from("root"), Name::from("item")]),
2464            surviving_edges: HashSet::from([edge]),
2465            vertex_remap: HashMap::new(),
2466            edge_remap: HashMap::new(),
2467            resolver: HashMap::new(),
2468            hyper_resolver: HashMap::new(),
2469            field_transforms: HashMap::new(),
2470            conditional_survival: HashMap::new(),
2471            expansion_path: HashMap::new(),
2472        };
2473
2474        let result =
2475            wtype_restrict(&inst, &src_schema, &tgt_schema, &migration).expect("restrict ok");
2476
2477        assert_eq!(result.node_count(), 2);
2478        assert!(result.nodes.contains_key(&1));
2479    }
2480
2481    // --- ComputeField tests ---
2482
2483    #[test]
2484    fn computed_field_template_name() {
2485        let mut node = Node::new(0, "heading");
2486        node.extra_fields.insert("level".to_string(), Value::Int(2));
2487
2488        // (concat "h" (int_to_str level))
2489        let expr = panproto_expr::Expr::Builtin(
2490            panproto_expr::BuiltinOp::Concat,
2491            vec![
2492                panproto_expr::Expr::Lit(panproto_expr::Literal::Str("h".to_string())),
2493                panproto_expr::Expr::Builtin(
2494                    panproto_expr::BuiltinOp::IntToStr,
2495                    vec![panproto_expr::Expr::Var(std::sync::Arc::from("level"))],
2496                ),
2497            ],
2498        );
2499
2500        let transform = FieldTransform::ComputeField {
2501            target_key: "name".to_string(),
2502            expr,
2503            inverse: None,
2504            coercion_class: panproto_gat::CoercionClass::Opaque,
2505        };
2506        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2507
2508        assert_eq!(
2509            node.extra_fields.get("name"),
2510            Some(&Value::Str("h2".into()))
2511        );
2512    }
2513
2514    #[test]
2515    fn computed_field_reads_nested_attrs() {
2516        let mut node = Node::new(0, "heading");
2517        let mut attrs = HashMap::new();
2518        attrs.insert("level".to_string(), Value::Int(3));
2519        node.extra_fields
2520            .insert("attrs".to_string(), Value::Unknown(attrs));
2521
2522        // (concat "h" (int_to_str attrs.level))
2523        let expr = panproto_expr::Expr::Builtin(
2524            panproto_expr::BuiltinOp::Concat,
2525            vec![
2526                panproto_expr::Expr::Lit(panproto_expr::Literal::Str("h".to_string())),
2527                panproto_expr::Expr::Builtin(
2528                    panproto_expr::BuiltinOp::IntToStr,
2529                    vec![panproto_expr::Expr::Var(std::sync::Arc::from(
2530                        "attrs.level",
2531                    ))],
2532                ),
2533            ],
2534        );
2535
2536        let transform = FieldTransform::ComputeField {
2537            target_key: "name".to_string(),
2538            expr,
2539            inverse: None,
2540            coercion_class: panproto_gat::CoercionClass::Opaque,
2541        };
2542        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2543
2544        assert_eq!(
2545            node.extra_fields.get("name"),
2546            Some(&Value::Str("h3".into()))
2547        );
2548    }
2549
2550    #[test]
2551    fn case_transform_sets_field_conditionally() {
2552        use crate::value::Value;
2553        use panproto_expr::{BuiltinOp, Expr, Literal};
2554        use std::sync::Arc;
2555
2556        let mut node = Node::new(0, "heading");
2557        node.extra_fields.insert("level".into(), Value::Int(1));
2558        node.extra_fields
2559            .insert("name".into(), Value::Str("heading".into()));
2560
2561        let case = FieldTransform::Case {
2562            branches: vec![
2563                CaseBranch {
2564                    predicate: Expr::builtin(
2565                        BuiltinOp::Eq,
2566                        vec![Expr::Var(Arc::from("level")), Expr::Lit(Literal::Int(1))],
2567                    ),
2568                    transforms: vec![FieldTransform::ComputeField {
2569                        target_key: "name".into(),
2570                        expr: Expr::Lit(Literal::Str("h1".into())),
2571                        inverse: None,
2572                        coercion_class: panproto_gat::CoercionClass::Opaque,
2573                    }],
2574                },
2575                CaseBranch {
2576                    predicate: Expr::builtin(
2577                        BuiltinOp::Eq,
2578                        vec![Expr::Var(Arc::from("level")), Expr::Lit(Literal::Int(2))],
2579                    ),
2580                    transforms: vec![FieldTransform::ComputeField {
2581                        target_key: "name".into(),
2582                        expr: Expr::Lit(Literal::Str("h2".into())),
2583                        inverse: None,
2584                        coercion_class: panproto_gat::CoercionClass::Opaque,
2585                    }],
2586                },
2587            ],
2588        };
2589
2590        apply_field_transforms(&mut node, &[case], &HashMap::new());
2591
2592        assert_eq!(
2593            node.extra_fields.get("name"),
2594            Some(&Value::Str("h1".into()))
2595        );
2596    }
2597
2598    // --- Child scalar access tests ---
2599
2600    /// Build a 3-node instance: root object + two string children.
2601    fn instance_with_scalar_children() -> (WInstance, HashMap<String, Value>) {
2602        let mut nodes = HashMap::new();
2603        nodes.insert(0, Node::new(0, "body"));
2604        nodes.insert(
2605            1,
2606            Node::new(1, "body.repo").with_value(FieldPresence::Present(Value::Str(
2607                "at://did:plc:abc/app.bsky.feed.post/rkey123".into(),
2608            ))),
2609        );
2610        nodes.insert(
2611            2,
2612            Node::new(2, "body.text")
2613                .with_value(FieldPresence::Present(Value::Str("hello world".into()))),
2614        );
2615
2616        let edge_repo = Edge {
2617            src: "body".into(),
2618            tgt: "body.repo".into(),
2619            kind: "prop".into(),
2620            name: Some("repo".into()),
2621        };
2622        let edge_text = Edge {
2623            src: "body".into(),
2624            tgt: "body.text".into(),
2625            kind: "prop".into(),
2626            name: Some("text".into()),
2627        };
2628
2629        let arcs = vec![(0, 1, edge_repo), (0, 2, edge_text)];
2630        let instance = WInstance::new(nodes, arcs, vec![], 0, "body".into());
2631        let scalars = collect_scalar_child_values(&instance, 0);
2632        (instance, scalars)
2633    }
2634
2635    #[test]
2636    fn compute_field_reads_scalar_child() {
2637        // ComputeField must read string fields stored as child
2638        // vertices, not just those in `extra_fields`. This unit test
2639        // covers the basic access path; the integration test
2640        // `at_uri_decomposition_end_to_end` exercises real Split /
2641        // Index expressions for full AT-URI parsing.
2642        let (_instance, scalars) = instance_with_scalar_children();
2643        let mut node = Node::new(0, "body");
2644
2645        let expr = panproto_expr::Expr::Var(std::sync::Arc::from("repo"));
2646
2647        let transform = FieldTransform::ComputeField {
2648            target_key: "repo_copy".to_string(),
2649            expr,
2650            inverse: None,
2651            coercion_class: panproto_gat::CoercionClass::Projection,
2652        };
2653        apply_field_transforms(&mut node, &[transform], &scalars);
2654
2655        assert_eq!(
2656            node.extra_fields.get("repo_copy"),
2657            Some(&Value::Str(
2658                "at://did:plc:abc/app.bsky.feed.post/rkey123".into()
2659            )),
2660            "ComputeField should read scalar child value via dependent-sum projection"
2661        );
2662    }
2663
2664    #[test]
2665    fn apply_expr_on_scalar_child() {
2666        let (_instance, scalars) = instance_with_scalar_children();
2667        let mut node = Node::new(0, "body");
2668
2669        // ApplyExpr on "text" (a child scalar): should find it and write
2670        // the transformed result to extra_fields.
2671        let expr = panproto_expr::Expr::Builtin(
2672            panproto_expr::BuiltinOp::Concat,
2673            vec![
2674                panproto_expr::Expr::Var(std::sync::Arc::from("text")),
2675                panproto_expr::Expr::Lit(panproto_expr::Literal::Str("!".into())),
2676            ],
2677        );
2678        let transform = FieldTransform::ApplyExpr {
2679            key: "text".to_string(),
2680            expr,
2681            inverse: None,
2682            coercion_class: panproto_gat::CoercionClass::Projection,
2683        };
2684        apply_field_transforms(&mut node, &[transform], &scalars);
2685
2686        assert_eq!(
2687            node.extra_fields.get("text"),
2688            Some(&Value::Str("hello world!".into())),
2689            "ApplyExpr should read child scalar and write result to extra_fields"
2690        );
2691    }
2692
2693    #[test]
2694    fn case_branch_on_scalar_child() {
2695        use panproto_expr::{BuiltinOp, Expr, Literal};
2696        use std::sync::Arc;
2697
2698        let (_instance, scalars) = instance_with_scalar_children();
2699        let mut node = Node::new(0, "body");
2700
2701        // Branch: if (contains repo "did:plc") then add field "has_did" = true
2702        let case = FieldTransform::Case {
2703            branches: vec![CaseBranch {
2704                predicate: Expr::builtin(
2705                    BuiltinOp::Contains,
2706                    vec![
2707                        Expr::Var(Arc::from("repo")),
2708                        Expr::Lit(Literal::Str("did:plc".into())),
2709                    ],
2710                ),
2711                transforms: vec![FieldTransform::AddField {
2712                    key: "has_did".into(),
2713                    value: Value::Bool(true),
2714                }],
2715            }],
2716        };
2717        apply_field_transforms(&mut node, &[case], &scalars);
2718
2719        assert_eq!(
2720            node.extra_fields.get("has_did"),
2721            Some(&Value::Bool(true)),
2722            "Case predicate should evaluate against child scalar values"
2723        );
2724    }
2725
2726    #[test]
2727    fn drop_field_on_extra_field_still_works() {
2728        let mut node = Node::new(0, "v");
2729        node.extra_fields
2730            .insert("keep".into(), Value::Str("yes".into()));
2731        node.extra_fields
2732            .insert("drop_me".into(), Value::Str("bye".into()));
2733
2734        let transform = FieldTransform::DropField {
2735            key: "drop_me".into(),
2736        };
2737        apply_field_transforms(&mut node, &[transform], &HashMap::new());
2738
2739        assert!(node.extra_fields.contains_key("keep"));
2740        assert!(!node.extra_fields.contains_key("drop_me"));
2741    }
2742
2743    #[test]
2744    fn child_scalars_do_not_override_extra_fields() {
2745        // When a key exists in both extra_fields and child_scalars,
2746        // extra_fields must take precedence (binding order correctness).
2747        let mut node = Node::new(0, "v");
2748        node.extra_fields
2749            .insert("repo".into(), Value::Str("from_extra_fields".into()));
2750
2751        let mut child_scalars = HashMap::new();
2752        child_scalars.insert("repo".into(), Value::Str("from_child".into()));
2753
2754        let expr = panproto_expr::Expr::Var(std::sync::Arc::from("repo"));
2755        let transform = FieldTransform::ComputeField {
2756            target_key: "repo_copy".to_string(),
2757            expr,
2758            inverse: None,
2759            coercion_class: panproto_gat::CoercionClass::Projection,
2760        };
2761        apply_field_transforms(&mut node, &[transform], &child_scalars);
2762
2763        assert_eq!(
2764            node.extra_fields.get("repo_copy"),
2765            Some(&Value::Str("from_extra_fields".into())),
2766            "extra_fields must take precedence over child_scalars"
2767        );
2768    }
2769
2770    #[test]
2771    fn collect_scalar_child_values_completeness() {
2772        let (instance, scalars) = instance_with_scalar_children();
2773        assert_eq!(scalars.len(), 2, "should collect both scalar children");
2774        assert_eq!(
2775            scalars.get("repo"),
2776            Some(&Value::Str(
2777                "at://did:plc:abc/app.bsky.feed.post/rkey123".into()
2778            ))
2779        );
2780        assert_eq!(scalars.get("text"), Some(&Value::Str("hello world".into())));
2781
2782        // Root node has no parent, so collecting from a non-existent parent returns empty
2783        assert!(collect_scalar_child_values(&instance, 99).is_empty());
2784    }
2785
2786    #[test]
2787    fn env_monotonicity() {
2788        // build_env_with_children must bind every key that
2789        // build_env_from_extra_fields binds, with the same value.
2790        let mut extra = HashMap::new();
2791        extra.insert("alpha".into(), Value::Str("a".into()));
2792        extra.insert("beta".into(), Value::Int(42));
2793
2794        let mut children = HashMap::new();
2795        children.insert("gamma".into(), Value::Str("g".into()));
2796        children.insert("delta".into(), Value::Bool(true));
2797
2798        let env_base = build_env_from_extra_fields(&extra);
2799        let env_extended = build_env_with_children(&extra, &children);
2800
2801        // Every binding from base must be present in extended
2802        let config = panproto_expr::EvalConfig::default();
2803        for key in ["alpha", "beta"] {
2804            let var = panproto_expr::Expr::Var(std::sync::Arc::from(key));
2805            let base_result = panproto_expr::eval(&var, &env_base, &config).ok();
2806            let ext_result = panproto_expr::eval(&var, &env_extended, &config).ok();
2807            assert_eq!(
2808                base_result, ext_result,
2809                "binding for {key} must match between base and extended env"
2810            );
2811        }
2812
2813        // Extended env should also have child bindings
2814        for key in ["gamma", "delta"] {
2815            let var = panproto_expr::Expr::Var(std::sync::Arc::from(key));
2816            assert!(
2817                panproto_expr::eval(&var, &env_extended, &config).is_ok(),
2818                "extended env should bind child scalar {key}"
2819            );
2820        }
2821    }
2822
2823    #[test]
2824    fn compute_field_deterministic() {
2825        // Applying the same ComputeField twice produces the same result
2826        // (fiber endomorphism idempotence when source data is unchanged).
2827        let (_instance, scalars) = instance_with_scalar_children();
2828        let expr = panproto_expr::Expr::Var(std::sync::Arc::from("repo"));
2829        let transform = FieldTransform::ComputeField {
2830            target_key: "derived".to_string(),
2831            expr,
2832            inverse: None,
2833            coercion_class: panproto_gat::CoercionClass::Projection,
2834        };
2835
2836        let mut node1 = Node::new(0, "body");
2837        apply_field_transforms(&mut node1, std::slice::from_ref(&transform), &scalars);
2838        let result1 = node1.extra_fields.get("derived").cloned();
2839
2840        let mut node2 = Node::new(0, "body");
2841        apply_field_transforms(&mut node2, std::slice::from_ref(&transform), &scalars);
2842        let result2 = node2.extra_fields.get("derived").cloned();
2843
2844        assert_eq!(result1, result2, "ComputeField must be deterministic");
2845    }
2846
2847    // --- Property-based tests ---
2848
2849    #[cfg(test)]
2850    #[allow(clippy::unwrap_used)]
2851    mod property {
2852        use super::*;
2853        use proptest::prelude::*;
2854
2855        /// Generate a random schema + instance with N scalar children
2856        /// under a root object node.
2857        fn arb_instance_with_scalars()
2858        -> impl Strategy<Value = (WInstance, HashMap<String, Value>, Vec<String>)> {
2859            (1..=5usize).prop_flat_map(|n| {
2860                prop::collection::vec("[a-z]{1,8}".prop_map(String::from), n..=n).prop_flat_map(
2861                    move |values| {
2862                        prop::collection::vec("[a-z]{1,6}".prop_map(String::from), n..=n).prop_map(
2863                            move |names| {
2864                                let values = values.clone();
2865                                // Deduplicate names
2866                                let mut seen = std::collections::HashSet::new();
2867                                let deduped: Vec<String> = names
2868                                    .iter()
2869                                    .map(|name| {
2870                                        let mut candidate = name.clone();
2871                                        let mut i = 0;
2872                                        while seen.contains(&candidate) {
2873                                            candidate = format!("{name}{i}");
2874                                            i += 1;
2875                                        }
2876                                        seen.insert(candidate.clone());
2877                                        candidate
2878                                    })
2879                                    .collect();
2880
2881                                let mut nodes = HashMap::new();
2882                                nodes.insert(0, Node::new(0, "root"));
2883
2884                                let mut arcs = Vec::new();
2885                                for (i, (name, val)) in
2886                                    deduped.iter().zip(values.iter()).enumerate()
2887                                {
2888                                    let nid = u32::try_from(i + 1).unwrap();
2889                                    let anchor = format!("root.{name}");
2890                                    nodes.insert(
2891                                        nid,
2892                                        Node::new(nid, anchor.as_str()).with_value(
2893                                            FieldPresence::Present(Value::Str(val.clone())),
2894                                        ),
2895                                    );
2896                                    arcs.push((
2897                                        0,
2898                                        nid,
2899                                        Edge {
2900                                            src: "root".into(),
2901                                            tgt: Name::from(anchor.as_str()),
2902                                            kind: "prop".into(),
2903                                            name: Some(Name::from(name.as_str())),
2904                                        },
2905                                    ));
2906                                }
2907
2908                                let instance =
2909                                    WInstance::new(nodes, arcs, vec![], 0, "root".into());
2910                                let scalars = collect_scalar_child_values(&instance, 0);
2911                                (instance, scalars, deduped)
2912                            },
2913                        )
2914                    },
2915                )
2916            })
2917        }
2918
2919        proptest! {
2920            #![proptest_config(ProptestConfig::with_cases(128))]
2921
2922            #[test]
2923            fn prop_child_scalar_collection_complete(
2924                (_instance, scalars, names) in arb_instance_with_scalars()
2925            ) {
2926                // Every child name must appear in the scalar collection.
2927                for name in &names {
2928                    prop_assert!(
2929                        scalars.contains_key(name),
2930                        "child scalar {name} missing from collection"
2931                    );
2932                }
2933                prop_assert_eq!(
2934                    scalars.len(), names.len(),
2935                    "scalar count must match child count"
2936                );
2937            }
2938
2939            #[test]
2940            fn prop_compute_field_reads_any_child(
2941                (_instance, scalars, names) in arb_instance_with_scalars()
2942            ) {
2943                // ComputeField should be able to read any child scalar by name.
2944                for name in &names {
2945                    let expr = panproto_expr::Expr::Var(std::sync::Arc::from(name.as_str()));
2946                    let transform = FieldTransform::ComputeField {
2947                        target_key: format!("{name}_copy"),
2948                        expr,
2949                        inverse: None,
2950                        coercion_class: panproto_gat::CoercionClass::Projection,
2951                    };
2952                    let mut node = Node::new(0, "root");
2953                    apply_field_transforms(&mut node, &[transform], &scalars);
2954                    let expected = scalars.get(name);
2955                    let actual = node.extra_fields.get(&format!("{name}_copy"));
2956                    prop_assert_eq!(
2957                        actual, expected,
2958                        "ComputeField should read child scalar"
2959                    );
2960                }
2961            }
2962
2963            #[test]
2964            fn prop_env_monotonicity(
2965                (_instance, scalars, _names) in arb_instance_with_scalars()
2966            ) {
2967                // Adding child_scalars must not remove or change any existing
2968                // extra_field binding. (Monotonicity of environment extension.)
2969                let mut extra = HashMap::new();
2970                extra.insert("sentinel".into(), Value::Str("sentinel_val".into()));
2971
2972                let env_base = build_env_from_extra_fields(&extra);
2973                let env_extended = build_env_with_children(&extra, &scalars);
2974
2975                let var = panproto_expr::Expr::Var(std::sync::Arc::from("sentinel"));
2976                let config = panproto_expr::EvalConfig::default();
2977                let base_result = panproto_expr::eval(&var, &env_base, &config).ok();
2978                let ext_result = panproto_expr::eval(&var, &env_extended, &config).ok();
2979                prop_assert_eq!(
2980                    base_result, ext_result,
2981                    "existing extra_field binding must be preserved"
2982                );
2983            }
2984
2985            #[test]
2986            fn prop_identity_restrict_preserves_all_values(
2987                (instance, _scalars, _names) in arb_instance_with_scalars()
2988            ) {
2989                // Identity migration with empty field_transforms: passing
2990                // child_scalars must not corrupt the instance.
2991                use smallvec::SmallVec;
2992
2993                let mut vertices = HashMap::new();
2994                let mut edges_map = HashMap::new();
2995                let mut outgoing: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
2996                let mut incoming: HashMap<Name, SmallVec<Edge, 4>> = HashMap::new();
2997                let mut between: HashMap<(Name, Name), SmallVec<Edge, 2>> = HashMap::new();
2998
2999                for node in instance.nodes.values() {
3000                    vertices.insert(
3001                        node.anchor.clone(),
3002                        panproto_schema::Vertex {
3003                            id: node.anchor.clone(),
3004                            kind: if node.value.is_some() { "string".into() } else { "object".into() },
3005                            nsid: None,
3006                        },
3007                    );
3008                }
3009                for (p, c, e) in &instance.arcs {
3010                    let _ = p;
3011                    let _ = c;
3012                    edges_map.insert(e.clone(), e.kind.clone());
3013                    outgoing.entry(e.src.clone()).or_default().push(e.clone());
3014                    incoming.entry(e.tgt.clone()).or_default().push(e.clone());
3015                    between.entry((e.src.clone(), e.tgt.clone())).or_default().push(e.clone());
3016                }
3017
3018                let schema = panproto_schema::Schema {
3019                    protocol: "test".into(),
3020                    vertices,
3021                    edges: edges_map,
3022                    hyper_edges: HashMap::new(),
3023                    constraints: HashMap::new(),
3024                    required: HashMap::new(),
3025                    nsids: HashMap::new(),
3026            entries: Vec::new(),
3027                    variants: HashMap::new(),
3028                    orderings: HashMap::new(),
3029                    recursion_points: HashMap::new(),
3030                    spans: HashMap::new(),
3031                    usage_modes: HashMap::new(),
3032                    nominal: HashMap::new(),
3033                    coercions: HashMap::new(),
3034                    mergers: HashMap::new(),
3035                    defaults: HashMap::new(),
3036                    policies: HashMap::new(),
3037                    outgoing,
3038                    incoming,
3039                    between,
3040                };
3041
3042                let surviving_verts = schema.vertices.keys().cloned().collect();
3043                let surviving_edges = schema.edges.keys().cloned().collect();
3044                let migration = CompiledMigration {
3045                    surviving_verts,
3046                    surviving_edges,
3047                    vertex_remap: HashMap::new(),
3048                    edge_remap: HashMap::new(),
3049                    resolver: HashMap::new(),
3050                    hyper_resolver: HashMap::new(),
3051                    field_transforms: HashMap::new(),
3052                    conditional_survival: HashMap::new(),
3053                    expansion_path: HashMap::new(),
3054                };
3055
3056                let result = wtype_restrict(&instance, &schema, &schema, &migration);
3057                prop_assert!(result.is_ok(), "identity restrict should succeed");
3058                let restricted = result.unwrap();
3059                prop_assert_eq!(
3060                    restricted.node_count(), instance.node_count(),
3061                    "identity restrict must preserve node count"
3062                );
3063                for (&id, node) in &instance.nodes {
3064                    let r_node = restricted.nodes.get(&id).unwrap();
3065                    prop_assert_eq!(&node.anchor, &r_node.anchor);
3066                    prop_assert_eq!(&node.value, &r_node.value);
3067                    prop_assert_eq!(&node.extra_fields, &r_node.extra_fields);
3068                }
3069            }
3070        }
3071    }
3072}