Skip to main content

panproto_inst/
functor.rs

1//! Set-valued functor instance representation.
2//!
3//! An [`FInstance`] represents relational (tabular) data as a set-valued
4//! functor: each schema vertex maps to a table (set of rows), and each
5//! edge maps to a foreign-key relationship.
6//!
7//! The restrict operation (`functor_restrict`) is precomposition
8//! (`Delta_F`): for each table in the target, look up the corresponding
9//! source table.
10
11use std::collections::HashMap;
12
13use panproto_schema::Edge;
14use serde::{Deserialize, Serialize};
15
16use crate::error::RestrictError;
17use crate::value::Value;
18use crate::wtype::CompiledMigration;
19
20/// A set-valued functor instance (relational data).
21///
22/// Tables map schema vertex IDs to rows (each row is a map of column
23/// names to values). Foreign keys map schema edges to pairs of
24/// (source row index, target row index).
25#[derive(Clone, Debug, Serialize, Deserialize)]
26pub struct FInstance {
27    /// Tables: vertex ID to rows. Each row is a column-name to value map.
28    pub tables: HashMap<String, Vec<HashMap<String, Value>>>,
29    /// Foreign keys: edge to row-index pairs.
30    pub foreign_keys: HashMap<Edge, Vec<(usize, usize)>>,
31}
32
33impl FInstance {
34    /// Create a new empty functor instance.
35    #[must_use]
36    pub fn new() -> Self {
37        Self {
38            tables: HashMap::new(),
39            foreign_keys: HashMap::new(),
40        }
41    }
42
43    /// Add a table for the given vertex.
44    #[must_use]
45    pub fn with_table(
46        mut self,
47        vertex_id: impl Into<String>,
48        rows: Vec<HashMap<String, Value>>,
49    ) -> Self {
50        self.tables.insert(vertex_id.into(), rows);
51        self
52    }
53
54    /// Add a foreign key for the given edge.
55    #[must_use]
56    pub fn with_foreign_key(mut self, edge: Edge, pairs: Vec<(usize, usize)>) -> Self {
57        self.foreign_keys.insert(edge, pairs);
58        self
59    }
60
61    /// Returns the number of tables.
62    #[must_use]
63    pub fn table_count(&self) -> usize {
64        self.tables.len()
65    }
66
67    /// Returns the number of rows in a specific table.
68    #[must_use]
69    pub fn row_count(&self, vertex_id: &str) -> usize {
70        self.tables.get(vertex_id).map_or(0, Vec::len)
71    }
72}
73
74impl Default for FInstance {
75    fn default() -> Self {
76        Self::new()
77    }
78}
79
80/// The restrict operation for set-valued functor instances.
81///
82/// This is `Delta_F` (precomposition): for each vertex in the target
83/// schema, look up the corresponding table in the source via the
84/// migration's vertex map.
85///
86/// # Errors
87///
88/// Returns `RestrictError` if a required source table is missing
89/// (though this typically means the migration is malformed).
90pub fn functor_restrict(
91    instance: &FInstance,
92    migration: &CompiledMigration,
93) -> Result<FInstance, RestrictError> {
94    let mut new_tables = HashMap::new();
95    let mut new_fks = HashMap::new();
96
97    // For each surviving vertex, pull the table from the source.
98    // vertex_remap maps src -> tgt, so invert to find all sources.
99    // When multiple source vertices map to the same target, collect all.
100    for tgt_vertex in &migration.surviving_verts {
101        let src_vertices: Vec<&str> = migration
102            .vertex_remap
103            .iter()
104            .filter(|(_, v)| *v == tgt_vertex)
105            .map(|(k, _)| &**k)
106            .collect();
107
108        let sources = if src_vertices.is_empty() {
109            vec![&**tgt_vertex]
110        } else {
111            src_vertices
112        };
113
114        let mut combined_rows = Vec::new();
115        for src_vertex in &sources {
116            if let Some(rows) = instance.tables.get(*src_vertex) {
117                combined_rows.extend(rows.iter().cloned());
118            }
119        }
120        if !combined_rows.is_empty() {
121            new_tables.insert(tgt_vertex.to_string(), combined_rows);
122        }
123    }
124
125    // Remap foreign keys for surviving edges
126    for (edge, pairs) in &instance.foreign_keys {
127        if let Some(new_edge) = migration.edge_remap.get(edge) {
128            if migration.surviving_verts.contains(&new_edge.src)
129                && migration.surviving_verts.contains(&new_edge.tgt)
130            {
131                new_fks.insert(new_edge.clone(), pairs.clone());
132            }
133        } else if migration.surviving_edges.contains(edge) {
134            new_fks.insert(edge.clone(), pairs.clone());
135        }
136    }
137
138    Ok(FInstance {
139        tables: new_tables,
140        foreign_keys: new_fks,
141    })
142}
143
144/// The extend operation for set-valued functor instances (`Sigma_F`).
145///
146/// This is the left Kan extension: given an instance of the source schema
147/// and a migration mapping (source -> target), produce an instance of the
148/// target schema by copying tables forward and initializing unmapped tables
149/// as empty.
150///
151/// # Errors
152///
153/// Returns `RestrictError` if the migration references inconsistent mappings.
154pub fn functor_extend(
155    instance: &FInstance,
156    migration: &CompiledMigration,
157) -> Result<FInstance, RestrictError> {
158    let mut new_tables = HashMap::new();
159    let mut new_fks = HashMap::new();
160
161    // Copy tables from source to their mapped names in the target.
162    // vertex_remap maps src -> tgt. When multiple source vertices map
163    // to the same target (many-to-one), compute the coproduct: disjoint
164    // union of rows with original column names (they share the same
165    // schema vertex, so columns should match). Row indices in FK pairs
166    // are offset by the cumulative row count to remain valid after
167    // concatenation. Missing columns across source tables are filled
168    // with Value::Null.
169
170    // First pass: collect rows per target vertex and track row offsets
171    // per source vertex for FK index offsetting.
172    let mut row_offsets: HashMap<String, usize> = HashMap::with_capacity(instance.tables.len());
173    for (src_vertex, rows) in &instance.tables {
174        let tgt_vertex = migration
175            .vertex_remap
176            .get(src_vertex.as_str())
177            .map_or_else(|| src_vertex.clone(), std::string::ToString::to_string);
178        let entry = new_tables.entry(tgt_vertex).or_insert_with(Vec::new);
179        let offset = entry.len();
180        row_offsets.insert(src_vertex.clone(), offset);
181        entry.extend(rows.iter().cloned());
182    }
183
184    // Second pass: union column sets within each target table and fill
185    // missing values with Value::Null.
186    for rows in new_tables.values_mut() {
187        // Collect the union of all column names across rows.
188        let all_columns: std::collections::HashSet<String> =
189            rows.iter().flat_map(|row| row.keys().cloned()).collect();
190        // Fill missing columns with null.
191        for row in rows.iter_mut() {
192            for col in &all_columns {
193                row.entry(col.clone()).or_insert(Value::Null);
194            }
195        }
196    }
197
198    // Initialize tables that exist in surviving_verts but were not
199    // populated by the source instance.
200    for tgt_vertex in &migration.surviving_verts {
201        new_tables
202            .entry(tgt_vertex.to_string())
203            .or_insert_with(Vec::new);
204    }
205
206    // Remap foreign keys, offsetting row indices by the cumulative row
207    // count so they remain valid after concatenation.
208    for (edge, pairs) in &instance.foreign_keys {
209        let resolved_edge = migration.edge_remap.get(edge).map_or_else(
210            || {
211                if migration.surviving_edges.contains(edge) {
212                    Some(edge.clone())
213                } else {
214                    None
215                }
216            },
217            |new_edge| Some(new_edge.clone()),
218        );
219
220        if let Some(new_edge) = resolved_edge {
221            let src_offset = row_offsets.get(&*edge.src).copied().unwrap_or(0);
222            let tgt_offset = row_offsets.get(&*edge.tgt).copied().unwrap_or(0);
223            let offset_pairs: Vec<(usize, usize)> = pairs
224                .iter()
225                .map(|(s, t)| (s + src_offset, t + tgt_offset))
226                .collect();
227            new_fks.insert(new_edge, offset_pairs);
228        }
229    }
230
231    Ok(FInstance {
232        tables: new_tables,
233        foreign_keys: new_fks,
234    })
235}
236
237#[cfg(test)]
238mod tests {
239    use std::collections::HashSet;
240
241    use super::*;
242
243    #[test]
244    fn empty_functor_instance() {
245        let inst = FInstance::new();
246        assert_eq!(inst.table_count(), 0);
247    }
248
249    #[test]
250    fn functor_with_tables() {
251        let mut row = HashMap::new();
252        row.insert("name".to_string(), Value::Str("Alice".into()));
253
254        let inst = FInstance::new().with_table("users", vec![row]);
255        assert_eq!(inst.table_count(), 1);
256        assert_eq!(inst.row_count("users"), 1);
257        assert_eq!(inst.row_count("posts"), 0);
258    }
259
260    #[test]
261    fn functor_restrict_drops_table() {
262        let mut users_row = HashMap::new();
263        users_row.insert("name".to_string(), Value::Str("Alice".into()));
264
265        let mut posts_row = HashMap::new();
266        posts_row.insert("title".to_string(), Value::Str("Hello".into()));
267
268        let fk_edge = Edge {
269            src: "posts".into(),
270            tgt: "users".into(),
271            kind: "fk".into(),
272            name: Some("author".into()),
273        };
274
275        let inst = FInstance::new()
276            .with_table("users", vec![users_row])
277            .with_table("posts", vec![posts_row])
278            .with_foreign_key(fk_edge, vec![(0, 0)]);
279
280        // Migration that only keeps "users"
281        let migration = CompiledMigration {
282            surviving_verts: HashSet::from([panproto_gat::Name::from("users")]),
283            surviving_edges: HashSet::new(),
284            vertex_remap: HashMap::new(),
285            edge_remap: HashMap::new(),
286            resolver: HashMap::new(),
287            hyper_resolver: HashMap::new(),
288            field_transforms: HashMap::new(),
289            conditional_survival: HashMap::new(),
290            expansion_path: HashMap::new(),
291        };
292
293        let result = functor_restrict(&inst, &migration);
294        assert!(result.is_ok());
295        let restricted = result.unwrap_or_else(|_| FInstance::new());
296        assert_eq!(restricted.table_count(), 1);
297        assert!(restricted.tables.contains_key("users"));
298        assert!(!restricted.tables.contains_key("posts"));
299        assert!(restricted.foreign_keys.is_empty());
300    }
301}