Skip to main content

panproto_inst/
provenance.rs

1//! Data lineage tracking through transforms.
2//!
3//! Provenance records which source fields contributed to each target field
4//! and through which transform steps, enabling debugging, incremental
5//! recomputation, and audit/compliance.
6
7use std::collections::HashMap;
8
9use serde::{Deserialize, Serialize};
10
11/// Provenance information for a single node in the target instance.
12#[derive(Debug, Clone, Serialize, Deserialize)]
13pub struct Provenance {
14    /// The node ID in the target instance.
15    pub node_id: u32,
16    /// Source fields that contributed to this node's value.
17    pub source_fields: Vec<SourceField>,
18    /// Transform steps that were applied.
19    pub transform_chain: Vec<TransformStep>,
20}
21
22/// A reference to a source field that contributed to a target value.
23#[derive(Debug, Clone, Serialize, Deserialize)]
24pub struct SourceField {
25    /// Path of schema vertex names from root to the source field.
26    pub schema_path: Vec<String>,
27    /// Node ID in the source instance.
28    pub node_id: u32,
29}
30
31/// A step in the transform chain that produced a value.
32#[derive(Debug, Clone, Serialize, Deserialize)]
33pub struct TransformStep {
34    /// Name of the protolens that performed this step.
35    pub protolens_name: String,
36    /// Index of this step in the protolens chain.
37    pub step_index: usize,
38}
39
40/// A map from target node IDs to their provenance information.
41pub type ProvenanceMap = HashMap<u32, Provenance>;
42
43/// Compute provenance for a restriction operation.
44///
45/// Given source and target node lists and a vertex remapping,
46/// build a provenance map recording which source nodes contributed
47/// to each target node.
48#[must_use]
49pub fn compute_provenance(
50    src_nodes: &[(u32, String)],
51    tgt_nodes: &[(u32, String)],
52    vertex_remap: &HashMap<String, String>,
53) -> ProvenanceMap {
54    let mut map = ProvenanceMap::new();
55    for (tgt_id, tgt_anchor) in tgt_nodes {
56        let source_fields: Vec<SourceField> = src_nodes
57            .iter()
58            .filter(|(_, src_anchor)| {
59                vertex_remap
60                    .get(src_anchor.as_str())
61                    .is_some_and(|mapped| mapped == tgt_anchor)
62                    || src_anchor == tgt_anchor
63            })
64            .map(|(src_id, src_anchor)| SourceField {
65                schema_path: vec![src_anchor.clone()],
66                node_id: *src_id,
67            })
68            .collect();
69
70        map.insert(
71            *tgt_id,
72            Provenance {
73                node_id: *tgt_id,
74                source_fields,
75                transform_chain: vec![],
76            },
77        );
78    }
79    map
80}
81
82#[cfg(test)]
83#[allow(clippy::unwrap_used, clippy::expect_used)]
84mod tests {
85    use super::*;
86
87    #[test]
88    fn identity_provenance_maps_nodes_to_themselves() {
89        let src = vec![
90            (0, "root".to_owned()),
91            (1, "field_a".to_owned()),
92            (2, "field_b".to_owned()),
93        ];
94        let tgt = vec![
95            (0, "root".to_owned()),
96            (1, "field_a".to_owned()),
97            (2, "field_b".to_owned()),
98        ];
99        let remap = HashMap::new();
100        let prov = compute_provenance(&src, &tgt, &remap);
101
102        assert_eq!(prov.len(), 3);
103        // Each target node should have exactly one source field (itself).
104        for (tgt_id, p) in &prov {
105            assert_eq!(p.source_fields.len(), 1, "node {tgt_id} source count");
106            assert_eq!(p.source_fields[0].node_id, *tgt_id);
107        }
108    }
109
110    #[test]
111    fn renamed_vertex_provenance_follows_remap() {
112        let src = vec![(1, "old_name".to_owned())];
113        let tgt = vec![(1, "new_name".to_owned())];
114        let mut remap = HashMap::new();
115        remap.insert("old_name".to_owned(), "new_name".to_owned());
116
117        let prov = compute_provenance(&src, &tgt, &remap);
118        assert_eq!(prov.len(), 1);
119        let p = &prov[&1];
120        assert_eq!(p.source_fields.len(), 1);
121        assert_eq!(p.source_fields[0].schema_path, vec!["old_name".to_owned()]);
122    }
123
124    #[test]
125    fn no_matching_source_yields_empty_sources() {
126        let src = vec![(1, "unrelated".to_owned())];
127        let tgt = vec![(2, "target_only".to_owned())];
128        let remap = HashMap::new();
129
130        let prov = compute_provenance(&src, &tgt, &remap);
131        assert_eq!(prov.len(), 1);
132        assert!(prov[&2].source_fields.is_empty());
133    }
134}