use std::collections::HashMap;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Provenance {
pub node_id: u32,
pub source_fields: Vec<SourceField>,
pub transform_chain: Vec<TransformStep>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SourceField {
pub schema_path: Vec<String>,
pub node_id: u32,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TransformStep {
pub protolens_name: String,
pub step_index: usize,
}
pub type ProvenanceMap = HashMap<u32, Provenance>;
#[must_use]
pub fn compute_provenance(
src_nodes: &[(u32, String)],
tgt_nodes: &[(u32, String)],
vertex_remap: &HashMap<String, String>,
) -> ProvenanceMap {
let mut map = ProvenanceMap::new();
for (tgt_id, tgt_anchor) in tgt_nodes {
let source_fields: Vec<SourceField> = src_nodes
.iter()
.filter(|(_, src_anchor)| {
vertex_remap
.get(src_anchor.as_str())
.is_some_and(|mapped| mapped == tgt_anchor)
|| src_anchor == tgt_anchor
})
.map(|(src_id, src_anchor)| SourceField {
schema_path: vec![src_anchor.clone()],
node_id: *src_id,
})
.collect();
map.insert(
*tgt_id,
Provenance {
node_id: *tgt_id,
source_fields,
transform_chain: vec![],
},
);
}
map
}
#[cfg(test)]
#[allow(clippy::unwrap_used, clippy::expect_used)]
mod tests {
use super::*;
#[test]
fn identity_provenance_maps_nodes_to_themselves() {
let src = vec![
(0, "root".to_owned()),
(1, "field_a".to_owned()),
(2, "field_b".to_owned()),
];
let tgt = vec![
(0, "root".to_owned()),
(1, "field_a".to_owned()),
(2, "field_b".to_owned()),
];
let remap = HashMap::new();
let prov = compute_provenance(&src, &tgt, &remap);
assert_eq!(prov.len(), 3);
for (tgt_id, p) in &prov {
assert_eq!(p.source_fields.len(), 1, "node {tgt_id} source count");
assert_eq!(p.source_fields[0].node_id, *tgt_id);
}
}
#[test]
fn renamed_vertex_provenance_follows_remap() {
let src = vec![(1, "old_name".to_owned())];
let tgt = vec![(1, "new_name".to_owned())];
let mut remap = HashMap::new();
remap.insert("old_name".to_owned(), "new_name".to_owned());
let prov = compute_provenance(&src, &tgt, &remap);
assert_eq!(prov.len(), 1);
let p = &prov[&1];
assert_eq!(p.source_fields.len(), 1);
assert_eq!(p.source_fields[0].schema_path, vec!["old_name".to_owned()]);
}
#[test]
fn no_matching_source_yields_empty_sources() {
let src = vec![(1, "unrelated".to_owned())];
let tgt = vec![(2, "target_only".to_owned())];
let remap = HashMap::new();
let prov = compute_provenance(&src, &tgt, &remap);
assert_eq!(prov.len(), 1);
assert!(prov[&2].source_fields.is_empty());
}
}