Skip to main content

datasynth_runtime/
lineage.rs

1//! Data lineage graph tracking for generation provenance.
2//!
3//! Tracks which config sections produced which output files via a directed graph.
4
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// A lineage graph tracking data flow from config → generators → output files.
9#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct LineageGraph {
11    /// All nodes in the lineage graph.
12    pub nodes: Vec<LineageNode>,
13    /// Directed edges between nodes.
14    pub edges: Vec<LineageEdge>,
15}
16
17/// A node in the lineage graph.
18#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct LineageNode {
20    /// Unique node identifier.
21    pub id: String,
22    /// Type of node.
23    pub node_type: LineageNodeType,
24    /// Human-readable label.
25    pub label: String,
26    /// Additional attributes.
27    #[serde(default, skip_serializing_if = "HashMap::is_empty")]
28    pub attributes: HashMap<String, String>,
29}
30
31/// Types of nodes in the lineage graph.
32#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum LineageNodeType {
35    /// A configuration section (input).
36    ConfigSection,
37    /// A generator phase (processing).
38    GeneratorPhase,
39    /// An output file (output).
40    OutputFile,
41}
42
43/// A directed edge in the lineage graph.
44#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct LineageEdge {
46    /// Source node ID.
47    pub source: String,
48    /// Target node ID.
49    pub target: String,
50    /// Relationship type.
51    pub relationship: LineageRelationship,
52}
53
54/// Types of relationships between lineage nodes.
55#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum LineageRelationship {
58    /// Config section configures a generator phase.
59    ConfiguredBy,
60    /// Generator phase produces an output file.
61    ProducedBy,
62    /// One output is derived from another.
63    DerivedFrom,
64    /// One output serves as input to a phase.
65    InputTo,
66}
67
68/// Builder for constructing lineage graphs with a fluent API.
69#[derive(Debug, Default)]
70pub struct LineageGraphBuilder {
71    nodes: Vec<LineageNode>,
72    edges: Vec<LineageEdge>,
73    node_ids: std::collections::HashSet<String>,
74}
75
76impl LineageGraphBuilder {
77    /// Creates a new empty builder.
78    pub fn new() -> Self {
79        Self::default()
80    }
81
82    /// Adds a config section node.
83    pub fn add_config_section(&mut self, id: &str, label: &str) -> &mut Self {
84        self.add_node(id, LineageNodeType::ConfigSection, label, HashMap::new())
85    }
86
87    /// Adds a generator phase node.
88    pub fn add_generator_phase(&mut self, id: &str, label: &str) -> &mut Self {
89        self.add_node(id, LineageNodeType::GeneratorPhase, label, HashMap::new())
90    }
91
92    /// Adds an output file node.
93    pub fn add_output_file(&mut self, id: &str, label: &str, path: &str) -> &mut Self {
94        let mut attrs = HashMap::new();
95        attrs.insert("path".to_string(), path.to_string());
96        self.add_node(id, LineageNodeType::OutputFile, label, attrs)
97    }
98
99    /// Adds a node with attributes.
100    pub fn add_node(
101        &mut self,
102        id: &str,
103        node_type: LineageNodeType,
104        label: &str,
105        attributes: HashMap<String, String>,
106    ) -> &mut Self {
107        if self.node_ids.insert(id.to_string()) {
108            self.nodes.push(LineageNode {
109                id: id.to_string(),
110                node_type,
111                label: label.to_string(),
112                attributes,
113            });
114        }
115        self
116    }
117
118    /// Adds a "configured by" edge: config section → generator phase.
119    pub fn configured_by(&mut self, generator_id: &str, config_id: &str) -> &mut Self {
120        self.add_edge(config_id, generator_id, LineageRelationship::ConfiguredBy)
121    }
122
123    /// Adds a "produced by" edge: generator phase → output file.
124    pub fn produced_by(&mut self, output_id: &str, generator_id: &str) -> &mut Self {
125        self.add_edge(generator_id, output_id, LineageRelationship::ProducedBy)
126    }
127
128    /// Adds a "derived from" edge: output → output.
129    pub fn derived_from(&mut self, derived_id: &str, source_id: &str) -> &mut Self {
130        self.add_edge(source_id, derived_id, LineageRelationship::DerivedFrom)
131    }
132
133    /// Adds an "input to" edge: output → generator phase.
134    pub fn input_to(&mut self, output_id: &str, phase_id: &str) -> &mut Self {
135        self.add_edge(output_id, phase_id, LineageRelationship::InputTo)
136    }
137
138    /// Adds an edge.
139    pub fn add_edge(
140        &mut self,
141        source: &str,
142        target: &str,
143        relationship: LineageRelationship,
144    ) -> &mut Self {
145        self.edges.push(LineageEdge {
146            source: source.to_string(),
147            target: target.to_string(),
148            relationship,
149        });
150        self
151    }
152
153    /// Builds the lineage graph.
154    pub fn build(self) -> LineageGraph {
155        LineageGraph {
156            nodes: self.nodes,
157            edges: self.edges,
158        }
159    }
160}
161
162impl LineageGraph {
163    /// Serializes the lineage graph to JSON.
164    pub fn to_json(&self) -> Result<String, serde_json::Error> {
165        serde_json::to_string_pretty(self)
166    }
167
168    /// Exports the lineage graph in DOT (Graphviz) format.
169    pub fn to_dot(&self) -> String {
170        let mut dot = String::from("digraph lineage {\n");
171        dot.push_str("  rankdir=LR;\n");
172        dot.push_str("  node [shape=box];\n\n");
173
174        // Define node styles by type
175        for node in &self.nodes {
176            let (shape, color) = match node.node_type {
177                LineageNodeType::ConfigSection => ("note", "lightblue"),
178                LineageNodeType::GeneratorPhase => ("component", "lightyellow"),
179                LineageNodeType::OutputFile => ("folder", "lightgreen"),
180            };
181            dot.push_str(&format!(
182                "  \"{}\" [label=\"{}\" shape={} style=filled fillcolor={}];\n",
183                node.id, node.label, shape, color
184            ));
185        }
186
187        dot.push('\n');
188
189        // Define edges
190        for edge in &self.edges {
191            let label = match edge.relationship {
192                LineageRelationship::ConfiguredBy => "configures",
193                LineageRelationship::ProducedBy => "produces",
194                LineageRelationship::DerivedFrom => "derives",
195                LineageRelationship::InputTo => "input_to",
196            };
197            dot.push_str(&format!(
198                "  \"{}\" -> \"{}\" [label=\"{}\"];\n",
199                edge.source, edge.target, label
200            ));
201        }
202
203        dot.push_str("}\n");
204        dot
205    }
206
207    /// Returns the number of nodes.
208    pub fn node_count(&self) -> usize {
209        self.nodes.len()
210    }
211
212    /// Returns the number of edges.
213    pub fn edge_count(&self) -> usize {
214        self.edges.len()
215    }
216}
217
218/// Builds a standard lineage graph for an enhanced generation run.
219pub fn build_generation_lineage(
220    config_sections: &[&str],
221    phases: &[(&str, &str)],
222    output_files: &[(&str, &str, &str)],
223    phase_config_map: &[(&str, &str)],
224    phase_output_map: &[(&str, &str)],
225) -> LineageGraph {
226    let mut builder = LineageGraphBuilder::new();
227
228    for section in config_sections {
229        builder.add_config_section(
230            &format!("config:{}", section),
231            &format!("Config: {}", section),
232        );
233    }
234
235    for (id, label) in phases {
236        builder.add_generator_phase(&format!("phase:{}", id), label);
237    }
238
239    for (id, label, path) in output_files {
240        builder.add_output_file(&format!("output:{}", id), label, path);
241    }
242
243    for (phase, config) in phase_config_map {
244        builder.configured_by(&format!("phase:{}", phase), &format!("config:{}", config));
245    }
246
247    for (phase, output) in phase_output_map {
248        builder.produced_by(&format!("output:{}", output), &format!("phase:{}", phase));
249    }
250
251    builder.build()
252}
253
254#[cfg(test)]
255#[allow(clippy::unwrap_used)]
256mod tests {
257    use super::*;
258
259    #[test]
260    fn test_builder_basic() {
261        let mut builder = LineageGraphBuilder::new();
262        builder
263            .add_config_section("cfg:global", "Global Config")
264            .add_generator_phase("gen:coa", "CoA Generator")
265            .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
266            .configured_by("gen:coa", "cfg:global")
267            .produced_by("out:coa", "gen:coa");
268
269        let graph = builder.build();
270        assert_eq!(graph.node_count(), 3);
271        assert_eq!(graph.edge_count(), 2);
272    }
273
274    #[test]
275    fn test_no_duplicate_nodes() {
276        let mut builder = LineageGraphBuilder::new();
277        builder
278            .add_config_section("cfg:global", "Global Config")
279            .add_config_section("cfg:global", "Global Config Again");
280
281        let graph = builder.build();
282        assert_eq!(graph.node_count(), 1);
283    }
284
285    #[test]
286    fn test_json_roundtrip() {
287        let mut builder = LineageGraphBuilder::new();
288        builder
289            .add_config_section("cfg:global", "Global Config")
290            .add_generator_phase("gen:coa", "CoA Generator")
291            .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
292            .configured_by("gen:coa", "cfg:global")
293            .produced_by("out:coa", "gen:coa");
294
295        let graph = builder.build();
296        let json = graph.to_json().expect("serialize");
297        let deserialized: LineageGraph = serde_json::from_str(&json).expect("deserialize");
298
299        assert_eq!(deserialized.node_count(), graph.node_count());
300        assert_eq!(deserialized.edge_count(), graph.edge_count());
301    }
302
303    #[test]
304    fn test_dot_output() {
305        let mut builder = LineageGraphBuilder::new();
306        builder
307            .add_config_section("cfg:global", "Global Config")
308            .add_generator_phase("gen:coa", "CoA Generator")
309            .configured_by("gen:coa", "cfg:global");
310
311        let graph = builder.build();
312        let dot = graph.to_dot();
313
314        assert!(dot.starts_with("digraph lineage {"));
315        assert!(dot.contains("cfg:global"));
316        assert!(dot.contains("gen:coa"));
317        assert!(dot.contains("configures"));
318        assert!(dot.ends_with("}\n"));
319    }
320
321    #[test]
322    fn test_build_generation_lineage() {
323        let graph = build_generation_lineage(
324            &["global", "transactions"],
325            &[("coa", "CoA Generation"), ("je", "Journal Entries")],
326            &[
327                ("coa_csv", "CoA CSV", "chart_of_accounts.csv"),
328                ("je_csv", "JE CSV", "journal_entries.csv"),
329            ],
330            &[("coa", "global"), ("je", "transactions")],
331            &[("coa", "coa_csv"), ("je", "je_csv")],
332        );
333
334        assert_eq!(graph.node_count(), 6); // 2 config + 2 phase + 2 output
335        assert_eq!(graph.edge_count(), 4); // 2 configured_by + 2 produced_by
336    }
337
338    #[test]
339    fn test_derived_from_edge() {
340        let mut builder = LineageGraphBuilder::new();
341        builder
342            .add_output_file("out:raw", "Raw Data", "raw.csv")
343            .add_output_file("out:agg", "Aggregated", "aggregated.csv")
344            .derived_from("out:agg", "out:raw");
345
346        let graph = builder.build();
347        assert_eq!(graph.edge_count(), 1);
348        assert_eq!(
349            graph.edges[0].relationship,
350            LineageRelationship::DerivedFrom
351        );
352    }
353}