1use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct LineageGraph {
11 pub nodes: Vec<LineageNode>,
13 pub edges: Vec<LineageEdge>,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct LineageNode {
20 pub id: String,
22 pub node_type: LineageNodeType,
24 pub label: String,
26 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
28 pub attributes: HashMap<String, String>,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum LineageNodeType {
35 ConfigSection,
37 GeneratorPhase,
39 OutputFile,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct LineageEdge {
46 pub source: String,
48 pub target: String,
50 pub relationship: LineageRelationship,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum LineageRelationship {
58 ConfiguredBy,
60 ProducedBy,
62 DerivedFrom,
64 InputTo,
66}
67
68#[derive(Debug, Default)]
70pub struct LineageGraphBuilder {
71 nodes: Vec<LineageNode>,
72 edges: Vec<LineageEdge>,
73 node_ids: std::collections::HashSet<String>,
74}
75
76impl LineageGraphBuilder {
77 pub fn new() -> Self {
79 Self::default()
80 }
81
82 pub fn add_config_section(&mut self, id: &str, label: &str) -> &mut Self {
84 self.add_node(id, LineageNodeType::ConfigSection, label, HashMap::new())
85 }
86
87 pub fn add_generator_phase(&mut self, id: &str, label: &str) -> &mut Self {
89 self.add_node(id, LineageNodeType::GeneratorPhase, label, HashMap::new())
90 }
91
92 pub fn add_output_file(&mut self, id: &str, label: &str, path: &str) -> &mut Self {
94 let mut attrs = HashMap::new();
95 attrs.insert("path".to_string(), path.to_string());
96 self.add_node(id, LineageNodeType::OutputFile, label, attrs)
97 }
98
99 pub fn add_node(
101 &mut self,
102 id: &str,
103 node_type: LineageNodeType,
104 label: &str,
105 attributes: HashMap<String, String>,
106 ) -> &mut Self {
107 if self.node_ids.insert(id.to_string()) {
108 self.nodes.push(LineageNode {
109 id: id.to_string(),
110 node_type,
111 label: label.to_string(),
112 attributes,
113 });
114 }
115 self
116 }
117
118 pub fn configured_by(&mut self, generator_id: &str, config_id: &str) -> &mut Self {
120 self.add_edge(config_id, generator_id, LineageRelationship::ConfiguredBy)
121 }
122
123 pub fn produced_by(&mut self, output_id: &str, generator_id: &str) -> &mut Self {
125 self.add_edge(generator_id, output_id, LineageRelationship::ProducedBy)
126 }
127
128 pub fn derived_from(&mut self, derived_id: &str, source_id: &str) -> &mut Self {
130 self.add_edge(source_id, derived_id, LineageRelationship::DerivedFrom)
131 }
132
133 pub fn input_to(&mut self, output_id: &str, phase_id: &str) -> &mut Self {
135 self.add_edge(output_id, phase_id, LineageRelationship::InputTo)
136 }
137
138 pub fn add_edge(
140 &mut self,
141 source: &str,
142 target: &str,
143 relationship: LineageRelationship,
144 ) -> &mut Self {
145 self.edges.push(LineageEdge {
146 source: source.to_string(),
147 target: target.to_string(),
148 relationship,
149 });
150 self
151 }
152
153 pub fn build(self) -> LineageGraph {
155 LineageGraph {
156 nodes: self.nodes,
157 edges: self.edges,
158 }
159 }
160}
161
162impl LineageGraph {
163 pub fn to_json(&self) -> Result<String, serde_json::Error> {
165 serde_json::to_string_pretty(self)
166 }
167
168 pub fn to_dot(&self) -> String {
170 let mut dot = String::from("digraph lineage {\n");
171 dot.push_str(" rankdir=LR;\n");
172 dot.push_str(" node [shape=box];\n\n");
173
174 for node in &self.nodes {
176 let (shape, color) = match node.node_type {
177 LineageNodeType::ConfigSection => ("note", "lightblue"),
178 LineageNodeType::GeneratorPhase => ("component", "lightyellow"),
179 LineageNodeType::OutputFile => ("folder", "lightgreen"),
180 };
181 dot.push_str(&format!(
182 " \"{}\" [label=\"{}\" shape={} style=filled fillcolor={}];\n",
183 node.id, node.label, shape, color
184 ));
185 }
186
187 dot.push('\n');
188
189 for edge in &self.edges {
191 let label = match edge.relationship {
192 LineageRelationship::ConfiguredBy => "configures",
193 LineageRelationship::ProducedBy => "produces",
194 LineageRelationship::DerivedFrom => "derives",
195 LineageRelationship::InputTo => "input_to",
196 };
197 dot.push_str(&format!(
198 " \"{}\" -> \"{}\" [label=\"{}\"];\n",
199 edge.source, edge.target, label
200 ));
201 }
202
203 dot.push_str("}\n");
204 dot
205 }
206
207 pub fn node_count(&self) -> usize {
209 self.nodes.len()
210 }
211
212 pub fn edge_count(&self) -> usize {
214 self.edges.len()
215 }
216}
217
218pub fn build_generation_lineage(
220 config_sections: &[&str],
221 phases: &[(&str, &str)],
222 output_files: &[(&str, &str, &str)],
223 phase_config_map: &[(&str, &str)],
224 phase_output_map: &[(&str, &str)],
225) -> LineageGraph {
226 let mut builder = LineageGraphBuilder::new();
227
228 for section in config_sections {
229 builder.add_config_section(&format!("config:{section}"), &format!("Config: {section}"));
230 }
231
232 for (id, label) in phases {
233 builder.add_generator_phase(&format!("phase:{id}"), label);
234 }
235
236 for (id, label, path) in output_files {
237 builder.add_output_file(&format!("output:{id}"), label, path);
238 }
239
240 for (phase, config) in phase_config_map {
241 builder.configured_by(&format!("phase:{phase}"), &format!("config:{config}"));
242 }
243
244 for (phase, output) in phase_output_map {
245 builder.produced_by(&format!("output:{output}"), &format!("phase:{phase}"));
246 }
247
248 builder.build()
249}
250
251#[cfg(test)]
252mod tests {
253 use super::*;
254
255 #[test]
256 fn test_builder_basic() {
257 let mut builder = LineageGraphBuilder::new();
258 builder
259 .add_config_section("cfg:global", "Global Config")
260 .add_generator_phase("gen:coa", "CoA Generator")
261 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
262 .configured_by("gen:coa", "cfg:global")
263 .produced_by("out:coa", "gen:coa");
264
265 let graph = builder.build();
266 assert_eq!(graph.node_count(), 3);
267 assert_eq!(graph.edge_count(), 2);
268 }
269
270 #[test]
271 fn test_no_duplicate_nodes() {
272 let mut builder = LineageGraphBuilder::new();
273 builder
274 .add_config_section("cfg:global", "Global Config")
275 .add_config_section("cfg:global", "Global Config Again");
276
277 let graph = builder.build();
278 assert_eq!(graph.node_count(), 1);
279 }
280
281 #[test]
282 fn test_json_roundtrip() {
283 let mut builder = LineageGraphBuilder::new();
284 builder
285 .add_config_section("cfg:global", "Global Config")
286 .add_generator_phase("gen:coa", "CoA Generator")
287 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
288 .configured_by("gen:coa", "cfg:global")
289 .produced_by("out:coa", "gen:coa");
290
291 let graph = builder.build();
292 let json = graph.to_json().expect("serialize");
293 let deserialized: LineageGraph = serde_json::from_str(&json).expect("deserialize");
294
295 assert_eq!(deserialized.node_count(), graph.node_count());
296 assert_eq!(deserialized.edge_count(), graph.edge_count());
297 }
298
299 #[test]
300 fn test_dot_output() {
301 let mut builder = LineageGraphBuilder::new();
302 builder
303 .add_config_section("cfg:global", "Global Config")
304 .add_generator_phase("gen:coa", "CoA Generator")
305 .configured_by("gen:coa", "cfg:global");
306
307 let graph = builder.build();
308 let dot = graph.to_dot();
309
310 assert!(dot.starts_with("digraph lineage {"));
311 assert!(dot.contains("cfg:global"));
312 assert!(dot.contains("gen:coa"));
313 assert!(dot.contains("configures"));
314 assert!(dot.ends_with("}\n"));
315 }
316
317 #[test]
318 fn test_build_generation_lineage() {
319 let graph = build_generation_lineage(
320 &["global", "transactions"],
321 &[("coa", "CoA Generation"), ("je", "Journal Entries")],
322 &[
323 ("coa_csv", "CoA CSV", "chart_of_accounts.csv"),
324 ("je_csv", "JE CSV", "journal_entries.csv"),
325 ],
326 &[("coa", "global"), ("je", "transactions")],
327 &[("coa", "coa_csv"), ("je", "je_csv")],
328 );
329
330 assert_eq!(graph.node_count(), 6); assert_eq!(graph.edge_count(), 4); }
333
334 #[test]
335 fn test_derived_from_edge() {
336 let mut builder = LineageGraphBuilder::new();
337 builder
338 .add_output_file("out:raw", "Raw Data", "raw.csv")
339 .add_output_file("out:agg", "Aggregated", "aggregated.csv")
340 .derived_from("out:agg", "out:raw");
341
342 let graph = builder.build();
343 assert_eq!(graph.edge_count(), 1);
344 assert_eq!(
345 graph.edges[0].relationship,
346 LineageRelationship::DerivedFrom
347 );
348 }
349}