1use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct LineageGraph {
11 pub nodes: Vec<LineageNode>,
13 pub edges: Vec<LineageEdge>,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct LineageNode {
20 pub id: String,
22 pub node_type: LineageNodeType,
24 pub label: String,
26 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
28 pub attributes: HashMap<String, String>,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum LineageNodeType {
35 ConfigSection,
37 GeneratorPhase,
39 OutputFile,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct LineageEdge {
46 pub source: String,
48 pub target: String,
50 pub relationship: LineageRelationship,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum LineageRelationship {
58 ConfiguredBy,
60 ProducedBy,
62 DerivedFrom,
64 InputTo,
66}
67
68#[derive(Debug, Default)]
70pub struct LineageGraphBuilder {
71 nodes: Vec<LineageNode>,
72 edges: Vec<LineageEdge>,
73 node_ids: std::collections::HashSet<String>,
74}
75
76impl LineageGraphBuilder {
77 pub fn new() -> Self {
79 Self::default()
80 }
81
82 pub fn add_config_section(&mut self, id: &str, label: &str) -> &mut Self {
84 self.add_node(id, LineageNodeType::ConfigSection, label, HashMap::new())
85 }
86
87 pub fn add_generator_phase(&mut self, id: &str, label: &str) -> &mut Self {
89 self.add_node(id, LineageNodeType::GeneratorPhase, label, HashMap::new())
90 }
91
92 pub fn add_output_file(&mut self, id: &str, label: &str, path: &str) -> &mut Self {
94 let mut attrs = HashMap::new();
95 attrs.insert("path".to_string(), path.to_string());
96 self.add_node(id, LineageNodeType::OutputFile, label, attrs)
97 }
98
99 pub fn add_node(
101 &mut self,
102 id: &str,
103 node_type: LineageNodeType,
104 label: &str,
105 attributes: HashMap<String, String>,
106 ) -> &mut Self {
107 if self.node_ids.insert(id.to_string()) {
108 self.nodes.push(LineageNode {
109 id: id.to_string(),
110 node_type,
111 label: label.to_string(),
112 attributes,
113 });
114 }
115 self
116 }
117
118 pub fn configured_by(&mut self, generator_id: &str, config_id: &str) -> &mut Self {
120 self.add_edge(config_id, generator_id, LineageRelationship::ConfiguredBy)
121 }
122
123 pub fn produced_by(&mut self, output_id: &str, generator_id: &str) -> &mut Self {
125 self.add_edge(generator_id, output_id, LineageRelationship::ProducedBy)
126 }
127
128 pub fn derived_from(&mut self, derived_id: &str, source_id: &str) -> &mut Self {
130 self.add_edge(source_id, derived_id, LineageRelationship::DerivedFrom)
131 }
132
133 pub fn input_to(&mut self, output_id: &str, phase_id: &str) -> &mut Self {
135 self.add_edge(output_id, phase_id, LineageRelationship::InputTo)
136 }
137
138 pub fn add_edge(
140 &mut self,
141 source: &str,
142 target: &str,
143 relationship: LineageRelationship,
144 ) -> &mut Self {
145 self.edges.push(LineageEdge {
146 source: source.to_string(),
147 target: target.to_string(),
148 relationship,
149 });
150 self
151 }
152
153 pub fn build(self) -> LineageGraph {
155 LineageGraph {
156 nodes: self.nodes,
157 edges: self.edges,
158 }
159 }
160}
161
162impl LineageGraph {
163 pub fn to_json(&self) -> Result<String, serde_json::Error> {
165 serde_json::to_string_pretty(self)
166 }
167
168 pub fn to_dot(&self) -> String {
170 let mut dot = String::from("digraph lineage {\n");
171 dot.push_str(" rankdir=LR;\n");
172 dot.push_str(" node [shape=box];\n\n");
173
174 for node in &self.nodes {
176 let (shape, color) = match node.node_type {
177 LineageNodeType::ConfigSection => ("note", "lightblue"),
178 LineageNodeType::GeneratorPhase => ("component", "lightyellow"),
179 LineageNodeType::OutputFile => ("folder", "lightgreen"),
180 };
181 dot.push_str(&format!(
182 " \"{}\" [label=\"{}\" shape={} style=filled fillcolor={}];\n",
183 node.id, node.label, shape, color
184 ));
185 }
186
187 dot.push('\n');
188
189 for edge in &self.edges {
191 let label = match edge.relationship {
192 LineageRelationship::ConfiguredBy => "configures",
193 LineageRelationship::ProducedBy => "produces",
194 LineageRelationship::DerivedFrom => "derives",
195 LineageRelationship::InputTo => "input_to",
196 };
197 dot.push_str(&format!(
198 " \"{}\" -> \"{}\" [label=\"{}\"];\n",
199 edge.source, edge.target, label
200 ));
201 }
202
203 dot.push_str("}\n");
204 dot
205 }
206
207 pub fn node_count(&self) -> usize {
209 self.nodes.len()
210 }
211
212 pub fn edge_count(&self) -> usize {
214 self.edges.len()
215 }
216}
217
218pub fn build_generation_lineage(
220 config_sections: &[&str],
221 phases: &[(&str, &str)],
222 output_files: &[(&str, &str, &str)],
223 phase_config_map: &[(&str, &str)],
224 phase_output_map: &[(&str, &str)],
225) -> LineageGraph {
226 let mut builder = LineageGraphBuilder::new();
227
228 for section in config_sections {
229 builder.add_config_section(&format!("config:{section}"), &format!("Config: {section}"));
230 }
231
232 for (id, label) in phases {
233 builder.add_generator_phase(&format!("phase:{id}"), label);
234 }
235
236 for (id, label, path) in output_files {
237 builder.add_output_file(&format!("output:{id}"), label, path);
238 }
239
240 for (phase, config) in phase_config_map {
241 builder.configured_by(&format!("phase:{phase}"), &format!("config:{config}"));
242 }
243
244 for (phase, output) in phase_output_map {
245 builder.produced_by(&format!("output:{output}"), &format!("phase:{phase}"));
246 }
247
248 builder.build()
249}
250
251#[cfg(test)]
252#[allow(clippy::unwrap_used)]
253mod tests {
254 use super::*;
255
256 #[test]
257 fn test_builder_basic() {
258 let mut builder = LineageGraphBuilder::new();
259 builder
260 .add_config_section("cfg:global", "Global Config")
261 .add_generator_phase("gen:coa", "CoA Generator")
262 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
263 .configured_by("gen:coa", "cfg:global")
264 .produced_by("out:coa", "gen:coa");
265
266 let graph = builder.build();
267 assert_eq!(graph.node_count(), 3);
268 assert_eq!(graph.edge_count(), 2);
269 }
270
271 #[test]
272 fn test_no_duplicate_nodes() {
273 let mut builder = LineageGraphBuilder::new();
274 builder
275 .add_config_section("cfg:global", "Global Config")
276 .add_config_section("cfg:global", "Global Config Again");
277
278 let graph = builder.build();
279 assert_eq!(graph.node_count(), 1);
280 }
281
282 #[test]
283 fn test_json_roundtrip() {
284 let mut builder = LineageGraphBuilder::new();
285 builder
286 .add_config_section("cfg:global", "Global Config")
287 .add_generator_phase("gen:coa", "CoA Generator")
288 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
289 .configured_by("gen:coa", "cfg:global")
290 .produced_by("out:coa", "gen:coa");
291
292 let graph = builder.build();
293 let json = graph.to_json().expect("serialize");
294 let deserialized: LineageGraph = serde_json::from_str(&json).expect("deserialize");
295
296 assert_eq!(deserialized.node_count(), graph.node_count());
297 assert_eq!(deserialized.edge_count(), graph.edge_count());
298 }
299
300 #[test]
301 fn test_dot_output() {
302 let mut builder = LineageGraphBuilder::new();
303 builder
304 .add_config_section("cfg:global", "Global Config")
305 .add_generator_phase("gen:coa", "CoA Generator")
306 .configured_by("gen:coa", "cfg:global");
307
308 let graph = builder.build();
309 let dot = graph.to_dot();
310
311 assert!(dot.starts_with("digraph lineage {"));
312 assert!(dot.contains("cfg:global"));
313 assert!(dot.contains("gen:coa"));
314 assert!(dot.contains("configures"));
315 assert!(dot.ends_with("}\n"));
316 }
317
318 #[test]
319 fn test_build_generation_lineage() {
320 let graph = build_generation_lineage(
321 &["global", "transactions"],
322 &[("coa", "CoA Generation"), ("je", "Journal Entries")],
323 &[
324 ("coa_csv", "CoA CSV", "chart_of_accounts.csv"),
325 ("je_csv", "JE CSV", "journal_entries.csv"),
326 ],
327 &[("coa", "global"), ("je", "transactions")],
328 &[("coa", "coa_csv"), ("je", "je_csv")],
329 );
330
331 assert_eq!(graph.node_count(), 6); assert_eq!(graph.edge_count(), 4); }
334
335 #[test]
336 fn test_derived_from_edge() {
337 let mut builder = LineageGraphBuilder::new();
338 builder
339 .add_output_file("out:raw", "Raw Data", "raw.csv")
340 .add_output_file("out:agg", "Aggregated", "aggregated.csv")
341 .derived_from("out:agg", "out:raw");
342
343 let graph = builder.build();
344 assert_eq!(graph.edge_count(), 1);
345 assert_eq!(
346 graph.edges[0].relationship,
347 LineageRelationship::DerivedFrom
348 );
349 }
350}