1use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8#[derive(Debug, Clone, Default, Serialize, Deserialize)]
10pub struct LineageGraph {
11 pub nodes: Vec<LineageNode>,
13 pub edges: Vec<LineageEdge>,
15}
16
17#[derive(Debug, Clone, Serialize, Deserialize)]
19pub struct LineageNode {
20 pub id: String,
22 pub node_type: LineageNodeType,
24 pub label: String,
26 #[serde(default, skip_serializing_if = "HashMap::is_empty")]
28 pub attributes: HashMap<String, String>,
29}
30
31#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
33#[serde(rename_all = "snake_case")]
34pub enum LineageNodeType {
35 ConfigSection,
37 GeneratorPhase,
39 OutputFile,
41}
42
43#[derive(Debug, Clone, Serialize, Deserialize)]
45pub struct LineageEdge {
46 pub source: String,
48 pub target: String,
50 pub relationship: LineageRelationship,
52}
53
54#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
56#[serde(rename_all = "snake_case")]
57pub enum LineageRelationship {
58 ConfiguredBy,
60 ProducedBy,
62 DerivedFrom,
64 InputTo,
66}
67
68#[derive(Debug, Default)]
70pub struct LineageGraphBuilder {
71 nodes: Vec<LineageNode>,
72 edges: Vec<LineageEdge>,
73 node_ids: std::collections::HashSet<String>,
74}
75
76impl LineageGraphBuilder {
77 pub fn new() -> Self {
79 Self::default()
80 }
81
82 pub fn add_config_section(&mut self, id: &str, label: &str) -> &mut Self {
84 self.add_node(id, LineageNodeType::ConfigSection, label, HashMap::new())
85 }
86
87 pub fn add_generator_phase(&mut self, id: &str, label: &str) -> &mut Self {
89 self.add_node(id, LineageNodeType::GeneratorPhase, label, HashMap::new())
90 }
91
92 pub fn add_output_file(&mut self, id: &str, label: &str, path: &str) -> &mut Self {
94 let mut attrs = HashMap::new();
95 attrs.insert("path".to_string(), path.to_string());
96 self.add_node(id, LineageNodeType::OutputFile, label, attrs)
97 }
98
99 pub fn add_node(
101 &mut self,
102 id: &str,
103 node_type: LineageNodeType,
104 label: &str,
105 attributes: HashMap<String, String>,
106 ) -> &mut Self {
107 if self.node_ids.insert(id.to_string()) {
108 self.nodes.push(LineageNode {
109 id: id.to_string(),
110 node_type,
111 label: label.to_string(),
112 attributes,
113 });
114 }
115 self
116 }
117
118 pub fn configured_by(&mut self, generator_id: &str, config_id: &str) -> &mut Self {
120 self.add_edge(config_id, generator_id, LineageRelationship::ConfiguredBy)
121 }
122
123 pub fn produced_by(&mut self, output_id: &str, generator_id: &str) -> &mut Self {
125 self.add_edge(generator_id, output_id, LineageRelationship::ProducedBy)
126 }
127
128 pub fn derived_from(&mut self, derived_id: &str, source_id: &str) -> &mut Self {
130 self.add_edge(source_id, derived_id, LineageRelationship::DerivedFrom)
131 }
132
133 pub fn input_to(&mut self, output_id: &str, phase_id: &str) -> &mut Self {
135 self.add_edge(output_id, phase_id, LineageRelationship::InputTo)
136 }
137
138 pub fn add_edge(
140 &mut self,
141 source: &str,
142 target: &str,
143 relationship: LineageRelationship,
144 ) -> &mut Self {
145 self.edges.push(LineageEdge {
146 source: source.to_string(),
147 target: target.to_string(),
148 relationship,
149 });
150 self
151 }
152
153 pub fn build(self) -> LineageGraph {
155 LineageGraph {
156 nodes: self.nodes,
157 edges: self.edges,
158 }
159 }
160}
161
162impl LineageGraph {
163 pub fn to_json(&self) -> Result<String, serde_json::Error> {
165 serde_json::to_string_pretty(self)
166 }
167
168 pub fn to_dot(&self) -> String {
170 let mut dot = String::from("digraph lineage {\n");
171 dot.push_str(" rankdir=LR;\n");
172 dot.push_str(" node [shape=box];\n\n");
173
174 for node in &self.nodes {
176 let (shape, color) = match node.node_type {
177 LineageNodeType::ConfigSection => ("note", "lightblue"),
178 LineageNodeType::GeneratorPhase => ("component", "lightyellow"),
179 LineageNodeType::OutputFile => ("folder", "lightgreen"),
180 };
181 dot.push_str(&format!(
182 " \"{}\" [label=\"{}\" shape={} style=filled fillcolor={}];\n",
183 node.id, node.label, shape, color
184 ));
185 }
186
187 dot.push('\n');
188
189 for edge in &self.edges {
191 let label = match edge.relationship {
192 LineageRelationship::ConfiguredBy => "configures",
193 LineageRelationship::ProducedBy => "produces",
194 LineageRelationship::DerivedFrom => "derives",
195 LineageRelationship::InputTo => "input_to",
196 };
197 dot.push_str(&format!(
198 " \"{}\" -> \"{}\" [label=\"{}\"];\n",
199 edge.source, edge.target, label
200 ));
201 }
202
203 dot.push_str("}\n");
204 dot
205 }
206
207 pub fn node_count(&self) -> usize {
209 self.nodes.len()
210 }
211
212 pub fn edge_count(&self) -> usize {
214 self.edges.len()
215 }
216}
217
218pub fn build_generation_lineage(
220 config_sections: &[&str],
221 phases: &[(&str, &str)],
222 output_files: &[(&str, &str, &str)],
223 phase_config_map: &[(&str, &str)],
224 phase_output_map: &[(&str, &str)],
225) -> LineageGraph {
226 let mut builder = LineageGraphBuilder::new();
227
228 for section in config_sections {
229 builder.add_config_section(
230 &format!("config:{}", section),
231 &format!("Config: {}", section),
232 );
233 }
234
235 for (id, label) in phases {
236 builder.add_generator_phase(&format!("phase:{}", id), label);
237 }
238
239 for (id, label, path) in output_files {
240 builder.add_output_file(&format!("output:{}", id), label, path);
241 }
242
243 for (phase, config) in phase_config_map {
244 builder.configured_by(&format!("phase:{}", phase), &format!("config:{}", config));
245 }
246
247 for (phase, output) in phase_output_map {
248 builder.produced_by(&format!("output:{}", output), &format!("phase:{}", phase));
249 }
250
251 builder.build()
252}
253
254#[cfg(test)]
255#[allow(clippy::unwrap_used)]
256mod tests {
257 use super::*;
258
259 #[test]
260 fn test_builder_basic() {
261 let mut builder = LineageGraphBuilder::new();
262 builder
263 .add_config_section("cfg:global", "Global Config")
264 .add_generator_phase("gen:coa", "CoA Generator")
265 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
266 .configured_by("gen:coa", "cfg:global")
267 .produced_by("out:coa", "gen:coa");
268
269 let graph = builder.build();
270 assert_eq!(graph.node_count(), 3);
271 assert_eq!(graph.edge_count(), 2);
272 }
273
274 #[test]
275 fn test_no_duplicate_nodes() {
276 let mut builder = LineageGraphBuilder::new();
277 builder
278 .add_config_section("cfg:global", "Global Config")
279 .add_config_section("cfg:global", "Global Config Again");
280
281 let graph = builder.build();
282 assert_eq!(graph.node_count(), 1);
283 }
284
285 #[test]
286 fn test_json_roundtrip() {
287 let mut builder = LineageGraphBuilder::new();
288 builder
289 .add_config_section("cfg:global", "Global Config")
290 .add_generator_phase("gen:coa", "CoA Generator")
291 .add_output_file("out:coa", "Chart of Accounts", "chart_of_accounts.csv")
292 .configured_by("gen:coa", "cfg:global")
293 .produced_by("out:coa", "gen:coa");
294
295 let graph = builder.build();
296 let json = graph.to_json().expect("serialize");
297 let deserialized: LineageGraph = serde_json::from_str(&json).expect("deserialize");
298
299 assert_eq!(deserialized.node_count(), graph.node_count());
300 assert_eq!(deserialized.edge_count(), graph.edge_count());
301 }
302
303 #[test]
304 fn test_dot_output() {
305 let mut builder = LineageGraphBuilder::new();
306 builder
307 .add_config_section("cfg:global", "Global Config")
308 .add_generator_phase("gen:coa", "CoA Generator")
309 .configured_by("gen:coa", "cfg:global");
310
311 let graph = builder.build();
312 let dot = graph.to_dot();
313
314 assert!(dot.starts_with("digraph lineage {"));
315 assert!(dot.contains("cfg:global"));
316 assert!(dot.contains("gen:coa"));
317 assert!(dot.contains("configures"));
318 assert!(dot.ends_with("}\n"));
319 }
320
321 #[test]
322 fn test_build_generation_lineage() {
323 let graph = build_generation_lineage(
324 &["global", "transactions"],
325 &[("coa", "CoA Generation"), ("je", "Journal Entries")],
326 &[
327 ("coa_csv", "CoA CSV", "chart_of_accounts.csv"),
328 ("je_csv", "JE CSV", "journal_entries.csv"),
329 ],
330 &[("coa", "global"), ("je", "transactions")],
331 &[("coa", "coa_csv"), ("je", "je_csv")],
332 );
333
334 assert_eq!(graph.node_count(), 6); assert_eq!(graph.edge_count(), 4); }
337
338 #[test]
339 fn test_derived_from_edge() {
340 let mut builder = LineageGraphBuilder::new();
341 builder
342 .add_output_file("out:raw", "Raw Data", "raw.csv")
343 .add_output_file("out:agg", "Aggregated", "aggregated.csv")
344 .derived_from("out:agg", "out:raw");
345
346 let graph = builder.build();
347 assert_eq!(graph.edge_count(), 1);
348 assert_eq!(
349 graph.edges[0].relationship,
350 LineageRelationship::DerivedFrom
351 );
352 }
353}