1use std::collections::{BTreeMap, BTreeSet, HashMap};
2
3use flowscope_core::{AnalyzeResult, EdgeType, NodeType};
4use serde::{Deserialize, Serialize};
5
6#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
7#[serde(rename_all = "camelCase")]
8pub struct ScriptInfo {
9 pub source_name: String,
10 pub statement_count: usize,
11 pub tables_read: Vec<String>,
12 pub tables_written: Vec<String>,
13}
14
15#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
16#[serde(rename_all = "camelCase")]
17pub struct TableInfo {
18 pub name: String,
19 pub qualified_name: String,
20 #[serde(rename = "type")]
21 pub table_type: TableType,
22 pub columns: Vec<String>,
23 #[serde(skip_serializing_if = "Option::is_none")]
24 pub source_name: Option<String>,
25}
26
27#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq)]
28#[serde(rename_all = "lowercase")]
29pub enum TableType {
30 Table,
31 View,
32 Cte,
33}
34
35impl TableType {
36 pub fn as_str(&self) -> &'static str {
37 match self {
38 TableType::Table => "table",
39 TableType::View => "view",
40 TableType::Cte => "cte",
41 }
42 }
43}
44
45impl std::fmt::Display for TableType {
46 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
47 f.write_str(self.as_str())
48 }
49}
50
51#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
52#[serde(rename_all = "camelCase")]
53pub struct ColumnMapping {
54 pub source_table: String,
55 pub source_column: String,
56 pub target_table: String,
57 pub target_column: String,
58 #[serde(skip_serializing_if = "Option::is_none")]
59 pub expression: Option<String>,
60 pub edge_type: String,
61}
62
63#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
64#[serde(rename_all = "camelCase")]
65pub struct TableDependency {
66 pub source_table: String,
67 pub target_table: String,
68}
69
70pub fn extract_script_info(result: &AnalyzeResult) -> Vec<ScriptInfo> {
71 let mut script_map: HashMap<String, ScriptInfo> = HashMap::new();
72
73 for stmt in &result.statements {
74 let source_name = stmt
75 .source_name
76 .clone()
77 .unwrap_or_else(|| "default".to_string());
78 let entry = script_map
79 .entry(source_name.clone())
80 .or_insert_with(|| ScriptInfo {
81 source_name: source_name.clone(),
82 statement_count: 0,
83 tables_read: Vec::new(),
84 tables_written: Vec::new(),
85 });
86
87 entry.statement_count += 1;
88
89 let mut tables_read: BTreeSet<String> = entry.tables_read.iter().cloned().collect();
90 let mut tables_written: BTreeSet<String> = entry.tables_written.iter().cloned().collect();
91
92 for node in &stmt.nodes {
93 if matches!(node.node_type, NodeType::Table | NodeType::View) {
94 let is_written = stmt
95 .edges
96 .iter()
97 .any(|edge| edge.to == node.id && edge.edge_type == EdgeType::DataFlow);
98 let is_read = stmt
99 .edges
100 .iter()
101 .any(|edge| edge.from == node.id && edge.edge_type == EdgeType::DataFlow);
102
103 let table_name = node
104 .qualified_name
105 .as_deref()
106 .unwrap_or(&node.label)
107 .to_string();
108
109 if is_written {
110 tables_written.insert(table_name.clone());
111 }
112 if is_read || !is_written {
115 tables_read.insert(table_name);
116 }
117 }
118 }
119
120 entry.tables_read = tables_read.into_iter().collect();
121 entry.tables_written = tables_written.into_iter().collect();
122 }
123
124 let mut values: Vec<_> = script_map.into_values().collect();
125 values.sort_by(|a, b| a.source_name.cmp(&b.source_name));
126 values
127}
128
129pub fn extract_table_info(result: &AnalyzeResult) -> Vec<TableInfo> {
130 let mut table_map: BTreeMap<String, TableInfo> = BTreeMap::new();
131
132 for stmt in &result.statements {
133 let table_nodes: Vec<_> = stmt
134 .nodes
135 .iter()
136 .filter(|node| node.node_type.is_table_like())
137 .collect();
138 let column_nodes: Vec<_> = stmt
139 .nodes
140 .iter()
141 .filter(|node| node.node_type == NodeType::Column)
142 .collect();
143
144 for table_node in table_nodes {
145 let key = table_node
146 .qualified_name
147 .as_deref()
148 .unwrap_or(&table_node.label)
149 .to_string();
150
151 let owned_column_ids: BTreeSet<_> = stmt
152 .edges
153 .iter()
154 .filter(|edge| edge.edge_type == EdgeType::Ownership && edge.from == table_node.id)
155 .map(|edge| edge.to.as_ref())
156 .collect();
157
158 let columns: BTreeSet<String> = column_nodes
159 .iter()
160 .filter(|col| owned_column_ids.contains(col.id.as_ref()))
161 .map(|col| col.label.to_string())
162 .collect();
163
164 let table_type = match table_node.node_type {
165 NodeType::View => TableType::View,
166 NodeType::Cte => TableType::Cte,
167 _ => TableType::Table,
168 };
169
170 let entry = table_map.entry(key.clone()).or_insert_with(|| TableInfo {
171 name: table_node.label.to_string(),
172 qualified_name: key.clone(),
173 table_type,
174 columns: Vec::new(),
175 source_name: stmt.source_name.clone(),
176 });
177
178 let mut merged: BTreeSet<String> = entry.columns.iter().cloned().collect();
179 merged.extend(columns);
180 entry.columns = merged.into_iter().collect();
181 }
182 }
183
184 table_map.into_values().collect()
185}
186
187pub fn extract_column_mappings(result: &AnalyzeResult) -> Vec<ColumnMapping> {
188 let mut mappings = Vec::new();
189
190 for stmt in &result.statements {
191 let table_nodes: Vec<_> = stmt
192 .nodes
193 .iter()
194 .filter(|node| node.node_type.is_table_like())
195 .collect();
196 let column_nodes: Vec<_> = stmt
197 .nodes
198 .iter()
199 .filter(|node| node.node_type == NodeType::Column)
200 .collect();
201
202 let mut column_to_table: HashMap<&str, &str> = HashMap::new();
203 for edge in &stmt.edges {
204 if edge.edge_type == EdgeType::Ownership {
205 if let Some(table_node) = table_nodes.iter().find(|node| node.id == edge.from) {
206 let table_name = table_node
207 .qualified_name
208 .as_deref()
209 .unwrap_or(&table_node.label);
210 column_to_table.insert(edge.to.as_ref(), table_name);
211 }
212 }
213 }
214
215 for edge in &stmt.edges {
216 if edge.edge_type == EdgeType::Derivation || edge.edge_type == EdgeType::DataFlow {
217 let source_col = column_nodes.iter().find(|col| col.id == edge.from);
218 let target_col = column_nodes.iter().find(|col| col.id == edge.to);
219
220 if let (Some(source), Some(target)) = (source_col, target_col) {
221 let source_table = column_to_table
222 .get(edge.from.as_ref())
223 .copied()
224 .unwrap_or("Output");
225 let target_table = column_to_table
226 .get(edge.to.as_ref())
227 .copied()
228 .unwrap_or("Output");
229
230 let expression = edge
231 .expression
232 .as_ref()
233 .map(|value| value.to_string())
234 .or_else(|| target.expression.as_ref().map(|value| value.to_string()));
235
236 mappings.push(ColumnMapping {
237 source_table: source_table.to_string(),
238 source_column: source.label.to_string(),
239 target_table: target_table.to_string(),
240 target_column: target.label.to_string(),
241 expression,
242 edge_type: edge_type_label(edge.edge_type).to_string(),
243 });
244 }
245 }
246 }
247 }
248
249 mappings
250}
251
252pub fn extract_table_dependencies(result: &AnalyzeResult) -> Vec<TableDependency> {
253 let mut dependencies = Vec::new();
254 let mut seen: BTreeSet<String> = BTreeSet::new();
255
256 for stmt in &result.statements {
257 let relation_nodes: Vec<_> = stmt
258 .nodes
259 .iter()
260 .filter(|node| node.node_type.is_relation())
261 .collect();
262
263 for edge in &stmt.edges {
264 if edge.edge_type == EdgeType::DataFlow || edge.edge_type == EdgeType::JoinDependency {
265 let source_node = relation_nodes.iter().find(|node| node.id == edge.from);
266 let target_node = relation_nodes.iter().find(|node| node.id == edge.to);
267
268 if let (Some(source), Some(target)) = (source_node, target_node) {
269 let source_key = source
270 .qualified_name
271 .as_deref()
272 .unwrap_or(&source.label)
273 .to_string();
274 let target_key = target
275 .qualified_name
276 .as_deref()
277 .unwrap_or(&target.label)
278 .to_string();
279 let dep_key = format!("{source_key}->{target_key}");
280
281 if source_key != target_key && seen.insert(dep_key) {
282 dependencies.push(TableDependency {
283 source_table: source_key,
284 target_table: target_key,
285 });
286 }
287 }
288 }
289 }
290 }
291
292 dependencies
293}
294
295fn edge_type_label(edge_type: EdgeType) -> &'static str {
296 match edge_type {
297 EdgeType::Ownership => "ownership",
298 EdgeType::DataFlow => "data_flow",
299 EdgeType::Derivation => "derivation",
300 EdgeType::JoinDependency => "join_dependency",
301 EdgeType::CrossStatement => "cross_statement",
302 }
303}