Skip to main content

dlin_core/parser/
manifest.rs

1use std::collections::{BTreeSet, HashMap};
2use std::path::Path;
3
4use anyhow::Result;
5use petgraph::stable_graph::NodeIndex;
6use serde::Deserialize;
7
8use crate::graph::types::*;
9
10/// Top-level manifest.json structure
11#[derive(Debug, Deserialize)]
12pub struct Manifest {
13    /// Nodes keyed by unique_id (models, seeds, snapshots, tests, analyses)
14    #[serde(default)]
15    pub nodes: HashMap<String, ManifestNode>,
16    /// Sources keyed by unique_id
17    #[serde(default)]
18    pub sources: HashMap<String, ManifestSource>,
19    /// Exposures keyed by unique_id
20    #[serde(default)]
21    pub exposures: HashMap<String, ManifestExposure>,
22}
23
24/// A node entry in the manifest (model, seed, snapshot, test, analysis)
25#[derive(Debug, Deserialize)]
26pub struct ManifestNode {
27    pub unique_id: String,
28    pub name: String,
29    pub resource_type: String,
30    #[serde(default)]
31    pub depends_on: DependsOn,
32    #[serde(default)]
33    pub config: ManifestConfig,
34    pub description: Option<String>,
35    pub path: Option<String>,
36    /// Column definitions keyed by column name
37    #[serde(default)]
38    pub columns: HashMap<String, ManifestColumn>,
39    /// Compiled SQL code (Jinja resolved) — present after `dbt compile` or `dbt run`
40    pub compiled_code: Option<String>,
41}
42
43/// A source entry in the manifest
44#[derive(Debug, Deserialize)]
45pub struct ManifestSource {
46    pub unique_id: String,
47    pub name: String,
48    pub source_name: String,
49    #[serde(default)]
50    pub resource_type: String,
51    pub description: Option<String>,
52    pub path: Option<String>,
53    /// Column definitions keyed by column name
54    #[serde(default)]
55    pub columns: HashMap<String, ManifestColumn>,
56}
57
58/// A column entry in the manifest
59#[derive(Debug, Deserialize)]
60pub struct ManifestColumn {
61    pub name: String,
62}
63
64/// An exposure entry in the manifest
65#[derive(Debug, Deserialize)]
66pub struct ManifestExposure {
67    pub unique_id: String,
68    pub name: String,
69    #[serde(default)]
70    pub depends_on: DependsOn,
71    pub description: Option<String>,
72    pub label: Option<String>,
73    #[serde(rename = "type")]
74    pub exposure_type: Option<String>,
75    pub url: Option<String>,
76    pub maturity: Option<String>,
77    pub owner: Option<ManifestExposureOwner>,
78}
79
80/// Owner information in a manifest exposure entry
81#[derive(Debug, Deserialize)]
82pub struct ManifestExposureOwner {
83    pub name: Option<String>,
84    pub email: Option<String>,
85}
86
87/// depends_on section with a list of node unique_ids
88#[derive(Debug, Default, Deserialize)]
89pub struct DependsOn {
90    #[serde(default)]
91    pub nodes: Vec<String>,
92}
93
94/// Config section for nodes
95#[derive(Debug, Default, Deserialize)]
96pub struct ManifestConfig {
97    pub materialized: Option<String>,
98    #[serde(default)]
99    pub tags: Vec<String>,
100}
101
102/// Map a manifest resource_type string to our NodeType enum
103fn resource_type_to_node_type(resource_type: &str) -> NodeType {
104    match resource_type {
105        "model" => NodeType::Model,
106        "source" => NodeType::Source,
107        "seed" => NodeType::Seed,
108        "snapshot" => NodeType::Snapshot,
109        "test" => NodeType::Test,
110        "analysis" => NodeType::Model,
111        "exposure" => NodeType::Exposure,
112        _ => NodeType::Model,
113    }
114}
115
116/// Simplify a dbt manifest unique_id (e.g. "model.my_project.stg_orders") to
117/// the short form used in this tool's graph (e.g. "model.stg_orders").
118/// For sources: "source.my_project.raw.orders" -> "source.raw.orders"
119/// For tests:   "test.my_project.test_name.hash" -> "test.test_name"
120fn simplify_unique_id(unique_id: &str, resource_type: &str) -> String {
121    let parts: Vec<&str> = unique_id.split('.').collect();
122    match resource_type {
123        "source" => {
124            // source.project.source_name.table_name -> source.source_name.table_name
125            if parts.len() >= 4 {
126                format!("{}.{}.{}", parts[0], parts[2], parts[3])
127            } else {
128                unique_id.to_string()
129            }
130        }
131        "test" => {
132            // test.project.test_name[.hash] -> test.test_name (skip trailing hash)
133            if parts.len() >= 3 {
134                format!("{}.{}", parts[0], parts[2])
135            } else {
136                unique_id.to_string()
137            }
138        }
139        _ => {
140            // model.project.name -> model.name
141            if parts.len() >= 3 {
142                format!("{}.{}", parts[0], parts[parts.len() - 1])
143            } else {
144                unique_id.to_string()
145            }
146        }
147    }
148}
149
150/// Load and parse a manifest.json file without building a graph.
151pub fn load_manifest(manifest_path: &Path) -> Result<Manifest> {
152    let content = std::fs::read_to_string(manifest_path).map_err(|e| {
153        crate::error::DbtLineageError::FileReadError {
154            path: manifest_path.to_path_buf(),
155            source: e,
156        }
157    })?;
158
159    let manifest: Manifest = serde_json::from_str(&content).map_err(|e| {
160        crate::error::DbtLineageError::ArtifactParseError {
161            path: manifest_path.to_path_buf(),
162            source: e,
163        }
164    })?;
165
166    Ok(manifest)
167}
168
169impl Manifest {
170    /// Collect `compiled_code` from manifest nodes as a mapping from simplified
171    /// unique_id to SQL string.  Nodes without `compiled_code` are omitted.
172    ///
173    /// This is the manifest-mode counterpart of the file-based
174    /// `collect_sql_contents` used in SQL-parse mode.  Users must run
175    /// `dbt compile` (or `dbt run`) before invoking dlin so that the manifest
176    /// contains compiled SQL.
177    pub fn collect_sql_contents(&self) -> HashMap<String, String> {
178        let mut map = HashMap::new();
179        for (orig_id, node) in &self.nodes {
180            if let Some(ref code) = node.compiled_code {
181                let simple_id = simplify_unique_id(orig_id, &node.resource_type);
182                map.insert(simple_id, code.clone());
183            }
184        }
185        map
186    }
187
188    /// Collect all unique file paths referenced by nodes and sources.
189    /// Returns relative paths as stored in the manifest (e.g. "models/staging/stg_orders.sql").
190    pub fn collect_file_paths(&self) -> BTreeSet<String> {
191        let mut paths = BTreeSet::new();
192        for node in self.nodes.values() {
193            if let Some(ref p) = node.path {
194                paths.insert(p.clone());
195            }
196        }
197        for source in self.sources.values() {
198            if let Some(ref p) = source.path {
199                paths.insert(p.clone());
200            }
201        }
202        paths
203    }
204}
205
206/// Build a LineageGraph from a parsed manifest.json file.
207pub fn build_graph_from_manifest(manifest_path: &Path) -> Result<LineageGraph> {
208    let manifest = load_manifest(manifest_path)?;
209    build_graph_from_parsed_manifest(&manifest)
210}
211
212/// Build a LineageGraph from an already-parsed Manifest struct.
213/// This is separated for testability and reuse by the diff feature.
214pub fn build_graph_from_parsed_manifest(manifest: &Manifest) -> Result<LineageGraph> {
215    let mut graph = LineageGraph::new();
216    // Map from original manifest unique_id to graph NodeIndex
217    let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
218
219    // 1. Add source nodes
220    add_source_nodes(&mut graph, &mut node_map, &manifest.sources);
221
222    // 2. Add regular nodes (models, seeds, snapshots, tests, analyses)
223    add_regular_nodes(&mut graph, &mut node_map, &manifest.nodes);
224
225    // 3. Add exposure nodes
226    add_exposure_nodes(&mut graph, &mut node_map, &manifest.exposures);
227
228    // 4. Add edges from depends_on for regular nodes
229    add_node_edges(&mut graph, &node_map, &manifest.nodes);
230
231    // 5. Add edges from depends_on for exposures
232    add_exposure_edges(&mut graph, &node_map, &manifest.exposures);
233
234    Ok(graph)
235}
236
237fn add_source_nodes(
238    graph: &mut LineageGraph,
239    node_map: &mut HashMap<String, NodeIndex>,
240    sources: &HashMap<String, ManifestSource>,
241) {
242    for (orig_id, source) in sources {
243        let simple_id = simplify_unique_id(orig_id, "source");
244        let label = format!("{}.{}", source.source_name, source.name);
245
246        let idx = graph.add_node(NodeData {
247            unique_id: simple_id.clone(),
248            label,
249            node_type: NodeType::Source,
250            file_path: source.path.as_ref().map(|p| p.into()),
251            description: non_empty_string(&source.description),
252            materialization: None,
253            tags: vec![],
254            columns: {
255                let mut cols: Vec<String> = source.columns.keys().cloned().collect();
256                cols.sort();
257                cols
258            },
259            exposure: None,
260        });
261        node_map.insert(orig_id.clone(), idx);
262        // Also index by simplified id for edge resolution
263        node_map.insert(simple_id, idx);
264    }
265}
266
267fn add_regular_nodes(
268    graph: &mut LineageGraph,
269    node_map: &mut HashMap<String, NodeIndex>,
270    nodes: &HashMap<String, ManifestNode>,
271) {
272    for (orig_id, node) in nodes {
273        let node_type = resource_type_to_node_type(&node.resource_type);
274        let simple_id = simplify_unique_id(orig_id, &node.resource_type);
275
276        let idx = graph.add_node(NodeData {
277            unique_id: simple_id.clone(),
278            label: node.name.clone(),
279            node_type,
280            file_path: node.path.as_ref().map(|p| p.into()),
281            description: non_empty_string(&node.description),
282            materialization: node.config.materialized.clone(),
283            tags: node.config.tags.clone(),
284            columns: {
285                let mut cols: Vec<String> = node.columns.keys().cloned().collect();
286                cols.sort();
287                cols
288            },
289            exposure: None,
290        });
291        node_map.insert(orig_id.clone(), idx);
292        node_map.insert(simple_id, idx);
293    }
294}
295
296fn add_exposure_nodes(
297    graph: &mut LineageGraph,
298    node_map: &mut HashMap<String, NodeIndex>,
299    exposures: &HashMap<String, ManifestExposure>,
300) {
301    for (orig_id, exposure) in exposures {
302        let simple_id = simplify_unique_id(orig_id, "exposure");
303
304        let idx = graph.add_node(NodeData {
305            unique_id: simple_id.clone(),
306            label: exposure.name.clone(),
307            node_type: NodeType::Exposure,
308            file_path: None,
309            description: non_empty_string(&exposure.description),
310            materialization: None,
311            tags: vec![],
312            columns: vec![],
313            exposure: Some(ExposureInfo {
314                label: non_empty_string(&exposure.label),
315                exposure_type: non_empty_string(&exposure.exposure_type),
316                url: non_empty_string(&exposure.url),
317                maturity: non_empty_string(&exposure.maturity),
318                owner: exposure.owner.as_ref().map(|o| OwnerInfo {
319                    name: non_empty_string(&o.name),
320                    email: non_empty_string(&o.email),
321                }),
322            }),
323        });
324        node_map.insert(orig_id.clone(), idx);
325        node_map.insert(simple_id, idx);
326    }
327}
328
329fn add_node_edges(
330    graph: &mut LineageGraph,
331    node_map: &HashMap<String, NodeIndex>,
332    nodes: &HashMap<String, ManifestNode>,
333) {
334    for (orig_id, node) in nodes {
335        let current_idx = match node_map.get(orig_id) {
336            Some(&idx) => idx,
337            None => continue,
338        };
339
340        // Use EdgeType::Test when the target node is a test, regardless of
341        // the dependency's type prefix, so all test relationships are consistent.
342        let current_is_test = graph[current_idx].node_type == NodeType::Test;
343
344        for dep_id in &node.depends_on.nodes {
345            if let Some(&dep_idx) = node_map.get(dep_id) {
346                let edge_type = if current_is_test {
347                    EdgeType::Test
348                } else {
349                    infer_edge_type(dep_id)
350                };
351                graph.add_edge(dep_idx, current_idx, EdgeData::direct(edge_type));
352            }
353        }
354    }
355}
356
357fn add_exposure_edges(
358    graph: &mut LineageGraph,
359    node_map: &HashMap<String, NodeIndex>,
360    exposures: &HashMap<String, ManifestExposure>,
361) {
362    for (orig_id, exposure) in exposures {
363        let current_idx = match node_map.get(orig_id) {
364            Some(&idx) => idx,
365            None => continue,
366        };
367
368        for dep_id in &exposure.depends_on.nodes {
369            if let Some(&dep_idx) = node_map.get(dep_id) {
370                graph.add_edge(dep_idx, current_idx, EdgeData::direct(EdgeType::Exposure));
371            }
372        }
373    }
374}
375
376/// Infer the edge type from a dependency unique_id
377fn infer_edge_type(dep_unique_id: &str) -> EdgeType {
378    if dep_unique_id.starts_with("source.") {
379        EdgeType::Source
380    } else if dep_unique_id.starts_with("test.") {
381        EdgeType::Test
382    } else {
383        EdgeType::Ref
384    }
385}
386
387/// Return None for empty or whitespace-only strings
388fn non_empty_string(s: &Option<String>) -> Option<String> {
389    s.as_ref().filter(|v| !v.trim().is_empty()).cloned()
390}
391
392#[cfg(test)]
393mod tests {
394    use super::*;
395    use std::fs;
396
397    #[test]
398    fn test_resource_type_to_node_type() {
399        assert_eq!(resource_type_to_node_type("model"), NodeType::Model);
400        assert_eq!(resource_type_to_node_type("source"), NodeType::Source);
401        assert_eq!(resource_type_to_node_type("seed"), NodeType::Seed);
402        assert_eq!(resource_type_to_node_type("snapshot"), NodeType::Snapshot);
403        assert_eq!(resource_type_to_node_type("test"), NodeType::Test);
404        assert_eq!(resource_type_to_node_type("analysis"), NodeType::Model);
405        assert_eq!(resource_type_to_node_type("exposure"), NodeType::Exposure);
406        assert_eq!(resource_type_to_node_type("unknown"), NodeType::Model);
407    }
408
409    #[test]
410    fn test_simplify_unique_id_model() {
411        assert_eq!(
412            simplify_unique_id("model.my_project.stg_orders", "model"),
413            "model.stg_orders"
414        );
415    }
416
417    #[test]
418    fn test_simplify_unique_id_source() {
419        assert_eq!(
420            simplify_unique_id("source.my_project.raw.orders", "source"),
421            "source.raw.orders"
422        );
423    }
424
425    #[test]
426    fn test_simplify_unique_id_short() {
427        assert_eq!(
428            simplify_unique_id("model.stg_orders", "model"),
429            "model.stg_orders"
430        );
431    }
432
433    #[test]
434    fn test_simplify_unique_id_source_short() {
435        assert_eq!(
436            simplify_unique_id("source.raw.orders", "source"),
437            "source.raw.orders"
438        );
439    }
440
441    #[test]
442    fn test_simplify_unique_id_test() {
443        // test.project.test_name.hash -> test.test_name
444        assert_eq!(
445            simplify_unique_id(
446                "test.jaffle_shop.not_null_orders_order_id.cf6c17daed",
447                "test"
448            ),
449            "test.not_null_orders_order_id"
450        );
451    }
452
453    #[test]
454    fn test_simplify_unique_id_test_short() {
455        assert_eq!(
456            simplify_unique_id("test.not_null_orders_order_id", "test"),
457            "test.not_null_orders_order_id"
458        );
459    }
460
461    #[test]
462    fn test_infer_edge_type() {
463        assert_eq!(
464            infer_edge_type("source.my_project.raw.orders"),
465            EdgeType::Source
466        );
467        assert_eq!(
468            infer_edge_type("model.my_project.stg_orders"),
469            EdgeType::Ref
470        );
471        assert_eq!(infer_edge_type("test.my_project.some_test"), EdgeType::Test);
472        assert_eq!(infer_edge_type("seed.my_project.countries"), EdgeType::Ref);
473    }
474
475    #[test]
476    fn test_non_empty_string() {
477        assert_eq!(non_empty_string(&None), None);
478        assert_eq!(non_empty_string(&Some("".to_string())), None);
479        assert_eq!(non_empty_string(&Some("  ".to_string())), None);
480        assert_eq!(
481            non_empty_string(&Some("hello".to_string())),
482            Some("hello".to_string())
483        );
484    }
485
486    #[test]
487    fn test_build_graph_from_minimal_manifest() {
488        let manifest = Manifest {
489            nodes: HashMap::from([(
490                "model.proj.stg_orders".to_string(),
491                ManifestNode {
492                    unique_id: "model.proj.stg_orders".to_string(),
493                    name: "stg_orders".to_string(),
494                    resource_type: "model".to_string(),
495                    depends_on: DependsOn {
496                        nodes: vec!["source.proj.raw.orders".to_string()],
497                    },
498                    config: ManifestConfig {
499                        materialized: Some("view".to_string()),
500                        tags: vec!["staging".to_string()],
501                    },
502                    description: Some("Staged orders".to_string()),
503                    path: Some("models/staging/stg_orders.sql".to_string()),
504                    columns: HashMap::new(),
505                    compiled_code: None,
506                },
507            )]),
508            sources: HashMap::from([(
509                "source.proj.raw.orders".to_string(),
510                ManifestSource {
511                    unique_id: "source.proj.raw.orders".to_string(),
512                    name: "orders".to_string(),
513                    source_name: "raw".to_string(),
514                    resource_type: "source".to_string(),
515                    description: Some("Raw orders table".to_string()),
516                    path: Some("models/staging/schema.yml".to_string()),
517                    columns: HashMap::new(),
518                },
519            )]),
520            exposures: HashMap::new(),
521        };
522
523        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
524
525        assert_eq!(graph.node_count(), 2);
526        assert_eq!(graph.edge_count(), 1);
527
528        // Find the model node
529        let model = graph
530            .node_indices()
531            .find(|&i| graph[i].node_type == NodeType::Model)
532            .expect("Should have a model node");
533        assert_eq!(graph[model].label, "stg_orders");
534        assert_eq!(graph[model].unique_id, "model.stg_orders");
535        assert_eq!(graph[model].materialization.as_deref(), Some("view"));
536        assert_eq!(graph[model].tags, vec!["staging"]);
537        assert_eq!(graph[model].description.as_deref(), Some("Staged orders"));
538
539        // Find the source node
540        let source = graph
541            .node_indices()
542            .find(|&i| graph[i].node_type == NodeType::Source)
543            .expect("Should have a source node");
544        assert_eq!(graph[source].label, "raw.orders");
545        assert_eq!(graph[source].unique_id, "source.raw.orders");
546    }
547
548    #[test]
549    fn test_build_graph_with_exposures() {
550        let manifest = Manifest {
551            nodes: HashMap::from([(
552                "model.proj.orders".to_string(),
553                ManifestNode {
554                    unique_id: "model.proj.orders".to_string(),
555                    name: "orders".to_string(),
556                    resource_type: "model".to_string(),
557                    depends_on: DependsOn::default(),
558                    config: ManifestConfig::default(),
559                    description: None,
560                    path: None,
561                    columns: HashMap::new(),
562                    compiled_code: None,
563                },
564            )]),
565            sources: HashMap::new(),
566            exposures: HashMap::from([(
567                "exposure.proj.weekly_report".to_string(),
568                ManifestExposure {
569                    unique_id: "exposure.proj.weekly_report".to_string(),
570                    name: "weekly_report".to_string(),
571                    depends_on: DependsOn {
572                        nodes: vec!["model.proj.orders".to_string()],
573                    },
574                    description: Some("Weekly dashboard".to_string()),
575                    label: None,
576                    exposure_type: None,
577                    url: None,
578                    maturity: None,
579                    owner: None,
580                },
581            )]),
582        };
583
584        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
585        assert_eq!(graph.node_count(), 2);
586        assert_eq!(graph.edge_count(), 1);
587
588        let exposure = graph
589            .node_indices()
590            .find(|&i| graph[i].node_type == NodeType::Exposure)
591            .expect("Should have an exposure node");
592        assert_eq!(graph[exposure].label, "weekly_report");
593        assert_eq!(
594            graph[exposure].description.as_deref(),
595            Some("Weekly dashboard")
596        );
597    }
598
599    #[test]
600    fn test_exposure_metadata_parsed() {
601        let manifest = Manifest {
602            nodes: HashMap::new(),
603            sources: HashMap::new(),
604            exposures: HashMap::from([(
605                "exposure.proj.dashboard".to_string(),
606                ManifestExposure {
607                    unique_id: "exposure.proj.dashboard".to_string(),
608                    name: "dashboard".to_string(),
609                    depends_on: DependsOn { nodes: vec![] },
610                    description: Some("Main dashboard".to_string()),
611                    label: Some("Main Dashboard".to_string()),
612                    exposure_type: Some("dashboard".to_string()),
613                    url: Some("https://bi.example.com".to_string()),
614                    maturity: Some("high".to_string()),
615                    owner: Some(ManifestExposureOwner {
616                        name: Some("Data Team".to_string()),
617                        email: Some("data@example.com".to_string()),
618                    }),
619                },
620            )]),
621        };
622
623        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
624        let exp_idx = graph
625            .node_indices()
626            .find(|&i| graph[i].node_type == NodeType::Exposure)
627            .expect("Should have an exposure node");
628        let exp = &graph[exp_idx];
629
630        let info = exp.exposure.as_ref().expect("Should have exposure info");
631        assert_eq!(info.label.as_deref(), Some("Main Dashboard"));
632        assert_eq!(info.exposure_type.as_deref(), Some("dashboard"));
633        assert_eq!(info.url.as_deref(), Some("https://bi.example.com"));
634        assert_eq!(info.maturity.as_deref(), Some("high"));
635
636        let owner = info.owner.as_ref().expect("Should have owner");
637        assert_eq!(owner.name.as_deref(), Some("Data Team"));
638        assert_eq!(owner.email.as_deref(), Some("data@example.com"));
639    }
640
641    #[test]
642    fn test_exposure_metadata_from_fixture() {
643        let manifest_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
644            .join("../../tests/fixtures/simple_project/target/manifest.json");
645        let graph = build_graph_from_manifest(&manifest_path).unwrap();
646
647        let exp_idx = graph
648            .node_indices()
649            .find(|&i| graph[i].node_type == NodeType::Exposure)
650            .expect("Should have an exposure node from fixture");
651        let exp = &graph[exp_idx];
652        assert_eq!(exp.label, "weekly_report");
653
654        let info = exp.exposure.as_ref().expect("Should have exposure info");
655        assert_eq!(info.label.as_deref(), Some("Weekly Report"));
656        assert_eq!(info.exposure_type.as_deref(), Some("dashboard"));
657        assert_eq!(info.url.as_deref(), Some("https://bi.example.com/weekly"));
658        assert_eq!(info.maturity.as_deref(), Some("high"));
659
660        let owner = info.owner.as_ref().expect("Should have owner");
661        assert_eq!(owner.name.as_deref(), Some("Data Team"));
662        assert_eq!(owner.email.as_deref(), Some("data@example.com"));
663    }
664
665    #[test]
666    fn test_build_graph_with_seeds_and_snapshots() {
667        let manifest = Manifest {
668            nodes: HashMap::from([
669                (
670                    "seed.proj.countries".to_string(),
671                    ManifestNode {
672                        unique_id: "seed.proj.countries".to_string(),
673                        name: "countries".to_string(),
674                        resource_type: "seed".to_string(),
675                        depends_on: DependsOn::default(),
676                        config: ManifestConfig::default(),
677                        description: None,
678                        path: Some("seeds/countries.csv".to_string()),
679                        columns: HashMap::new(),
680                        compiled_code: None,
681                    },
682                ),
683                (
684                    "snapshot.proj.snap_orders".to_string(),
685                    ManifestNode {
686                        unique_id: "snapshot.proj.snap_orders".to_string(),
687                        name: "snap_orders".to_string(),
688                        resource_type: "snapshot".to_string(),
689                        depends_on: DependsOn::default(),
690                        config: ManifestConfig {
691                            materialized: Some("snapshot".to_string()),
692                            tags: vec![],
693                        },
694                        description: None,
695                        path: Some("snapshots/snap_orders.sql".to_string()),
696                        columns: HashMap::new(),
697                        compiled_code: None,
698                    },
699                ),
700            ]),
701            sources: HashMap::new(),
702            exposures: HashMap::new(),
703        };
704
705        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
706        assert_eq!(graph.node_count(), 2);
707
708        let seed = graph
709            .node_indices()
710            .find(|&i| graph[i].node_type == NodeType::Seed)
711            .expect("Should have a seed node");
712        assert_eq!(graph[seed].label, "countries");
713
714        let snap = graph
715            .node_indices()
716            .find(|&i| graph[i].node_type == NodeType::Snapshot)
717            .expect("Should have a snapshot node");
718        assert_eq!(graph[snap].label, "snap_orders");
719    }
720
721    #[test]
722    fn test_build_graph_with_tests() {
723        let manifest = Manifest {
724            nodes: HashMap::from([
725                (
726                    "model.proj.orders".to_string(),
727                    ManifestNode {
728                        unique_id: "model.proj.orders".to_string(),
729                        name: "orders".to_string(),
730                        resource_type: "model".to_string(),
731                        depends_on: DependsOn::default(),
732                        config: ManifestConfig::default(),
733                        description: None,
734                        path: None,
735                        columns: HashMap::new(),
736                        compiled_code: None,
737                    },
738                ),
739                (
740                    "test.proj.assert_positive".to_string(),
741                    ManifestNode {
742                        unique_id: "test.proj.assert_positive".to_string(),
743                        name: "assert_positive".to_string(),
744                        resource_type: "test".to_string(),
745                        depends_on: DependsOn {
746                            nodes: vec!["model.proj.orders".to_string()],
747                        },
748                        config: ManifestConfig::default(),
749                        description: None,
750                        path: Some("tests/assert_positive.sql".to_string()),
751                        columns: HashMap::new(),
752                        compiled_code: None,
753                    },
754                ),
755            ]),
756            sources: HashMap::new(),
757            exposures: HashMap::new(),
758        };
759
760        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
761        assert_eq!(graph.node_count(), 2);
762        assert_eq!(graph.edge_count(), 1);
763
764        let test_node = graph
765            .node_indices()
766            .find(|&i| graph[i].node_type == NodeType::Test)
767            .expect("Should have a test node");
768        assert_eq!(graph[test_node].label, "assert_positive");
769
770        // Edge to test node should use EdgeType::Test, not EdgeType::Ref
771        use petgraph::visit::IntoEdgeReferences;
772        let edge = graph.edge_references().next().unwrap();
773        assert_eq!(edge.weight().edge_type, EdgeType::Test);
774    }
775
776    #[test]
777    fn test_build_graph_empty_manifest() {
778        let manifest = Manifest {
779            nodes: HashMap::new(),
780            sources: HashMap::new(),
781            exposures: HashMap::new(),
782        };
783
784        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
785        assert_eq!(graph.node_count(), 0);
786        assert_eq!(graph.edge_count(), 0);
787    }
788
789    #[test]
790    fn test_build_graph_missing_dependency() {
791        // A node depends on something not in the manifest -- edge is skipped gracefully
792        let manifest = Manifest {
793            nodes: HashMap::from([(
794                "model.proj.orders".to_string(),
795                ManifestNode {
796                    unique_id: "model.proj.orders".to_string(),
797                    name: "orders".to_string(),
798                    resource_type: "model".to_string(),
799                    depends_on: DependsOn {
800                        nodes: vec!["model.proj.nonexistent".to_string()],
801                    },
802                    config: ManifestConfig::default(),
803                    description: None,
804                    path: None,
805                    columns: HashMap::new(),
806                    compiled_code: None,
807                },
808            )]),
809            sources: HashMap::new(),
810            exposures: HashMap::new(),
811        };
812
813        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
814        assert_eq!(graph.node_count(), 1);
815        assert_eq!(graph.edge_count(), 0); // Edge to nonexistent node is skipped
816    }
817
818    #[test]
819    fn test_build_graph_optional_fields() {
820        let manifest = Manifest {
821            nodes: HashMap::from([(
822                "model.proj.bare".to_string(),
823                ManifestNode {
824                    unique_id: "model.proj.bare".to_string(),
825                    name: "bare".to_string(),
826                    resource_type: "model".to_string(),
827                    depends_on: DependsOn::default(),
828                    config: ManifestConfig {
829                        materialized: None,
830                        tags: vec![],
831                    },
832                    description: None,
833                    path: None,
834                    columns: HashMap::new(),
835                    compiled_code: None,
836                },
837            )]),
838            sources: HashMap::new(),
839            exposures: HashMap::new(),
840        };
841
842        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
843        let node = &graph[graph.node_indices().next().unwrap()];
844        assert!(node.description.is_none());
845        assert!(node.materialization.is_none());
846        assert!(node.tags.is_empty());
847        assert!(node.file_path.is_none());
848    }
849
850    #[test]
851    fn test_build_graph_from_manifest_file() {
852        let tmp = tempfile::tempdir().unwrap();
853        let manifest_path = tmp.path().join("manifest.json");
854
855        let manifest_json = r#"{
856            "nodes": {
857                "model.proj.stg_orders": {
858                    "unique_id": "model.proj.stg_orders",
859                    "name": "stg_orders",
860                    "resource_type": "model",
861                    "depends_on": { "nodes": ["source.proj.raw.orders"] },
862                    "config": { "materialized": "view", "tags": [] },
863                    "description": "Staged orders",
864                    "path": "models/staging/stg_orders.sql"
865                }
866            },
867            "sources": {
868                "source.proj.raw.orders": {
869                    "unique_id": "source.proj.raw.orders",
870                    "name": "orders",
871                    "source_name": "raw",
872                    "resource_type": "source",
873                    "description": "Raw orders",
874                    "path": "models/staging/schema.yml"
875                }
876            },
877            "exposures": {}
878        }"#;
879
880        fs::write(&manifest_path, manifest_json).unwrap();
881
882        let graph = build_graph_from_manifest(&manifest_path).unwrap();
883        assert_eq!(graph.node_count(), 2);
884        assert_eq!(graph.edge_count(), 1);
885    }
886
887    #[test]
888    fn test_build_graph_from_manifest_file_not_found() {
889        let result = build_graph_from_manifest(Path::new("/nonexistent/manifest.json"));
890        assert!(result.is_err());
891    }
892
893    #[test]
894    fn test_build_graph_from_manifest_invalid_json() {
895        let tmp = tempfile::tempdir().unwrap();
896        let manifest_path = tmp.path().join("manifest.json");
897        fs::write(&manifest_path, "not valid json").unwrap();
898
899        let result = build_graph_from_manifest(&manifest_path);
900        assert!(result.is_err());
901    }
902
903    #[test]
904    fn test_build_graph_analysis_maps_to_model() {
905        let manifest = Manifest {
906            nodes: HashMap::from([(
907                "analysis.proj.my_analysis".to_string(),
908                ManifestNode {
909                    unique_id: "analysis.proj.my_analysis".to_string(),
910                    name: "my_analysis".to_string(),
911                    resource_type: "analysis".to_string(),
912                    depends_on: DependsOn::default(),
913                    config: ManifestConfig::default(),
914                    description: None,
915                    path: None,
916                    columns: HashMap::new(),
917                    compiled_code: None,
918                },
919            )]),
920            sources: HashMap::new(),
921            exposures: HashMap::new(),
922        };
923
924        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
925        let node = &graph[graph.node_indices().next().unwrap()];
926        assert_eq!(node.node_type, NodeType::Model);
927    }
928
929    #[test]
930    fn test_build_graph_complex_chain() {
931        // source -> stg_orders -> orders (with multiple deps)
932        let manifest = Manifest {
933            nodes: HashMap::from([
934                (
935                    "model.proj.stg_orders".to_string(),
936                    ManifestNode {
937                        unique_id: "model.proj.stg_orders".to_string(),
938                        name: "stg_orders".to_string(),
939                        resource_type: "model".to_string(),
940                        depends_on: DependsOn {
941                            nodes: vec!["source.proj.raw.orders".to_string()],
942                        },
943                        config: ManifestConfig {
944                            materialized: Some("view".to_string()),
945                            tags: vec![],
946                        },
947                        description: None,
948                        path: None,
949                        columns: HashMap::new(),
950                        compiled_code: None,
951                    },
952                ),
953                (
954                    "model.proj.stg_payments".to_string(),
955                    ManifestNode {
956                        unique_id: "model.proj.stg_payments".to_string(),
957                        name: "stg_payments".to_string(),
958                        resource_type: "model".to_string(),
959                        depends_on: DependsOn {
960                            nodes: vec!["source.proj.raw.payments".to_string()],
961                        },
962                        config: ManifestConfig::default(),
963                        description: None,
964                        path: None,
965                        columns: HashMap::new(),
966                        compiled_code: None,
967                    },
968                ),
969                (
970                    "model.proj.orders".to_string(),
971                    ManifestNode {
972                        unique_id: "model.proj.orders".to_string(),
973                        name: "orders".to_string(),
974                        resource_type: "model".to_string(),
975                        depends_on: DependsOn {
976                            nodes: vec![
977                                "model.proj.stg_orders".to_string(),
978                                "model.proj.stg_payments".to_string(),
979                            ],
980                        },
981                        config: ManifestConfig {
982                            materialized: Some("table".to_string()),
983                            tags: vec!["marts".to_string()],
984                        },
985                        description: Some("Order fact table".to_string()),
986                        path: None,
987                        columns: HashMap::new(),
988                        compiled_code: None,
989                    },
990                ),
991            ]),
992            sources: HashMap::from([
993                (
994                    "source.proj.raw.orders".to_string(),
995                    ManifestSource {
996                        unique_id: "source.proj.raw.orders".to_string(),
997                        name: "orders".to_string(),
998                        source_name: "raw".to_string(),
999                        resource_type: "source".to_string(),
1000                        description: None,
1001                        path: None,
1002                        columns: HashMap::new(),
1003                    },
1004                ),
1005                (
1006                    "source.proj.raw.payments".to_string(),
1007                    ManifestSource {
1008                        unique_id: "source.proj.raw.payments".to_string(),
1009                        name: "payments".to_string(),
1010                        source_name: "raw".to_string(),
1011                        resource_type: "source".to_string(),
1012                        description: None,
1013                        path: None,
1014                        columns: HashMap::new(),
1015                    },
1016                ),
1017            ]),
1018            exposures: HashMap::new(),
1019        };
1020
1021        let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
1022        // 2 sources + 3 models = 5 nodes
1023        assert_eq!(graph.node_count(), 5);
1024        // source.raw.orders -> stg_orders, source.raw.payments -> stg_payments,
1025        // stg_orders -> orders, stg_payments -> orders = 4 edges
1026        assert_eq!(graph.edge_count(), 4);
1027    }
1028
1029    #[test]
1030    fn test_build_graph_from_fixture_manifest() {
1031        let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
1032            .join("../../tests/fixtures/simple_project/target/manifest.json");
1033
1034        if !fixture_path.exists() {
1035            // Skip if fixture not yet created
1036            return;
1037        }
1038
1039        let graph = build_graph_from_manifest(&fixture_path).unwrap();
1040
1041        // The fixture has: 3 sources, 3 staging models, 2 mart models, 1 seed, 1 test, 1 exposure
1042        // = 11 nodes total
1043        assert!(
1044            graph.node_count() >= 10,
1045            "Expected at least 10 nodes, got {}",
1046            graph.node_count()
1047        );
1048
1049        // Check we have all node types present
1050        let has_source = graph
1051            .node_indices()
1052            .any(|i| graph[i].node_type == NodeType::Source);
1053        let has_model = graph
1054            .node_indices()
1055            .any(|i| graph[i].node_type == NodeType::Model);
1056        let has_seed = graph
1057            .node_indices()
1058            .any(|i| graph[i].node_type == NodeType::Seed);
1059        let has_test = graph
1060            .node_indices()
1061            .any(|i| graph[i].node_type == NodeType::Test);
1062        let has_exposure = graph
1063            .node_indices()
1064            .any(|i| graph[i].node_type == NodeType::Exposure);
1065
1066        assert!(has_source, "Should have source nodes");
1067        assert!(has_model, "Should have model nodes");
1068        assert!(has_seed, "Should have seed nodes");
1069        assert!(has_test, "Should have test nodes");
1070        assert!(has_exposure, "Should have exposure nodes");
1071
1072        // Check edges exist
1073        assert!(graph.edge_count() > 0, "Should have edges");
1074    }
1075
1076    #[test]
1077    fn test_collect_file_paths() {
1078        let manifest = Manifest {
1079            nodes: HashMap::from([
1080                (
1081                    "model.proj.stg_orders".to_string(),
1082                    ManifestNode {
1083                        unique_id: "model.proj.stg_orders".to_string(),
1084                        name: "stg_orders".to_string(),
1085                        resource_type: "model".to_string(),
1086                        depends_on: DependsOn::default(),
1087                        config: ManifestConfig::default(),
1088                        description: None,
1089                        path: Some("models/staging/stg_orders.sql".to_string()),
1090                        columns: HashMap::new(),
1091                        compiled_code: None,
1092                    },
1093                ),
1094                (
1095                    "model.proj.orders".to_string(),
1096                    ManifestNode {
1097                        unique_id: "model.proj.orders".to_string(),
1098                        name: "orders".to_string(),
1099                        resource_type: "model".to_string(),
1100                        depends_on: DependsOn::default(),
1101                        config: ManifestConfig::default(),
1102                        description: None,
1103                        path: Some("models/marts/orders.sql".to_string()),
1104                        columns: HashMap::new(),
1105                        compiled_code: None,
1106                    },
1107                ),
1108                (
1109                    "model.proj.bare".to_string(),
1110                    ManifestNode {
1111                        unique_id: "model.proj.bare".to_string(),
1112                        name: "bare".to_string(),
1113                        resource_type: "model".to_string(),
1114                        depends_on: DependsOn::default(),
1115                        config: ManifestConfig::default(),
1116                        description: None,
1117                        path: None,
1118                        columns: HashMap::new(),
1119                        compiled_code: None,
1120                    },
1121                ),
1122            ]),
1123            sources: HashMap::from([(
1124                "source.proj.raw.orders".to_string(),
1125                ManifestSource {
1126                    unique_id: "source.proj.raw.orders".to_string(),
1127                    name: "orders".to_string(),
1128                    source_name: "raw".to_string(),
1129                    resource_type: "source".to_string(),
1130                    description: None,
1131                    path: Some("models/staging/schema.yml".to_string()),
1132                    columns: HashMap::new(),
1133                },
1134            )]),
1135            exposures: HashMap::new(),
1136        };
1137
1138        let paths = manifest.collect_file_paths();
1139        assert_eq!(paths.len(), 3);
1140        assert!(paths.contains("models/staging/stg_orders.sql"));
1141        assert!(paths.contains("models/marts/orders.sql"));
1142        assert!(paths.contains("models/staging/schema.yml"));
1143        // bare has no path, should not appear
1144        assert!(!paths.iter().any(|p| p.contains("bare")));
1145    }
1146
1147    #[test]
1148    fn test_collect_file_paths_deduplicates() {
1149        // Multiple sources can reference the same YAML file
1150        let manifest = Manifest {
1151            nodes: HashMap::new(),
1152            sources: HashMap::from([
1153                (
1154                    "source.proj.raw.orders".to_string(),
1155                    ManifestSource {
1156                        unique_id: "source.proj.raw.orders".to_string(),
1157                        name: "orders".to_string(),
1158                        source_name: "raw".to_string(),
1159                        resource_type: "source".to_string(),
1160                        description: None,
1161                        path: Some("models/staging/schema.yml".to_string()),
1162                        columns: HashMap::new(),
1163                    },
1164                ),
1165                (
1166                    "source.proj.raw.customers".to_string(),
1167                    ManifestSource {
1168                        unique_id: "source.proj.raw.customers".to_string(),
1169                        name: "customers".to_string(),
1170                        source_name: "raw".to_string(),
1171                        resource_type: "source".to_string(),
1172                        description: None,
1173                        path: Some("models/staging/schema.yml".to_string()),
1174                        columns: HashMap::new(),
1175                    },
1176                ),
1177            ]),
1178            exposures: HashMap::new(),
1179        };
1180
1181        let paths = manifest.collect_file_paths();
1182        assert_eq!(paths.len(), 1, "Duplicate paths should be deduplicated");
1183    }
1184
1185    #[test]
1186    fn test_load_manifest() {
1187        let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
1188            .join("../../tests/fixtures/simple_project/target/manifest.json");
1189
1190        let manifest = load_manifest(&fixture_path).unwrap();
1191        assert!(!manifest.nodes.is_empty());
1192        assert!(!manifest.sources.is_empty());
1193
1194        let paths = manifest.collect_file_paths();
1195        assert!(paths.contains("models/staging/stg_orders.sql"));
1196        assert!(paths.contains("models/staging/schema.yml"));
1197    }
1198
1199    #[test]
1200    fn test_collect_sql_contents_from_manifest() {
1201        let manifest = Manifest {
1202            nodes: HashMap::from([
1203                (
1204                    "model.proj.stg_orders".to_string(),
1205                    ManifestNode {
1206                        unique_id: "model.proj.stg_orders".to_string(),
1207                        name: "stg_orders".to_string(),
1208                        resource_type: "model".to_string(),
1209                        depends_on: DependsOn::default(),
1210                        config: ManifestConfig::default(),
1211                        description: None,
1212                        path: None,
1213                        columns: HashMap::new(),
1214                        compiled_code: Some("select * from raw.orders".to_string()),
1215                    },
1216                ),
1217                (
1218                    "test.proj.not_null_orders_id.abc123".to_string(),
1219                    ManifestNode {
1220                        unique_id: "test.proj.not_null_orders_id.abc123".to_string(),
1221                        name: "not_null_orders_id".to_string(),
1222                        resource_type: "test".to_string(),
1223                        depends_on: DependsOn::default(),
1224                        config: ManifestConfig::default(),
1225                        description: None,
1226                        path: None,
1227                        columns: HashMap::new(),
1228                        compiled_code: Some(
1229                            "select count(*) from orders where id is null".to_string(),
1230                        ),
1231                    },
1232                ),
1233                (
1234                    "model.proj.no_compile".to_string(),
1235                    ManifestNode {
1236                        unique_id: "model.proj.no_compile".to_string(),
1237                        name: "no_compile".to_string(),
1238                        resource_type: "model".to_string(),
1239                        depends_on: DependsOn::default(),
1240                        config: ManifestConfig::default(),
1241                        description: None,
1242                        path: None,
1243                        columns: HashMap::new(),
1244                        compiled_code: None,
1245                    },
1246                ),
1247            ]),
1248            sources: HashMap::new(),
1249            exposures: HashMap::new(),
1250        };
1251
1252        let sql_contents = manifest.collect_sql_contents();
1253
1254        // compiled_code present → included
1255        assert_eq!(
1256            sql_contents.get("model.stg_orders").map(|s| s.as_str()),
1257            Some("select * from raw.orders")
1258        );
1259        // test unique_id is simplified (test.proj.name.hash → test.name)
1260        assert_eq!(
1261            sql_contents
1262                .get("test.not_null_orders_id")
1263                .map(|s| s.as_str()),
1264            Some("select count(*) from orders where id is null")
1265        );
1266        // compiled_code absent → omitted
1267        assert!(!sql_contents.contains_key("model.no_compile"));
1268    }
1269
1270    #[test]
1271    fn test_collect_sql_contents_from_fixture() {
1272        let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
1273            .join("../../tests/fixtures/simple_project/target/manifest.json");
1274
1275        let manifest = load_manifest(&fixture_path).unwrap();
1276        let sql_contents = manifest.collect_sql_contents();
1277
1278        // The fixture has compiled_code for stg_orders and the test node
1279        assert!(
1280            sql_contents.contains_key("model.stg_orders"),
1281            "stg_orders should have compiled_code"
1282        );
1283        assert!(
1284            sql_contents.contains_key("test.assert_orders_positive_amount"),
1285            "test node should have compiled_code"
1286        );
1287        // Nodes without compiled_code should not appear
1288        assert!(
1289            !sql_contents.contains_key("model.customers"),
1290            "customers has no compiled_code in fixture"
1291        );
1292    }
1293}