use std::collections::{BTreeSet, HashMap};
use std::path::Path;
use anyhow::Result;
use petgraph::stable_graph::NodeIndex;
use serde::Deserialize;
use crate::graph::types::*;
#[derive(Debug, Deserialize)]
pub struct Manifest {
#[serde(default)]
pub nodes: HashMap<String, ManifestNode>,
#[serde(default)]
pub sources: HashMap<String, ManifestSource>,
#[serde(default)]
pub exposures: HashMap<String, ManifestExposure>,
}
#[derive(Debug, Deserialize)]
pub struct ManifestNode {
pub unique_id: String,
pub name: String,
pub resource_type: String,
#[serde(default)]
pub depends_on: DependsOn,
#[serde(default)]
pub config: ManifestConfig,
pub description: Option<String>,
pub path: Option<String>,
#[serde(default)]
pub columns: HashMap<String, ManifestColumn>,
pub compiled_code: Option<String>,
#[serde(default)]
pub database: Option<String>,
#[serde(default)]
pub schema: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct ManifestSource {
pub unique_id: String,
pub name: String,
pub source_name: String,
#[serde(default)]
pub resource_type: String,
pub description: Option<String>,
pub path: Option<String>,
#[serde(default)]
pub columns: HashMap<String, ManifestColumn>,
#[serde(default)]
pub database: Option<String>,
#[serde(default)]
pub schema: Option<String>,
#[serde(default)]
pub identifier: Option<String>,
}
#[derive(Debug, Deserialize)]
pub struct ManifestColumn {
pub name: String,
}
#[derive(Debug, Deserialize)]
pub struct ManifestExposure {
pub unique_id: String,
pub name: String,
#[serde(default)]
pub depends_on: DependsOn,
pub description: Option<String>,
pub label: Option<String>,
#[serde(rename = "type")]
pub exposure_type: Option<String>,
pub url: Option<String>,
pub maturity: Option<String>,
pub owner: Option<ManifestExposureOwner>,
}
#[derive(Debug, Deserialize)]
pub struct ManifestExposureOwner {
pub name: Option<String>,
pub email: Option<String>,
}
#[derive(Debug, Default, Deserialize)]
pub struct DependsOn {
#[serde(default)]
pub nodes: Vec<String>,
}
#[derive(Debug, Default, Deserialize)]
pub struct ManifestConfig {
pub materialized: Option<String>,
#[serde(default)]
pub tags: Vec<String>,
}
fn resource_type_to_node_type(resource_type: &str) -> NodeType {
match resource_type {
"model" => NodeType::Model,
"source" => NodeType::Source,
"seed" => NodeType::Seed,
"snapshot" => NodeType::Snapshot,
"test" => NodeType::Test,
"analysis" => NodeType::Model,
"exposure" => NodeType::Exposure,
_ => NodeType::Model,
}
}
fn simplify_unique_id(unique_id: &str, resource_type: &str) -> String {
let parts: Vec<&str> = unique_id.split('.').collect();
match resource_type {
"source" => {
if parts.len() >= 4 {
format!("{}.{}.{}", parts[0], parts[2], parts[3])
} else {
unique_id.to_string()
}
}
"test" => {
if parts.len() >= 3 {
format!("{}.{}", parts[0], parts[2])
} else {
unique_id.to_string()
}
}
_ => {
if parts.len() >= 3 {
format!("{}.{}", parts[0], parts[parts.len() - 1])
} else {
unique_id.to_string()
}
}
}
}
pub fn load_manifest(manifest_path: &Path) -> Result<Manifest> {
let content = std::fs::read_to_string(manifest_path).map_err(|e| {
crate::error::DbtLineageError::FileReadError {
path: manifest_path.to_path_buf(),
source: e,
}
})?;
let manifest: Manifest = serde_json::from_str(&content).map_err(|e| {
crate::error::DbtLineageError::ArtifactParseError {
path: manifest_path.to_path_buf(),
source: e,
}
})?;
Ok(manifest)
}
impl Manifest {
pub fn collect_sql_contents(&self) -> HashMap<String, String> {
let mut map = HashMap::new();
for (orig_id, node) in &self.nodes {
if let Some(ref code) = node.compiled_code {
let simple_id = simplify_unique_id(orig_id, &node.resource_type);
map.insert(simple_id, code.clone());
}
}
map
}
pub fn collect_file_paths(&self) -> BTreeSet<String> {
let mut paths = BTreeSet::new();
for node in self.nodes.values() {
if let Some(ref p) = node.path {
paths.insert(p.clone());
}
}
for source in self.sources.values() {
if let Some(ref p) = source.path {
paths.insert(p.clone());
}
}
paths
}
}
pub fn build_graph_from_manifest(manifest_path: &Path) -> Result<LineageGraph> {
let manifest = load_manifest(manifest_path)?;
build_graph_from_parsed_manifest(&manifest)
}
pub fn build_graph_from_parsed_manifest(manifest: &Manifest) -> Result<LineageGraph> {
let mut graph = LineageGraph::new();
let mut node_map: HashMap<String, NodeIndex> = HashMap::new();
add_source_nodes(&mut graph, &mut node_map, &manifest.sources);
add_regular_nodes(&mut graph, &mut node_map, &manifest.nodes);
add_exposure_nodes(&mut graph, &mut node_map, &manifest.exposures);
add_node_edges(&mut graph, &node_map, &manifest.nodes);
add_exposure_edges(&mut graph, &node_map, &manifest.exposures);
Ok(graph)
}
fn add_source_nodes(
graph: &mut LineageGraph,
node_map: &mut HashMap<String, NodeIndex>,
sources: &HashMap<String, ManifestSource>,
) {
for (orig_id, source) in sources {
let simple_id = simplify_unique_id(orig_id, "source");
let label = format!("{}.{}", source.source_name, source.name);
let idx = graph.add_node(NodeData {
unique_id: simple_id.clone(),
label,
node_type: NodeType::Source,
file_path: source.path.as_ref().map(|p| p.into()),
description: non_empty_string(&source.description),
materialization: None,
tags: vec![],
columns: {
let mut cols: Vec<String> = source.columns.keys().cloned().collect();
cols.sort();
cols
},
exposure: None,
});
node_map.insert(orig_id.clone(), idx);
node_map.insert(simple_id, idx);
}
}
fn add_regular_nodes(
graph: &mut LineageGraph,
node_map: &mut HashMap<String, NodeIndex>,
nodes: &HashMap<String, ManifestNode>,
) {
for (orig_id, node) in nodes {
let node_type = resource_type_to_node_type(&node.resource_type);
let simple_id = simplify_unique_id(orig_id, &node.resource_type);
let idx = graph.add_node(NodeData {
unique_id: simple_id.clone(),
label: node.name.clone(),
node_type,
file_path: node.path.as_ref().map(|p| p.into()),
description: non_empty_string(&node.description),
materialization: node.config.materialized.clone(),
tags: node.config.tags.clone(),
columns: {
let mut cols: Vec<String> = node.columns.keys().cloned().collect();
cols.sort();
cols
},
exposure: None,
});
node_map.insert(orig_id.clone(), idx);
node_map.insert(simple_id, idx);
}
}
fn add_exposure_nodes(
graph: &mut LineageGraph,
node_map: &mut HashMap<String, NodeIndex>,
exposures: &HashMap<String, ManifestExposure>,
) {
for (orig_id, exposure) in exposures {
let simple_id = simplify_unique_id(orig_id, "exposure");
let idx = graph.add_node(NodeData {
unique_id: simple_id.clone(),
label: exposure.name.clone(),
node_type: NodeType::Exposure,
file_path: None,
description: non_empty_string(&exposure.description),
materialization: None,
tags: vec![],
columns: vec![],
exposure: Some(ExposureInfo {
label: non_empty_string(&exposure.label),
exposure_type: non_empty_string(&exposure.exposure_type),
url: non_empty_string(&exposure.url),
maturity: non_empty_string(&exposure.maturity),
owner: exposure.owner.as_ref().map(|o| OwnerInfo {
name: non_empty_string(&o.name),
email: non_empty_string(&o.email),
}),
}),
});
node_map.insert(orig_id.clone(), idx);
node_map.insert(simple_id, idx);
}
}
fn add_node_edges(
graph: &mut LineageGraph,
node_map: &HashMap<String, NodeIndex>,
nodes: &HashMap<String, ManifestNode>,
) {
for (orig_id, node) in nodes {
let current_idx = match node_map.get(orig_id) {
Some(&idx) => idx,
None => continue,
};
let current_is_test = graph[current_idx].node_type == NodeType::Test;
for dep_id in &node.depends_on.nodes {
if let Some(&dep_idx) = node_map.get(dep_id) {
let edge_type = if current_is_test {
EdgeType::Test
} else {
infer_edge_type(dep_id)
};
graph.add_edge(dep_idx, current_idx, EdgeData::direct(edge_type));
}
}
}
}
fn add_exposure_edges(
graph: &mut LineageGraph,
node_map: &HashMap<String, NodeIndex>,
exposures: &HashMap<String, ManifestExposure>,
) {
for (orig_id, exposure) in exposures {
let current_idx = match node_map.get(orig_id) {
Some(&idx) => idx,
None => continue,
};
for dep_id in &exposure.depends_on.nodes {
if let Some(&dep_idx) = node_map.get(dep_id) {
graph.add_edge(dep_idx, current_idx, EdgeData::direct(EdgeType::Exposure));
}
}
}
}
fn infer_edge_type(dep_unique_id: &str) -> EdgeType {
if dep_unique_id.starts_with("source.") {
EdgeType::Source
} else if dep_unique_id.starts_with("test.") {
EdgeType::Test
} else {
EdgeType::Ref
}
}
fn non_empty_string(s: &Option<String>) -> Option<String> {
s.as_ref().filter(|v| !v.trim().is_empty()).cloned()
}
#[cfg(test)]
mod tests {
use super::*;
use std::fs;
#[test]
fn test_resource_type_to_node_type() {
assert_eq!(resource_type_to_node_type("model"), NodeType::Model);
assert_eq!(resource_type_to_node_type("source"), NodeType::Source);
assert_eq!(resource_type_to_node_type("seed"), NodeType::Seed);
assert_eq!(resource_type_to_node_type("snapshot"), NodeType::Snapshot);
assert_eq!(resource_type_to_node_type("test"), NodeType::Test);
assert_eq!(resource_type_to_node_type("analysis"), NodeType::Model);
assert_eq!(resource_type_to_node_type("exposure"), NodeType::Exposure);
assert_eq!(resource_type_to_node_type("unknown"), NodeType::Model);
}
#[test]
fn test_simplify_unique_id_model() {
assert_eq!(
simplify_unique_id("model.my_project.stg_orders", "model"),
"model.stg_orders"
);
}
#[test]
fn test_simplify_unique_id_source() {
assert_eq!(
simplify_unique_id("source.my_project.raw.orders", "source"),
"source.raw.orders"
);
}
#[test]
fn test_simplify_unique_id_short() {
assert_eq!(
simplify_unique_id("model.stg_orders", "model"),
"model.stg_orders"
);
}
#[test]
fn test_simplify_unique_id_source_short() {
assert_eq!(
simplify_unique_id("source.raw.orders", "source"),
"source.raw.orders"
);
}
#[test]
fn test_simplify_unique_id_test() {
assert_eq!(
simplify_unique_id(
"test.jaffle_shop.not_null_orders_order_id.cf6c17daed",
"test"
),
"test.not_null_orders_order_id"
);
}
#[test]
fn test_simplify_unique_id_test_short() {
assert_eq!(
simplify_unique_id("test.not_null_orders_order_id", "test"),
"test.not_null_orders_order_id"
);
}
#[test]
fn test_infer_edge_type() {
assert_eq!(
infer_edge_type("source.my_project.raw.orders"),
EdgeType::Source
);
assert_eq!(
infer_edge_type("model.my_project.stg_orders"),
EdgeType::Ref
);
assert_eq!(infer_edge_type("test.my_project.some_test"), EdgeType::Test);
assert_eq!(infer_edge_type("seed.my_project.countries"), EdgeType::Ref);
}
#[test]
fn test_non_empty_string() {
assert_eq!(non_empty_string(&None), None);
assert_eq!(non_empty_string(&Some("".to_string())), None);
assert_eq!(non_empty_string(&Some(" ".to_string())), None);
assert_eq!(
non_empty_string(&Some("hello".to_string())),
Some("hello".to_string())
);
}
#[test]
fn test_build_graph_from_minimal_manifest() {
let manifest = Manifest {
nodes: HashMap::from([(
"model.proj.stg_orders".to_string(),
ManifestNode {
unique_id: "model.proj.stg_orders".to_string(),
name: "stg_orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn {
nodes: vec!["source.proj.raw.orders".to_string()],
},
config: ManifestConfig {
materialized: Some("view".to_string()),
tags: vec!["staging".to_string()],
},
description: Some("Staged orders".to_string()),
path: Some("models/staging/stg_orders.sql".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
)]),
sources: HashMap::from([(
"source.proj.raw.orders".to_string(),
ManifestSource {
unique_id: "source.proj.raw.orders".to_string(),
name: "orders".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: Some("Raw orders table".to_string()),
path: Some("models/staging/schema.yml".to_string()),
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
)]),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 1);
let model = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Model)
.expect("Should have a model node");
assert_eq!(graph[model].label, "stg_orders");
assert_eq!(graph[model].unique_id, "model.stg_orders");
assert_eq!(graph[model].materialization.as_deref(), Some("view"));
assert_eq!(graph[model].tags, vec!["staging"]);
assert_eq!(graph[model].description.as_deref(), Some("Staged orders"));
let source = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Source)
.expect("Should have a source node");
assert_eq!(graph[source].label, "raw.orders");
assert_eq!(graph[source].unique_id, "source.raw.orders");
}
#[test]
fn test_build_graph_with_exposures() {
let manifest = Manifest {
nodes: HashMap::from([(
"model.proj.orders".to_string(),
ManifestNode {
unique_id: "model.proj.orders".to_string(),
name: "orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
)]),
sources: HashMap::new(),
exposures: HashMap::from([(
"exposure.proj.weekly_report".to_string(),
ManifestExposure {
unique_id: "exposure.proj.weekly_report".to_string(),
name: "weekly_report".to_string(),
depends_on: DependsOn {
nodes: vec!["model.proj.orders".to_string()],
},
description: Some("Weekly dashboard".to_string()),
label: None,
exposure_type: None,
url: None,
maturity: None,
owner: None,
},
)]),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 1);
let exposure = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Exposure)
.expect("Should have an exposure node");
assert_eq!(graph[exposure].label, "weekly_report");
assert_eq!(
graph[exposure].description.as_deref(),
Some("Weekly dashboard")
);
}
#[test]
fn test_exposure_metadata_parsed() {
let manifest = Manifest {
nodes: HashMap::new(),
sources: HashMap::new(),
exposures: HashMap::from([(
"exposure.proj.dashboard".to_string(),
ManifestExposure {
unique_id: "exposure.proj.dashboard".to_string(),
name: "dashboard".to_string(),
depends_on: DependsOn { nodes: vec![] },
description: Some("Main dashboard".to_string()),
label: Some("Main Dashboard".to_string()),
exposure_type: Some("dashboard".to_string()),
url: Some("https://bi.example.com".to_string()),
maturity: Some("high".to_string()),
owner: Some(ManifestExposureOwner {
name: Some("Data Team".to_string()),
email: Some("data@example.com".to_string()),
}),
},
)]),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
let exp_idx = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Exposure)
.expect("Should have an exposure node");
let exp = &graph[exp_idx];
let info = exp.exposure.as_ref().expect("Should have exposure info");
assert_eq!(info.label.as_deref(), Some("Main Dashboard"));
assert_eq!(info.exposure_type.as_deref(), Some("dashboard"));
assert_eq!(info.url.as_deref(), Some("https://bi.example.com"));
assert_eq!(info.maturity.as_deref(), Some("high"));
let owner = info.owner.as_ref().expect("Should have owner");
assert_eq!(owner.name.as_deref(), Some("Data Team"));
assert_eq!(owner.email.as_deref(), Some("data@example.com"));
}
#[test]
fn test_exposure_metadata_from_fixture() {
let manifest_path = std::path::Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../../tests/fixtures/simple_project/target/manifest.json");
let graph = build_graph_from_manifest(&manifest_path).unwrap();
let exp_idx = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Exposure)
.expect("Should have an exposure node from fixture");
let exp = &graph[exp_idx];
assert_eq!(exp.label, "weekly_report");
let info = exp.exposure.as_ref().expect("Should have exposure info");
assert_eq!(info.label.as_deref(), Some("Weekly Report"));
assert_eq!(info.exposure_type.as_deref(), Some("dashboard"));
assert_eq!(info.url.as_deref(), Some("https://bi.example.com/weekly"));
assert_eq!(info.maturity.as_deref(), Some("high"));
let owner = info.owner.as_ref().expect("Should have owner");
assert_eq!(owner.name.as_deref(), Some("Data Team"));
assert_eq!(owner.email.as_deref(), Some("data@example.com"));
}
#[test]
fn test_build_graph_with_seeds_and_snapshots() {
let manifest = Manifest {
nodes: HashMap::from([
(
"seed.proj.countries".to_string(),
ManifestNode {
unique_id: "seed.proj.countries".to_string(),
name: "countries".to_string(),
resource_type: "seed".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: Some("seeds/countries.csv".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"snapshot.proj.snap_orders".to_string(),
ManifestNode {
unique_id: "snapshot.proj.snap_orders".to_string(),
name: "snap_orders".to_string(),
resource_type: "snapshot".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig {
materialized: Some("snapshot".to_string()),
tags: vec![],
},
description: None,
path: Some("snapshots/snap_orders.sql".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 2);
let seed = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Seed)
.expect("Should have a seed node");
assert_eq!(graph[seed].label, "countries");
let snap = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Snapshot)
.expect("Should have a snapshot node");
assert_eq!(graph[snap].label, "snap_orders");
}
#[test]
fn test_build_graph_with_tests() {
let manifest = Manifest {
nodes: HashMap::from([
(
"model.proj.orders".to_string(),
ManifestNode {
unique_id: "model.proj.orders".to_string(),
name: "orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"test.proj.assert_positive".to_string(),
ManifestNode {
unique_id: "test.proj.assert_positive".to_string(),
name: "assert_positive".to_string(),
resource_type: "test".to_string(),
depends_on: DependsOn {
nodes: vec!["model.proj.orders".to_string()],
},
config: ManifestConfig::default(),
description: None,
path: Some("tests/assert_positive.sql".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 1);
let test_node = graph
.node_indices()
.find(|&i| graph[i].node_type == NodeType::Test)
.expect("Should have a test node");
assert_eq!(graph[test_node].label, "assert_positive");
use petgraph::visit::IntoEdgeReferences;
let edge = graph.edge_references().next().unwrap();
assert_eq!(edge.weight().edge_type, EdgeType::Test);
}
#[test]
fn test_build_graph_empty_manifest() {
let manifest = Manifest {
nodes: HashMap::new(),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 0);
assert_eq!(graph.edge_count(), 0);
}
#[test]
fn test_build_graph_missing_dependency() {
let manifest = Manifest {
nodes: HashMap::from([(
"model.proj.orders".to_string(),
ManifestNode {
unique_id: "model.proj.orders".to_string(),
name: "orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn {
nodes: vec!["model.proj.nonexistent".to_string()],
},
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
)]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 1);
assert_eq!(graph.edge_count(), 0); }
#[test]
fn test_build_graph_optional_fields() {
let manifest = Manifest {
nodes: HashMap::from([(
"model.proj.bare".to_string(),
ManifestNode {
unique_id: "model.proj.bare".to_string(),
name: "bare".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig {
materialized: None,
tags: vec![],
},
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
)]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
let node = &graph[graph.node_indices().next().unwrap()];
assert!(node.description.is_none());
assert!(node.materialization.is_none());
assert!(node.tags.is_empty());
assert!(node.file_path.is_none());
}
#[test]
fn test_build_graph_from_manifest_file() {
let tmp = tempfile::tempdir().unwrap();
let manifest_path = tmp.path().join("manifest.json");
let manifest_json = r#"{
"nodes": {
"model.proj.stg_orders": {
"unique_id": "model.proj.stg_orders",
"name": "stg_orders",
"resource_type": "model",
"depends_on": { "nodes": ["source.proj.raw.orders"] },
"config": { "materialized": "view", "tags": [] },
"description": "Staged orders",
"path": "models/staging/stg_orders.sql"
}
},
"sources": {
"source.proj.raw.orders": {
"unique_id": "source.proj.raw.orders",
"name": "orders",
"source_name": "raw",
"resource_type": "source",
"description": "Raw orders",
"path": "models/staging/schema.yml"
}
},
"exposures": {}
}"#;
fs::write(&manifest_path, manifest_json).unwrap();
let graph = build_graph_from_manifest(&manifest_path).unwrap();
assert_eq!(graph.node_count(), 2);
assert_eq!(graph.edge_count(), 1);
}
#[test]
fn test_build_graph_from_manifest_file_not_found() {
let result = build_graph_from_manifest(Path::new("/nonexistent/manifest.json"));
assert!(result.is_err());
}
#[test]
fn test_build_graph_from_manifest_invalid_json() {
let tmp = tempfile::tempdir().unwrap();
let manifest_path = tmp.path().join("manifest.json");
fs::write(&manifest_path, "not valid json").unwrap();
let result = build_graph_from_manifest(&manifest_path);
assert!(result.is_err());
}
#[test]
fn test_build_graph_analysis_maps_to_model() {
let manifest = Manifest {
nodes: HashMap::from([(
"analysis.proj.my_analysis".to_string(),
ManifestNode {
unique_id: "analysis.proj.my_analysis".to_string(),
name: "my_analysis".to_string(),
resource_type: "analysis".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
)]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
let node = &graph[graph.node_indices().next().unwrap()];
assert_eq!(node.node_type, NodeType::Model);
}
#[test]
fn test_build_graph_complex_chain() {
let manifest = Manifest {
nodes: HashMap::from([
(
"model.proj.stg_orders".to_string(),
ManifestNode {
unique_id: "model.proj.stg_orders".to_string(),
name: "stg_orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn {
nodes: vec!["source.proj.raw.orders".to_string()],
},
config: ManifestConfig {
materialized: Some("view".to_string()),
tags: vec![],
},
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"model.proj.stg_payments".to_string(),
ManifestNode {
unique_id: "model.proj.stg_payments".to_string(),
name: "stg_payments".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn {
nodes: vec!["source.proj.raw.payments".to_string()],
},
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"model.proj.orders".to_string(),
ManifestNode {
unique_id: "model.proj.orders".to_string(),
name: "orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn {
nodes: vec![
"model.proj.stg_orders".to_string(),
"model.proj.stg_payments".to_string(),
],
},
config: ManifestConfig {
materialized: Some("table".to_string()),
tags: vec!["marts".to_string()],
},
description: Some("Order fact table".to_string()),
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
]),
sources: HashMap::from([
(
"source.proj.raw.orders".to_string(),
ManifestSource {
unique_id: "source.proj.raw.orders".to_string(),
name: "orders".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: None,
path: None,
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
),
(
"source.proj.raw.payments".to_string(),
ManifestSource {
unique_id: "source.proj.raw.payments".to_string(),
name: "payments".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: None,
path: None,
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
),
]),
exposures: HashMap::new(),
};
let graph = build_graph_from_parsed_manifest(&manifest).unwrap();
assert_eq!(graph.node_count(), 5);
assert_eq!(graph.edge_count(), 4);
}
#[test]
fn test_build_graph_from_fixture_manifest() {
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../../tests/fixtures/simple_project/target/manifest.json");
if !fixture_path.exists() {
return;
}
let graph = build_graph_from_manifest(&fixture_path).unwrap();
assert!(
graph.node_count() >= 10,
"Expected at least 10 nodes, got {}",
graph.node_count()
);
let has_source = graph
.node_indices()
.any(|i| graph[i].node_type == NodeType::Source);
let has_model = graph
.node_indices()
.any(|i| graph[i].node_type == NodeType::Model);
let has_seed = graph
.node_indices()
.any(|i| graph[i].node_type == NodeType::Seed);
let has_test = graph
.node_indices()
.any(|i| graph[i].node_type == NodeType::Test);
let has_exposure = graph
.node_indices()
.any(|i| graph[i].node_type == NodeType::Exposure);
assert!(has_source, "Should have source nodes");
assert!(has_model, "Should have model nodes");
assert!(has_seed, "Should have seed nodes");
assert!(has_test, "Should have test nodes");
assert!(has_exposure, "Should have exposure nodes");
assert!(graph.edge_count() > 0, "Should have edges");
}
#[test]
fn test_collect_file_paths() {
let manifest = Manifest {
nodes: HashMap::from([
(
"model.proj.stg_orders".to_string(),
ManifestNode {
unique_id: "model.proj.stg_orders".to_string(),
name: "stg_orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: Some("models/staging/stg_orders.sql".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"model.proj.orders".to_string(),
ManifestNode {
unique_id: "model.proj.orders".to_string(),
name: "orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: Some("models/marts/orders.sql".to_string()),
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
(
"model.proj.bare".to_string(),
ManifestNode {
unique_id: "model.proj.bare".to_string(),
name: "bare".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
]),
sources: HashMap::from([(
"source.proj.raw.orders".to_string(),
ManifestSource {
unique_id: "source.proj.raw.orders".to_string(),
name: "orders".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: None,
path: Some("models/staging/schema.yml".to_string()),
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
)]),
exposures: HashMap::new(),
};
let paths = manifest.collect_file_paths();
assert_eq!(paths.len(), 3);
assert!(paths.contains("models/staging/stg_orders.sql"));
assert!(paths.contains("models/marts/orders.sql"));
assert!(paths.contains("models/staging/schema.yml"));
assert!(!paths.iter().any(|p| p.contains("bare")));
}
#[test]
fn test_collect_file_paths_deduplicates() {
let manifest = Manifest {
nodes: HashMap::new(),
sources: HashMap::from([
(
"source.proj.raw.orders".to_string(),
ManifestSource {
unique_id: "source.proj.raw.orders".to_string(),
name: "orders".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: None,
path: Some("models/staging/schema.yml".to_string()),
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
),
(
"source.proj.raw.customers".to_string(),
ManifestSource {
unique_id: "source.proj.raw.customers".to_string(),
name: "customers".to_string(),
source_name: "raw".to_string(),
resource_type: "source".to_string(),
description: None,
path: Some("models/staging/schema.yml".to_string()),
columns: HashMap::new(),
database: None,
schema: None,
identifier: None,
},
),
]),
exposures: HashMap::new(),
};
let paths = manifest.collect_file_paths();
assert_eq!(paths.len(), 1, "Duplicate paths should be deduplicated");
}
#[test]
fn test_load_manifest() {
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../../tests/fixtures/simple_project/target/manifest.json");
let manifest = load_manifest(&fixture_path).unwrap();
assert!(!manifest.nodes.is_empty());
assert!(!manifest.sources.is_empty());
let paths = manifest.collect_file_paths();
assert!(paths.contains("models/staging/stg_orders.sql"));
assert!(paths.contains("models/staging/schema.yml"));
}
#[test]
fn test_collect_sql_contents_from_manifest() {
let manifest = Manifest {
nodes: HashMap::from([
(
"model.proj.stg_orders".to_string(),
ManifestNode {
unique_id: "model.proj.stg_orders".to_string(),
name: "stg_orders".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: Some("select * from raw.orders".to_string()),
database: None,
schema: None,
},
),
(
"test.proj.not_null_orders_id.abc123".to_string(),
ManifestNode {
unique_id: "test.proj.not_null_orders_id.abc123".to_string(),
name: "not_null_orders_id".to_string(),
resource_type: "test".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: Some(
"select count(*) from orders where id is null".to_string(),
),
database: None,
schema: None,
},
),
(
"model.proj.no_compile".to_string(),
ManifestNode {
unique_id: "model.proj.no_compile".to_string(),
name: "no_compile".to_string(),
resource_type: "model".to_string(),
depends_on: DependsOn::default(),
config: ManifestConfig::default(),
description: None,
path: None,
columns: HashMap::new(),
compiled_code: None,
database: None,
schema: None,
},
),
]),
sources: HashMap::new(),
exposures: HashMap::new(),
};
let sql_contents = manifest.collect_sql_contents();
assert_eq!(
sql_contents.get("model.stg_orders").map(|s| s.as_str()),
Some("select * from raw.orders")
);
assert_eq!(
sql_contents
.get("test.not_null_orders_id")
.map(|s| s.as_str()),
Some("select count(*) from orders where id is null")
);
assert!(!sql_contents.contains_key("model.no_compile"));
}
#[test]
fn test_collect_sql_contents_from_fixture() {
let fixture_path = Path::new(env!("CARGO_MANIFEST_DIR"))
.join("../../tests/fixtures/simple_project/target/manifest.json");
let manifest = load_manifest(&fixture_path).unwrap();
let sql_contents = manifest.collect_sql_contents();
assert!(
sql_contents.contains_key("model.stg_orders"),
"stg_orders should have compiled_code"
);
assert!(
sql_contents.contains_key("test.assert_orders_positive_amount"),
"test node should have compiled_code"
);
assert!(
!sql_contents.contains_key("model.customers"),
"customers has no compiled_code in fixture"
);
}
}