Skip to main content

dlin_core/parser/
project.rs

1use anyhow::{Context, Result};
2use serde::Deserialize;
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5
6use crate::input::normalize_path;
7
8use crate::error::DbtLineageError;
9
10#[derive(Debug, Deserialize)]
11#[allow(dead_code)]
12pub struct DbtProject {
13    pub name: String,
14
15    #[serde(rename = "model-paths", default = "default_model_paths")]
16    pub model_paths: Vec<String>,
17
18    #[serde(rename = "seed-paths", default = "default_seed_paths")]
19    pub seed_paths: Vec<String>,
20
21    #[serde(rename = "snapshot-paths", default = "default_snapshot_paths")]
22    pub snapshot_paths: Vec<String>,
23
24    #[serde(rename = "test-paths", default = "default_test_paths")]
25    pub test_paths: Vec<String>,
26
27    #[serde(rename = "macro-paths", default = "default_macro_paths")]
28    pub macro_paths: Vec<String>,
29
30    #[serde(rename = "analysis-paths", default = "default_analysis_paths")]
31    pub analysis_paths: Vec<String>,
32
33    #[serde(default)]
34    pub vars: HashMap<String, serde_json::Value>,
35}
36
37fn default_model_paths() -> Vec<String> {
38    vec!["models".to_string()]
39}
40
41fn default_seed_paths() -> Vec<String> {
42    vec!["seeds".to_string()]
43}
44
45fn default_snapshot_paths() -> Vec<String> {
46    vec!["snapshots".to_string()]
47}
48
49fn default_test_paths() -> Vec<String> {
50    vec!["tests".to_string()]
51}
52
53fn default_macro_paths() -> Vec<String> {
54    vec!["macros".to_string()]
55}
56
57fn default_analysis_paths() -> Vec<String> {
58    vec!["analyses".to_string()]
59}
60
61impl DbtProject {
62    pub fn load(project_dir: &Path) -> Result<Self> {
63        let project_file = project_dir.join("dbt_project.yml");
64        if !project_file.exists() {
65            return Err(DbtLineageError::ProjectNotFound(project_dir.to_path_buf()).into());
66        }
67
68        let content =
69            std::fs::read_to_string(&project_file).map_err(|e| DbtLineageError::FileReadError {
70                path: project_file.clone(),
71                source: e,
72            })?;
73
74        let project: DbtProject =
75            super::yaml_from_str(&content, &project_file.display().to_string())
76                .context(format!("Failed to parse {}", project_file.display()))?;
77
78        Ok(project)
79    }
80
81    pub fn resolve_paths(&self, project_dir: &Path) -> ResolvedPaths {
82        let resolve = |paths: &[String]| -> Vec<PathBuf> {
83            paths
84                .iter()
85                .map(|p| normalize_path(&project_dir.join(p)))
86                .collect()
87        };
88        ResolvedPaths {
89            model_paths: resolve(&self.model_paths),
90            seed_paths: resolve(&self.seed_paths),
91            snapshot_paths: resolve(&self.snapshot_paths),
92            test_paths: resolve(&self.test_paths),
93            macro_paths: resolve(&self.macro_paths),
94            analysis_paths: resolve(&self.analysis_paths),
95        }
96    }
97}
98
99#[derive(Debug)]
100pub struct ResolvedPaths {
101    pub model_paths: Vec<PathBuf>,
102    pub seed_paths: Vec<PathBuf>,
103    pub snapshot_paths: Vec<PathBuf>,
104    pub test_paths: Vec<PathBuf>,
105    pub macro_paths: Vec<PathBuf>,
106    pub analysis_paths: Vec<PathBuf>,
107}
108
109#[cfg(test)]
110mod tests {
111    use super::*;
112    use std::fs;
113
114    #[test]
115    fn test_defaults() {
116        let yaml = "name: my_project\n";
117        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
118        assert_eq!(project.name, "my_project");
119        assert_eq!(project.model_paths, vec!["models"]);
120        assert_eq!(project.seed_paths, vec!["seeds"]);
121        assert_eq!(project.snapshot_paths, vec!["snapshots"]);
122        assert_eq!(project.test_paths, vec!["tests"]);
123        assert_eq!(project.macro_paths, vec!["macros"]);
124        assert_eq!(project.analysis_paths, vec!["analyses"]);
125    }
126
127    #[test]
128    fn test_custom_paths() {
129        let yaml = r#"
130name: my_project
131model-paths: ["models", "extra_models"]
132seed-paths: ["data"]
133macro-paths: ["macros", "custom_macros"]
134"#;
135        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
136        assert_eq!(project.model_paths, vec!["models", "extra_models"]);
137        assert_eq!(project.seed_paths, vec!["data"]);
138        assert_eq!(project.snapshot_paths, vec!["snapshots"]); // default
139        assert_eq!(project.macro_paths, vec!["macros", "custom_macros"]);
140        assert_eq!(project.analysis_paths, vec!["analyses"]); // default
141    }
142
143    #[test]
144    fn test_custom_analysis_paths() {
145        let yaml = r#"
146name: my_project
147analysis-paths: ["analyses", "custom_analyses"]
148"#;
149        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
150        assert_eq!(project.analysis_paths, vec!["analyses", "custom_analyses"]);
151    }
152
153    #[test]
154    fn test_load_from_file() {
155        let tmp = tempfile::tempdir().unwrap();
156        fs::write(tmp.path().join("dbt_project.yml"), "name: test_project\n").unwrap();
157
158        let project = DbtProject::load(tmp.path()).unwrap();
159        assert_eq!(project.name, "test_project");
160        assert_eq!(project.model_paths, vec!["models"]);
161    }
162
163    #[test]
164    fn test_load_duplicate_keys() {
165        // Test that duplicate YAML keys are accepted (last wins, matching PyYAML behavior).
166        // dbt users may have duplicate keys in dbt_project.yml (e.g. vars sections).
167        let tmp = tempfile::tempdir().unwrap();
168        fs::write(
169            tmp.path().join("dbt_project.yml"),
170            "name: my_project\nversion: '1.0.0'\nname: my_project_dup\n",
171        )
172        .unwrap();
173
174        let project = DbtProject::load(tmp.path()).unwrap();
175        assert_eq!(project.name, "my_project_dup"); // last wins
176    }
177
178    #[test]
179    fn test_load_not_found() {
180        let tmp = tempfile::tempdir().unwrap();
181        let err = DbtProject::load(tmp.path()).unwrap_err();
182        let msg = err.to_string();
183        assert!(msg.contains("dbt project not found"), "Got: {}", msg);
184    }
185
186    #[test]
187    fn test_load_invalid_yaml() {
188        let tmp = tempfile::tempdir().unwrap();
189        fs::write(tmp.path().join("dbt_project.yml"), ": : : bad yaml").unwrap();
190        let err = DbtProject::load(tmp.path()).unwrap_err();
191        let msg = err.to_string();
192        assert!(msg.contains("Failed to parse"), "Got: {}", msg);
193    }
194
195    #[test]
196    fn test_resolve_paths() {
197        let yaml = "name: my_project\n";
198        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
199        let base = Path::new("/proj");
200        let paths = project.resolve_paths(base);
201        // resolve_paths normalizes paths, so expected values must also be normalized
202        let expected = |name: &str| vec![normalize_path(&base.join(name))];
203        assert_eq!(paths.model_paths, expected("models"));
204        assert_eq!(paths.seed_paths, expected("seeds"));
205        assert_eq!(paths.snapshot_paths, expected("snapshots"));
206        assert_eq!(paths.test_paths, expected("tests"));
207        assert_eq!(paths.macro_paths, expected("macros"));
208        assert_eq!(paths.analysis_paths, expected("analyses"));
209    }
210}