Skip to main content

dlin_core/parser/
project.rs

1use anyhow::{Context, Result};
2use serde::Deserialize;
3use std::collections::HashMap;
4use std::path::{Path, PathBuf};
5
6use crate::input::normalize_path;
7
8use crate::error::DbtLineageError;
9
10#[derive(Debug, Deserialize)]
11#[allow(dead_code)]
12pub struct DbtProject {
13    pub name: String,
14
15    #[serde(rename = "model-paths", default = "default_model_paths")]
16    pub model_paths: Vec<String>,
17
18    #[serde(rename = "seed-paths", default = "default_seed_paths")]
19    pub seed_paths: Vec<String>,
20
21    #[serde(rename = "snapshot-paths", default = "default_snapshot_paths")]
22    pub snapshot_paths: Vec<String>,
23
24    #[serde(rename = "test-paths", default = "default_test_paths")]
25    pub test_paths: Vec<String>,
26
27    #[serde(rename = "macro-paths", default = "default_macro_paths")]
28    pub macro_paths: Vec<String>,
29
30    #[serde(rename = "analysis-paths", default = "default_analysis_paths")]
31    pub analysis_paths: Vec<String>,
32
33    #[serde(default)]
34    pub vars: HashMap<String, serde_json::Value>,
35}
36
37fn default_model_paths() -> Vec<String> {
38    vec!["models".to_string()]
39}
40
41fn default_seed_paths() -> Vec<String> {
42    vec!["seeds".to_string()]
43}
44
45fn default_snapshot_paths() -> Vec<String> {
46    vec!["snapshots".to_string()]
47}
48
49fn default_test_paths() -> Vec<String> {
50    vec!["tests".to_string()]
51}
52
53fn default_macro_paths() -> Vec<String> {
54    vec!["macros".to_string()]
55}
56
57fn default_analysis_paths() -> Vec<String> {
58    vec!["analyses".to_string()]
59}
60
61impl DbtProject {
62    pub fn load(project_dir: &Path) -> Result<Self> {
63        let project_file = project_dir.join("dbt_project.yml");
64        if !project_file.exists() {
65            return Err(DbtLineageError::ProjectNotFound(project_dir.to_path_buf()).into());
66        }
67
68        let content =
69            std::fs::read_to_string(&project_file).map_err(|e| DbtLineageError::FileReadError {
70                path: project_file.clone(),
71                source: e,
72            })?;
73
74        let project: DbtProject =
75            super::yaml_from_str(&content, &project_file.display().to_string())
76                .context(format!("Failed to parse {}", project_file.display()))?;
77
78        Ok(project)
79    }
80
81    pub fn resolve_paths(&self, project_dir: &Path) -> ResolvedPaths {
82        let resolve = |paths: &[String]| -> Vec<PathBuf> {
83            paths
84                .iter()
85                .map(|p| normalize_path(&project_dir.join(p)))
86                .collect()
87        };
88        ResolvedPaths {
89            model_paths: resolve(&self.model_paths),
90            seed_paths: resolve(&self.seed_paths),
91            snapshot_paths: resolve(&self.snapshot_paths),
92            test_paths: resolve(&self.test_paths),
93            macro_paths: resolve(&self.macro_paths),
94            analysis_paths: resolve(&self.analysis_paths),
95        }
96    }
97}
98
99#[derive(Debug)]
100pub struct ResolvedPaths {
101    pub model_paths: Vec<PathBuf>,
102    pub seed_paths: Vec<PathBuf>,
103    pub snapshot_paths: Vec<PathBuf>,
104    pub test_paths: Vec<PathBuf>,
105    pub macro_paths: Vec<PathBuf>,
106    pub analysis_paths: Vec<PathBuf>,
107}
108
109impl ResolvedPaths {
110    pub fn default_for(project_dir: &Path) -> Self {
111        let resolve = |dirs: &[&str]| -> Vec<PathBuf> {
112            dirs.iter()
113                .map(|d| normalize_path(&project_dir.join(d)))
114                .collect()
115        };
116        ResolvedPaths {
117            model_paths: resolve(&["models"]),
118            seed_paths: resolve(&["seeds"]),
119            snapshot_paths: resolve(&["snapshots"]),
120            test_paths: resolve(&["tests"]),
121            macro_paths: resolve(&["macros"]),
122            analysis_paths: resolve(&["analyses"]),
123        }
124    }
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130    use std::fs;
131
132    #[test]
133    fn test_defaults() {
134        let yaml = "name: my_project\n";
135        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
136        assert_eq!(project.name, "my_project");
137        assert_eq!(project.model_paths, vec!["models"]);
138        assert_eq!(project.seed_paths, vec!["seeds"]);
139        assert_eq!(project.snapshot_paths, vec!["snapshots"]);
140        assert_eq!(project.test_paths, vec!["tests"]);
141        assert_eq!(project.macro_paths, vec!["macros"]);
142        assert_eq!(project.analysis_paths, vec!["analyses"]);
143    }
144
145    #[test]
146    fn test_custom_paths() {
147        let yaml = r#"
148name: my_project
149model-paths: ["models", "extra_models"]
150seed-paths: ["data"]
151macro-paths: ["macros", "custom_macros"]
152"#;
153        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
154        assert_eq!(project.model_paths, vec!["models", "extra_models"]);
155        assert_eq!(project.seed_paths, vec!["data"]);
156        assert_eq!(project.snapshot_paths, vec!["snapshots"]); // default
157        assert_eq!(project.macro_paths, vec!["macros", "custom_macros"]);
158        assert_eq!(project.analysis_paths, vec!["analyses"]); // default
159    }
160
161    #[test]
162    fn test_custom_analysis_paths() {
163        let yaml = r#"
164name: my_project
165analysis-paths: ["analyses", "custom_analyses"]
166"#;
167        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
168        assert_eq!(project.analysis_paths, vec!["analyses", "custom_analyses"]);
169    }
170
171    #[test]
172    fn test_load_from_file() {
173        let tmp = tempfile::tempdir().unwrap();
174        fs::write(tmp.path().join("dbt_project.yml"), "name: test_project\n").unwrap();
175
176        let project = DbtProject::load(tmp.path()).unwrap();
177        assert_eq!(project.name, "test_project");
178        assert_eq!(project.model_paths, vec!["models"]);
179    }
180
181    #[test]
182    fn test_load_duplicate_keys() {
183        // Test that duplicate YAML keys are accepted (last wins, matching PyYAML behavior).
184        // dbt users may have duplicate keys in dbt_project.yml (e.g. vars sections).
185        let tmp = tempfile::tempdir().unwrap();
186        fs::write(
187            tmp.path().join("dbt_project.yml"),
188            "name: my_project\nversion: '1.0.0'\nname: my_project_dup\n",
189        )
190        .unwrap();
191
192        let project = DbtProject::load(tmp.path()).unwrap();
193        assert_eq!(project.name, "my_project_dup"); // last wins
194    }
195
196    #[test]
197    fn test_load_not_found() {
198        let tmp = tempfile::tempdir().unwrap();
199        let err = DbtProject::load(tmp.path()).unwrap_err();
200        let msg = err.to_string();
201        assert!(msg.contains("dbt project not found"), "Got: {}", msg);
202    }
203
204    #[test]
205    fn test_load_invalid_yaml() {
206        let tmp = tempfile::tempdir().unwrap();
207        fs::write(tmp.path().join("dbt_project.yml"), ": : : bad yaml").unwrap();
208        let err = DbtProject::load(tmp.path()).unwrap_err();
209        let msg = err.to_string();
210        assert!(msg.contains("Failed to parse"), "Got: {}", msg);
211    }
212
213    #[test]
214    fn test_resolve_paths() {
215        let yaml = "name: my_project\n";
216        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
217        let base = Path::new("/proj");
218        let paths = project.resolve_paths(base);
219        // resolve_paths normalizes paths, so expected values must also be normalized
220        let expected = |name: &str| vec![normalize_path(&base.join(name))];
221        assert_eq!(paths.model_paths, expected("models"));
222        assert_eq!(paths.seed_paths, expected("seeds"));
223        assert_eq!(paths.snapshot_paths, expected("snapshots"));
224        assert_eq!(paths.test_paths, expected("tests"));
225        assert_eq!(paths.macro_paths, expected("macros"));
226        assert_eq!(paths.analysis_paths, expected("analyses"));
227    }
228}