dlin-core 0.2.0-alpha.2

Core library for dbt model lineage analysis
Documentation
use anyhow::{Context, Result};
use serde::Deserialize;
use std::collections::HashMap;
use std::path::{Path, PathBuf};

use crate::input::normalize_path;

use crate::error::DbtLineageError;

#[derive(Debug, Deserialize)]
#[allow(dead_code)]
pub struct DbtProject {
    pub name: String,

    #[serde(rename = "model-paths", default = "default_model_paths")]
    pub model_paths: Vec<String>,

    #[serde(rename = "seed-paths", default = "default_seed_paths")]
    pub seed_paths: Vec<String>,

    #[serde(rename = "snapshot-paths", default = "default_snapshot_paths")]
    pub snapshot_paths: Vec<String>,

    #[serde(rename = "test-paths", default = "default_test_paths")]
    pub test_paths: Vec<String>,

    #[serde(rename = "macro-paths", default = "default_macro_paths")]
    pub macro_paths: Vec<String>,

    #[serde(rename = "analysis-paths", default = "default_analysis_paths")]
    pub analysis_paths: Vec<String>,

    #[serde(default)]
    pub vars: HashMap<String, serde_json::Value>,
}

fn default_model_paths() -> Vec<String> {
    vec!["models".to_string()]
}

fn default_seed_paths() -> Vec<String> {
    vec!["seeds".to_string()]
}

fn default_snapshot_paths() -> Vec<String> {
    vec!["snapshots".to_string()]
}

fn default_test_paths() -> Vec<String> {
    vec!["tests".to_string()]
}

fn default_macro_paths() -> Vec<String> {
    vec!["macros".to_string()]
}

fn default_analysis_paths() -> Vec<String> {
    vec!["analyses".to_string()]
}

impl DbtProject {
    pub fn load(project_dir: &Path) -> Result<Self> {
        let project_file = project_dir.join("dbt_project.yml");
        if !project_file.exists() {
            return Err(DbtLineageError::ProjectNotFound(project_dir.to_path_buf()).into());
        }

        let content =
            std::fs::read_to_string(&project_file).map_err(|e| DbtLineageError::FileReadError {
                path: project_file.clone(),
                source: e,
            })?;

        let project: DbtProject =
            super::yaml_from_str(&content, &project_file.display().to_string())
                .context(format!("Failed to parse {}", project_file.display()))?;

        Ok(project)
    }

    pub fn resolve_paths(&self, project_dir: &Path) -> ResolvedPaths {
        let resolve = |paths: &[String]| -> Vec<PathBuf> {
            paths
                .iter()
                .map(|p| normalize_path(&project_dir.join(p)))
                .collect()
        };
        ResolvedPaths {
            model_paths: resolve(&self.model_paths),
            seed_paths: resolve(&self.seed_paths),
            snapshot_paths: resolve(&self.snapshot_paths),
            test_paths: resolve(&self.test_paths),
            macro_paths: resolve(&self.macro_paths),
            analysis_paths: resolve(&self.analysis_paths),
        }
    }
}

#[derive(Debug)]
pub struct ResolvedPaths {
    pub model_paths: Vec<PathBuf>,
    pub seed_paths: Vec<PathBuf>,
    pub snapshot_paths: Vec<PathBuf>,
    pub test_paths: Vec<PathBuf>,
    pub macro_paths: Vec<PathBuf>,
    pub analysis_paths: Vec<PathBuf>,
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::fs;

    #[test]
    fn test_defaults() {
        let yaml = "name: my_project\n";
        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
        assert_eq!(project.name, "my_project");
        assert_eq!(project.model_paths, vec!["models"]);
        assert_eq!(project.seed_paths, vec!["seeds"]);
        assert_eq!(project.snapshot_paths, vec!["snapshots"]);
        assert_eq!(project.test_paths, vec!["tests"]);
        assert_eq!(project.macro_paths, vec!["macros"]);
        assert_eq!(project.analysis_paths, vec!["analyses"]);
    }

    #[test]
    fn test_custom_paths() {
        let yaml = r#"
name: my_project
model-paths: ["models", "extra_models"]
seed-paths: ["data"]
macro-paths: ["macros", "custom_macros"]
"#;
        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
        assert_eq!(project.model_paths, vec!["models", "extra_models"]);
        assert_eq!(project.seed_paths, vec!["data"]);
        assert_eq!(project.snapshot_paths, vec!["snapshots"]); // default
        assert_eq!(project.macro_paths, vec!["macros", "custom_macros"]);
        assert_eq!(project.analysis_paths, vec!["analyses"]); // default
    }

    #[test]
    fn test_custom_analysis_paths() {
        let yaml = r#"
name: my_project
analysis-paths: ["analyses", "custom_analyses"]
"#;
        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
        assert_eq!(project.analysis_paths, vec!["analyses", "custom_analyses"]);
    }

    #[test]
    fn test_load_from_file() {
        let tmp = tempfile::tempdir().unwrap();
        fs::write(tmp.path().join("dbt_project.yml"), "name: test_project\n").unwrap();

        let project = DbtProject::load(tmp.path()).unwrap();
        assert_eq!(project.name, "test_project");
        assert_eq!(project.model_paths, vec!["models"]);
    }

    #[test]
    fn test_load_duplicate_keys() {
        // Test that duplicate YAML keys are accepted (last wins, matching PyYAML behavior).
        // dbt users may have duplicate keys in dbt_project.yml (e.g. vars sections).
        let tmp = tempfile::tempdir().unwrap();
        fs::write(
            tmp.path().join("dbt_project.yml"),
            "name: my_project\nversion: '1.0.0'\nname: my_project_dup\n",
        )
        .unwrap();

        let project = DbtProject::load(tmp.path()).unwrap();
        assert_eq!(project.name, "my_project_dup"); // last wins
    }

    #[test]
    fn test_load_not_found() {
        let tmp = tempfile::tempdir().unwrap();
        let err = DbtProject::load(tmp.path()).unwrap_err();
        let msg = err.to_string();
        assert!(msg.contains("dbt project not found"), "Got: {}", msg);
    }

    #[test]
    fn test_load_invalid_yaml() {
        let tmp = tempfile::tempdir().unwrap();
        fs::write(tmp.path().join("dbt_project.yml"), ": : : bad yaml").unwrap();
        let err = DbtProject::load(tmp.path()).unwrap_err();
        let msg = err.to_string();
        assert!(msg.contains("Failed to parse"), "Got: {}", msg);
    }

    #[test]
    fn test_resolve_paths() {
        let yaml = "name: my_project\n";
        let project: DbtProject = serde_saphyr::from_str(yaml).unwrap();
        let base = Path::new("/proj");
        let paths = project.resolve_paths(base);
        // resolve_paths normalizes paths, so expected values must also be normalized
        let expected = |name: &str| vec![normalize_path(&base.join(name))];
        assert_eq!(paths.model_paths, expected("models"));
        assert_eq!(paths.seed_paths, expected("seeds"));
        assert_eq!(paths.snapshot_paths, expected("snapshots"));
        assert_eq!(paths.test_paths, expected("tests"));
        assert_eq!(paths.macro_paths, expected("macros"));
        assert_eq!(paths.analysis_paths, expected("analyses"));
    }
}