Skip to main content

dlin_core/parser/
discovery.rs

1use anyhow::Result;
2use std::path::{Path, PathBuf};
3use walkdir::WalkDir;
4
5use super::project::ResolvedPaths;
6
7/// All discovered files in the dbt project, categorized by type
8#[derive(Debug, Default)]
9pub struct DiscoveredFiles {
10    pub model_sql_files: Vec<PathBuf>,
11    pub seed_files: Vec<PathBuf>,
12    pub snapshot_sql_files: Vec<PathBuf>,
13    pub test_sql_files: Vec<PathBuf>,
14    pub yaml_files: Vec<PathBuf>,
15    pub macro_sql_files: Vec<PathBuf>,
16}
17
18/// Walk all configured paths and collect SQL/YAML files
19pub fn discover_files(paths: &ResolvedPaths) -> Result<DiscoveredFiles> {
20    let mut discovered = DiscoveredFiles::default();
21
22    // Models
23    for dir in &paths.model_paths {
24        let (sql, yaml) = walk_directory(dir);
25        discovered.model_sql_files.extend(sql);
26        discovered.yaml_files.extend(yaml);
27    }
28
29    // Seeds
30    for dir in &paths.seed_paths {
31        let (_, yaml) = walk_directory(dir);
32        // Seeds are CSV files typically, but we collect their YAML schema files
33        discovered.yaml_files.extend(yaml);
34        // Also look for .csv files
35        discovered.seed_files.extend(walk_csv_files(dir));
36    }
37
38    // Snapshots
39    for dir in &paths.snapshot_paths {
40        let (sql, yaml) = walk_directory(dir);
41        discovered.snapshot_sql_files.extend(sql);
42        discovered.yaml_files.extend(yaml);
43    }
44
45    // Tests
46    for dir in &paths.test_paths {
47        let (sql, yaml) = walk_directory(dir);
48        discovered.test_sql_files.extend(sql);
49        discovered.yaml_files.extend(yaml);
50    }
51
52    // Analyses (treated as models, consistent with manifest mode)
53    for dir in &paths.analysis_paths {
54        let (sql, yaml) = walk_directory(dir);
55        discovered.model_sql_files.extend(sql);
56        discovered.yaml_files.extend(yaml);
57    }
58
59    // Macros
60    for dir in &paths.macro_paths {
61        let (sql, _yaml) = walk_directory(dir);
62        discovered.macro_sql_files.extend(sql);
63    }
64
65    Ok(discovered)
66}
67
68/// Walk a directory and return (sql_files, yaml_files)
69fn walk_directory(dir: &Path) -> (Vec<PathBuf>, Vec<PathBuf>) {
70    let mut sql_files = Vec::new();
71    let mut yaml_files = Vec::new();
72
73    if !dir.exists() {
74        return (sql_files, yaml_files);
75    }
76
77    for entry in WalkDir::new(dir)
78        .into_iter()
79        .filter_map(|e| e.ok())
80        .filter(|e| e.file_type().is_file())
81    {
82        let path = entry.path();
83        match path.extension().and_then(|e| e.to_str()) {
84            Some("sql") => sql_files.push(path.to_path_buf()),
85            Some("yml" | "yaml") => yaml_files.push(path.to_path_buf()),
86            _ => {}
87        }
88    }
89
90    (sql_files, yaml_files)
91}
92
93/// Walk a directory and return CSV files (for seeds)
94fn walk_csv_files(dir: &Path) -> Vec<PathBuf> {
95    if !dir.exists() {
96        return Vec::new();
97    }
98
99    WalkDir::new(dir)
100        .into_iter()
101        .filter_map(|e| e.ok())
102        .filter(|e| e.file_type().is_file())
103        .filter(|e| e.path().extension().and_then(|ext| ext.to_str()) == Some("csv"))
104        .map(|e| e.path().to_path_buf())
105        .collect()
106}
107
108#[cfg(test)]
109mod tests {
110    use super::*;
111    use std::fs;
112
113    #[test]
114    fn test_walk_nonexistent_directory() {
115        let (sql, yaml) = walk_directory(Path::new("/nonexistent/path"));
116        assert!(sql.is_empty());
117        assert!(yaml.is_empty());
118    }
119
120    #[test]
121    fn test_walk_directory() {
122        let tmp = tempfile::tempdir().unwrap();
123        let models_dir = tmp.path().join("models");
124        fs::create_dir_all(&models_dir).unwrap();
125        fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
126        fs::write(models_dir.join("schema.yml"), "version: 2").unwrap();
127        fs::write(models_dir.join("readme.md"), "# Readme").unwrap();
128
129        let (sql, yaml) = walk_directory(&models_dir);
130        assert_eq!(sql.len(), 1);
131        assert_eq!(yaml.len(), 1);
132    }
133
134    #[test]
135    fn test_walk_csv_files() {
136        let tmp = tempfile::tempdir().unwrap();
137        let seeds_dir = tmp.path().join("seeds");
138        fs::create_dir_all(&seeds_dir).unwrap();
139        fs::write(seeds_dir.join("countries.csv"), "id,name\n1,US").unwrap();
140        fs::write(seeds_dir.join("schema.yml"), "version: 2").unwrap();
141        fs::write(seeds_dir.join("notes.txt"), "notes").unwrap();
142
143        let csv_files = walk_csv_files(&seeds_dir);
144        assert_eq!(csv_files.len(), 1);
145        assert!(csv_files[0].ends_with("countries.csv"));
146    }
147
148    #[test]
149    fn test_walk_csv_files_nonexistent() {
150        let csv_files = walk_csv_files(Path::new("/nonexistent/path"));
151        assert!(csv_files.is_empty());
152    }
153
154    #[test]
155    fn test_walk_directory_nested() {
156        let tmp = tempfile::tempdir().unwrap();
157        let models_dir = tmp.path().join("models");
158        let staging_dir = models_dir.join("staging");
159        fs::create_dir_all(&staging_dir).unwrap();
160        fs::write(staging_dir.join("stg_a.sql"), "SELECT 1").unwrap();
161        fs::write(staging_dir.join("stg_b.sql"), "SELECT 2").unwrap();
162        fs::write(models_dir.join("schema.yaml"), "version: 2").unwrap();
163
164        let (sql, yaml) = walk_directory(&models_dir);
165        assert_eq!(sql.len(), 2);
166        assert_eq!(yaml.len(), 1);
167    }
168
169    #[test]
170    fn test_discover_files_full() {
171        let tmp = tempfile::tempdir().unwrap();
172        let project_dir = tmp.path();
173
174        // Models
175        let models_dir = project_dir.join("models");
176        fs::create_dir_all(&models_dir).unwrap();
177        fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
178        fs::write(models_dir.join("schema.yml"), "version: 2").unwrap();
179
180        // Seeds
181        let seeds_dir = project_dir.join("seeds");
182        fs::create_dir_all(&seeds_dir).unwrap();
183        fs::write(seeds_dir.join("seed.csv"), "a,b\n1,2").unwrap();
184
185        // Snapshots
186        let snap_dir = project_dir.join("snapshots");
187        fs::create_dir_all(&snap_dir).unwrap();
188        fs::write(snap_dir.join("snap.sql"), "SELECT 1").unwrap();
189
190        // Tests
191        let test_dir = project_dir.join("tests");
192        fs::create_dir_all(&test_dir).unwrap();
193        fs::write(test_dir.join("test_a.sql"), "SELECT 1").unwrap();
194
195        let paths = ResolvedPaths {
196            model_paths: vec![models_dir],
197            seed_paths: vec![seeds_dir],
198            snapshot_paths: vec![snap_dir],
199            test_paths: vec![test_dir],
200            macro_paths: vec![],
201            analysis_paths: vec![],
202        };
203
204        let discovered = discover_files(&paths).unwrap();
205        assert_eq!(discovered.model_sql_files.len(), 1);
206        assert_eq!(discovered.seed_files.len(), 1);
207        assert_eq!(discovered.snapshot_sql_files.len(), 1);
208        assert_eq!(discovered.test_sql_files.len(), 1);
209        assert_eq!(discovered.yaml_files.len(), 1);
210    }
211
212    #[test]
213    fn test_discover_files_with_analyses() {
214        let tmp = tempfile::tempdir().unwrap();
215        let project_dir = tmp.path();
216
217        // Models
218        let models_dir = project_dir.join("models");
219        fs::create_dir_all(&models_dir).unwrap();
220        fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
221
222        // Analyses (should be treated as models)
223        let analyses_dir = project_dir.join("analyses");
224        fs::create_dir_all(&analyses_dir).unwrap();
225        fs::write(analyses_dir.join("my_analysis.sql"), "SELECT 1").unwrap();
226        fs::write(analyses_dir.join("schema.yml"), "version: 2").unwrap();
227
228        let paths = ResolvedPaths {
229            model_paths: vec![models_dir],
230            seed_paths: vec![],
231            snapshot_paths: vec![],
232            test_paths: vec![],
233            macro_paths: vec![],
234            analysis_paths: vec![analyses_dir],
235        };
236
237        let discovered = discover_files(&paths).unwrap();
238        // Analysis SQL files are added to model_sql_files
239        assert_eq!(discovered.model_sql_files.len(), 2);
240        // Analysis YAML files are collected
241        assert_eq!(discovered.yaml_files.len(), 1);
242    }
243
244    #[test]
245    fn test_discover_files_missing_dirs() {
246        let paths = ResolvedPaths {
247            model_paths: vec![PathBuf::from("/nonexistent/models")],
248            seed_paths: vec![PathBuf::from("/nonexistent/seeds")],
249            snapshot_paths: vec![PathBuf::from("/nonexistent/snapshots")],
250            test_paths: vec![PathBuf::from("/nonexistent/tests")],
251            macro_paths: vec![],
252            analysis_paths: vec![],
253        };
254
255        let discovered = discover_files(&paths).unwrap();
256        assert!(discovered.model_sql_files.is_empty());
257        assert!(discovered.seed_files.is_empty());
258        assert!(discovered.snapshot_sql_files.is_empty());
259        assert!(discovered.test_sql_files.is_empty());
260        assert!(discovered.yaml_files.is_empty());
261    }
262}