dlin_core/parser/
discovery.rs1use anyhow::Result;
2use std::path::{Path, PathBuf};
3use walkdir::WalkDir;
4
5use super::project::ResolvedPaths;
6
7#[derive(Debug, Default)]
9pub struct DiscoveredFiles {
10 pub model_sql_files: Vec<PathBuf>,
11 pub seed_files: Vec<PathBuf>,
12 pub snapshot_sql_files: Vec<PathBuf>,
13 pub test_sql_files: Vec<PathBuf>,
14 pub yaml_files: Vec<PathBuf>,
15 pub macro_sql_files: Vec<PathBuf>,
16}
17
18pub fn discover_files(paths: &ResolvedPaths) -> Result<DiscoveredFiles> {
20 let mut discovered = DiscoveredFiles::default();
21
22 for dir in &paths.model_paths {
24 let (sql, yaml) = walk_directory(dir);
25 discovered.model_sql_files.extend(sql);
26 discovered.yaml_files.extend(yaml);
27 }
28
29 for dir in &paths.seed_paths {
31 let (_, yaml) = walk_directory(dir);
32 discovered.yaml_files.extend(yaml);
34 discovered.seed_files.extend(walk_csv_files(dir));
36 }
37
38 for dir in &paths.snapshot_paths {
40 let (sql, yaml) = walk_directory(dir);
41 discovered.snapshot_sql_files.extend(sql);
42 discovered.yaml_files.extend(yaml);
43 }
44
45 for dir in &paths.test_paths {
47 let (sql, yaml) = walk_directory(dir);
48 discovered.test_sql_files.extend(sql);
49 discovered.yaml_files.extend(yaml);
50 }
51
52 for dir in &paths.analysis_paths {
54 let (sql, yaml) = walk_directory(dir);
55 discovered.model_sql_files.extend(sql);
56 discovered.yaml_files.extend(yaml);
57 }
58
59 for dir in &paths.macro_paths {
61 let (sql, _yaml) = walk_directory(dir);
62 discovered.macro_sql_files.extend(sql);
63 }
64
65 Ok(discovered)
66}
67
68fn walk_directory(dir: &Path) -> (Vec<PathBuf>, Vec<PathBuf>) {
70 let mut sql_files = Vec::new();
71 let mut yaml_files = Vec::new();
72
73 if !dir.exists() {
74 return (sql_files, yaml_files);
75 }
76
77 for entry in WalkDir::new(dir)
78 .into_iter()
79 .filter_map(|e| e.ok())
80 .filter(|e| e.file_type().is_file())
81 {
82 let path = entry.path();
83 match path.extension().and_then(|e| e.to_str()) {
84 Some("sql") => sql_files.push(path.to_path_buf()),
85 Some("yml" | "yaml") => yaml_files.push(path.to_path_buf()),
86 _ => {}
87 }
88 }
89
90 (sql_files, yaml_files)
91}
92
93fn walk_csv_files(dir: &Path) -> Vec<PathBuf> {
95 if !dir.exists() {
96 return Vec::new();
97 }
98
99 WalkDir::new(dir)
100 .into_iter()
101 .filter_map(|e| e.ok())
102 .filter(|e| e.file_type().is_file())
103 .filter(|e| e.path().extension().and_then(|ext| ext.to_str()) == Some("csv"))
104 .map(|e| e.path().to_path_buf())
105 .collect()
106}
107
108#[cfg(test)]
109mod tests {
110 use super::*;
111 use std::fs;
112
113 #[test]
114 fn test_walk_nonexistent_directory() {
115 let (sql, yaml) = walk_directory(Path::new("/nonexistent/path"));
116 assert!(sql.is_empty());
117 assert!(yaml.is_empty());
118 }
119
120 #[test]
121 fn test_walk_directory() {
122 let tmp = tempfile::tempdir().unwrap();
123 let models_dir = tmp.path().join("models");
124 fs::create_dir_all(&models_dir).unwrap();
125 fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
126 fs::write(models_dir.join("schema.yml"), "version: 2").unwrap();
127 fs::write(models_dir.join("readme.md"), "# Readme").unwrap();
128
129 let (sql, yaml) = walk_directory(&models_dir);
130 assert_eq!(sql.len(), 1);
131 assert_eq!(yaml.len(), 1);
132 }
133
134 #[test]
135 fn test_walk_csv_files() {
136 let tmp = tempfile::tempdir().unwrap();
137 let seeds_dir = tmp.path().join("seeds");
138 fs::create_dir_all(&seeds_dir).unwrap();
139 fs::write(seeds_dir.join("countries.csv"), "id,name\n1,US").unwrap();
140 fs::write(seeds_dir.join("schema.yml"), "version: 2").unwrap();
141 fs::write(seeds_dir.join("notes.txt"), "notes").unwrap();
142
143 let csv_files = walk_csv_files(&seeds_dir);
144 assert_eq!(csv_files.len(), 1);
145 assert!(csv_files[0].ends_with("countries.csv"));
146 }
147
148 #[test]
149 fn test_walk_csv_files_nonexistent() {
150 let csv_files = walk_csv_files(Path::new("/nonexistent/path"));
151 assert!(csv_files.is_empty());
152 }
153
154 #[test]
155 fn test_walk_directory_nested() {
156 let tmp = tempfile::tempdir().unwrap();
157 let models_dir = tmp.path().join("models");
158 let staging_dir = models_dir.join("staging");
159 fs::create_dir_all(&staging_dir).unwrap();
160 fs::write(staging_dir.join("stg_a.sql"), "SELECT 1").unwrap();
161 fs::write(staging_dir.join("stg_b.sql"), "SELECT 2").unwrap();
162 fs::write(models_dir.join("schema.yaml"), "version: 2").unwrap();
163
164 let (sql, yaml) = walk_directory(&models_dir);
165 assert_eq!(sql.len(), 2);
166 assert_eq!(yaml.len(), 1);
167 }
168
169 #[test]
170 fn test_discover_files_full() {
171 let tmp = tempfile::tempdir().unwrap();
172 let project_dir = tmp.path();
173
174 let models_dir = project_dir.join("models");
176 fs::create_dir_all(&models_dir).unwrap();
177 fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
178 fs::write(models_dir.join("schema.yml"), "version: 2").unwrap();
179
180 let seeds_dir = project_dir.join("seeds");
182 fs::create_dir_all(&seeds_dir).unwrap();
183 fs::write(seeds_dir.join("seed.csv"), "a,b\n1,2").unwrap();
184
185 let snap_dir = project_dir.join("snapshots");
187 fs::create_dir_all(&snap_dir).unwrap();
188 fs::write(snap_dir.join("snap.sql"), "SELECT 1").unwrap();
189
190 let test_dir = project_dir.join("tests");
192 fs::create_dir_all(&test_dir).unwrap();
193 fs::write(test_dir.join("test_a.sql"), "SELECT 1").unwrap();
194
195 let paths = ResolvedPaths {
196 model_paths: vec![models_dir],
197 seed_paths: vec![seeds_dir],
198 snapshot_paths: vec![snap_dir],
199 test_paths: vec![test_dir],
200 macro_paths: vec![],
201 analysis_paths: vec![],
202 };
203
204 let discovered = discover_files(&paths).unwrap();
205 assert_eq!(discovered.model_sql_files.len(), 1);
206 assert_eq!(discovered.seed_files.len(), 1);
207 assert_eq!(discovered.snapshot_sql_files.len(), 1);
208 assert_eq!(discovered.test_sql_files.len(), 1);
209 assert_eq!(discovered.yaml_files.len(), 1);
210 }
211
212 #[test]
213 fn test_discover_files_with_analyses() {
214 let tmp = tempfile::tempdir().unwrap();
215 let project_dir = tmp.path();
216
217 let models_dir = project_dir.join("models");
219 fs::create_dir_all(&models_dir).unwrap();
220 fs::write(models_dir.join("model_a.sql"), "SELECT 1").unwrap();
221
222 let analyses_dir = project_dir.join("analyses");
224 fs::create_dir_all(&analyses_dir).unwrap();
225 fs::write(analyses_dir.join("my_analysis.sql"), "SELECT 1").unwrap();
226 fs::write(analyses_dir.join("schema.yml"), "version: 2").unwrap();
227
228 let paths = ResolvedPaths {
229 model_paths: vec![models_dir],
230 seed_paths: vec![],
231 snapshot_paths: vec![],
232 test_paths: vec![],
233 macro_paths: vec![],
234 analysis_paths: vec![analyses_dir],
235 };
236
237 let discovered = discover_files(&paths).unwrap();
238 assert_eq!(discovered.model_sql_files.len(), 2);
240 assert_eq!(discovered.yaml_files.len(), 1);
242 }
243
244 #[test]
245 fn test_discover_files_missing_dirs() {
246 let paths = ResolvedPaths {
247 model_paths: vec![PathBuf::from("/nonexistent/models")],
248 seed_paths: vec![PathBuf::from("/nonexistent/seeds")],
249 snapshot_paths: vec![PathBuf::from("/nonexistent/snapshots")],
250 test_paths: vec![PathBuf::from("/nonexistent/tests")],
251 macro_paths: vec![],
252 analysis_paths: vec![],
253 };
254
255 let discovered = discover_files(&paths).unwrap();
256 assert!(discovered.model_sql_files.is_empty());
257 assert!(discovered.seed_files.is_empty());
258 assert!(discovered.snapshot_sql_files.is_empty());
259 assert!(discovered.test_sql_files.is_empty());
260 assert!(discovered.yaml_files.is_empty());
261 }
262}