1use serde::Deserialize;
2
3#[derive(Debug, Deserialize, Default)]
5pub struct SchemaFile {
6 #[serde(default)]
7 pub sources: Vec<SourceDefinition>,
8
9 #[serde(default)]
10 pub models: Vec<ModelDefinition>,
11
12 #[serde(default)]
13 pub exposures: Vec<ExposureDefinition>,
14}
15
16#[derive(Debug, Deserialize, Clone)]
17pub struct SourceDefinition {
18 pub name: String,
19 #[serde(default)]
20 pub description: Option<String>,
21 #[serde(default)]
22 pub tables: Vec<SourceTable>,
23}
24
25#[derive(Debug, Deserialize, Clone)]
26pub struct SourceTable {
27 pub name: String,
28 #[serde(default)]
29 pub description: Option<String>,
30 #[serde(default)]
31 pub columns: Vec<ColumnDefinition>,
32}
33
34#[derive(Debug, Deserialize, Clone)]
35pub struct ColumnDefinition {
36 pub name: String,
37 #[serde(default)]
38 pub description: Option<String>,
39 #[serde(default, alias = "data_tests")]
40 pub tests: Vec<TestDefinition>,
41}
42
43#[derive(Debug, Deserialize, Clone)]
48#[serde(untagged)]
49pub enum TestDefinition {
50 Simple(String),
51 Complex(serde_json::Value),
52}
53
54impl TestDefinition {
55 pub fn test_name(&self) -> Option<&str> {
61 match self {
62 TestDefinition::Simple(s) => Some(s.as_str()),
63 TestDefinition::Complex(v) => {
64 let obj = v.as_object()?;
65 if let Some(tn) = obj.get("test_name").and_then(|v| v.as_str()) {
67 return Some(tn);
68 }
69 for key in obj.keys() {
73 if !matches!(key.as_str(), "config" | "arguments" | "name") {
74 return Some(key.as_str());
75 }
76 }
77 None
78 }
79 }
80 }
81}
82
83#[derive(Debug, Deserialize, Clone)]
84pub struct ModelDefinition {
85 pub name: String,
86 #[serde(default)]
87 pub description: Option<String>,
88 #[serde(default)]
89 pub columns: Vec<ColumnDefinition>,
90 #[serde(default)]
91 pub config: Option<ModelConfig>,
92 #[serde(default)]
93 pub tags: Vec<String>,
94 #[serde(default, alias = "data_tests")]
96 pub tests: Vec<TestDefinition>,
97}
98
99#[derive(Debug, Deserialize, Clone, Default)]
100pub struct ModelConfig {
101 #[serde(default)]
102 pub materialized: Option<String>,
103 #[serde(default)]
104 pub tags: Vec<String>,
105}
106
107#[derive(Debug, Deserialize, Clone)]
108pub struct ExposureDefinition {
109 pub name: String,
110 #[serde(default)]
111 pub description: Option<String>,
112 #[serde(default)]
113 pub label: Option<String>,
114 #[serde(rename = "type", default)]
115 pub exposure_type: Option<String>,
116 #[serde(default)]
117 pub url: Option<String>,
118 #[serde(default)]
119 pub maturity: Option<String>,
120 #[serde(default)]
121 pub depends_on: Vec<String>,
122 #[serde(default)]
123 pub owner: Option<ExposureOwner>,
124}
125
126#[derive(Debug, Deserialize, Clone)]
127pub struct ExposureOwner {
128 pub name: Option<String>,
129 pub email: Option<String>,
130}
131
132pub fn parse_schema_file(
134 content: &str,
135 path: Option<&std::path::Path>,
136) -> anyhow::Result<SchemaFile> {
137 let location = path
138 .map(|p| p.display().to_string())
139 .unwrap_or_else(|| "<input>".to_string());
140 super::yaml_from_str(content, &location)
141}
142
143#[cfg(test)]
144mod tests {
145 use super::*;
146
147 #[test]
148 fn test_parse_sources() {
149 let yaml = r#"
150sources:
151 - name: raw
152 description: Raw data from the warehouse
153 tables:
154 - name: orders
155 description: Raw orders table
156 - name: customers
157"#;
158 let schema = parse_schema_file(yaml, None).unwrap();
159 assert_eq!(schema.sources.len(), 1);
160 assert_eq!(schema.sources[0].name, "raw");
161 assert_eq!(schema.sources[0].tables.len(), 2);
162 assert_eq!(schema.sources[0].tables[0].name, "orders");
163 }
164
165 #[test]
166 fn test_parse_models_with_data_tests() {
167 let yaml = r#"
168models:
169 - name: stg_orders
170 description: Staged orders
171 columns:
172 - name: order_id
173 data_tests:
174 - not_null
175 - unique
176"#;
177 let schema = parse_schema_file(yaml, None).unwrap();
178 assert_eq!(schema.models.len(), 1);
179 assert_eq!(schema.models[0].name, "stg_orders");
180 assert_eq!(schema.models[0].columns.len(), 1);
181 assert_eq!(schema.models[0].columns[0].tests.len(), 2);
182 }
183
184 #[test]
185 fn test_parse_models_with_legacy_tests_key() {
186 let yaml = r#"
187models:
188 - name: stg_orders
189 columns:
190 - name: order_id
191 tests:
192 - not_null
193 - unique
194"#;
195 let schema = parse_schema_file(yaml, None).unwrap();
196 assert_eq!(schema.models[0].columns[0].tests.len(), 2);
197 }
198
199 #[test]
200 fn test_parse_data_tests_all_formats() {
201 let yaml = r#"
202models:
203 - name: orders
204 columns:
205 - name: order_id
206 data_tests:
207 - not_null
208 - unique:
209 config:
210 where: "order_id > 21"
211 - name: status
212 data_tests:
213 - accepted_values:
214 arguments:
215 values:
216 - placed
217 - shipped
218 - completed
219 - returned
220 config:
221 severity: warn
222 - name: customer_id
223 data_tests:
224 - relationships:
225 arguments:
226 to: ref('customers')
227 field: id
228 - name: custom_test_name
229 test_name: accepted_values
230 arguments:
231 values:
232 - 1
233 - 2
234 - 3
235 config:
236 where: "order_date = current_date"
237"#;
238 let schema = parse_schema_file(yaml, None).unwrap();
239 let model = &schema.models[0];
240 assert_eq!(model.columns.len(), 3);
241
242 assert_eq!(model.columns[0].tests.len(), 2);
244 assert!(
245 matches!(model.columns[0].tests[0], TestDefinition::Simple(ref s) if s == "not_null")
246 );
247 assert!(matches!(
248 model.columns[0].tests[1],
249 TestDefinition::Complex(_)
250 ));
251
252 assert_eq!(model.columns[1].tests.len(), 1);
254 assert!(matches!(
255 model.columns[1].tests[0],
256 TestDefinition::Complex(_)
257 ));
258
259 assert_eq!(model.columns[2].tests.len(), 2);
261 assert!(matches!(
262 model.columns[2].tests[0],
263 TestDefinition::Complex(_)
264 ));
265 assert!(matches!(
266 model.columns[2].tests[1],
267 TestDefinition::Complex(_)
268 ));
269 }
270
271 #[test]
272 fn test_parse_exposures() {
273 let yaml = r#"
274exposures:
275 - name: weekly_report
276 description: Weekly business report
277 type: dashboard
278 depends_on:
279 - ref('orders')
280 - ref('customers')
281 owner:
282 name: Data Team
283 email: data@example.com
284"#;
285 let schema = parse_schema_file(yaml, None).unwrap();
286 assert_eq!(schema.exposures.len(), 1);
287 assert_eq!(schema.exposures[0].name, "weekly_report");
288 assert_eq!(schema.exposures[0].depends_on.len(), 2);
289 }
290
291 #[test]
292 fn test_parse_duplicate_mapping_keys() {
293 let yaml = r#"
296sources:
297 - name: raw
298 tables:
299 - name: orders
300sources:
301 - name: other
302 tables:
303 - name: users
304"#;
305 let schema = parse_schema_file(yaml, None).unwrap();
306 assert_eq!(schema.sources.len(), 1);
308 assert_eq!(schema.sources[0].name, "other");
309 }
310
311 #[test]
312 fn test_empty_file() {
313 let yaml = "";
314 let schema = parse_schema_file(yaml, None).unwrap();
315 assert!(schema.sources.is_empty());
316 assert!(schema.models.is_empty());
317 assert!(schema.exposures.is_empty());
318 }
319
320 #[test]
321 fn test_test_name_extraction() {
322 let simple = TestDefinition::Simple("not_null".to_string());
324 assert_eq!(simple.test_name(), Some("not_null"));
325
326 let complex_single = TestDefinition::Complex(serde_json::json!({
328 "unique": {"config": {"where": "id > 0"}}
329 }));
330 assert_eq!(complex_single.test_name(), Some("unique"));
331
332 let complex_named = TestDefinition::Complex(serde_json::json!({
334 "name": "custom_test_name",
335 "test_name": "accepted_values",
336 "arguments": {"values": [1, 2]}
337 }));
338 assert_eq!(complex_named.test_name(), Some("accepted_values"));
339
340 let relationships = TestDefinition::Complex(serde_json::json!({
342 "relationships": {"arguments": {"to": "ref('customers')", "field": "id"}}
343 }));
344 assert_eq!(relationships.test_name(), Some("relationships"));
345
346 let name_only = TestDefinition::Complex(serde_json::json!({"name": "something"}));
348 assert_eq!(name_only.test_name(), None);
349 }
350}