use serde::Deserialize;
#[derive(Debug, Deserialize, Default)]
pub struct SchemaFile {
#[serde(default)]
pub sources: Vec<SourceDefinition>,
#[serde(default)]
pub models: Vec<ModelDefinition>,
#[serde(default)]
pub exposures: Vec<ExposureDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SourceDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub tables: Vec<SourceTable>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SourceTable {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub columns: Vec<ColumnDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ColumnDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default, alias = "data_tests")]
pub tests: Vec<TestDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum TestDefinition {
Simple(String),
Complex(serde_json::Value),
}
impl TestDefinition {
pub fn test_name(&self) -> Option<&str> {
match self {
TestDefinition::Simple(s) => Some(s.as_str()),
TestDefinition::Complex(v) => {
let obj = v.as_object()?;
if let Some(tn) = obj.get("test_name").and_then(|v| v.as_str()) {
return Some(tn);
}
for key in obj.keys() {
if !matches!(key.as_str(), "config" | "arguments" | "name") {
return Some(key.as_str());
}
}
None
}
}
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct ModelDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub columns: Vec<ColumnDefinition>,
#[serde(default)]
pub config: Option<ModelConfig>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default, alias = "data_tests")]
pub tests: Vec<TestDefinition>,
}
#[derive(Debug, Deserialize, Clone, Default)]
pub struct ModelConfig {
#[serde(default)]
pub materialized: Option<String>,
#[serde(default)]
pub tags: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ExposureDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(rename = "type", default)]
pub exposure_type: Option<String>,
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub maturity: Option<String>,
#[serde(default)]
pub depends_on: Vec<String>,
#[serde(default)]
pub owner: Option<ExposureOwner>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ExposureOwner {
pub name: Option<String>,
pub email: Option<String>,
}
pub fn parse_schema_file(
content: &str,
path: Option<&std::path::Path>,
) -> anyhow::Result<SchemaFile> {
let location = path
.map(|p| p.display().to_string())
.unwrap_or_else(|| "<input>".to_string());
super::yaml_from_str(content, &location)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_sources() {
let yaml = r#"
sources:
- name: raw
description: Raw data from the warehouse
tables:
- name: orders
description: Raw orders table
- name: customers
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.sources.len(), 1);
assert_eq!(schema.sources[0].name, "raw");
assert_eq!(schema.sources[0].tables.len(), 2);
assert_eq!(schema.sources[0].tables[0].name, "orders");
}
#[test]
fn test_parse_models_with_data_tests() {
let yaml = r#"
models:
- name: stg_orders
description: Staged orders
columns:
- name: order_id
data_tests:
- not_null
- unique
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models.len(), 1);
assert_eq!(schema.models[0].name, "stg_orders");
assert_eq!(schema.models[0].columns.len(), 1);
assert_eq!(schema.models[0].columns[0].tests.len(), 2);
}
#[test]
fn test_parse_models_with_legacy_tests_key() {
let yaml = r#"
models:
- name: stg_orders
columns:
- name: order_id
tests:
- not_null
- unique
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models[0].columns[0].tests.len(), 2);
}
#[test]
fn test_parse_data_tests_all_formats() {
let yaml = r#"
models:
- name: orders
columns:
- name: order_id
data_tests:
- not_null
- unique:
config:
where: "order_id > 21"
- name: status
data_tests:
- accepted_values:
arguments:
values:
- placed
- shipped
- completed
- returned
config:
severity: warn
- name: customer_id
data_tests:
- relationships:
arguments:
to: ref('customers')
field: id
- name: custom_test_name
test_name: accepted_values
arguments:
values:
- 1
- 2
- 3
config:
where: "order_date = current_date"
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let model = &schema.models[0];
assert_eq!(model.columns.len(), 3);
assert_eq!(model.columns[0].tests.len(), 2);
assert!(
matches!(model.columns[0].tests[0], TestDefinition::Simple(ref s) if s == "not_null")
);
assert!(matches!(
model.columns[0].tests[1],
TestDefinition::Complex(_)
));
assert_eq!(model.columns[1].tests.len(), 1);
assert!(matches!(
model.columns[1].tests[0],
TestDefinition::Complex(_)
));
assert_eq!(model.columns[2].tests.len(), 2);
assert!(matches!(
model.columns[2].tests[0],
TestDefinition::Complex(_)
));
assert!(matches!(
model.columns[2].tests[1],
TestDefinition::Complex(_)
));
}
#[test]
fn test_parse_exposures() {
let yaml = r#"
exposures:
- name: weekly_report
description: Weekly business report
type: dashboard
depends_on:
- ref('orders')
- ref('customers')
owner:
name: Data Team
email: data@example.com
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.exposures.len(), 1);
assert_eq!(schema.exposures[0].name, "weekly_report");
assert_eq!(schema.exposures[0].depends_on.len(), 2);
}
#[test]
fn test_parse_duplicate_mapping_keys() {
let yaml = r#"
sources:
- name: raw
tables:
- name: orders
sources:
- name: other
tables:
- name: users
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.sources.len(), 1);
assert_eq!(schema.sources[0].name, "other");
}
#[test]
fn test_empty_file() {
let yaml = "";
let schema = parse_schema_file(yaml, None).unwrap();
assert!(schema.sources.is_empty());
assert!(schema.models.is_empty());
assert!(schema.exposures.is_empty());
}
#[test]
fn test_test_name_extraction() {
let simple = TestDefinition::Simple("not_null".to_string());
assert_eq!(simple.test_name(), Some("not_null"));
let complex_single = TestDefinition::Complex(serde_json::json!({
"unique": {"config": {"where": "id > 0"}}
}));
assert_eq!(complex_single.test_name(), Some("unique"));
let complex_named = TestDefinition::Complex(serde_json::json!({
"name": "custom_test_name",
"test_name": "accepted_values",
"arguments": {"values": [1, 2]}
}));
assert_eq!(complex_named.test_name(), Some("accepted_values"));
let relationships = TestDefinition::Complex(serde_json::json!({
"relationships": {"arguments": {"to": "ref('customers')", "field": "id"}}
}));
assert_eq!(relationships.test_name(), Some("relationships"));
let name_only = TestDefinition::Complex(serde_json::json!({"name": "something"}));
assert_eq!(name_only.test_name(), None);
}
}