use serde::Deserialize;
#[derive(Debug, Deserialize, Default)]
pub struct SchemaFile {
#[serde(default)]
pub sources: Vec<SourceDefinition>,
#[serde(default)]
pub models: Vec<ModelDefinition>,
#[serde(default)]
pub snapshots: Vec<SnapshotDefinition>,
#[serde(default)]
pub exposures: Vec<ExposureDefinition>,
#[serde(default)]
pub semantic_models: Vec<SemanticModelDefinition>,
#[serde(default)]
pub metrics: Vec<MetricDefinition>,
#[serde(default)]
pub saved_queries: Vec<SavedQueryDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SourceDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub tables: Vec<SourceTable>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SourceTable {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub columns: Vec<ColumnDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ColumnDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default, alias = "data_tests")]
pub tests: Vec<TestDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
#[serde(untagged)]
pub enum TestDefinition {
Simple(String),
Complex(serde_json::Value),
}
impl TestDefinition {
pub fn test_name(&self) -> Option<&str> {
match self {
TestDefinition::Simple(s) => Some(s.as_str()),
TestDefinition::Complex(v) => {
let obj = v.as_object()?;
if let Some(tn) = obj.get("test_name").and_then(|v| v.as_str()) {
return Some(tn);
}
for key in obj.keys() {
if !matches!(key.as_str(), "config" | "arguments" | "name") {
return Some(key.as_str());
}
}
None
}
}
}
}
fn version_value_to_str(v: &serde_json::Value) -> String {
if let Some(n) = v.as_i64() {
return n.to_string();
}
if let Some(n) = v.as_u64() {
return n.to_string();
}
if let Some(f) = v.as_f64() {
return if f.fract() == 0.0 {
(f as i64).to_string()
} else {
f.to_string()
};
}
if let Some(s) = v.as_str() {
if let Ok(n) = s.parse::<i64>() {
return n.to_string();
}
return s.to_string();
}
v.to_string()
}
#[derive(Debug, Deserialize, Clone)]
pub struct VersionSpec {
pub v: serde_json::Value,
#[serde(default)]
pub defined_in: Option<String>,
}
impl VersionSpec {
pub fn v_str(&self) -> String {
version_value_to_str(&self.v)
}
pub fn sql_stem(&self, model_name: &str) -> String {
self.defined_in
.clone()
.unwrap_or_else(|| format!("{}_v{}", model_name, self.v_str()))
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct ModelDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub columns: Vec<ColumnDefinition>,
#[serde(default)]
pub config: Option<ModelConfig>,
#[serde(default)]
pub tags: Vec<String>,
#[serde(default, alias = "data_tests")]
pub tests: Vec<TestDefinition>,
#[serde(default)]
pub versions: Vec<VersionSpec>,
#[serde(default)]
pub latest_version: Option<serde_json::Value>,
}
impl ModelDefinition {
pub fn resolved_latest_version_str(&self) -> Option<String> {
if let Some(lv) = &self.latest_version {
return Some(version_value_to_str(lv));
}
if self.versions.is_empty() {
return None;
}
let strs: Vec<String> = self.versions.iter().map(|v| v.v_str()).collect();
let numerics: Vec<i64> = strs.iter().filter_map(|s| s.parse().ok()).collect();
if numerics.len() == strs.len() {
numerics.into_iter().max().map(|n| n.to_string())
} else {
strs.into_iter().max()
}
}
}
#[derive(Debug, Deserialize, Clone, Default)]
pub struct ModelConfig {
#[serde(default)]
pub materialized: Option<String>,
#[serde(default)]
pub tags: Vec<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SnapshotDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub relation: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ExposureDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(rename = "type", default)]
pub exposure_type: Option<String>,
#[serde(default)]
pub url: Option<String>,
#[serde(default)]
pub maturity: Option<String>,
#[serde(default)]
pub depends_on: Vec<String>,
#[serde(default)]
pub owner: Option<ExposureOwner>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct ExposureOwner {
pub name: Option<String>,
pub email: Option<String>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SemanticModelDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(default)]
pub model: Option<String>,
#[serde(default)]
pub measures: Vec<MeasureDefinition>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct MeasureDefinition {
pub name: String,
}
#[derive(Debug, Deserialize, Clone)]
pub struct MetricDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(default)]
pub type_params: Option<serde_json::Value>,
#[serde(default)]
pub base_metric: Option<serde_json::Value>,
#[serde(default)]
pub conversion_metric: Option<serde_json::Value>,
#[serde(default)]
pub input_metric: Option<serde_json::Value>,
}
impl MetricDefinition {
fn name_ref(value: &serde_json::Value) -> Option<&str> {
value
.as_str()
.or_else(|| value.get("name").and_then(|n| n.as_str()))
}
pub fn measure_refs(&self) -> Vec<&str> {
let Some(p) = &self.type_params else {
return vec![];
};
let mut refs = vec![];
for field in &["measure", "base_measure", "conversion_measure"] {
if let Some(v) = p.get(field)
&& let Some(name) = Self::name_ref(v)
{
refs.push(name);
}
}
refs
}
pub fn metric_refs(&self) -> Vec<&str> {
let mut refs = vec![];
if let Some(p) = &self.type_params {
for field in &["numerator", "denominator"] {
if let Some(v) = p.get(field)
&& let Some(name) = Self::name_ref(v)
{
refs.push(name);
}
}
for field in &["input_metrics", "metrics"] {
if let Some(arr) = p.get(field).and_then(|v| v.as_array()) {
for item in arr {
if let Some(name) = Self::name_ref(item) {
refs.push(name);
}
}
}
}
}
for v in [&self.base_metric, &self.conversion_metric]
.into_iter()
.flatten()
{
if let Some(name) = Self::name_ref(v) {
refs.push(name);
}
}
if let Some(v) = &self.input_metric
&& let Some(name) = Self::name_ref(v)
{
refs.push(name);
}
refs
}
}
#[derive(Debug, Deserialize, Clone)]
pub struct SavedQueryDefinition {
pub name: String,
#[serde(default)]
pub description: Option<String>,
#[serde(default)]
pub label: Option<String>,
#[serde(default)]
pub query_params: Option<SavedQueryQueryParams>,
}
#[derive(Debug, Deserialize, Clone)]
pub struct SavedQueryQueryParams {
#[serde(default)]
pub metrics: Vec<String>,
}
pub fn parse_schema_file(
content: &str,
path: Option<&std::path::Path>,
) -> anyhow::Result<SchemaFile> {
let location = path
.map(|p| p.display().to_string())
.unwrap_or_else(|| "<input>".to_string());
super::yaml_from_str(content, &location)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_sources() {
let yaml = r#"
sources:
- name: raw
description: Raw data from the warehouse
tables:
- name: orders
description: Raw orders table
- name: customers
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.sources.len(), 1);
assert_eq!(schema.sources[0].name, "raw");
assert_eq!(schema.sources[0].tables.len(), 2);
assert_eq!(schema.sources[0].tables[0].name, "orders");
}
#[test]
fn test_parse_models_with_data_tests() {
let yaml = r#"
models:
- name: stg_orders
description: Staged orders
columns:
- name: order_id
data_tests:
- not_null
- unique
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models.len(), 1);
assert_eq!(schema.models[0].name, "stg_orders");
assert_eq!(schema.models[0].columns.len(), 1);
assert_eq!(schema.models[0].columns[0].tests.len(), 2);
}
#[test]
fn test_parse_models_with_legacy_tests_key() {
let yaml = r#"
models:
- name: stg_orders
columns:
- name: order_id
tests:
- not_null
- unique
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models[0].columns[0].tests.len(), 2);
}
#[test]
fn test_parse_data_tests_all_formats() {
let yaml = r#"
models:
- name: orders
columns:
- name: order_id
data_tests:
- not_null
- unique:
config:
where: "order_id > 21"
- name: status
data_tests:
- accepted_values:
arguments:
values:
- placed
- shipped
- completed
- returned
config:
severity: warn
- name: customer_id
data_tests:
- relationships:
arguments:
to: ref('customers')
field: id
- name: custom_test_name
test_name: accepted_values
arguments:
values:
- 1
- 2
- 3
config:
where: "order_date = current_date"
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let model = &schema.models[0];
assert_eq!(model.columns.len(), 3);
assert_eq!(model.columns[0].tests.len(), 2);
assert!(
matches!(model.columns[0].tests[0], TestDefinition::Simple(ref s) if s == "not_null")
);
assert!(matches!(
model.columns[0].tests[1],
TestDefinition::Complex(_)
));
assert_eq!(model.columns[1].tests.len(), 1);
assert!(matches!(
model.columns[1].tests[0],
TestDefinition::Complex(_)
));
assert_eq!(model.columns[2].tests.len(), 2);
assert!(matches!(
model.columns[2].tests[0],
TestDefinition::Complex(_)
));
assert!(matches!(
model.columns[2].tests[1],
TestDefinition::Complex(_)
));
}
#[test]
fn test_parse_exposures() {
let yaml = r#"
exposures:
- name: weekly_report
description: Weekly business report
type: dashboard
depends_on:
- ref('orders')
- ref('customers')
owner:
name: Data Team
email: data@example.com
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.exposures.len(), 1);
assert_eq!(schema.exposures[0].name, "weekly_report");
assert_eq!(schema.exposures[0].depends_on.len(), 2);
}
#[test]
fn test_parse_duplicate_mapping_keys() {
let yaml = r#"
sources:
- name: raw
tables:
- name: orders
sources:
- name: other
tables:
- name: users
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.sources.len(), 1);
assert_eq!(schema.sources[0].name, "other");
}
#[test]
fn test_empty_file() {
let yaml = "";
let schema = parse_schema_file(yaml, None).unwrap();
assert!(schema.sources.is_empty());
assert!(schema.models.is_empty());
assert!(schema.snapshots.is_empty());
assert!(schema.exposures.is_empty());
}
#[test]
fn test_parse_yaml_only_snapshots() {
let yaml = r#"
snapshots:
- name: snap_orders
description: Orders snapshot
relation: ref('stg_orders')
- name: snap_customers
relation: ref('stg_customers', version=2)
- name: snap_no_relation
description: Snapshot without upstream relation
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.snapshots.len(), 3);
assert_eq!(schema.snapshots[0].name, "snap_orders");
assert_eq!(
schema.snapshots[0].description.as_deref(),
Some("Orders snapshot")
);
assert_eq!(
schema.snapshots[0].relation.as_deref(),
Some("ref('stg_orders')")
);
assert_eq!(schema.snapshots[1].name, "snap_customers");
assert_eq!(
schema.snapshots[1].relation.as_deref(),
Some("ref('stg_customers', version=2)")
);
assert!(schema.snapshots[2].relation.is_none());
}
#[test]
fn test_parse_versioned_model() {
let yaml = r#"
models:
- name: my_model
description: A versioned model
latest_version: 2
versions:
- v: 1
- v: 2
defined_in: my_model_custom
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models.len(), 1);
let m = &schema.models[0];
assert_eq!(m.name, "my_model");
assert_eq!(m.versions.len(), 2);
assert_eq!(m.versions[0].v_str(), "1");
assert_eq!(m.versions[0].sql_stem("my_model"), "my_model_v1");
assert_eq!(m.versions[1].v_str(), "2");
assert_eq!(m.versions[1].sql_stem("my_model"), "my_model_custom");
assert_eq!(m.resolved_latest_version_str().as_deref(), Some("2"));
}
#[test]
fn test_versioned_model_infers_latest_from_max_v() {
let yaml = r#"
models:
- name: orders
versions:
- v: 1
- v: 3
- v: 2
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.models[0];
assert_eq!(m.resolved_latest_version_str().as_deref(), Some("3"));
}
#[test]
fn test_versioned_model_infers_latest_from_quoted_v() {
let yaml = r#"
models:
- name: orders
versions:
- v: "1"
- v: "3"
- v: "2"
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.models[0];
assert_eq!(m.resolved_latest_version_str().as_deref(), Some("3"));
}
#[test]
fn test_v_str_normalizes_quoted_numeric() {
let quoted = VersionSpec {
v: serde_json::Value::String("2".to_string()),
defined_in: None,
};
assert_eq!(quoted.v_str(), "2");
let quoted_large = VersionSpec {
v: serde_json::Value::String("10".to_string()),
defined_in: None,
};
assert_eq!(quoted_large.v_str(), "10");
let non_numeric = VersionSpec {
v: serde_json::Value::String("alpha".to_string()),
defined_in: None,
};
assert_eq!(non_numeric.v_str(), "alpha");
let large_int = VersionSpec {
v: serde_json::Value::String("9007199254740993".to_string()),
defined_in: None,
};
assert_eq!(large_int.v_str(), "9007199254740993");
let u64_num = VersionSpec {
v: serde_json::Value::Number(serde_json::Number::from(i64::MAX as u64 + 1)),
defined_in: None,
};
assert_eq!(u64_num.v_str(), (i64::MAX as u64 + 1).to_string());
}
#[test]
fn test_unversioned_model_has_empty_versions() {
let yaml = r#"
models:
- name: plain_model
description: Not versioned
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.models[0];
assert!(m.versions.is_empty());
assert!(m.latest_version.is_none());
assert!(m.resolved_latest_version_str().is_none());
}
#[test]
fn test_test_name_extraction() {
let simple = TestDefinition::Simple("not_null".to_string());
assert_eq!(simple.test_name(), Some("not_null"));
let complex_single = TestDefinition::Complex(serde_json::json!({
"unique": {"config": {"where": "id > 0"}}
}));
assert_eq!(complex_single.test_name(), Some("unique"));
let complex_named = TestDefinition::Complex(serde_json::json!({
"name": "custom_test_name",
"test_name": "accepted_values",
"arguments": {"values": [1, 2]}
}));
assert_eq!(complex_named.test_name(), Some("accepted_values"));
let relationships = TestDefinition::Complex(serde_json::json!({
"relationships": {"arguments": {"to": "ref('customers')", "field": "id"}}
}));
assert_eq!(relationships.test_name(), Some("relationships"));
let name_only = TestDefinition::Complex(serde_json::json!({"name": "something"}));
assert_eq!(name_only.test_name(), None);
}
#[test]
fn test_parse_semantic_models() {
let yaml = r#"
semantic_models:
- name: orders
description: Order semantic model
model: ref('orders')
measures:
- name: order_total
- name: order_count
dimensions:
- name: ordered_at
type: time
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.semantic_models.len(), 1);
let sm = &schema.semantic_models[0];
assert_eq!(sm.name, "orders");
assert_eq!(sm.description.as_deref(), Some("Order semantic model"));
assert_eq!(sm.model.as_deref(), Some("ref('orders')"));
assert_eq!(sm.measures.len(), 2);
assert_eq!(sm.measures[0].name, "order_total");
assert_eq!(sm.measures[1].name, "order_count");
}
#[test]
fn test_parse_metrics_simple() {
let yaml = r#"
metrics:
- name: order_total
label: Order Total
description: Sum of orders
type: simple
type_params:
measure: order_total
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.metrics.len(), 1);
let m = &schema.metrics[0];
assert_eq!(m.name, "order_total");
assert_eq!(m.label.as_deref(), Some("Order Total"));
assert_eq!(m.measure_refs(), vec!["order_total"]);
assert!(m.metric_refs().is_empty());
}
#[test]
fn test_parse_metrics_simple_with_object_measure() {
let yaml = r#"
metrics:
- name: order_total
type: simple
type_params:
measure:
name: order_total
fill_nulls_with: 0
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
assert_eq!(m.measure_refs(), vec!["order_total"]);
assert!(m.metric_refs().is_empty());
}
#[test]
fn test_parse_metrics_ratio() {
let yaml = r#"
metrics:
- name: revenue_per_order
type: ratio
type_params:
numerator: revenue
denominator: orders
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
assert!(m.measure_refs().is_empty());
let refs = m.metric_refs();
assert!(refs.contains(&"revenue"));
assert!(refs.contains(&"orders"));
}
#[test]
fn test_parse_metrics_derived_with_input_metrics_and_metrics() {
let yaml = r#"
metrics:
- name: pct_change
type: derived
type_params:
input_metrics:
- name: revenue
- orders
metrics:
- name: margin
- customer_count
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
assert!(m.measure_refs().is_empty());
let refs = m.metric_refs();
assert!(refs.contains(&"revenue"));
assert!(refs.contains(&"orders"));
assert!(refs.contains(&"margin"));
assert!(refs.contains(&"customer_count"));
}
#[test]
fn test_parse_metrics_conversion_measure() {
let yaml = r#"
metrics:
- name: visitors_who_bought
type: conversion
type_params:
base_measure:
name: visitors
conversion_measure:
name: buyers
entity: user
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
let refs = m.measure_refs();
assert!(
refs.contains(&"visitors"),
"base_measure should be included"
);
assert!(
refs.contains(&"buyers"),
"conversion_measure should be included"
);
assert!(m.metric_refs().is_empty());
}
#[test]
fn test_parse_metrics_conversion_metric() {
let yaml = r#"
metrics:
- name: visit_to_purchase
type: conversion
base_metric: visits
conversion_metric:
name: purchases
filter: "{{ Dimension('user__country') }} = 'US'"
entity: user
window: 7 days
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
assert!(m.measure_refs().is_empty());
let refs = m.metric_refs();
assert!(
refs.contains(&"visits"),
"base_metric (string) should be in metric_refs"
);
assert!(
refs.contains(&"purchases"),
"conversion_metric (object) should be in metric_refs"
);
}
#[test]
fn test_parse_metrics_cumulative_input_metric() {
let yaml = r#"
metrics:
- name: cumulative_revenue
type: cumulative
input_metric: revenue
window: 1 month
"#;
let schema = parse_schema_file(yaml, None).unwrap();
let m = &schema.metrics[0];
assert!(m.measure_refs().is_empty());
let refs = m.metric_refs();
assert!(
refs.contains(&"revenue"),
"input_metric should be in metric_refs"
);
}
#[test]
fn test_parse_saved_queries() {
let yaml = r#"
saved_queries:
- name: order_metrics
description: Key order metrics
query_params:
metrics:
- orders
- order_total
- food_orders
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.saved_queries.len(), 1);
let sq = &schema.saved_queries[0];
assert_eq!(sq.name, "order_metrics");
assert_eq!(sq.description.as_deref(), Some("Key order metrics"));
let metrics = sq.query_params.as_ref().unwrap().metrics.as_slice();
assert_eq!(metrics, &["orders", "order_total", "food_orders"]);
}
#[test]
fn test_parse_full_semantic_layer_yaml() {
let yaml = r#"
models:
- name: orders
description: Orders table
semantic_models:
- name: orders
model: ref('orders')
measures:
- name: order_count
- name: order_total
metrics:
- name: orders
type: simple
type_params:
measure: order_count
- name: order_total
type: simple
type_params:
measure: order_total
saved_queries:
- name: order_kpis
query_params:
metrics:
- orders
- order_total
"#;
let schema = parse_schema_file(yaml, None).unwrap();
assert_eq!(schema.models.len(), 1);
assert_eq!(schema.semantic_models.len(), 1);
assert_eq!(schema.metrics.len(), 2);
assert_eq!(schema.saved_queries.len(), 1);
assert_eq!(
schema.saved_queries[0]
.query_params
.as_ref()
.unwrap()
.metrics
.len(),
2
);
}
}