Skip to main content

data_modelling_core/models/
dbmv.rs

1//! Databricks Metric Views (DBMV) model
2//!
3//! Defines the data structures for Databricks Metric Views — a semantic layer
4//! format that transforms raw tables into standardised business metrics.
5//!
6//! ## File Format
7//!
8//! DBMV documents use the `.dbmv.yaml` extension and contain one or more
9//! metric view definitions per file, wrapped in an SDK envelope format.
10//!
11//! The envelope uses **camelCase** keys (`apiVersion`, `kind`, `metricViews`)
12//! while the inner Databricks-native content uses **snake_case** keys
13//! (`display_name`, `materialized_views`).
14//!
15//! ## Example
16//!
17//! ```yaml
18//! apiVersion: v1.0.0
19//! kind: MetricViews
20//! system: my-databricks-system
21//! metricViews:
22//!   - name: orders_metrics
23//!     source: catalog.schema.orders
24//!     dimensions:
25//!       - name: order_date
26//!         expr: order_date
27//!     measures:
28//!       - name: total_revenue
29//!         expr: SUM(revenue)
30//! ```
31
32use serde::{Deserialize, Serialize};
33
34/// Default version for metric views
35fn default_version() -> String {
36    "1.1".to_string()
37}
38
39/// Default API version
40fn default_api_version() -> String {
41    "v1.0.0".to_string()
42}
43
44/// Default kind
45fn default_kind() -> String {
46    "MetricViews".to_string()
47}
48
49/// DBMV Document — wrapper envelope for multiple metric views
50///
51/// Uses camelCase for the envelope fields to match SDK conventions.
52/// One document per system, containing multiple metric view definitions.
53#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
54#[serde(rename_all = "camelCase")]
55pub struct DBMVDocument {
56    /// API version of the DBMV format (e.g., "v1.0.0")
57    #[serde(default = "default_api_version")]
58    pub api_version: String,
59    /// Document kind — always "MetricViews"
60    #[serde(default = "default_kind")]
61    pub kind: String,
62    /// System name this document belongs to
63    pub system: String,
64    /// Optional description of the metric views collection
65    #[serde(skip_serializing_if = "Option::is_none")]
66    pub description: Option<String>,
67    /// Metric view definitions
68    #[serde(default)]
69    pub metric_views: Vec<DBMVMetricView>,
70}
71
72impl Default for DBMVDocument {
73    fn default() -> Self {
74        Self {
75            api_version: default_api_version(),
76            kind: default_kind(),
77            system: String::new(),
78            description: None,
79            metric_views: Vec::new(),
80        }
81    }
82}
83
84impl DBMVDocument {
85    /// Create a new DBMV document for a system
86    pub fn new(system: impl Into<String>) -> Self {
87        Self {
88            system: system.into(),
89            ..Default::default()
90        }
91    }
92
93    /// Add a metric view to the document
94    pub fn add_metric_view(&mut self, view: DBMVMetricView) {
95        self.metric_views.push(view);
96    }
97
98    /// Get a metric view by name
99    pub fn get_metric_view(&self, name: &str) -> Option<&DBMVMetricView> {
100        self.metric_views.iter().find(|v| v.name == name)
101    }
102
103    /// Import from YAML
104    pub fn from_yaml(yaml_content: &str) -> Result<Self, serde_yaml::Error> {
105        serde_yaml::from_str(yaml_content)
106    }
107
108    /// Export to YAML
109    pub fn to_yaml(&self) -> Result<String, serde_yaml::Error> {
110        serde_yaml::to_string(self)
111    }
112}
113
114/// Databricks Metric View definition
115///
116/// Uses snake_case (Rust default) to match Databricks native YAML format.
117#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
118pub struct DBMVMetricView {
119    /// Metric view name
120    pub name: String,
121    /// Version of the metric view definition
122    #[serde(default = "default_version")]
123    pub version: String,
124    /// Fully qualified source table (e.g., "catalog.schema.table")
125    pub source: String,
126    /// Optional SQL filter expression applied to the source
127    #[serde(skip_serializing_if = "Option::is_none")]
128    pub filter: Option<String>,
129    /// Optional comment/description
130    #[serde(skip_serializing_if = "Option::is_none")]
131    pub comment: Option<String>,
132    /// Dimension definitions
133    #[serde(default, skip_serializing_if = "Vec::is_empty")]
134    pub dimensions: Vec<DBMVDimension>,
135    /// Measure definitions
136    #[serde(default, skip_serializing_if = "Vec::is_empty")]
137    pub measures: Vec<DBMVMeasure>,
138    /// Join definitions (supports nested joins for snowflake schemas)
139    #[serde(default, skip_serializing_if = "Vec::is_empty")]
140    pub joins: Vec<DBMVJoin>,
141    /// Materialization configuration
142    #[serde(skip_serializing_if = "Option::is_none")]
143    pub materialization: Option<DBMVMaterialization>,
144}
145
146impl Default for DBMVMetricView {
147    fn default() -> Self {
148        Self {
149            name: String::new(),
150            version: default_version(),
151            source: String::new(),
152            filter: None,
153            comment: None,
154            dimensions: Vec::new(),
155            measures: Vec::new(),
156            joins: Vec::new(),
157            materialization: None,
158        }
159    }
160}
161
162/// Dimension definition in a metric view
163#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
164pub struct DBMVDimension {
165    /// Dimension name
166    pub name: String,
167    /// SQL expression for the dimension
168    pub expr: String,
169    /// Human-readable display name
170    #[serde(skip_serializing_if = "Option::is_none")]
171    pub display_name: Option<String>,
172    /// Optional comment/description
173    #[serde(skip_serializing_if = "Option::is_none")]
174    pub comment: Option<String>,
175}
176
177/// Measure definition in a metric view
178#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
179pub struct DBMVMeasure {
180    /// Measure name
181    pub name: String,
182    /// SQL aggregation expression (e.g., "SUM(revenue)")
183    pub expr: String,
184    /// Human-readable display name
185    #[serde(skip_serializing_if = "Option::is_none")]
186    pub display_name: Option<String>,
187    /// Optional comment/description
188    #[serde(skip_serializing_if = "Option::is_none")]
189    pub comment: Option<String>,
190    /// Format specification for the measure
191    #[serde(skip_serializing_if = "Option::is_none")]
192    pub format: Option<DBMVMeasureFormat>,
193    /// Window function specifications
194    #[serde(default, skip_serializing_if = "Vec::is_empty")]
195    pub window: Vec<DBMVWindow>,
196}
197
198/// Format specification for a measure
199#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
200pub struct DBMVMeasureFormat {
201    /// Format type (e.g., "currency", "percentage", "number")
202    #[serde(rename = "type")]
203    pub format_type: String,
204}
205
206/// Window function specification for a measure
207#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
208pub struct DBMVWindow {
209    /// Column to order by
210    pub order: String,
211    /// Window range (e.g., "cumulative", "unbounded")
212    #[serde(skip_serializing_if = "Option::is_none")]
213    pub range: Option<String>,
214    /// Semi-additive behaviour (e.g., "last", "first")
215    #[serde(skip_serializing_if = "Option::is_none")]
216    pub semiadditive: Option<String>,
217}
218
219/// Join definition (supports recursive nesting for snowflake schemas)
220#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
221pub struct DBMVJoin {
222    /// Join alias name
223    pub name: String,
224    /// Fully qualified source table for the join
225    pub source: String,
226    /// Join condition expression (e.g., "source.customer_id = customers.id")
227    #[serde(skip_serializing_if = "Option::is_none")]
228    pub on: Option<String>,
229    /// Column names for equi-join (alternative to `on`)
230    #[serde(default, skip_serializing_if = "Vec::is_empty")]
231    pub using: Vec<String>,
232    /// Nested joins (for snowflake schema patterns)
233    #[serde(default, skip_serializing_if = "Vec::is_empty")]
234    pub joins: Vec<DBMVJoin>,
235}
236
237/// Materialization configuration for a metric view
238#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
239pub struct DBMVMaterialization {
240    /// Refresh schedule (e.g., "every 6 hours", "daily")
241    pub schedule: String,
242    /// Materialization mode (e.g., "relaxed", "strict")
243    pub mode: String,
244    /// Pre-computed materialized views
245    #[serde(default, skip_serializing_if = "Vec::is_empty")]
246    pub materialized_views: Vec<DBMVMaterializedView>,
247}
248
249/// Pre-computed materialized view definition
250#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
251pub struct DBMVMaterializedView {
252    /// Materialized view name
253    pub name: String,
254    /// View type: "aggregated" or "unaggregated"
255    #[serde(rename = "type")]
256    pub view_type: String,
257    /// Dimensions to include (for aggregated views)
258    #[serde(default, skip_serializing_if = "Vec::is_empty")]
259    pub dimensions: Vec<String>,
260    /// Measures to include (for aggregated views)
261    #[serde(default, skip_serializing_if = "Vec::is_empty")]
262    pub measures: Vec<String>,
263}
264
265#[cfg(test)]
266mod tests {
267    use super::*;
268
269    #[test]
270    fn test_document_new() {
271        let doc = DBMVDocument::new("my-system");
272        assert_eq!(doc.system, "my-system");
273        assert_eq!(doc.api_version, "v1.0.0");
274        assert_eq!(doc.kind, "MetricViews");
275        assert!(doc.metric_views.is_empty());
276    }
277
278    #[test]
279    fn test_document_add_metric_view() {
280        let mut doc = DBMVDocument::new("test-system");
281        doc.add_metric_view(DBMVMetricView {
282            name: "orders".to_string(),
283            source: "catalog.schema.orders".to_string(),
284            ..Default::default()
285        });
286        assert_eq!(doc.metric_views.len(), 1);
287        assert_eq!(doc.get_metric_view("orders").unwrap().name, "orders");
288        assert!(doc.get_metric_view("nonexistent").is_none());
289    }
290
291    #[test]
292    fn test_default_version() {
293        let view = DBMVMetricView::default();
294        assert_eq!(view.version, "1.1");
295    }
296
297    #[test]
298    fn test_measure_format_type_rename() {
299        let format = DBMVMeasureFormat {
300            format_type: "currency".to_string(),
301        };
302        let yaml = serde_yaml::to_string(&format).unwrap();
303        assert!(yaml.contains("type: currency"));
304    }
305
306    #[test]
307    fn test_materialized_view_type_rename() {
308        let mv = DBMVMaterializedView {
309            name: "test".to_string(),
310            view_type: "aggregated".to_string(),
311            dimensions: vec![],
312            measures: vec![],
313        };
314        let yaml = serde_yaml::to_string(&mv).unwrap();
315        assert!(yaml.contains("type: aggregated"));
316    }
317
318    #[test]
319    fn test_document_yaml_roundtrip() {
320        let mut doc = DBMVDocument::new("test-system");
321        doc.description = Some("Test metrics".to_string());
322        doc.add_metric_view(DBMVMetricView {
323            name: "orders_metrics".to_string(),
324            source: "catalog.schema.orders".to_string(),
325            dimensions: vec![DBMVDimension {
326                name: "order_date".to_string(),
327                expr: "order_date".to_string(),
328                display_name: Some("Order Date".to_string()),
329                comment: None,
330            }],
331            measures: vec![DBMVMeasure {
332                name: "total_revenue".to_string(),
333                expr: "SUM(revenue)".to_string(),
334                display_name: Some("Total Revenue".to_string()),
335                comment: None,
336                format: Some(DBMVMeasureFormat {
337                    format_type: "currency".to_string(),
338                }),
339                window: vec![],
340            }],
341            ..Default::default()
342        });
343
344        let yaml = doc.to_yaml().unwrap();
345        let parsed = DBMVDocument::from_yaml(&yaml).unwrap();
346        assert_eq!(doc, parsed);
347    }
348
349    #[test]
350    fn test_camel_case_envelope_snake_case_inner() {
351        let doc = DBMVDocument::new("test");
352        let yaml = doc.to_yaml().unwrap();
353
354        // Envelope fields should be camelCase
355        assert!(yaml.contains("apiVersion:"));
356        assert!(yaml.contains("metricViews:"));
357
358        // These should NOT appear (wrong casing)
359        assert!(!yaml.contains("api_version:"));
360        assert!(!yaml.contains("metric_views:"));
361    }
362
363    #[test]
364    fn test_inner_fields_snake_case() {
365        let mut doc = DBMVDocument::new("test");
366        doc.add_metric_view(DBMVMetricView {
367            name: "test_view".to_string(),
368            source: "catalog.schema.table".to_string(),
369            dimensions: vec![DBMVDimension {
370                name: "dim1".to_string(),
371                expr: "col1".to_string(),
372                display_name: Some("Dimension 1".to_string()),
373                comment: None,
374            }],
375            measures: vec![DBMVMeasure {
376                name: "measure1".to_string(),
377                expr: "SUM(col2)".to_string(),
378                display_name: None,
379                comment: None,
380                format: None,
381                window: vec![],
382            }],
383            ..Default::default()
384        });
385
386        let yaml = doc.to_yaml().unwrap();
387        // Inner fields should be snake_case (Rust default, no rename)
388        assert!(yaml.contains("display_name:"));
389    }
390
391    #[test]
392    fn test_nested_joins() {
393        let join = DBMVJoin {
394            name: "customers".to_string(),
395            source: "catalog.schema.customers".to_string(),
396            on: Some("source.customer_id = customers.id".to_string()),
397            using: vec![],
398            joins: vec![DBMVJoin {
399                name: "nation".to_string(),
400                source: "catalog.schema.nations".to_string(),
401                on: Some("customers.nation_id = nation.id".to_string()),
402                using: vec![],
403                joins: vec![],
404            }],
405        };
406
407        let yaml = serde_yaml::to_string(&join).unwrap();
408        assert!(yaml.contains("nation"));
409        assert!(yaml.contains("customers.nation_id"));
410
411        // Roundtrip
412        let parsed: DBMVJoin = serde_yaml::from_str(&yaml).unwrap();
413        assert_eq!(join, parsed);
414    }
415
416    #[test]
417    fn test_window_measure() {
418        let measure = DBMVMeasure {
419            name: "ytd_revenue".to_string(),
420            expr: "SUM(revenue)".to_string(),
421            display_name: None,
422            comment: None,
423            format: None,
424            window: vec![DBMVWindow {
425                order: "order_date".to_string(),
426                range: Some("cumulative".to_string()),
427                semiadditive: Some("last".to_string()),
428            }],
429        };
430
431        let yaml = serde_yaml::to_string(&measure).unwrap();
432        let parsed: DBMVMeasure = serde_yaml::from_str(&yaml).unwrap();
433        assert_eq!(measure, parsed);
434    }
435
436    #[test]
437    fn test_materialization() {
438        let mat = DBMVMaterialization {
439            schedule: "every 6 hours".to_string(),
440            mode: "relaxed".to_string(),
441            materialized_views: vec![
442                DBMVMaterializedView {
443                    name: "baseline".to_string(),
444                    view_type: "unaggregated".to_string(),
445                    dimensions: vec![],
446                    measures: vec![],
447                },
448                DBMVMaterializedView {
449                    name: "revenue_by_date".to_string(),
450                    view_type: "aggregated".to_string(),
451                    dimensions: vec!["order_date".to_string()],
452                    measures: vec!["total_revenue".to_string()],
453                },
454            ],
455        };
456
457        let yaml = serde_yaml::to_string(&mat).unwrap();
458        assert!(yaml.contains("materialized_views:"));
459
460        let parsed: DBMVMaterialization = serde_yaml::from_str(&yaml).unwrap();
461        assert_eq!(mat, parsed);
462    }
463
464    #[test]
465    fn test_optional_fields_omitted() {
466        let view = DBMVMetricView {
467            name: "simple".to_string(),
468            source: "catalog.schema.table".to_string(),
469            ..Default::default()
470        };
471
472        let yaml = serde_yaml::to_string(&view).unwrap();
473        assert!(!yaml.contains("filter:"));
474        assert!(!yaml.contains("comment:"));
475        assert!(!yaml.contains("dimensions:"));
476        assert!(!yaml.contains("measures:"));
477        assert!(!yaml.contains("joins:"));
478        assert!(!yaml.contains("materialization:"));
479    }
480}