iceberg_rust_spec/spec/
materialized_view_metadata.rs

1//! Materialized view metadata types and functionality
2//!
3//! This module contains the types and implementations for managing materialized view metadata in Apache Iceberg.
4//! It includes structures for tracking view states, source tables, and refresh operations.
5//!
6//! The main types are:
7//! - [`MaterializedViewMetadata`]: The top-level metadata for a materialized view
8//! - [`RefreshState`]: Information about the last refresh operation
9//! - [`SourceTables`]: Collection of source table states
10//! - [`SourceViews`]: Collection of source view states
11
12use std::{collections::HashMap, ops::Deref};
13
14use serde::{Deserialize, Serialize};
15use uuid::Uuid;
16
17use crate::identifier::FullIdentifier;
18
19use super::{
20    tabular::TabularMetadataRef,
21    view_metadata::{GeneralViewMetadata, GeneralViewMetadataBuilder},
22};
23
24pub static REFRESH_STATE: &str = "refresh-state";
25
26/// Fields for the version 1 of the view metadata.
27pub type MaterializedViewMetadata = GeneralViewMetadata<FullIdentifier>;
28/// Builder for materialized view metadata
29pub type MaterializedViewMetadataBuilder = GeneralViewMetadataBuilder<FullIdentifier>;
30
31impl MaterializedViewMetadata {
32    pub fn as_ref(&self) -> TabularMetadataRef<'_> {
33        TabularMetadataRef::MaterializedView(self)
34    }
35}
36
37#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
38#[serde(rename_all = "kebab-case")]
39/// Freshness information of the materialized view
40pub struct RefreshState {
41    /// The version-id of the materialized view when the refresh operation was performed.
42    pub refresh_version_id: i64,
43    /// A map from sequence-id (as defined in the view lineage) to the source tables’ snapshot-id of when the last refresh operation was performed.
44    pub source_table_states: SourceTables,
45    /// A map from sequence-id (as defined in the view lineage) to the source views’ version-id of when the last refresh operation was performed.
46    pub source_view_states: SourceViews,
47}
48
49/// Represents a collection of source table states in a materialized view refresh
50///
51/// # Fields
52/// * `0` - A HashMap mapping (table UUID, optional reference) pairs to snapshot IDs
53#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
54#[serde(from = "Vec<SourceTable>", into = "Vec<SourceTable>")]
55pub struct SourceTables(pub HashMap<(Uuid, Option<String>), i64>);
56
57/// Represents a collection of source view states in a materialized view refresh
58///
59/// # Fields
60/// * `0` - A HashMap mapping (table UUID, optional reference) pairs to version IDs
61#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
62#[serde(from = "Vec<SourceView>", into = "Vec<SourceView>")]
63pub struct SourceViews(pub HashMap<(Uuid, Option<String>), i64>);
64
65/// Represents a source table state in a materialized view refresh
66///
67/// # Fields
68/// * `uuid` - The UUID of the source table
69/// * `snapshot_id` - The snapshot ID of the source table at refresh time
70/// * `ref` - Optional reference name (e.g. branch or tag) used to access the source table
71#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
72#[serde(rename_all = "kebab-case")]
73pub struct SourceTable {
74    uuid: Uuid,
75    snapshot_id: i64,
76    r#ref: Option<String>,
77}
78
79/// Represents a source view state in a materialized view refresh
80///
81/// # Fields
82/// * `uuid` - The UUID of the source view
83/// * `version_id` - The version ID of the source view at refresh time
84#[derive(Debug, Serialize, Deserialize, PartialEq, Eq, Clone)]
85#[serde(rename_all = "kebab-case")]
86pub struct SourceView {
87    uuid: Uuid,
88    version_id: i64,
89}
90
91impl From<Vec<SourceTable>> for SourceTables {
92    fn from(value: Vec<SourceTable>) -> Self {
93        SourceTables(
94            value
95                .into_iter()
96                .map(|x| ((x.uuid, x.r#ref), x.snapshot_id))
97                .collect(),
98        )
99    }
100}
101
102impl From<SourceTables> for Vec<SourceTable> {
103    fn from(value: SourceTables) -> Self {
104        value
105            .0
106            .into_iter()
107            .map(|((uuid, r#ref), snapshot_id)| SourceTable {
108                uuid,
109                snapshot_id,
110                r#ref,
111            })
112            .collect()
113    }
114}
115
116impl From<Vec<SourceView>> for SourceViews {
117    fn from(value: Vec<SourceView>) -> Self {
118        SourceViews(
119            value
120                .into_iter()
121                .map(|x| ((x.uuid, None), x.version_id))
122                .collect(),
123        )
124    }
125}
126
127impl From<SourceViews> for Vec<SourceView> {
128    fn from(value: SourceViews) -> Self {
129        value
130            .0
131            .into_iter()
132            .map(|((uuid, _), version_id)| SourceView { uuid, version_id })
133            .collect()
134    }
135}
136
137impl Deref for SourceTables {
138    type Target = HashMap<(Uuid, Option<String>), i64>;
139    fn deref(&self) -> &Self::Target {
140        &self.0
141    }
142}
143
144impl Deref for SourceViews {
145    type Target = HashMap<(Uuid, Option<String>), i64>;
146    fn deref(&self) -> &Self::Target {
147        &self.0
148    }
149}
150
151#[cfg(test)]
152mod tests {
153
154    use crate::{error::Error, spec::materialized_view_metadata::MaterializedViewMetadata};
155
156    #[test]
157    fn test_deserialize_materialized_view_metadata_v1() -> Result<(), Error> {
158        let data = r#"
159        {
160        "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385",
161        "format-version" : 1,
162        "location" : "s3://bucket/warehouse/default.db/event_agg",
163        "current-version-id" : 1,
164        "properties" : {
165            "comment" : "Daily event counts"
166        },
167        "versions" : [ {
168            "version-id" : 1,
169            "timestamp-ms" : 1573518431292,
170            "schema-id" : 1,
171            "default-catalog" : "prod",
172            "default-namespace" : [ "default" ],
173            "summary" : {
174            "operation" : "create",
175            "engine-name" : "Spark",
176            "engineVersion" : "3.3.2"
177            },
178            "representations" : [ {
179            "type" : "sql",
180            "sql" : "SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2",
181            "dialect" : "spark"
182            } ],
183            "storage-table": {
184                "catalog": "prod",
185                "namespace": ["default"],
186                "name": "event_agg_storage"
187            }
188        } ],
189        "schemas": [ {
190            "schema-id": 1,
191            "type" : "struct",
192            "fields" : [ {
193            "id" : 1,
194            "name" : "event_count",
195            "required" : false,
196            "type" : "int",
197            "doc" : "Count of events"
198            }, {
199            "id" : 2,
200            "name" : "event_date",
201            "required" : false,
202            "type" : "date"
203            } ]
204        } ],
205        "version-log" : [ {
206            "timestamp-ms" : 1573518431292,
207            "version-id" : 1
208        } ]
209        }
210        "#;
211        let metadata = serde_json::from_str::<MaterializedViewMetadata>(data)
212            .expect("Failed to deserialize json");
213        //test serialise deserialise works.
214        let metadata_two: MaterializedViewMetadata = serde_json::from_str(
215            &serde_json::to_string(&metadata).expect("Failed to serialize metadata"),
216        )
217        .expect("Failed to serialize json");
218        assert_eq!(metadata, metadata_two);
219
220        Ok(())
221    }
222}