iceberg_rust_spec/spec/
tabular.rs

1/*!
2 * Tabular metadata types and traits
3 *
4 * This module provides types for working with metadata for different tabular data structures
5 * in Iceberg, including tables, views, and materialized views. It defines common traits and
6 * implementations that allow working with these different types through a unified interface.
7 *
8 * The main types are:
9 * - TabularMetadata: An enum for owned metadata of different tabular types
10 * - TabularMetadataRef: A reference-based version for borrowed metadata
11 *
12 * These types allow code to handle tables, views, and materialized views generically while
13 * preserving their specific metadata structures and behaviors.
14 */
15
16use std::{fmt, str};
17
18use serde::{Deserialize, Serialize};
19use uuid::Uuid;
20
21use crate::{error::Error, schema::Schema};
22
23use super::{
24    materialized_view_metadata::MaterializedViewMetadata, table_metadata::TableMetadata,
25    view_metadata::ViewMetadata,
26};
27
28/// Represents metadata for different types of tabular data structures in Iceberg
29///
30/// This enum provides a unified way to handle metadata for tables, views, and materialized views.
31/// It allows working with different tabular types through a common interface while preserving
32/// their specific metadata structures.
33#[derive(Debug, Clone, Serialize, Deserialize, PartialEq, Eq)]
34#[serde(untagged)]
35#[allow(clippy::large_enum_variant)]
36pub enum TabularMetadata {
37    /// Table metadata
38    Table(TableMetadata),
39    /// View metadata
40    View(ViewMetadata),
41    /// Materialized view metadata
42    MaterializedView(MaterializedViewMetadata),
43}
44
45impl TabularMetadata {
46    pub fn as_ref(&self) -> TabularMetadataRef<'_> {
47        match self {
48            TabularMetadata::Table(table) => TabularMetadataRef::Table(table),
49            TabularMetadata::View(view) => TabularMetadataRef::View(view),
50            TabularMetadata::MaterializedView(matview) => {
51                TabularMetadataRef::MaterializedView(matview)
52            }
53        }
54    }
55}
56
57impl fmt::Display for TabularMetadata {
58    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
59        write!(
60            f,
61            "{}",
62            &serde_json::to_string(self).map_err(|_| fmt::Error)?,
63        )
64    }
65}
66
67impl str::FromStr for TabularMetadata {
68    type Err = Error;
69    fn from_str(s: &str) -> Result<Self, Self::Err> {
70        serde_json::from_str(s).map_err(Error::from)
71    }
72}
73
74impl From<TableMetadata> for TabularMetadata {
75    fn from(value: TableMetadata) -> Self {
76        TabularMetadata::Table(value)
77    }
78}
79
80impl From<ViewMetadata> for TabularMetadata {
81    fn from(value: ViewMetadata) -> Self {
82        TabularMetadata::View(value)
83    }
84}
85
86impl From<MaterializedViewMetadata> for TabularMetadata {
87    fn from(value: MaterializedViewMetadata) -> Self {
88        TabularMetadata::MaterializedView(value)
89    }
90}
91
92/// A reference wrapper for different types of tabular metadata
93///
94/// This enum provides a way to reference the different types of tabular metadata
95/// (tables, views, materialized views) without taking ownership. It implements
96/// common functionality for accessing metadata properties across all tabular types.
97#[derive(Serialize)]
98#[serde(untagged)]
99pub enum TabularMetadataRef<'a> {
100    /// Table metadata
101    Table(&'a TableMetadata),
102    /// View metadata
103    View(&'a ViewMetadata),
104    /// Materialized view metadata
105    MaterializedView(&'a MaterializedViewMetadata),
106}
107
108impl TabularMetadataRef<'_> {
109    /// Returns the UUID of the tabular object
110    ///
111    /// # Returns
112    /// * A reference to the UUID that uniquely identifies this table, view, or materialized view
113    pub fn uuid(&self) -> &Uuid {
114        match self {
115            TabularMetadataRef::Table(table) => &table.table_uuid,
116            TabularMetadataRef::View(view) => &view.view_uuid,
117            TabularMetadataRef::MaterializedView(matview) => &matview.view_uuid,
118        }
119    }
120
121    /// Returns the storage location of the tabular object
122    ///
123    /// # Returns
124    /// * A string reference to the base storage location (e.g. S3 path, file path)
125    ///   where this table, view, or materialized view's data is stored
126    pub fn location(&self) -> &str {
127        match self {
128            TabularMetadataRef::Table(table) => &table.location,
129            TabularMetadataRef::View(view) => &view.location,
130            TabularMetadataRef::MaterializedView(matview) => &matview.location,
131        }
132    }
133
134    /// Returns the current sequence number or version ID of the tabular object
135    ///
136    /// # Returns
137    /// * For tables: The last sequence number used to create a snapshot
138    /// * For views and materialized views: The current version ID
139    pub fn sequence_number(&self) -> i64 {
140        match self {
141            TabularMetadataRef::Table(table) => table.last_sequence_number,
142            TabularMetadataRef::View(view) => view.current_version_id,
143            TabularMetadataRef::MaterializedView(matview) => matview.current_version_id,
144        }
145    }
146
147    /// Returns the current schema for the tabular object
148    ///
149    /// # Arguments
150    /// * `branch` - Optional branch name to get schema from
151    ///
152    /// # Returns
153    /// * `Ok(&Schema)` - The current schema for this table, view, or materialized view
154    /// * `Err(Error)` - If the schema cannot be retrieved
155    pub fn current_schema(&self, branch: Option<&str>) -> Result<&Schema, Error> {
156        match self {
157            TabularMetadataRef::Table(table) => table.current_schema(branch),
158            TabularMetadataRef::View(view) => view.current_schema(branch),
159            TabularMetadataRef::MaterializedView(matview) => matview.current_schema(branch),
160        }
161    }
162}
163
164impl<'a> From<&'a TableMetadata> for TabularMetadataRef<'a> {
165    fn from(value: &'a TableMetadata) -> Self {
166        TabularMetadataRef::Table(value)
167    }
168}
169
170impl<'a> From<&'a ViewMetadata> for TabularMetadataRef<'a> {
171    fn from(value: &'a ViewMetadata) -> Self {
172        TabularMetadataRef::View(value)
173    }
174}
175
176impl<'a> From<&'a MaterializedViewMetadata> for TabularMetadataRef<'a> {
177    fn from(value: &'a MaterializedViewMetadata) -> Self {
178        TabularMetadataRef::MaterializedView(value)
179    }
180}
181
182impl<'a> From<&'a TabularMetadata> for TabularMetadataRef<'a> {
183    fn from(value: &'a TabularMetadata) -> Self {
184        match value {
185            TabularMetadata::Table(table) => TabularMetadataRef::Table(table),
186            TabularMetadata::View(view) => TabularMetadataRef::View(view),
187            TabularMetadata::MaterializedView(matview) => {
188                TabularMetadataRef::MaterializedView(matview)
189            }
190        }
191    }
192}
193
194#[cfg(test)]
195mod tests {
196
197    use crate::{error::Error, tabular::TabularMetadata};
198
199    #[test]
200    fn test_deserialize_tabular_view_data_v1() -> Result<(), Error> {
201        let data = r#"
202        {
203        "view-uuid": "fa6506c3-7681-40c8-86dc-e36561f83385",
204        "format-version" : 1,
205        "location" : "s3://bucket/warehouse/default.db/event_agg",
206        "current-version-id" : 1,
207        "properties" : {
208            "comment" : "Daily event counts"
209        },
210        "versions" : [ {
211            "version-id" : 1,
212            "timestamp-ms" : 1573518431292,
213            "schema-id" : 1,
214            "default-catalog" : "prod",
215            "default-namespace" : [ "default" ],
216            "summary" : {
217            "operation" : "create",
218            "engine-name" : "Spark",
219            "engineVersion" : "3.3.2"
220            },
221            "representations" : [ {
222            "type" : "sql",
223            "sql" : "SELECT\n    COUNT(1), CAST(event_ts AS DATE)\nFROM events\nGROUP BY 2",
224            "dialect" : "spark"
225            } ]
226        } ],
227        "schemas": [ {
228            "schema-id": 1,
229            "type" : "struct",
230            "fields" : [ {
231            "id" : 1,
232            "name" : "event_count",
233            "required" : false,
234            "type" : "int",
235            "doc" : "Count of events"
236            }, {
237            "id" : 2,
238            "name" : "event_date",
239            "required" : false,
240            "type" : "date"
241            } ]
242        } ],
243        "version-log" : [ {
244            "timestamp-ms" : 1573518431292,
245            "version-id" : 1
246        } ]
247        }
248        "#;
249        let metadata =
250            serde_json::from_str::<TabularMetadata>(data).expect("Failed to deserialize json");
251        //test serialise deserialise works.
252        let metadata_two: TabularMetadata = serde_json::from_str(
253            &serde_json::to_string(&metadata).expect("Failed to serialize metadata"),
254        )
255        .expect("Failed to serialize json");
256        assert_eq!(metadata, metadata_two);
257
258        Ok(())
259    }
260}