iceberg_rust/catalog/
mod.rs

1//! Catalog module providing interfaces for managing Iceberg tables and metadata.
2//!
3//! The catalog system is a core component of Apache Iceberg that manages:
4//! - Table metadata and schemas
5//! - Namespace organization
6//! - Storage locations and object stores
7//! - Atomic updates and versioning
8//!
9//! # Key Components
10//!
11//! - [`Catalog`]: Core trait for managing tables, views, and namespaces
12//! - [`CatalogList`]: Interface for managing multiple catalogs
13//! - [`namespace`]: Types for organizing tables into hierarchies
14//! - [`identifier`]: Types for uniquely identifying catalog objects
15//!
16//! # Common Operations
17//!
18//! - Creating and managing tables and views
19//! - Organizing tables into namespaces
20//! - Tracking table metadata and history
21//! - Managing storage locations
22//! - Performing atomic updates
23//!
24
25use std::collections::HashMap;
26use std::fmt::Debug;
27use std::sync::Arc;
28
29use iceberg_rust_spec::identifier::FullIdentifier;
30use identifier::Identifier;
31
32use crate::error::Error;
33use crate::materialized_view::MaterializedView;
34use crate::table::Table;
35use crate::view::View;
36
37use self::commit::{CommitTable, CommitView};
38use self::create::{CreateMaterializedView, CreateTable, CreateView};
39use self::namespace::Namespace;
40use self::tabular::Tabular;
41
42pub mod commit;
43pub mod create;
44pub mod tabular;
45
46/// A trait representing an Iceberg catalog that manages tables, views, and namespaces.
47///
48/// The Catalog trait provides methods to:
49/// - Create, update, and delete namespaces
50/// - Create, load, and drop tables and views
51/// - List available tables and namespaces
52/// - Manage table and view metadata
53/// - Access object storage
54///
55/// Implementations must be Send + Sync for concurrent access and Debug for logging/debugging.
56#[async_trait::async_trait]
57pub trait Catalog: Send + Sync + Debug {
58    /// Returns the name of this catalog.
59    ///
60    /// The catalog name is a unique identifier used to:
61    /// - Distinguish between multiple catalogs in a catalog list
62    /// - Reference this catalog in configuration
63    /// - Identify the catalog in logging and error messages
64    fn name(&self) -> &str;
65    /// Creates a new namespace in the catalog with optional properties.
66    ///
67    /// # Arguments
68    /// * `namespace` - The namespace to create
69    /// * `properties` - Optional key-value properties to associate with the namespace
70    ///
71    /// # Returns
72    /// * `Result<HashMap<String, String>, Error>` - The namespace properties after creation
73    ///
74    /// # Errors
75    /// Returns an error if:
76    /// * The namespace already exists
77    /// * The namespace name is invalid
78    /// * The catalog fails to create the namespace
79    /// * Properties cannot be set
80    async fn create_namespace(
81        &self,
82        namespace: &Namespace,
83        properties: Option<HashMap<String, String>>,
84    ) -> Result<HashMap<String, String>, Error>;
85    /// Removes a namespace and all its properties from the catalog.
86    ///
87    /// # Arguments
88    /// * `namespace` - The namespace to remove
89    ///
90    /// # Returns
91    /// * `Result<(), Error>` - Ok if the namespace was successfully removed
92    ///
93    /// # Errors
94    /// Returns an error if:
95    /// * The namespace doesn't exist
96    /// * The namespace contains tables or views
97    /// * The catalog fails to remove the namespace
98    async fn drop_namespace(&self, namespace: &Namespace) -> Result<(), Error>;
99    /// Loads a namespace's properties from the catalog.
100    ///
101    /// # Arguments
102    /// * `namespace` - The namespace to load properties for
103    ///
104    /// # Returns
105    /// * `Result<HashMap<String, String>, Error>` - The namespace properties if found
106    ///
107    /// # Errors
108    /// Returns an error if:
109    /// * The namespace doesn't exist
110    /// * The catalog fails to load the namespace properties
111    /// * The properties cannot be deserialized
112    async fn load_namespace(&self, namespace: &Namespace)
113        -> Result<HashMap<String, String>, Error>;
114    /// Updates a namespace's properties by applying updates and removals.
115    ///
116    /// # Arguments
117    /// * `namespace` - The namespace to update
118    /// * `updates` - Optional map of property key-value pairs to add or update
119    /// * `removals` - Optional list of property keys to remove
120    ///
121    /// # Returns
122    /// * `Result<(), Error>` - Ok if the namespace was successfully updated
123    ///
124    /// # Errors
125    /// Returns an error if:
126    /// * The namespace doesn't exist
127    /// * The properties cannot be updated
128    /// * The catalog fails to persist the changes
129    async fn update_namespace(
130        &self,
131        namespace: &Namespace,
132        updates: Option<HashMap<String, String>>,
133        removals: Option<Vec<String>>,
134    ) -> Result<(), Error>;
135    /// Checks if a namespace exists in the catalog.
136    ///
137    /// # Arguments
138    /// * `namespace` - The namespace to check for existence
139    ///
140    /// # Returns
141    /// * `Result<bool, Error>` - True if the namespace exists, false otherwise
142    ///
143    /// # Errors
144    /// Returns an error if:
145    /// * The catalog cannot be accessed
146    /// * The namespace check operation fails
147    async fn namespace_exists(&self, namespace: &Namespace) -> Result<bool, Error>;
148    /// Lists all tables, views, and materialized views in the given namespace.
149    ///
150    /// # Arguments
151    /// * `namespace` - The namespace to list tabular objects from
152    ///
153    /// # Returns
154    /// * `Result<Vec<Identifier>, Error>` - List of identifiers for all tabular objects
155    ///
156    /// # Errors
157    /// Returns an error if:
158    /// * The namespace doesn't exist
159    /// * The catalog cannot be accessed
160    /// * The listing operation fails
161    async fn list_tabulars(&self, namespace: &Namespace) -> Result<Vec<Identifier>, Error>;
162    /// Lists all namespaces under an optional parent namespace.
163    ///
164    /// # Arguments
165    /// * `parent` - Optional parent namespace to list children under. If None, lists top-level namespaces.
166    ///
167    /// # Returns
168    /// * `Result<Vec<Namespace>, Error>` - List of namespace objects
169    ///
170    /// # Errors
171    /// Returns an error if:
172    /// * The parent namespace doesn't exist (if specified)
173    /// * The catalog cannot be accessed
174    /// * The listing operation fails
175    async fn list_namespaces(&self, parent: Option<&str>) -> Result<Vec<Namespace>, Error>;
176    /// Checks if a table, view, or materialized view exists in the catalog.
177    ///
178    /// # Arguments
179    /// * `identifier` - The identifier of the tabular object to check
180    ///
181    /// # Returns
182    /// * `Result<bool, Error>` - True if the tabular object exists, false otherwise
183    ///
184    /// # Errors
185    /// Returns an error if:
186    /// * The namespace doesn't exist
187    /// * The catalog cannot be accessed
188    /// * The existence check operation fails
189    async fn tabular_exists(&self, identifier: &Identifier) -> Result<bool, Error>;
190    /// Drops a table from the catalog and deletes all associated data and metadata files.
191    ///
192    /// # Arguments
193    /// * `identifier` - The identifier of the table to drop
194    ///
195    /// # Returns
196    /// * `Result<(), Error>` - Ok if the table was successfully dropped
197    ///
198    /// # Errors
199    /// Returns an error if:
200    /// * The table doesn't exist
201    /// * The table is locked or in use
202    /// * The catalog fails to delete the table metadata
203    /// * The data files cannot be deleted
204    async fn drop_table(&self, identifier: &Identifier) -> Result<(), Error>;
205    /// Drops a view from the catalog and deletes its metadata.
206    ///
207    /// # Arguments
208    /// * `identifier` - The identifier of the view to drop
209    ///
210    /// # Returns
211    /// * `Result<(), Error>` - Ok if the view was successfully dropped
212    ///
213    /// # Errors
214    /// Returns an error if:
215    /// * The view doesn't exist
216    /// * The view is in use
217    /// * The catalog fails to delete the view metadata
218    async fn drop_view(&self, identifier: &Identifier) -> Result<(), Error>;
219    /// Drops a materialized view from the catalog and deletes its metadata and data files.
220    ///
221    /// # Arguments
222    /// * `identifier` - The identifier of the materialized view to drop
223    ///
224    /// # Returns
225    /// * `Result<(), Error>` - Ok if the materialized view was successfully dropped
226    ///
227    /// # Errors
228    /// Returns an error if:
229    /// * The materialized view doesn't exist
230    /// * The materialized view is in use
231    /// * The catalog fails to delete the view metadata
232    /// * The associated data files cannot be deleted
233    async fn drop_materialized_view(&self, identifier: &Identifier) -> Result<(), Error>;
234    /// Loads a table, view, or materialized view from the catalog.
235    ///
236    /// # Arguments
237    /// * `identifier` - The identifier of the tabular object to load
238    ///
239    /// # Returns
240    /// * `Result<Tabular, Error>` - The loaded tabular object wrapped in an enum
241    ///
242    /// # Errors
243    /// Returns an error if:
244    /// * The tabular object doesn't exist
245    /// * The metadata cannot be loaded
246    /// * The metadata is invalid or corrupted
247    /// * The catalog cannot be accessed
248    async fn load_tabular(self: Arc<Self>, identifier: &Identifier) -> Result<Tabular, Error>;
249    /// Creates a new table in the catalog with the specified configuration.
250    ///
251    /// # Arguments
252    /// * `identifier` - The identifier for the new table
253    /// * `create_table` - Configuration for the table creation including schema, partitioning, etc.
254    ///
255    /// # Returns
256    /// * `Result<Table, Error>` - The newly created table object
257    ///
258    /// # Errors
259    /// Returns an error if:
260    /// * The table already exists
261    /// * The namespace doesn't exist
262    /// * The schema is invalid
263    /// * The catalog fails to create the table metadata
264    /// * The table location cannot be initialized
265    async fn create_table(
266        self: Arc<Self>,
267        identifier: Identifier,
268        create_table: CreateTable,
269    ) -> Result<Table, Error>;
270    /// Creates a new view in the catalog with the specified configuration.
271    ///
272    /// # Arguments
273    /// * `identifier` - The identifier for the new view
274    /// * `create_view` - Configuration for the view creation including view definition and properties
275    ///
276    /// # Returns
277    /// * `Result<View, Error>` - The newly created view object
278    ///
279    /// # Errors
280    /// Returns an error if:
281    /// * The view already exists
282    /// * The namespace doesn't exist
283    /// * The view definition is invalid
284    /// * The catalog fails to create the view metadata
285    async fn create_view(
286        self: Arc<Self>,
287        identifier: Identifier,
288        create_view: CreateView<Option<()>>,
289    ) -> Result<View, Error>;
290    /// Creates a new materialized view in the catalog with the specified configuration.
291    ///
292    /// # Arguments
293    /// * `identifier` - The identifier for the new materialized view
294    /// * `create_view` - Configuration for the materialized view creation including view definition,
295    ///                  storage properties, and refresh policies
296    ///
297    /// # Returns
298    /// * `Result<MaterializedView, Error>` - The newly created materialized view object
299    ///
300    /// # Errors
301    /// Returns an error if:
302    /// * The materialized view already exists
303    /// * The namespace doesn't exist
304    /// * The view definition is invalid
305    /// * The catalog fails to create the view metadata
306    /// * The storage location cannot be initialized
307    async fn create_materialized_view(
308        self: Arc<Self>,
309        identifier: Identifier,
310        create_view: CreateMaterializedView,
311    ) -> Result<MaterializedView, Error>;
312    /// Updates a table's metadata by applying the specified commit operation.
313    ///
314    /// # Arguments
315    /// * `commit` - The commit operation containing metadata updates to apply
316    ///
317    /// # Returns
318    /// * `Result<Table, Error>` - The updated table object
319    ///
320    /// # Errors
321    /// Returns an error if:
322    /// * The table doesn't exist
323    /// * The table is locked by another operation
324    /// * The commit operation is invalid
325    /// * The catalog fails to update the metadata
326    /// * Concurrent modifications conflict with this update
327    async fn update_table(self: Arc<Self>, commit: CommitTable) -> Result<Table, Error>;
328    /// Updates a view's metadata by applying the specified commit operation.
329    ///
330    /// # Arguments
331    /// * `commit` - The commit operation containing metadata updates to apply
332    ///
333    /// # Returns
334    /// * `Result<View, Error>` - The updated view object
335    ///
336    /// # Errors
337    /// Returns an error if:
338    /// * The view doesn't exist
339    /// * The view is locked by another operation
340    /// * The commit operation is invalid
341    /// * The catalog fails to update the metadata
342    /// * Concurrent modifications conflict with this update
343    async fn update_view(self: Arc<Self>, commit: CommitView<Option<()>>) -> Result<View, Error>;
344    /// Updates a materialized view's metadata by applying the specified commit operation.
345    ///
346    /// # Arguments
347    /// * `commit` - The commit operation containing metadata updates to apply
348    ///
349    /// # Returns
350    /// * `Result<MaterializedView, Error>` - The updated materialized view object
351    ///
352    /// # Errors
353    /// Returns an error if:
354    /// * The materialized view doesn't exist
355    /// * The materialized view is locked by another operation
356    /// * The commit operation is invalid
357    /// * The catalog fails to update the metadata
358    /// * Concurrent modifications conflict with this update
359    /// * The underlying storage cannot be updated
360    async fn update_materialized_view(
361        self: Arc<Self>,
362        commit: CommitView<FullIdentifier>,
363    ) -> Result<MaterializedView, Error>;
364    /// Registers an existing table in the catalog using its metadata location.
365    ///
366    /// # Arguments
367    /// * `identifier` - The identifier to register the table under
368    /// * `metadata_location` - Location of the table's metadata file
369    ///
370    /// # Returns
371    /// * `Result<Table, Error>` - The registered table object
372    ///
373    /// # Errors
374    /// Returns an error if:
375    /// * A table already exists with the given identifier
376    /// * The metadata location is invalid or inaccessible
377    /// * The metadata file cannot be read or parsed
378    /// * The catalog fails to register the table
379    async fn register_table(
380        self: Arc<Self>,
381        identifier: Identifier,
382        metadata_location: &str,
383    ) -> Result<Table, Error>;
384}
385
386/// A trait representing a collection of Iceberg catalogs that can be accessed by name.
387///
388/// The CatalogList trait provides methods to:
389/// - Look up individual catalogs by name
390/// - List all available catalogs
391/// - Manage multiple catalogs in a unified interface
392///
393/// Implementations must be Send + Sync for concurrent access and Debug for logging/debugging.
394#[async_trait::async_trait]
395pub trait CatalogList: Send + Sync + Debug {
396    /// Get catalog from list by name
397    fn catalog(&self, name: &str) -> Option<Arc<dyn Catalog>>;
398    /// Get the list of available catalogs
399    async fn list_catalogs(&self) -> Vec<String>;
400}
401
402pub mod identifier {
403    //! Catalog identifier
404    pub use iceberg_rust_spec::identifier::Identifier;
405}
406
407pub mod namespace {
408    //! Catalog namespace
409    pub use iceberg_rust_spec::namespace::Namespace;
410}