iceberg_rust/catalog/mod.rs
1//! Catalog module providing interfaces for managing Iceberg tables and metadata.
2//!
3//! The catalog system is a core component of Apache Iceberg that manages:
4//! - Table metadata and schemas
5//! - Namespace organization
6//! - Storage locations and object stores
7//! - Atomic updates and versioning
8//!
9//! # Key Components
10//!
11//! - [`Catalog`]: Core trait for managing tables, views, and namespaces
12//! - [`CatalogList`]: Interface for managing multiple catalogs
13//! - [`namespace`]: Types for organizing tables into hierarchies
14//! - [`identifier`]: Types for uniquely identifying catalog objects
15//!
16//! # Common Operations
17//!
18//! - Creating and managing tables and views
19//! - Organizing tables into namespaces
20//! - Tracking table metadata and history
21//! - Managing storage locations
22//! - Performing atomic updates
23//!
24
25use std::collections::HashMap;
26use std::fmt::Debug;
27use std::sync::Arc;
28
29use iceberg_rust_spec::identifier::FullIdentifier;
30use identifier::Identifier;
31
32use crate::error::Error;
33use crate::materialized_view::MaterializedView;
34use crate::table::Table;
35use crate::view::View;
36
37use self::commit::{CommitTable, CommitView};
38use self::create::{CreateMaterializedView, CreateTable, CreateView};
39use self::namespace::Namespace;
40use self::tabular::Tabular;
41
42pub mod commit;
43pub mod create;
44pub mod tabular;
45
46/// A trait representing an Iceberg catalog that manages tables, views, and namespaces.
47///
48/// The Catalog trait provides methods to:
49/// - Create, update, and delete namespaces
50/// - Create, load, and drop tables and views
51/// - List available tables and namespaces
52/// - Manage table and view metadata
53/// - Access object storage
54///
55/// Implementations must be Send + Sync for concurrent access and Debug for logging/debugging.
56#[async_trait::async_trait]
57pub trait Catalog: Send + Sync + Debug {
58 /// Returns the name of this catalog.
59 ///
60 /// The catalog name is a unique identifier used to:
61 /// - Distinguish between multiple catalogs in a catalog list
62 /// - Reference this catalog in configuration
63 /// - Identify the catalog in logging and error messages
64 fn name(&self) -> &str;
65 /// Creates a new namespace in the catalog with optional properties.
66 ///
67 /// # Arguments
68 /// * `namespace` - The namespace to create
69 /// * `properties` - Optional key-value properties to associate with the namespace
70 ///
71 /// # Returns
72 /// * `Result<HashMap<String, String>, Error>` - The namespace properties after creation
73 ///
74 /// # Errors
75 /// Returns an error if:
76 /// * The namespace already exists
77 /// * The namespace name is invalid
78 /// * The catalog fails to create the namespace
79 /// * Properties cannot be set
80 async fn create_namespace(
81 &self,
82 namespace: &Namespace,
83 properties: Option<HashMap<String, String>>,
84 ) -> Result<HashMap<String, String>, Error>;
85 /// Removes a namespace and all its properties from the catalog.
86 ///
87 /// # Arguments
88 /// * `namespace` - The namespace to remove
89 ///
90 /// # Returns
91 /// * `Result<(), Error>` - Ok if the namespace was successfully removed
92 ///
93 /// # Errors
94 /// Returns an error if:
95 /// * The namespace doesn't exist
96 /// * The namespace contains tables or views
97 /// * The catalog fails to remove the namespace
98 async fn drop_namespace(&self, namespace: &Namespace) -> Result<(), Error>;
99 /// Loads a namespace's properties from the catalog.
100 ///
101 /// # Arguments
102 /// * `namespace` - The namespace to load properties for
103 ///
104 /// # Returns
105 /// * `Result<HashMap<String, String>, Error>` - The namespace properties if found
106 ///
107 /// # Errors
108 /// Returns an error if:
109 /// * The namespace doesn't exist
110 /// * The catalog fails to load the namespace properties
111 /// * The properties cannot be deserialized
112 async fn load_namespace(&self, namespace: &Namespace)
113 -> Result<HashMap<String, String>, Error>;
114 /// Updates a namespace's properties by applying updates and removals.
115 ///
116 /// # Arguments
117 /// * `namespace` - The namespace to update
118 /// * `updates` - Optional map of property key-value pairs to add or update
119 /// * `removals` - Optional list of property keys to remove
120 ///
121 /// # Returns
122 /// * `Result<(), Error>` - Ok if the namespace was successfully updated
123 ///
124 /// # Errors
125 /// Returns an error if:
126 /// * The namespace doesn't exist
127 /// * The properties cannot be updated
128 /// * The catalog fails to persist the changes
129 async fn update_namespace(
130 &self,
131 namespace: &Namespace,
132 updates: Option<HashMap<String, String>>,
133 removals: Option<Vec<String>>,
134 ) -> Result<(), Error>;
135 /// Checks if a namespace exists in the catalog.
136 ///
137 /// # Arguments
138 /// * `namespace` - The namespace to check for existence
139 ///
140 /// # Returns
141 /// * `Result<bool, Error>` - True if the namespace exists, false otherwise
142 ///
143 /// # Errors
144 /// Returns an error if:
145 /// * The catalog cannot be accessed
146 /// * The namespace check operation fails
147 async fn namespace_exists(&self, namespace: &Namespace) -> Result<bool, Error>;
148 /// Lists all tables, views, and materialized views in the given namespace.
149 ///
150 /// # Arguments
151 /// * `namespace` - The namespace to list tabular objects from
152 ///
153 /// # Returns
154 /// * `Result<Vec<Identifier>, Error>` - List of identifiers for all tabular objects
155 ///
156 /// # Errors
157 /// Returns an error if:
158 /// * The namespace doesn't exist
159 /// * The catalog cannot be accessed
160 /// * The listing operation fails
161 async fn list_tabulars(&self, namespace: &Namespace) -> Result<Vec<Identifier>, Error>;
162 /// Lists all namespaces under an optional parent namespace.
163 ///
164 /// # Arguments
165 /// * `parent` - Optional parent namespace to list children under. If None, lists top-level namespaces.
166 ///
167 /// # Returns
168 /// * `Result<Vec<Namespace>, Error>` - List of namespace objects
169 ///
170 /// # Errors
171 /// Returns an error if:
172 /// * The parent namespace doesn't exist (if specified)
173 /// * The catalog cannot be accessed
174 /// * The listing operation fails
175 async fn list_namespaces(&self, parent: Option<&str>) -> Result<Vec<Namespace>, Error>;
176 /// Checks if a table, view, or materialized view exists in the catalog.
177 ///
178 /// # Arguments
179 /// * `identifier` - The identifier of the tabular object to check
180 ///
181 /// # Returns
182 /// * `Result<bool, Error>` - True if the tabular object exists, false otherwise
183 ///
184 /// # Errors
185 /// Returns an error if:
186 /// * The namespace doesn't exist
187 /// * The catalog cannot be accessed
188 /// * The existence check operation fails
189 async fn tabular_exists(&self, identifier: &Identifier) -> Result<bool, Error>;
190 /// Drops a table from the catalog and deletes all associated data and metadata files.
191 ///
192 /// # Arguments
193 /// * `identifier` - The identifier of the table to drop
194 ///
195 /// # Returns
196 /// * `Result<(), Error>` - Ok if the table was successfully dropped
197 ///
198 /// # Errors
199 /// Returns an error if:
200 /// * The table doesn't exist
201 /// * The table is locked or in use
202 /// * The catalog fails to delete the table metadata
203 /// * The data files cannot be deleted
204 async fn drop_table(&self, identifier: &Identifier) -> Result<(), Error>;
205 /// Drops a view from the catalog and deletes its metadata.
206 ///
207 /// # Arguments
208 /// * `identifier` - The identifier of the view to drop
209 ///
210 /// # Returns
211 /// * `Result<(), Error>` - Ok if the view was successfully dropped
212 ///
213 /// # Errors
214 /// Returns an error if:
215 /// * The view doesn't exist
216 /// * The view is in use
217 /// * The catalog fails to delete the view metadata
218 async fn drop_view(&self, identifier: &Identifier) -> Result<(), Error>;
219 /// Drops a materialized view from the catalog and deletes its metadata and data files.
220 ///
221 /// # Arguments
222 /// * `identifier` - The identifier of the materialized view to drop
223 ///
224 /// # Returns
225 /// * `Result<(), Error>` - Ok if the materialized view was successfully dropped
226 ///
227 /// # Errors
228 /// Returns an error if:
229 /// * The materialized view doesn't exist
230 /// * The materialized view is in use
231 /// * The catalog fails to delete the view metadata
232 /// * The associated data files cannot be deleted
233 async fn drop_materialized_view(&self, identifier: &Identifier) -> Result<(), Error>;
234 /// Loads a table, view, or materialized view from the catalog.
235 ///
236 /// # Arguments
237 /// * `identifier` - The identifier of the tabular object to load
238 ///
239 /// # Returns
240 /// * `Result<Tabular, Error>` - The loaded tabular object wrapped in an enum
241 ///
242 /// # Errors
243 /// Returns an error if:
244 /// * The tabular object doesn't exist
245 /// * The metadata cannot be loaded
246 /// * The metadata is invalid or corrupted
247 /// * The catalog cannot be accessed
248 async fn load_tabular(self: Arc<Self>, identifier: &Identifier) -> Result<Tabular, Error>;
249 /// Creates a new table in the catalog with the specified configuration.
250 ///
251 /// # Arguments
252 /// * `identifier` - The identifier for the new table
253 /// * `create_table` - Configuration for the table creation including schema, partitioning, etc.
254 ///
255 /// # Returns
256 /// * `Result<Table, Error>` - The newly created table object
257 ///
258 /// # Errors
259 /// Returns an error if:
260 /// * The table already exists
261 /// * The namespace doesn't exist
262 /// * The schema is invalid
263 /// * The catalog fails to create the table metadata
264 /// * The table location cannot be initialized
265 async fn create_table(
266 self: Arc<Self>,
267 identifier: Identifier,
268 create_table: CreateTable,
269 ) -> Result<Table, Error>;
270 /// Creates a new view in the catalog with the specified configuration.
271 ///
272 /// # Arguments
273 /// * `identifier` - The identifier for the new view
274 /// * `create_view` - Configuration for the view creation including view definition and properties
275 ///
276 /// # Returns
277 /// * `Result<View, Error>` - The newly created view object
278 ///
279 /// # Errors
280 /// Returns an error if:
281 /// * The view already exists
282 /// * The namespace doesn't exist
283 /// * The view definition is invalid
284 /// * The catalog fails to create the view metadata
285 async fn create_view(
286 self: Arc<Self>,
287 identifier: Identifier,
288 create_view: CreateView<Option<()>>,
289 ) -> Result<View, Error>;
290 /// Creates a new materialized view in the catalog with the specified configuration.
291 ///
292 /// # Arguments
293 /// * `identifier` - The identifier for the new materialized view
294 /// * `create_view` - Configuration for the materialized view creation including view definition,
295 /// storage properties, and refresh policies
296 ///
297 /// # Returns
298 /// * `Result<MaterializedView, Error>` - The newly created materialized view object
299 ///
300 /// # Errors
301 /// Returns an error if:
302 /// * The materialized view already exists
303 /// * The namespace doesn't exist
304 /// * The view definition is invalid
305 /// * The catalog fails to create the view metadata
306 /// * The storage location cannot be initialized
307 async fn create_materialized_view(
308 self: Arc<Self>,
309 identifier: Identifier,
310 create_view: CreateMaterializedView,
311 ) -> Result<MaterializedView, Error>;
312 /// Updates a table's metadata by applying the specified commit operation.
313 ///
314 /// # Arguments
315 /// * `commit` - The commit operation containing metadata updates to apply
316 ///
317 /// # Returns
318 /// * `Result<Table, Error>` - The updated table object
319 ///
320 /// # Errors
321 /// Returns an error if:
322 /// * The table doesn't exist
323 /// * The table is locked by another operation
324 /// * The commit operation is invalid
325 /// * The catalog fails to update the metadata
326 /// * Concurrent modifications conflict with this update
327 async fn update_table(self: Arc<Self>, commit: CommitTable) -> Result<Table, Error>;
328 /// Updates a view's metadata by applying the specified commit operation.
329 ///
330 /// # Arguments
331 /// * `commit` - The commit operation containing metadata updates to apply
332 ///
333 /// # Returns
334 /// * `Result<View, Error>` - The updated view object
335 ///
336 /// # Errors
337 /// Returns an error if:
338 /// * The view doesn't exist
339 /// * The view is locked by another operation
340 /// * The commit operation is invalid
341 /// * The catalog fails to update the metadata
342 /// * Concurrent modifications conflict with this update
343 async fn update_view(self: Arc<Self>, commit: CommitView<Option<()>>) -> Result<View, Error>;
344 /// Updates a materialized view's metadata by applying the specified commit operation.
345 ///
346 /// # Arguments
347 /// * `commit` - The commit operation containing metadata updates to apply
348 ///
349 /// # Returns
350 /// * `Result<MaterializedView, Error>` - The updated materialized view object
351 ///
352 /// # Errors
353 /// Returns an error if:
354 /// * The materialized view doesn't exist
355 /// * The materialized view is locked by another operation
356 /// * The commit operation is invalid
357 /// * The catalog fails to update the metadata
358 /// * Concurrent modifications conflict with this update
359 /// * The underlying storage cannot be updated
360 async fn update_materialized_view(
361 self: Arc<Self>,
362 commit: CommitView<FullIdentifier>,
363 ) -> Result<MaterializedView, Error>;
364 /// Registers an existing table in the catalog using its metadata location.
365 ///
366 /// # Arguments
367 /// * `identifier` - The identifier to register the table under
368 /// * `metadata_location` - Location of the table's metadata file
369 ///
370 /// # Returns
371 /// * `Result<Table, Error>` - The registered table object
372 ///
373 /// # Errors
374 /// Returns an error if:
375 /// * A table already exists with the given identifier
376 /// * The metadata location is invalid or inaccessible
377 /// * The metadata file cannot be read or parsed
378 /// * The catalog fails to register the table
379 async fn register_table(
380 self: Arc<Self>,
381 identifier: Identifier,
382 metadata_location: &str,
383 ) -> Result<Table, Error>;
384}
385
386/// A trait representing a collection of Iceberg catalogs that can be accessed by name.
387///
388/// The CatalogList trait provides methods to:
389/// - Look up individual catalogs by name
390/// - List all available catalogs
391/// - Manage multiple catalogs in a unified interface
392///
393/// Implementations must be Send + Sync for concurrent access and Debug for logging/debugging.
394#[async_trait::async_trait]
395pub trait CatalogList: Send + Sync + Debug {
396 /// Get catalog from list by name
397 fn catalog(&self, name: &str) -> Option<Arc<dyn Catalog>>;
398 /// Get the list of available catalogs
399 async fn list_catalogs(&self) -> Vec<String>;
400}
401
402pub mod identifier {
403 //! Catalog identifier
404 pub use iceberg_rust_spec::identifier::Identifier;
405}
406
407pub mod namespace {
408 //! Catalog namespace
409 pub use iceberg_rust_spec::namespace::Namespace;
410}