supertable-core 0.1.0

Core library for SuperTable, a next-generation open table format
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
//! # SuperTable Catalog
//!
//! This module defines the catalog abstraction for SuperTable. The catalog
//! is responsible for:
//!
//! - **Namespace management**: Organizing tables into hierarchical namespaces
//! - **Table metadata tracking**: Storing the location of current metadata files
//! - **Atomic commits**: Ensuring ACID guarantees through compare-and-swap
//!
//! ## Catalog Implementations
//!
//! SuperTable supports multiple catalog backends:
//!
//! - **In-Memory**: For testing and development
//! - **SQLite**: For single-node deployments
//! - **PostgreSQL**: For production multi-node deployments
//! - **REST**: For integration with existing Iceberg REST catalogs
//!
//! ## Concurrency
//!
//! All catalog implementations must support atomic compare-and-swap operations
//! to enable optimistic concurrency control. This is typically achieved through:
//!
//! - Database transactions with row-level locking
//! - Conditional writes (e.g., etags, version numbers)
//! - Distributed consensus (e.g., Raft, Paxos)

use async_trait::async_trait;
use std::collections::HashMap;
use std::sync::Arc;
use thiserror::Error;
use tokio::sync::RwLock;

use crate::metadata::TableMetadata;


/// Errors that can occur during catalog operations.
#[derive(Debug, Error)]
pub enum CatalogError {
    /// The requested table was not found.
    #[error("table not found: {0}")]
    TableNotFound(String),

    /// The requested namespace was not found.
    #[error("namespace not found: {0}")]
    NamespaceNotFound(String),

    /// The table already exists.
    #[error("table already exists: {0}")]
    TableAlreadyExists(String),

    /// The namespace already exists.
    #[error("namespace already exists: {0}")]
    NamespaceAlreadyExists(String),

    /// A conflict occurred during an atomic operation.
    #[error("commit conflict: expected version {expected}, found {actual}")]
    CommitConflict { expected: i64, actual: i64 },

    /// An I/O error occurred.
    #[error("io error: {0}")]
    Io(#[from] std::io::Error),

    /// A serialization error occurred.
    #[error("serialization error: {0}")]
    Serialization(String),
}

/// Result type for catalog operations.
pub type CatalogResult<T> = Result<T, CatalogError>;

/// A table identifier consisting of a namespace and table name.
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct TableIdentifier {
    /// The namespace (can be multi-level, e.g., ["db", "schema"]).
    pub namespace: Vec<String>,

    /// The table name.
    pub name: String,
}

impl TableIdentifier {
    /// Creates a new table identifier.
    pub fn new(
        namespace: impl IntoIterator<Item = impl Into<String>>,
        name: impl Into<String>,
    ) -> Self {
        Self {
            namespace: namespace.into_iter().map(|s| s.into()).collect(),
            name: name.into(),
        }
    }

    /// Creates a table identifier from a single namespace level.
    pub fn of(namespace: impl Into<String>, name: impl Into<String>) -> Self {
        Self {
            namespace: vec![namespace.into()],
            name: name.into(),
        }
    }

    /// Returns the fully qualified name (namespace.table).
    pub fn full_name(&self) -> String {
        if self.namespace.is_empty() {
            self.name.clone()
        } else {
            format!("{}.{}", self.namespace.join("."), self.name)
        }
    }

    /// Parses a fully qualified name into a TableIdentifier.
    pub fn parse(full_name: &str) -> Self {
        let parts: Vec<&str> = full_name.split('.').collect();
        if parts.len() == 1 {
            Self {
                namespace: Vec::new(),
                name: parts[0].to_string(),
            }
        } else {
            let (namespace, name) = parts.split_at(parts.len() - 1);
            Self {
                namespace: namespace.iter().map(|s| s.to_string()).collect(),
                name: name[0].to_string(),
            }
        }
    }
}

impl std::fmt::Display for TableIdentifier {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(f, "{}", self.full_name())
    }
}

/// Properties for a namespace.
pub type NamespaceProperties = HashMap<String, String>;

/// The main catalog trait for managing tables.
#[async_trait]
pub trait Catalog: Send + Sync {
    /// Returns the name of this catalog.
    fn name(&self) -> &str;

    /// Lists all namespaces, optionally under a parent namespace.
    async fn list_namespaces(&self, parent: Option<&[String]>) -> CatalogResult<Vec<Vec<String>>>;

    /// Creates a new namespace.
    async fn create_namespace(
        &self,
        namespace: &[String],
        properties: NamespaceProperties,
    ) -> CatalogResult<()>;

    /// Drops a namespace (must be empty).
    async fn drop_namespace(&self, namespace: &[String]) -> CatalogResult<()>;

    /// Gets namespace properties.
    async fn namespace_properties(
        &self,
        namespace: &[String],
    ) -> CatalogResult<NamespaceProperties>;

    /// Lists all tables in a namespace.
    async fn list_tables(&self, namespace: &[String]) -> CatalogResult<Vec<TableIdentifier>>;

    /// Creates a new table.
    async fn create_table(
        &self,
        identifier: &TableIdentifier,
        metadata: TableMetadata,
    ) -> CatalogResult<TableMetadata>;

    /// Loads a table's metadata.
    async fn load_table(&self, identifier: &TableIdentifier) -> CatalogResult<TableMetadata>;

    /// Drops a table.
    async fn drop_table(&self, identifier: &TableIdentifier, purge: bool) -> CatalogResult<()>;

    /// Renames a table.
    async fn rename_table(&self, from: &TableIdentifier, to: &TableIdentifier)
    -> CatalogResult<()>;

    /// Checks if a table exists.
    async fn table_exists(&self, identifier: &TableIdentifier) -> CatalogResult<bool>;

    /// Atomically updates table metadata using compare-and-swap.
    ///
    /// # Arguments
    ///
    /// * `identifier` - The table to update
    /// * `base_version` - The expected current version (sequence number)
    /// * `metadata` - The new metadata to commit
    ///
    /// # Returns
    ///
    /// The committed metadata on success.
    async fn commit_table(
        &self,
        identifier: &TableIdentifier,
        base_version: i64,
        metadata: TableMetadata,
    ) -> CatalogResult<TableMetadata>;
}

/// An in-memory catalog implementation for testing and development.
///
/// This implementation stores all metadata in memory and is not persistent.
/// It's useful for unit tests and local development.
#[derive(Debug)]
pub struct InMemoryCatalog {
    name: String,
    namespaces: RwLock<HashMap<Vec<String>, NamespaceProperties>>,
    tables: RwLock<HashMap<TableIdentifier, TableMetadata>>,
}

impl InMemoryCatalog {
    /// Creates a new in-memory catalog.
    pub fn new(name: impl Into<String>) -> Self {
        Self {
            name: name.into(),
            namespaces: RwLock::new(HashMap::new()),
            tables: RwLock::new(HashMap::new()),
        }
    }

    /// Creates a new catalog wrapped in an Arc for sharing.
    pub fn shared(name: impl Into<String>) -> Arc<Self> {
        Arc::new(Self::new(name))
    }
}

#[async_trait]
impl Catalog for InMemoryCatalog {
    fn name(&self) -> &str {
        &self.name
    }

    async fn list_namespaces(&self, parent: Option<&[String]>) -> CatalogResult<Vec<Vec<String>>> {
        let namespaces: tokio::sync::RwLockReadGuard<HashMap<Vec<String>, crate::catalog::NamespaceProperties>> = self.namespaces.read().await;
        let result: Vec<Vec<String>> = namespaces
            .keys()
            .filter(|ns| match parent {
                Some(p) => ns.starts_with(p) && ns.len() == p.len() + 1,
                None => ns.len() == 1,
            })
            .cloned()
            .collect();
        Ok(result)
    }

    async fn create_namespace(
        &self,
        namespace: &[String],
        properties: NamespaceProperties,
    ) -> CatalogResult<()> {
        let mut namespaces: tokio::sync::RwLockWriteGuard<HashMap<Vec<String>, crate::catalog::NamespaceProperties>> = self.namespaces.write().await;
        let ns_vec = namespace.to_vec();

        if namespaces.contains_key(&ns_vec) {
            return Err(CatalogError::NamespaceAlreadyExists(namespace.join(".")));
        }

        namespaces.insert(ns_vec, properties);
        Ok(())
    }

    async fn drop_namespace(&self, namespace: &[String]) -> CatalogResult<()> {
        let mut namespaces = self.namespaces.write().await;
        let ns_vec = namespace.to_vec();

        if namespaces.remove(&ns_vec).is_none() {
            return Err(CatalogError::NamespaceNotFound(namespace.join(".")));
        }

        Ok(())
    }

    async fn namespace_properties(
        &self,
        namespace: &[String],
    ) -> CatalogResult<NamespaceProperties> {
        let namespaces = self.namespaces.read().await;
        namespaces
            .get(namespace)
            .cloned()
            .ok_or_else(|| CatalogError::NamespaceNotFound(namespace.join(".")))
    }

    async fn list_tables(&self, namespace: &[String]) -> CatalogResult<Vec<TableIdentifier>> {
        let tables = self.tables.read().await;
        let result: Vec<TableIdentifier> = tables
            .keys()
            .filter(|id| id.namespace == namespace)
            .cloned()
            .collect();
        Ok(result)
    }

    async fn create_table(
        &self,
        identifier: &TableIdentifier,
        metadata: TableMetadata,
    ) -> CatalogResult<TableMetadata> {
        let mut tables = self.tables.write().await;

        if tables.contains_key(identifier) {
            return Err(CatalogError::TableAlreadyExists(identifier.full_name()));
        }

        tables.insert(identifier.clone(), metadata.clone());
        Ok(metadata)
    }

    async fn load_table(&self, identifier: &TableIdentifier) -> CatalogResult<TableMetadata> {
        let tables = self.tables.read().await;
        tables
            .get(identifier)
            .cloned()
            .ok_or_else(|| CatalogError::TableNotFound(identifier.full_name()))
    }

    async fn drop_table(&self, identifier: &TableIdentifier, _purge: bool) -> CatalogResult<()> {
        let mut tables = self.tables.write().await;

        if tables.remove(identifier).is_none() {
            return Err(CatalogError::TableNotFound(identifier.full_name()));
        }

        Ok(())
    }

    async fn rename_table(
        &self,
        from: &TableIdentifier,
        to: &TableIdentifier,
    ) -> CatalogResult<()> {
        let mut tables = self.tables.write().await;

        let metadata = tables
            .remove(from)
            .ok_or_else(|| CatalogError::TableNotFound(from.full_name()))?;

        if tables.contains_key(to) {
            // Restore the original table if target exists
            tables.insert(from.clone(), metadata);
            return Err(CatalogError::TableAlreadyExists(to.full_name()));
        }

        tables.insert(to.clone(), metadata);
        Ok(())
    }

    async fn table_exists(&self, identifier: &TableIdentifier) -> CatalogResult<bool> {
        let tables: tokio::sync::RwLockReadGuard<HashMap<TableIdentifier, TableMetadata>> = self.tables.read().await;
        Ok(tables.contains_key(identifier))
    }

    async fn commit_table(
        &self,
        identifier: &TableIdentifier,
        base_version: i64,
        metadata: TableMetadata,
    ) -> CatalogResult<TableMetadata> {
        let mut tables: tokio::sync::RwLockWriteGuard<HashMap<TableIdentifier, TableMetadata>> = self.tables.write().await;

        let current = tables
            .get(identifier)
            .ok_or_else(|| CatalogError::TableNotFound(identifier.full_name()))?;

        // Check for conflicts using sequence number
        if current.last_sequence_number != base_version {
            return Err(CatalogError::CommitConflict {
                expected: base_version,
                actual: current.last_sequence_number,
            });
        }

        tables.insert(identifier.clone(), metadata.clone());
        Ok(metadata)
    }
}



#[cfg(test)]
mod tests {
    use super::*;
    use crate::schema::{Schema, Type};

    fn sample_metadata(location: &str) -> TableMetadata {
        let schema = Schema::builder(0)
            .with_field(1, "id", Type::Long, true)
            .build();
        TableMetadata::builder(location, schema).build()
    }

    #[tokio::test]
    async fn test_create_namespace() {
        let catalog = InMemoryCatalog::new("test");

        catalog
            .create_namespace(&["db".into()], HashMap::new())
            .await
            .unwrap();

        let namespaces = catalog.list_namespaces(None).await.unwrap();
        assert_eq!(namespaces.len(), 1);
        assert_eq!(namespaces[0], vec!["db".to_string()]);
    }

    #[tokio::test]
    async fn test_create_table() {
        let catalog = InMemoryCatalog::new("test");

        let identifier = TableIdentifier::of("db", "users");
        let metadata = sample_metadata("s3://bucket/users");

        catalog.create_table(&identifier, metadata).await.unwrap();

        let exists = catalog.table_exists(&identifier).await.unwrap();
        assert!(exists);
    }

    #[tokio::test]
    async fn test_commit_conflict() {
        let catalog = InMemoryCatalog::new("test");

        let identifier = TableIdentifier::of("db", "users");
        let metadata = sample_metadata("s3://bucket/users");

        catalog
            .create_table(&identifier, metadata.clone())
            .await
            .unwrap();

        // Try to commit with wrong base version
        let result = catalog
            .commit_table(&identifier, 999, metadata.clone())
            .await;

        assert!(matches!(result, Err(CatalogError::CommitConflict { .. })));
    }

    #[tokio::test]
    async fn test_table_identifier_parsing() {
        let id = TableIdentifier::parse("db.schema.users");
        assert_eq!(id.namespace, vec!["db", "schema"]);
        assert_eq!(id.name, "users");
        assert_eq!(id.full_name(), "db.schema.users");

        let id2 = TableIdentifier::parse("simple_table");
        assert!(id2.namespace.is_empty());
        assert_eq!(id2.name, "simple_table");
    }
}